001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017package org.apache.bcel.verifier.structurals;
018
019import java.util.ArrayList;
020import java.util.HashMap;
021import java.util.HashSet;
022import java.util.List;
023import java.util.Map;
024import java.util.Set;
025
026import org.apache.bcel.generic.ASTORE;
027import org.apache.bcel.generic.ATHROW;
028import org.apache.bcel.generic.BranchInstruction;
029import org.apache.bcel.generic.CodeExceptionGen;
030import org.apache.bcel.generic.GotoInstruction;
031import org.apache.bcel.generic.IndexedInstruction;
032import org.apache.bcel.generic.Instruction;
033import org.apache.bcel.generic.InstructionHandle;
034import org.apache.bcel.generic.JsrInstruction;
035import org.apache.bcel.generic.LocalVariableInstruction;
036import org.apache.bcel.generic.MethodGen;
037import org.apache.bcel.generic.RET;
038import org.apache.bcel.generic.ReturnInstruction;
039import org.apache.bcel.generic.Select;
040import org.apache.bcel.verifier.exc.AssertionViolatedException;
041import org.apache.bcel.verifier.exc.StructuralCodeConstraintException;
042
043/**
044 * Instances of this class contain information about the subroutines found in a code array of a method. This
045 * implementation considers the top-level (the instructions reachable without a JSR or JSR_W starting off from the first
046 * instruction in a code array of a method) being a special subroutine; see getTopLevel() for that. Please note that the
047 * definition of subroutines in the Java Virtual Machine Specification, Second Edition is somewhat incomplete.
048 * Therefore, JustIce uses an own, more rigid notion. Basically, a subroutine is a piece of code that starts at the
049 * target of a JSR of JSR_W instruction and ends at a corresponding RET instruction. Note also that the control flow of
050 * a subroutine may be complex and non-linear; and that subroutines may be nested. JustIce also mandates subroutines not
051 * to be protected by exception handling code (for the sake of control flow predictability). To understand JustIce's
052 * notion of subroutines, please read
053 *
054 * TODO: refer to the paper.
055 *
056 * @see #getTopLevel()
057 */
058public class Subroutines {
059    // Node coloring constants
060    private enum ColourConstants {
061        WHITE, GRAY, BLACK
062    }
063
064    /**
065     * This inner class implements the Subroutine interface.
066     */
067    private final class SubroutineImpl implements Subroutine {
068        /**
069         * UNSET, a symbol for an uninitialized localVariable field. This is used for the "top-level" Subroutine; i.e. no
070         * subroutine.
071         */
072        private static final int UNSET = -1;
073
074        private final SubroutineImpl[] EMPTY_ARRAY = {};
075
076        /**
077         * The Local Variable slot where the first instruction of this subroutine (an ASTORE) stores the JsrInstruction's
078         * ReturnAddress in and the RET of this subroutine operates on.
079         */
080        private int localVariable = UNSET;
081
082        /** The instructions that belong to this subroutine. */
083        private final Set<InstructionHandle> instructions = new HashSet<>(); // Elements: InstructionHandle
084
085        /**
086         * The JSR or JSR_W instructions that define this subroutine by targeting it.
087         */
088        private final Set<InstructionHandle> theJSRs = new HashSet<>();
089
090        /**
091         * The RET instruction that leaves this subroutine.
092         */
093        private InstructionHandle theRET;
094
095        /**
096         * The default constructor.
097         */
098        public SubroutineImpl() {
099        }
100
101        /**
102         * Adds a new JSR or JSR_W that has this subroutine as its target.
103         */
104        public void addEnteringJsrInstruction(final InstructionHandle jsrInst) {
105            if (jsrInst == null || !(jsrInst.getInstruction() instanceof JsrInstruction)) {
106                throw new AssertionViolatedException("Expecting JsrInstruction InstructionHandle.");
107            }
108            if (localVariable == UNSET) {
109                throw new AssertionViolatedException("Set the localVariable first!");
110            }
111            // Something is wrong when an ASTORE is targeted that does not operate on the same local variable than the rest of the
112            // JsrInstruction-targets and the RET.
113            // (We don't know out leader here so we cannot check if we're really targeted!)
114            if (localVariable != ((ASTORE) ((JsrInstruction) jsrInst.getInstruction()).getTarget().getInstruction()).getIndex()) {
115                throw new AssertionViolatedException("Setting a wrong JsrInstruction.");
116            }
117            theJSRs.add(jsrInst);
118        }
119
120        /*
121         * Adds an instruction to this subroutine. All instructions must have been added before invoking setLeavingRET().
122         *
123         * @see #setLeavingRET
124         */
125        void addInstruction(final InstructionHandle ih) {
126            if (theRET != null) {
127                throw new AssertionViolatedException("All instructions must have been added before invoking setLeavingRET().");
128            }
129            instructions.add(ih);
130        }
131
132        /*
133         * Refer to the Subroutine interface for documentation.
134         */
135        @Override
136        public boolean contains(final InstructionHandle inst) {
137            return instructions.contains(inst);
138        }
139
140        /*
141         * Satisfies Subroutine.getAccessedLocalIndices().
142         */
143        @Override
144        public int[] getAccessedLocalsIndices() {
145            // TODO: Implement caching.
146            final Set<Integer> acc = new HashSet<>();
147            if (theRET == null && this != getTopLevel()) {
148                throw new AssertionViolatedException("This subroutine object must be built up completely before calculating accessed locals.");
149            }
150            {
151                for (final InstructionHandle ih : instructions) {
152                    // RET is not a LocalVariableInstruction in the current version of BCEL.
153                    if (ih.getInstruction() instanceof LocalVariableInstruction || ih.getInstruction() instanceof RET) {
154                        final int idx = ((IndexedInstruction) ih.getInstruction()).getIndex();
155                        acc.add(Integer.valueOf(idx));
156                        // LONG? DOUBLE?.
157                        try {
158                            // LocalVariableInstruction instances are typed without the need to look into
159                            // the constant pool.
160                            if (ih.getInstruction() instanceof LocalVariableInstruction) {
161                                final int s = ((LocalVariableInstruction) ih.getInstruction()).getType(null).getSize();
162                                if (s == 2) {
163                                    acc.add(Integer.valueOf(idx + 1));
164                                }
165                            }
166                        } catch (final RuntimeException re) {
167                            throw new AssertionViolatedException("BCEL did not like NULL as a ConstantPoolGen object.", re);
168                        }
169                    }
170                }
171            }
172
173            {
174                final int[] ret = new int[acc.size()];
175                int j = -1;
176                for (final Integer accessedLocal : acc) {
177                    j++;
178                    ret[j] = accessedLocal.intValue();
179                }
180                return ret;
181            }
182        }
183
184        /*
185         * Refer to the Subroutine interface for documentation.
186         */
187        @Override
188        public InstructionHandle[] getEnteringJsrInstructions() {
189            if (this == getTopLevel()) {
190                throw new AssertionViolatedException("getLeavingRET() called on top level pseudo-subroutine.");
191            }
192            return theJSRs.toArray(InstructionHandle.EMPTY_ARRAY);
193        }
194
195        /*
196         * Refer to the Subroutine interface for documentation.
197         */
198        @Override
199        public InstructionHandle[] getInstructions() {
200            return instructions.toArray(InstructionHandle.EMPTY_ARRAY);
201        }
202
203        /*
204         * Refer to the Subroutine interface for documentation.
205         */
206        @Override
207        public InstructionHandle getLeavingRET() {
208            if (this == getTopLevel()) {
209                throw new AssertionViolatedException("getLeavingRET() called on top level pseudo-subroutine.");
210            }
211            return theRET;
212        }
213
214        /* Satisfies Subroutine.getRecursivelyAccessedLocalsIndices(). */
215        @Override
216        public int[] getRecursivelyAccessedLocalsIndices() {
217            final Set<Integer> s = new HashSet<>();
218            final int[] lvs = getAccessedLocalsIndices();
219            for (final int lv : lvs) {
220                s.add(Integer.valueOf(lv));
221            }
222            getRecursivelyAccessedLocalsIndicesHelper(s, subSubs());
223            final int[] ret = new int[s.size()];
224            int j = -1;
225            for (final Integer index : s) {
226                j++;
227                ret[j] = index.intValue();
228            }
229            return ret;
230        }
231
232        /**
233         * A recursive helper method for getRecursivelyAccessedLocalsIndices().
234         *
235         * @see #getRecursivelyAccessedLocalsIndices()
236         */
237        private void getRecursivelyAccessedLocalsIndicesHelper(final Set<Integer> set, final Subroutine[] subs) {
238            for (final Subroutine sub : subs) {
239                final int[] lvs = sub.getAccessedLocalsIndices();
240                for (final int lv : lvs) {
241                    set.add(Integer.valueOf(lv));
242                }
243                if (sub.subSubs().length != 0) {
244                    getRecursivelyAccessedLocalsIndicesHelper(set, sub.subSubs());
245                }
246            }
247        }
248
249        /**
250         * Sets the leaving RET instruction. Must be invoked after all instructions are added. Must not be invoked for top-level
251         * 'subroutine'.
252         */
253        void setLeavingRET() {
254            if (localVariable == UNSET) {
255                throw new AssertionViolatedException("setLeavingRET() called for top-level 'subroutine' or forgot to set local variable first.");
256            }
257            InstructionHandle ret = null;
258            for (final InstructionHandle actual : instructions) {
259                if (actual.getInstruction() instanceof RET) {
260                    if (ret != null) {
261                        throw new StructuralCodeConstraintException("Subroutine with more then one RET detected: '" + ret + "' and '" + actual + "'.");
262                    }
263                    ret = actual;
264                }
265            }
266            if (ret == null) {
267                throw new StructuralCodeConstraintException("Subroutine without a RET detected.");
268            }
269            if (((RET) ret.getInstruction()).getIndex() != localVariable) {
270                throw new StructuralCodeConstraintException(
271                    "Subroutine uses '" + ret + "' which does not match the correct local variable '" + localVariable + "'.");
272            }
273            theRET = ret;
274        }
275
276        /*
277         * Sets the local variable slot the ASTORE that is targeted by the JsrInstructions of this subroutine operates on. This
278         * subroutine's RET operates on that same local variable slot, of course.
279         */
280        void setLocalVariable(final int i) {
281            if (localVariable != UNSET) {
282                throw new AssertionViolatedException("localVariable set twice.");
283            }
284            localVariable = i;
285        }
286
287        /*
288         * Satisfies Subroutine.subSubs().
289         */
290        @Override
291        public Subroutine[] subSubs() {
292            final Set<Subroutine> h = new HashSet<>();
293
294            for (final InstructionHandle ih : instructions) {
295                final Instruction inst = ih.getInstruction();
296                if (inst instanceof JsrInstruction) {
297                    final InstructionHandle targ = ((JsrInstruction) inst).getTarget();
298                    h.add(getSubroutine(targ));
299                }
300            }
301            return h.toArray(EMPTY_ARRAY);
302        }
303
304        /**
305         * Returns a String representation of this object, merely for debugging purposes. (Internal) Warning: Verbosity on a
306         * problematic subroutine may cause stack overflow errors due to recursive subSubs() calls. Don't use this, then.
307         */
308        @Override
309        public String toString() {
310            final StringBuilder ret = new StringBuilder();
311            ret.append("Subroutine: Local variable is '").append(localVariable);
312            ret.append("', JSRs are '").append(theJSRs);
313            ret.append("', RET is '").append(theRET);
314            ret.append("', Instructions: '").append(instructions).append("'.");
315
316            ret.append(" Accessed local variable slots: '");
317            int[] alv = getAccessedLocalsIndices();
318            for (final int element : alv) {
319                ret.append(element);
320                ret.append(" ");
321            }
322            ret.append("'.");
323
324            ret.append(" Recursively (via subsub...routines) accessed local variable slots: '");
325            alv = getRecursivelyAccessedLocalsIndices();
326            for (final int element : alv) {
327                ret.append(element);
328                ret.append(" ");
329            }
330            ret.append("'.");
331
332            return ret.toString();
333        }
334
335    } // end Inner Class SubrouteImpl
336
337    /**
338     * A utility method that calculates the successors of a given InstructionHandle <B>in the same subroutine</B>. That
339     * means, a RET does not have any successors as defined here. A JsrInstruction has its physical successor as its
340     * successor (opposed to its target) as defined here.
341     */
342    private static InstructionHandle[] getSuccessors(final InstructionHandle instruction) {
343        final InstructionHandle[] single = new InstructionHandle[1];
344
345        final Instruction inst = instruction.getInstruction();
346
347        // Terminates method normally.
348        // Terminates method abnormally, because JustIce mandates
349        // subroutines not to be protected by exception handlers.
350        if (inst instanceof RET || inst instanceof ReturnInstruction || inst instanceof ATHROW) {
351            return InstructionHandle.EMPTY_ARRAY;
352        }
353
354        // See method comment.
355        if (inst instanceof JsrInstruction) {
356            single[0] = instruction.getNext();
357            return single;
358        }
359
360        if (inst instanceof GotoInstruction) {
361            single[0] = ((GotoInstruction) inst).getTarget();
362            return single;
363        }
364
365        if (inst instanceof BranchInstruction) {
366            if (inst instanceof Select) {
367                // BCEL's getTargets() returns only the non-default targets,
368                // thanks to Eli Tilevich for reporting.
369                final InstructionHandle[] matchTargets = ((Select) inst).getTargets();
370                final InstructionHandle[] ret = new InstructionHandle[matchTargets.length + 1];
371                ret[0] = ((Select) inst).getTarget();
372                System.arraycopy(matchTargets, 0, ret, 1, matchTargets.length);
373                return ret;
374            }
375            final InstructionHandle[] pair = new InstructionHandle[2];
376            pair[0] = instruction.getNext();
377            pair[1] = ((BranchInstruction) inst).getTarget();
378            return pair;
379        }
380
381        // default case: Fall through.
382        single[0] = instruction.getNext();
383        return single;
384    }
385
386    /**
387     * The map containing the subroutines found. Key: InstructionHandle of the leader of the subroutine. Elements:
388     * SubroutineImpl objects.
389     */
390    private final Map<InstructionHandle, Subroutine> subroutines = new HashMap<>();
391
392    /**
393     * This is referring to a special subroutine, namely the top level. This is not really a subroutine but we use it to
394     * distinguish between top level instructions and unreachable instructions.
395     */
396    // CHECKSTYLE:OFF
397    public final Subroutine TOPLEVEL; // TODO can this be made private?
398    // CHECKSTYLE:ON
399
400    /**
401     * Constructs a new instance.
402     *
403     * @param mg A MethodGen object representing method to create the Subroutine objects of. Assumes that JustIce strict
404     *        checks are needed.
405     */
406    public Subroutines(final MethodGen mg) {
407        this(mg, true);
408    }
409
410    /**
411     * Constructs a new instance.
412     *
413     * @param mg A MethodGen object representing method to create the Subroutine objects of.
414     * @param enableJustIceCheck whether to enable additional JustIce checks
415     * @since 6.0
416     */
417    public Subroutines(final MethodGen mg, final boolean enableJustIceCheck) {
418        final InstructionHandle[] all = mg.getInstructionList().getInstructionHandles();
419        final CodeExceptionGen[] handlers = mg.getExceptionHandlers();
420
421        // Define our "Toplevel" fake subroutine.
422        TOPLEVEL = new SubroutineImpl();
423
424        // Calculate "real" subroutines.
425        final Set<InstructionHandle> subLeaders = new HashSet<>(); // Elements: InstructionHandle
426        for (final InstructionHandle element : all) {
427            final Instruction inst = element.getInstruction();
428            if (inst instanceof JsrInstruction) {
429                subLeaders.add(((JsrInstruction) inst).getTarget());
430            }
431        }
432
433        // Build up the database.
434        for (final InstructionHandle astore : subLeaders) {
435            final SubroutineImpl sr = new SubroutineImpl();
436            sr.setLocalVariable(((ASTORE) astore.getInstruction()).getIndex());
437            subroutines.put(astore, sr);
438        }
439
440        // Fake it a bit. We want a virtual "TopLevel" subroutine.
441        subroutines.put(all[0], TOPLEVEL);
442        subLeaders.add(all[0]);
443
444        // Tell the subroutines about their JsrInstructions.
445        // Note that there cannot be a JSR targeting the top-level
446        // since "Jsr 0" is disallowed in Pass 3a.
447        // Instructions shared by a subroutine and the toplevel are
448        // disallowed and checked below, after the BFS.
449        for (final InstructionHandle element : all) {
450            final Instruction inst = element.getInstruction();
451            if (inst instanceof JsrInstruction) {
452                final InstructionHandle leader = ((JsrInstruction) inst).getTarget();
453                ((SubroutineImpl) getSubroutine(leader)).addEnteringJsrInstruction(element);
454            }
455        }
456
457        // Now do a BFS from every subroutine leader to find all the
458        // instructions that belong to a subroutine.
459        // we don't want to assign an instruction to two or more Subroutine objects.
460        final Set<InstructionHandle> instructionsAssigned = new HashSet<>();
461
462        // Graph coloring. Key: InstructionHandle, Value: ColourConstants enum .
463        final Map<InstructionHandle, ColourConstants> colors = new HashMap<>();
464
465        final List<InstructionHandle> qList = new ArrayList<>();
466        for (final InstructionHandle actual : subLeaders) {
467            // Do some BFS with "actual" as the root of the graph.
468            // Init colors
469            for (final InstructionHandle element : all) {
470                colors.put(element, ColourConstants.WHITE);
471            }
472            colors.put(actual, ColourConstants.GRAY);
473            // Init Queue
474
475            qList.clear();
476            qList.add(actual); // add(Obj) adds to the end, remove(0) removes from the start.
477
478            /*
479             * BFS ALGORITHM MODIFICATION: Start out with multiple "root" nodes, as exception handlers are starting points of
480             * top-level code, too. [why top-level? TODO: Refer to the special JustIce notion of subroutines.]
481             */
482            if (actual == all[0]) {
483                for (final CodeExceptionGen handler : handlers) {
484                    colors.put(handler.getHandlerPC(), ColourConstants.GRAY);
485                    qList.add(handler.getHandlerPC());
486                }
487            }
488            /* CONTINUE NORMAL BFS ALGORITHM */
489
490            // Loop until Queue is empty
491            while (!qList.isEmpty()) {
492                final InstructionHandle u = qList.remove(0);
493                final InstructionHandle[] successors = getSuccessors(u);
494                for (final InstructionHandle successor : successors) {
495                    if (colors.get(successor) == ColourConstants.WHITE) {
496                        colors.put(successor, ColourConstants.GRAY);
497                        qList.add(successor);
498                    }
499                }
500                colors.put(u, ColourConstants.BLACK);
501            }
502            // BFS ended above.
503            for (final InstructionHandle element : all) {
504                if (colors.get(element) == ColourConstants.BLACK) {
505                    ((SubroutineImpl) (actual == all[0] ? getTopLevel() : getSubroutine(actual))).addInstruction(element);
506                    if (instructionsAssigned.contains(element)) {
507                        throw new StructuralCodeConstraintException(
508                            "Instruction '" + element + "' is part of more than one subroutine (or of the top level and a subroutine).");
509                    }
510                    instructionsAssigned.add(element);
511                }
512            }
513            if (actual != all[0]) { // If we don't deal with the top-level 'subroutine'
514                ((SubroutineImpl) getSubroutine(actual)).setLeavingRET();
515            }
516        }
517
518        if (enableJustIceCheck) {
519            // Now make sure no instruction of a Subroutine is protected by exception handling code
520            // as is mandated by JustIces notion of subroutines.
521            for (final CodeExceptionGen handler : handlers) {
522                InstructionHandle protectedIh = handler.getStartPC();
523                while (protectedIh != handler.getEndPC().getNext()) {
524                    // Note the inclusive/inclusive notation of "generic API" exception handlers!
525                    for (final Subroutine sub : subroutines.values()) {
526                        if (sub != subroutines.get(all[0]) && sub.contains(protectedIh)) {
527                            throw new StructuralCodeConstraintException("Subroutine instruction '" + protectedIh + "' is protected by an exception handler, '"
528                                + handler + "'. This is forbidden by the JustIce verifier due to its clear definition of subroutines.");
529                        }
530                    }
531                    protectedIh = protectedIh.getNext();
532                }
533            }
534        }
535
536        // Now make sure no subroutine is calling a subroutine
537        // that uses the same local variable for the RET as themselves
538        // (recursively).
539        // This includes that subroutines may not call themselves
540        // recursively, even not through intermediate calls to other
541        // subroutines.
542        noRecursiveCalls(getTopLevel(), new HashSet<>());
543
544    }
545
546    /**
547     * Returns the Subroutine object associated with the given leader (that is, the first instruction of the subroutine).
548     * You must not use this to get the top-level instructions modeled as a Subroutine object.
549     *
550     * @see #getTopLevel()
551     */
552    public Subroutine getSubroutine(final InstructionHandle leader) {
553        final Subroutine ret = subroutines.get(leader);
554
555        if (ret == null) {
556            throw new AssertionViolatedException("Subroutine requested for an InstructionHandle that is not a leader of a subroutine.");
557        }
558
559        if (ret == TOPLEVEL) {
560            throw new AssertionViolatedException("TOPLEVEL special subroutine requested; use getTopLevel().");
561        }
562
563        return ret;
564    }
565
566    /**
567     * For easy handling, the piece of code that is <B>not</B> a subroutine, the top-level, is also modeled as a Subroutine
568     * object. It is a special Subroutine object where <B>you must not invoke getEnteringJsrInstructions() or
569     * getLeavingRET()</B>.
570     *
571     * @see Subroutine#getEnteringJsrInstructions()
572     * @see Subroutine#getLeavingRET()
573     */
574    public Subroutine getTopLevel() {
575        return TOPLEVEL;
576    }
577
578    /**
579     * This (recursive) utility method makes sure that no subroutine is calling a subroutine that uses the same local
580     * variable for the RET as themselves (recursively). This includes that subroutines may not call themselves recursively,
581     * even not through intermediate calls to other subroutines.
582     *
583     * @throws StructuralCodeConstraintException if the above constraint is not satisfied.
584     */
585    private void noRecursiveCalls(final Subroutine sub, final Set<Integer> set) {
586        final Subroutine[] subs = sub.subSubs();
587
588        for (final Subroutine sub2 : subs) {
589            final int index = ((RET) sub2.getLeavingRET().getInstruction()).getIndex();
590
591            if (!set.add(Integer.valueOf(index))) {
592                // Don't use toString() here because of possibly infinite recursive subSubs() calls then.
593                final SubroutineImpl si = (SubroutineImpl) sub2;
594                throw new StructuralCodeConstraintException("Subroutine with local variable '" + si.localVariable + "', JSRs '" + si.theJSRs + "', RET '"
595                    + si.theRET + "' is called by a subroutine which uses the same local variable index as itself; maybe even a recursive call?"
596                    + " JustIce's clean definition of a subroutine forbids both.");
597            }
598
599            noRecursiveCalls(sub2, set);
600
601            set.remove(Integer.valueOf(index));
602        }
603    }
604
605    /**
606     * Returns the subroutine object associated with the given instruction. This is a costly operation, you should consider
607     * using getSubroutine(InstructionHandle). Returns 'null' if the given InstructionHandle lies in so-called 'dead code',
608     * i.e. code that can never be executed.
609     *
610     * @see #getSubroutine(InstructionHandle)
611     * @see #getTopLevel()
612     */
613    public Subroutine subroutineOf(final InstructionHandle any) {
614        for (final Subroutine s : subroutines.values()) {
615            if (s.contains(any)) {
616                return s;
617            }
618        }
619        System.err.println("DEBUG: Please verify '" + any.toString(true) + "' lies in dead code.");
620        return null;
621        // throw new AssertionViolatedException("No subroutine for InstructionHandle found (DEAD CODE?).");
622    }
623
624    /**
625     * Returns a String representation of this object; merely for debugging puposes.
626     */
627    @Override
628    public String toString() {
629        return "---\n" + subroutines + "\n---\n";
630    }
631}