/*
 * [JavaState.java]
 *
 * Summary: State machine for JavaTokenizer.
 *
 * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  3.2 2009-05-04 now treat Javadoc tags specially.
 */
package com.mindprod.jprep;

import com.mindprod.common18.ST;
import com.mindprod.jtokens.CharLiteral;
import com.mindprod.jtokens.Gibberish;
import com.mindprod.jtokens.ImportantKeyword;
import com.mindprod.jtokens.Keyword;
import com.mindprod.jtokens.Label;
import com.mindprod.jtokens.NL;
import com.mindprod.jtokens.Noise;
import com.mindprod.jtokens.Operator;
import com.mindprod.jtokens.Semicolon;
import com.mindprod.jtokens.Space;
import com.mindprod.jtokens.Start;
import com.mindprod.jtokens.Stop;
import com.mindprod.jtokens.StringLiteral;
import com.mindprod.jtokens.Token;
import com.mindprod.jtokens.WhiteSpace;
import com.mindprod.jtokens.java.Annotation;
import com.mindprod.jtokens.java.CommentJavadoc;
import com.mindprod.jtokens.java.CommentJavadocTag;
import com.mindprod.jtokens.java.CommentSlashSlash;
import com.mindprod.jtokens.java.CommentSlashStar;
import com.mindprod.jtokens.java.Definable;
import com.mindprod.jtokens.java.Fence;
import com.mindprod.jtokens.java.InterfaceName;
import com.mindprod.jtokens.java.JavaClassName;
import com.mindprod.jtokens.java.JavaConstant;
import com.mindprod.jtokens.java.Method;
import com.mindprod.jtokens.java.NumericIndicator;
import com.mindprod.jtokens.java.NumericLiteralHigh;
import com.mindprod.jtokens.java.NumericLiteralLow;
import com.mindprod.jtokens.java.PackageName;
import com.mindprod.jtokens.java.Var;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static java.lang.System.*;

/**
 * State machine for JavaTokenizer.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 3.2 2009-05-04 now treat javadoc tags specially.
 * @since 2004-05-15
 */
// TODO: group digits to right of decimal in threes.
// TODO: figure out why making next private stops it from working.
@SuppressWarnings( {
        "NestedAssignment", "ValueOfIncrementOrDecrementUsed", "UnnecessaryContinue",
        "EnumeratedConstantNamingConvention" } )
public enum JavaState
    {
        /**
         * Someone has forwarded us an EOL. We deal with it and any subsequent EOLs. When we hit something
         * interesting we
         * let the default next deal with it.
         */
        @SuppressWarnings( { "WeakerAccess" } ) AT_END_OF_LINE
                    {
                    @SuppressWarnings( { "UnusedParameters" } )
                    void consume( char c )
                        {
                        nlCount++;
                        // end consume AT_END_OF_LINE
                        }

                    void leaving()
                        {
                        // no more than 3 NLs in a row.
                        if ( nlCount > 3 )
                            {
                            nlCount = 3;
                            }
                        // collapse multiple NLs into a single token.
                        if ( nlCount > 0 )
                            {
                            // ignore trailing spaces
                            spaceCount = 0;
                            addToken( new NL( nlCount ) );
                            nlCount = 0;
                            }
                        // end leaving AT_END_OF_LINE
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        //  AT_END_OF_LINE
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case DIGIT:
                            case DOT:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case EOL:
                                how = HowToProcess.CONSUME;
                                return AT_END_OF_LINE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next AT_END_OF_LINE
                        }
                    // end of enum constant AT_END_OF_LINE
                    },
        /**
         * in the middle of processing a string of {}()[] chars each one gets it own token.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_FENCE
                    {
                    void consume( char c )
                        {
                        accumulatedFences.append( c );
                        // end consume IN_FENCE
                        }

                    void leaving()
                        {
                        String fences = accumulatedFences.toString();
                        accumulatedFences.setLength( 0 );
                        // split (( up into separate tokens so can be rendered different
                        // sizes.
                        for ( int i = 0; i < fences.length(); i++ )
                            {
                            char fence = fences.charAt( i );
                            // depth not necessarily correct yet
                            addToken( new Fence( fence, 0/* depth */ ) );
                            }
                        // end leaving IN_FENCE
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_FENCE
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case DIGIT:
                            case DOT:
                            case EOL:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case FENCE:
                                // stay
                                how = HowToProcess.CONSUME;
                                return IN_FENCE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_FENCE
                        }
                    // end of enum constant IN_FENCE
                    },
        /**
         * in name, keyword or identifier. We are overly strict on what constitutes an identifier.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_NAME
                    {
                    /**
                     * characters legal in an identifier, case-insensitive
                     */
                    private static final String LEGAL_CONSTANT_CHARS =
                            "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";

                    /**
                     * Ensure the string contains only legal characters
                     * @param candidate string to TEST to see if it is a Java static
                     *                  final constant name.
                     * @return true if identifier is all upper case and numbers and
                     *         _
                     */
                    private boolean isNameAConstant( String candidate )
                        {
                        return ST.isLegal( candidate, LEGAL_CONSTANT_CHARS );
                        }

                    void consume( char c )
                        {
                        accumulatedName.append( c );
                        // end consume IN_NAME
                        }

                    void leaving()
                        {
                        final String name = accumulatedName.toString();
                        accumulatedName.setLength( 0 );
                        if ( name.length() > 0 )
                            {
                            final char firstChar = name.charAt( 0 );
                            assert name.equals( name.trim() ) : "name not trimmed";
                            // make a first stab at what sort of identifier it is.
                            if ( keywords.contains( name ) )
                                {
                                // keyword
                                if ( importantKeywords.contains( name ) )
                                    {
                                    addToken( new ImportantKeyword( name ) );
                                    }
                                else
                                    {
                                    addToken( new Keyword( name ) );
                                    }
                                }
                            else if ( Character.isUpperCase( firstChar ) )
                                {
                                if ( isNameAConstant( name ) )
                                    {
                                    // all upper case name, constant
                                    addToken( new JavaConstant( name, false ) );
                                    }
                                else
                                    {
                                    // start with upper case, class
                                    // later analyse to find classDef and interfaceDef
                                    addToken( new JavaClassName( name, false ) );
                                    }
                                }
                            else
                                {
                                // might really be a method, will find out later.
                                addToken( new Var( name, false ) );
                                }
                            } // end if
                        // end leaving IN_NAME
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_NAME
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case DOT:
                            case EOL:
                            case FENCE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case DIGIT:
                            case OTHER:/* allow Unicode names */
                            case PLAIN:
                            case UNDERSCORE:
                                // stay in name
                                how = HowToProcess.CONSUME;
                                return IN_NAME;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_NAME
                        }
                    },
        /**
         * in numeric literal e.g. 01234 0xabcdef099 1.2 2.4d 1.4f 2.30E-05 2.30E-05f 2.30E-05d 2.30E-4E 2.30E-3D
         * Any lead - will be treated as an operator.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_NUMERIC_LITERAL
                    {
                    /**
                     * characters legal in an identifier, case-insensitive
                     */
                    private static final String LEGAL_NUMERIC_CHARS =
                            "0123456789xXabcdefABCDEFlL+-._";

                    void consume( char c )
                        {
                        accumulatedNumeric.append( c );
                        // end consume IN_NUMERIC_LITERAL
                        }

                    void leaving()
                        {
                        final String number = accumulatedNumeric.toString();
                        accumulatedNumeric.setLength( 0 );
                        // build any tokens.
                        // name will look like 1.0 0xff 007 3.45E-93d 40L
                        analyseNumericLiteral( number );
                        // end leaving IN_NUMERIC_LITERAL
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_NUMERIC_LITERAL
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case EOL:
                            case FENCE:
                            case OTHER:/* allow unicode names */
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case DIGIT:
                            case DOT:
                            case UNDERSCORE:
                                how = HowToProcess.CONSUME;
                                return IN_NUMERIC_LITERAL;
                            case PLAIN:
                            case PUNCTUATION:
                                // hex, trail letters, - all ok.
                                if ( LEGAL_NUMERIC_CHARS.indexOf( nextChar ) >= 0 )
                                    {
                                    // stay in number, since this char was a valid numeric
                                    how = HowToProcess.CONSUME;
                                    return IN_NUMERIC_LITERAL;
                                    }
                                else
                                    {
                                    // found end of number
                                    how = HowToProcess.FORWARD;
                                    return super.next( category, nextChar, first );
                                    }
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_NUMERIC_LITERAL
                        }
                    },
        /**
         * in string or arithmetic operators, including ;, but not fences {}() []
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_OPERATOR
                    {
                    void consume( char c )
                        {
                        accumulatedOperators.append( c );
                        }

                    void leaving()
                        {
                        String operators = accumulatedOperators.toString();
                        accumulatedOperators.setLength( 0 );
                        // treat ; specially.
                        int place;
                        while ( ( place = operators.indexOf( ';' ) ) >= 0 )
                            {
                            // possibly empty
                            String group = operators.substring( 0, place );
                            addToken( new Operator( group ) );
                            addToken( new Semicolon() );
                            operators = operators.substring( place + 1 );
                            } // end while
                        // deal with whatever is left over in operators
                        addToken( new Operator( operators ) );
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_OPERATOR
                        switch ( category )
                            {
                            case DIGIT:
                            case EOL:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case QUOTE:
                            case SPACE:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case AT:
                            case BACKSLASH:
                            case PUNCTUATION:
                            case SLASH:
                            case STAR:
                                // stay
                                how = HowToProcess.CONSUME;
                                return IN_OPERATOR;
                            case DOT:
                                if ( numberComingAfterThisDot() )
                                    {
                                    how = HowToProcess.FORWARD;
                                    return IN_NUMERIC_LITERAL;
                                    }
                                else
                                    {
                                    how = HowToProcess.CONSUME;
                                    return IN_OPERATOR;
                                    }
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        }
                    },
        /**
         * saw @ now in annotation.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_ANNOTATION
                    {
                    void consume( char c )
                        {
                        accumulatedAnnotation.append( c );
                        }

                    void leaving()
                        {
                        String annotation = accumulatedAnnotation.toString();
                        accumulatedAnnotation.setLength( 0 );
                        addToken( new Annotation( annotation ) );
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                    /* this is pretty crude parsing. We cannot easily tell when
                      annotation ends, so end it after the first word.
                     */
                        // IN_ANNOTATION
                        switch ( category )
                            {
                            case BACKSLASH:
                            case DOT:
                            case EOL:
                            case FENCE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case AT:
                            case DIGIT:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                                // stay
                                how = HowToProcess.CONSUME;
                                return IN_ANNOTATION;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        }
                    },
        /**
         * Someone saw a quote in outside a comment and forwarded it to us. we accumulate the quotation terminated by a
         * quote. We don't save either lead or trail quote in the buffer to save space. They are regenerated by the
         * token
         * as needed. There are two complications, we can hit eol before hitting the end quote, in which case we just
         * render it verbatim. \quote does not terminate the string
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_QUOTES
                    {
                    /**
                     * true if "..." are balanced, false if hit EOL too soon.
                     */
                    private boolean balanced = false;
                    /**
                     * Used by IN_QUOTES to track whether quote preceded by \
                     */
                    private boolean prevWasBackslash = false;

                    void consume( char c )
                        {
                        accumulatedQuotation.append( c );
                        // end consume IN_QUOTES
                        }

                    void leaving()
                        {
                        String quotation = accumulatedQuotation.toString();
                        accumulatedQuotation.setLength( 0 );
                        if ( balanced )
                            {
                            // add even if 0 length
                            // surrounding quotes generated as needed. not part of quotation
                            addToken( new StringLiteral( quotation ) );
                            }
                        else
                            {
                            // document had unbalanced " ...", missing trailing ".
                            // We treat not as literal, but as an Error
                            addToken( new Gibberish( "\"" + quotation ) );
                            // note that we have handled this anomaly
                            balanced = true;
                            }
                        // end leaving IN_QUOTES
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        //   IN_QUOTES
                        switch ( category )
                            {
                            case BACKSLASH:
                                prevWasBackslash = true;
                                how = HowToProcess.CONSUME;
                                return IN_QUOTES;
                            case AT:
                            case DIGIT:
                            case DOT:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                prevWasBackslash = false;
                                how = HowToProcess.CONSUME;
                                return IN_QUOTES;
                            case EOL:
                                // we had an unbalanced " ...\ then eol
                                // balanced will be false
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            case QUOTE:
                                if ( first )
                                    {
                                    prevWasBackslash = false;
                                    balanced = false;
                                    how = HowToProcess.DISCARD;
                                    return IN_QUOTES;
                                    }
                                else if ( prevWasBackslash )
                                    {
                                    // treat as an ordinary char
                                    prevWasBackslash = false;
                                    how = HowToProcess.CONSUME;
                                    return IN_QUOTES;
                                    }
                                else
                                    {
                                    // was the final one
                                    prevWasBackslash = false;
                                    balanced = true;
                                    how = HowToProcess.DISCARD;
                                    return IN_WHITESPACE;// super would just send us back
                                    // here
                                    }
                                // break;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_QUOTES
                        }
                    // end of enum constant IN_QUOTES
                    },
        /**
         * seen slash slash, in one line comment... eol Will be terminated by eol slash slash goes in the token
         * we don't worry about embedded @tags just yet. We deal with them later.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_REM_SLASH_SLASH
                    {
                    void consume( char c )
                        {
                        accumulatedComment.append( c );
                        // end consume IN_REM_SLASH_SLASH
                        }

                    void leaving()
                        {
                        // simpler than other comments. Can't contain embedded \nl
                        String comment = accumulatedComment.toString();
                        accumulatedComment.setLength( 0 );
                        if ( comment.length() > 0 )
                            {
                            // if there are embedded @tag, we want to split them out as their own tokens.
                            // we will lead with long strings of lead, trail or embedded spaces we will optimise them later.
                            addToken( new CommentSlashSlash( comment ) );
                            }
                        // end leaving IN_REM_SLASH_SLASH
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar, boolean first
                    )
                        {
                        // IN_REM_SLASH_SLASH
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case DIGIT:
                            case DOT:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                            case TICK:
                                how = HowToProcess.CONSUME;
                                return IN_REM_SLASH_SLASH;
                            case EOL:
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_REM_SLASH_SLASH
                        }
                    // end of enum constant IN_REM_SLASH_SLASH
                    },
        /**
         * in slash star or slash star star. We don't leave this state on hitting EOL but just keep on trucking and deal
         * with the eol tokens later. Finally ended by star slash. We don't deal with running of the end of the program
         * unbalanced. That will show ups as an error with stuff left un-tokenized.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_REM_SLASH_STAR
                    {
                    /**
                     * keep track of whether / was preceded by star to mark end of
                     * comment.
                     */
                    private boolean prevWasStar = false;

                    void consume( char c )
                        {
                        accumulatedComment.append( c );
                        // end consume IN_REM_SLASH_STAR
                        }

                    void leaving()
                        {
                        // break into several tokens if contains \n
                        String comments = accumulatedComment.toString();
                        accumulatedComment.setLength( 0 );
                        boolean javaDoc = comments.startsWith( "/**" );
                        int place;
                        Token token;
                        while ( ( place = comments.indexOf( '\n' ) ) >= 0 )
                            {
                            String comment = comments.substring( 0, place );
                            if ( javaDoc )
                                {
                                // special add that splits out @tags, adds CommentJavadoc or commentJavadocTag
                                addJavadocToken( comment );
                                }
                            else
                                {
                                addToken( new CommentSlashStar( comment ) );
                                }
                            addToken( new NL() );
                            comments = comments.substring( place + 1 );
                            } // end while
                        // deal with whatever is left over in comments
                        if ( comments.length() > 0 )
                            {
                            if ( javaDoc )
                                {
                                token = new CommentJavadoc( comments );
                                }
                            else
                                {
                                token = new CommentSlashStar( comments );
                                }
                            addToken( token );
                            }
                        // end leaving IN_REM_SLASH_STAR
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_REM_SLASH_STAR
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case DIGIT:
                            case DOT:
                            case EOL:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SPACE:
                            case TICK:
                                prevWasStar = false;
                                how = HowToProcess.CONSUME;
                                return IN_REM_SLASH_STAR;
                            case SLASH:
                                if ( first )
                                    {
                                    // leading slash at begin of slash star or slash star star
                                    prevWasStar = false;
                                    how = HowToProcess.CONSUME;
                                    return IN_REM_SLASH_STAR;
                                    }
                                else if ( prevWasStar )
                                    {
                                    // hit star slash end marker of comment
                                    how = HowToProcess.CONSUME;
                                    return IN_WHITESPACE;// super would just send us back
                                    // here
                                    }
                                else
                                    {
                                    // just incidental /
                                    how = HowToProcess.CONSUME;
                                    return IN_REM_SLASH_STAR;
                                    }
                                // break;
                            case STAR:
                                prevWasStar = true;
                                how = HowToProcess.CONSUME;
                                return IN_REM_SLASH_STAR;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_REM_SLASH_STAR
                        }
                    // end of enum constant IN_REM_SLASH_STAR
                    },
        /**
         * Someone saw a ' in outside a comment and forwarded it to us. we accumulate the char literal terminated by a
         * '. We don't put either lead or trail ' in the buffer to save space. They are regenerated by the token as
         * needed. There are two complications, we can hit eol before hitting the end ', in which case we just render it
         * verbatim. \' does not terminate the literal
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_TICKS
                    {
                    /**
                     * true if 'x' are balanced, false if hit EOL too soon.
                     */
                    private boolean balanced = false;
                    /**
                     * Used by IN_TICKS to track whether ' preceded by \
                     */
                    private boolean prevWasBackslash = false;

                    void consume( char c )
                        {
                        accumulatedQuotation.append( c );
                        // end consume
                        }

                    void leaving()
                        {
                        String quotation = accumulatedQuotation.toString();
                        accumulatedQuotation.setLength( 0 );
                        if ( balanced )
                            {
                            // add even if 0 length
                            // surrounding quotes generated as needed. not part of quotation
                            addToken( new CharLiteral( quotation ) );
                            }
                        else
                            {
                            // document had unbalanced " ...", missing trailing ".
                            // We treat not as literal, but as an error.
                            addToken( new Gibberish( "\'" + quotation ) );
                            // note that we have handled this anomaly
                            balanced = true;
                            }
                        // end leaving IN_TICKS
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_TICKS
                        switch ( category )
                            {
                            case AT:
                            case DIGIT:
                            case DOT:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case SPACE:
                            case STAR:
                                prevWasBackslash = false;
                                how = HowToProcess.CONSUME;
                                return IN_TICKS;
                            case BACKSLASH:
                                prevWasBackslash = true;
                                how = HowToProcess.CONSUME;
                                return IN_TICKS;
                            case EOL:
                                // we had an unbalanced '\ then eol
                                // balanced will be false
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            case TICK:
                                if ( first )
                                    {
                                    prevWasBackslash = false;
                                    balanced = false;
                                    how = HowToProcess.DISCARD;
                                    return IN_TICKS;
                                    }
                                else if ( prevWasBackslash )
                                    {
                                    // treat as an ordinary char
                                    prevWasBackslash = false;
                                    how = HowToProcess.CONSUME;
                                    return IN_TICKS;
                                    }
                                else
                                    {
                                    // was the final one
                                    prevWasBackslash = false;
                                    balanced = true;
                                    how = HowToProcess.DISCARD;
                                    return IN_WHITESPACE;// super would just send us back
                                    // here
                                    }
                                // break;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_TICKS
                        }
                    // end of enum constant IN_TICKS
                    },
        /**
         * in white space, but not EOL.
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_WHITESPACE
                    {
                    @SuppressWarnings( { "UnusedParameters" } )
                    void consume( char c )
                        {
                        spaceCount++;
                        // end consume IN_WHITESPACE
                        }

                    void leaving()
                        {
                        // collapse multiple spaces into a single token
                        if ( spaceCount > 0 )
                            {
                            addToken( new Space( spaceCount ) );
                            spaceCount = 0;
                            }
                        // end leaving IN_WHITESPACE
                        }

                    @SuppressWarnings( { "UnusedDeclaration" } )
                    JavaState next( JavaCharCategory category,
                                    char nextChar,
                                    boolean first )
                        {
                        // IN_WHITESPACE
                        switch ( category )
                            {
                            case AT:
                            case BACKSLASH:
                            case DIGIT:
                            case DOT:
                            case EOL:
                            case FENCE:
                            case OTHER:
                            case PLAIN:
                            case UNDERSCORE:
                            case PUNCTUATION:
                            case QUOTE:
                            case SLASH:
                            case STAR:
                            case TICK:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case SPACE:
                                how = HowToProcess.CONSUME;
                                return IN_WHITESPACE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        // end next IN_WHITESPACE
                        }
                    // end of enum constant IN_WHITESPACE
                    };
    // //////////////////////////////////////////////////////////////////
    // common to all enum constants
    // declarations

    /**
     * true if want extra debugging checks and output
     */
    private static final boolean DEBUGGING = false;

    /**
     * list of tokens we have parsed out.
     */
    private static final ArrayList<Token> tokens =
            new ArrayList<>( 50000 );

    /**
     * all legal important keywords
     */
    private static final HashSet<String> importantKeywords =
            new HashSet<>( Arrays.asList( "break",
                    "class",
                    "continue",
                    "for",
                    "interface",
                    "package",
                    "return",
                    "while" ) );

    /**
     * complete list of Java keywords, aka reserved words, includes the important keywords too.
     */
    private static final HashSet<String> keywords = new HashSet<>( Arrays
            .asList( "abstract",
                    "assert",
                    "boolean",
                    "break",
                    "byte",
                    "case",
                    "catch",
                    "char",
                    "class",
                    "const",
                    "continue",
                    "default",
                    "do",
                    "double",
                    "else",
                    "enum",
                    "extends",
                    "false",
                    "final",
                    "finally",
                    "float",
                    "for",
                    "goto",
                    "if",
                    "implements",
                    "import",
                    "instanceof",
                    "int",
                    "interface",
                    "long",
                    "native",
                    "new",
                    "null",
                    "package",
                    "private",
                    "protected",
                    "public",
                    "return",
                    "short",
                    "static",
                    "strictfp",
                    "super",
                    "switch",
                    "synchronized",
                    "this",
                    "throw",
                    "throws",
                    "transient",
                    "true",
                    "try",
                    "void",
                    "volatile",
                    "while" ) );

    /**
     * list of java keywords used to define primitive variables.
     */
    private static final HashSet<String> primitives =
            new HashSet<>( Arrays
                    .asList( "boolean",
                            "byte",
                            "char",
                            "double",
                            "float",
                            "int",
                            "long",
                            "short" ) );

    /**
     * regex to detect a decimal string literal. This till also match octal patterns, so octals must be filtered out
     * first. matches int , long, float/double without exponent
     */
    private static final Pattern decimalPattern = Pattern
            .compile( "([-]?)([_\\d\\.]+)([dDfFlL]?)" );

    /**
     * regex to detect a decimal string literal.  float or double.
     */
    private static final Pattern floatPattern = Pattern
            .compile( "([-]?)([_\\d\\.]+)([eE])([-]?)(\\d+[dDfF]?)" );

    /**
     * regex to detect an hex string literal -0xffffL
     */
    private static final Pattern hexPattern = Pattern
            .compile( "([-]?0x)([_\\p{XDigit}]+)([lL]?)" );

    /**
     * recognise a legit Javadoc tag
     */
    private static final Pattern JAVADOCTAG_RECOGNISER = Pattern.compile(
            "@(author|deprecated|inheritDoc|override|param|return|see|serial|serialData|serialField|since|throws" +
            "|version)\\s"
    );

    /**
     * regex to detect an octal string literal -07777L
     */
    private static final Pattern octalPattern = Pattern
            .compile( "([-]?0)([0-7]++)([lL]?)" );

    /**
     * accumulates strings forming an annotation
     */
    private static final StringBuilder accumulatedAnnotation =
            new StringBuilder( 10 );
    // statics are shared common to all enum constants.
    // others are a separate field in each enum constant.

    /**
     * accumulates char ' ' strings.
     */
    private static final StringBuilder accumulatedCharLiteral =
            new StringBuilder( 5 );

    /**
     * accumulates any of the various types of comment
     */
    private static final StringBuilder accumulatedComment =
            new StringBuilder( 80 );

    /**
     * accumulates a string of fence chars e.g. (){}[]
     */
    private static final StringBuilder accumulatedFences =
            new StringBuilder( 10 );

    /**
     * accumulate names of classes, variable, methods.
     */
    private static final StringBuilder accumulatedName =
            new StringBuilder( 50 );

    /**
     * accumulate number, hex, decimal, scientific, float, long
     */
    private static final StringBuilder accumulatedNumeric =
            new StringBuilder( 50 );

    /**
     * accumulates strings of operators
     */
    private static final StringBuilder accumulatedOperators =
            new StringBuilder( 10 );

    /**
     * accumulates things inside quotes.
     */
    private static final StringBuilder accumulatedQuotation =
            new StringBuilder( 80 );

    /**
     * how far we are through parsing the program
     */
    private static int charIndex;

    /**
     * how the next next character will be treated, usually consumed and stuffed in a buffer, or forwarded to the next
     * state to deal with.
     */
    private static HowToProcess how;

    /**
     * count of how many new lines encountered, shared by several states
     */
    private static int nlCount;

    /**
     * the program or program fragment we are parsing
     */
    private static String program;

    /**
     * the length of the program fragment we are parsing.
     */
    private static int size;

    /**
     * count of how many spaces encountered.
     */
    private static int spaceCount;
    // declarations
    // methods

    /**
     * create tokens for for a JavaDoc Comment.
     *
     * @param comment comment text. no \n, but may contain embedded @tags.
     */
    private static void addJavadocToken( String comment )
        {
        // if there are embedded @tag, we want to split them out as their own tokens.
        // we will lead with long strings of lead, trail or embedded spaces we will optimise them later.
        // We loop to process any embedded tag tokens  in that comment.
        while ( comment.length() > 0 )
            {
            int atPlace = comment.indexOf( '@' );
            if ( atPlace < 0 )
                {
                // no more tags, add rest as ordinary javadoc
                addToken( new CommentJavadoc( comment ) );
                return;
                }
            else if ( atPlace > 0 )
                {
                // add comment prior to @
                addToken( new CommentJavadoc( comment.substring( 0, atPlace ) ) );
                comment = comment.substring( atPlace );
                }
            else if ( atPlace == 0 )
                {
                // remaining comment starts with @
                Matcher m = JAVADOCTAG_RECOGNISER.matcher( comment );
                if ( m.lookingAt() )
                    {
                    // was a legit tag, treat it specially
                    String tag = m.group( 1 );
                    addToken( new CommentJavadocTag( '@' + tag ) );
                    comment = comment.substring( tag.length() + 1 );
                    }
                else
                    {
                    // no match, just a stray at. Treat it as an ordinary comment char, later will be coalesced..
                    addToken( new CommentJavadoc( "@" ) );
                    comment = comment.substring( 1 );
                    }
                }
            // end time around te loop the comment gets a little shorter.
            } // end loop
        }// /method

    /**
     * add a token to the end of the list to be rendered.
     *
     * @param t a token. Useless tokens will be not be added.
     */
    private static void addToken( Token t )
        {
        // check out token for validity
        if ( !t.isUseless() )
            {
            tokens.add( t );
            }
        }// /method

    /**
     * analyse string composing a numeric literal and break it into pieces and create the tokens.
     *
     * @param name String representing the literal. Will not have lead -, but could be float, hex, octal, decimal,
     *             with decimal point...
     */
    private static void analyseNumericLiteral( String name )
        {
        if ( DEBUGGING )
            {
            out.println( name );
            }
        Matcher m;
        if ( ( m = hexPattern.matcher( name ) ).matches() )
            {
            assert m.groupCount() == 3 : "hex literal pattern matcher failure";
            makeNumericLiterals( m.group( 1 ), m.group( 2 ), 4/* group by 4s */, m.group( 3 ), 16 );
            // group 0 is the whole pattern matched,
            // loops runs from from 0 to gc, not 0 to gc-1 as is
            // traditional.
            }
        else if ( ( m = octalPattern.matcher( name ) ).matches() )
            {
            assert m.groupCount()
                   == 3 : "octal literal pattern matcher failure";
            makeNumericLiterals( m.group( 1 ), m.group( 2 ), 3, m.group( 3 ), 8 );
            }
        else if ( ( m = floatPattern.matcher( name ) ).matches() )
            {
            assert m.groupCount()
                   == 5 : "float literal pattern matcher failure";
            makeNumericLiterals( m.group( 1 ), m.group( 2 ), 3, m.group( 3 ) + m.group( 4 ) + m.group( 5 ), 10 );
            }
        else if ( ( m = decimalPattern.matcher( name ) ).matches() )
            {
            assert m.groupCount()
                   == 3 : "decimal literal pattern matcher failure";
            makeNumericLiterals( m.group( 1 ), m.group( 2 ), 3, m.group( 3 ), 10 );
            }
        else
            {
            // got garbage, but we have to render it anyway.
            addToken( new Gibberish( name ) );
            }
        }// /method

    /**
     * calculate depth of {} () and []
     */
    private static void calcNestingDepths()
        {
        int braceDepth = 0;
        int parenDepth = 0;
        int bracketDepth = 0;
        for ( Token t : tokens )
            {
            if ( !( t instanceof Fence ) )
                {
                continue;
                }
            Fence b = ( Fence ) t;
            char c = b.getChar();
            switch ( c )
                {
                case '{':
                    b.setNestingDepth( ++braceDepth );
                    break;
                case '}':
                    b.setNestingDepth( braceDepth-- );
                    break;
                case '(':
                    b.setNestingDepth( ++parenDepth );
                    break;
                case ')':
                    b.setNestingDepth( parenDepth-- );
                    break;
                case '[':
                    b.setNestingDepth( ++bracketDepth );
                    break;
                case ']':
                    b.setNestingDepth( bracketDepth-- );
                    break;
                default:
                    throw new IllegalStateException(
                            "JavaTokenizer.calcNestingDepths: invalid fence character" );
                }
            } // end for
        }// /method

    /**
     * Replace junk chars with something that won't cause trouble
     *
     * @param category category of this char
     * @param c        the char
     *
     * @return c if char is clean, a replacement if it were dirty e.g. tab
     */
    private static char clean( JavaCharCategory category, char c )
        {
        if ( category == JavaCharCategory.SPACE )
            {
            return ' ';
            }
        else
            {
            return c;
            }
        }// /method

    /**
     * crunch multiple tokens into a single token where feasible.
     */
    private static void crunch()
        {
        int size;
        do
            {
            size = tokens.size();
            for ( int i = size - 1; i >= 1; i-- )
                {
                Token current = tokens.get( i );
                Token prev = tokens.get( i - 1 );
                if ( current instanceof NL && prev instanceof Space )
                    {
                    // trim trailing blanks on line
                    tokens.remove( i - 1 );
                    // don't i--. same NL token will be repeatedly compared
                    // against prev.
                    }
                else if ( current instanceof Space )
                    {
                    Space sp = ( Space ) ( current );
                    if ( sp.length() <= 10 && prev.isCollapsible() )
                        {
                        // combine space into previous token
                        prev.setText( prev.getText() + sp.getText() );
                        tokens.remove( i );
                        // don't i--, combined token will be compared against
                        // its predecessor
                        }
                    }
                else if ( prev.isCollapsible( current ) )
                    {
                    // combine two tokens into one
                    prev.setText( prev.getText() + current.getText() );
                    tokens.remove( i );
                    // don't i--. combined token will be compared with its
                    // predecessor
                    }
                } // end for
            // keep going while it is still finding something to crunch
            }
        while ( tokens.size() < size );
        }// /method

    /**
     * debugging dump system state
     *
     * @param theChar  char we are processing
     * @param category category of the char
     * @param first    did we just enter this state
     * @param oldState old state
     * @param state    current state
     * @param newState next state
     * @param how      do we consume, forward or discard this character.
     */
    private static void dumpState( char theChar,
                                   JavaCharCategory category,
                                   boolean first,
                                   JavaState oldState,
                                   JavaState state,
                                   JavaState newState,
                                   HowToProcess how )
        {
        if ( how == HowToProcess.CONSUME )
            {
            /* use slightly abbreviated form, black */
            out.println( theChar
                         + " "
                         + category
                         + " "
                         + first
                         + " o:"
                         + oldState
                         + " s:"
                         + state
                         + " n:"
                         + newState );
            }
        else
            {/* in red */
            err.println( theChar
                         + " "
                         + category
                         + " "
                         + first
                         + " o:"
                         + oldState
                         + " s:"
                         + state
                         + " n:"
                         + newState
                         + " "
                         + how );
            }
        }// /method

    /**
     * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens
     */
    private static void ensureNoLeftovers()
        {
        assert accumulatedCharLiteral.length() == 0 : "CharLiteral residual : "
                                                      + accumulatedCharLiteral.toString();
        accumulatedCharLiteral.setLength( 0 );
        assert accumulatedComment.length() == 0 : "Comment residual : "
                                                  + accumulatedComment.toString();
        accumulatedComment.setLength( 0 );
        assert accumulatedFences.length() == 0 : "Fences residual : "
                                                 + accumulatedFences.toString();
        accumulatedFences.setLength( 0 );
        assert accumulatedName.length() == 0 : "Name residual : "
                                               + accumulatedName.toString();
        accumulatedName.setLength( 0 );
        assert accumulatedNumeric.length() == 0 : "Numeric residual : "
                                                  + accumulatedNumeric.toString();
        accumulatedNumeric.setLength( 0 );
        assert accumulatedOperators.length() == 0 : "Operators residual : "
                                                    + accumulatedOperators.toString();
        accumulatedOperators.setLength( 0 );
        assert accumulatedQuotation.length() == 0 : "Quotation residual : "
                                                    + accumulatedQuotation.toString();
        accumulatedQuotation.setLength( 0 );
        }// /method

    /**
     * find places where classes are defined, after the keyword class.
     */
    private static void findClassDefs()
        {
        // class defs in is form final xxxx
        boolean transform = false;
        for ( Token t : tokens )
            {
            if ( t instanceof Keyword && t.getTrimmedText().equals( "class" ) )
                {
                transform = true;
                }
            else if ( transform && t instanceof JavaClassName )
                {
                // transform from class ref to class Definition.
                ( ( Definable ) t ).setDefining( true );
                transform = false;
                }
            else if ( transform && t instanceof WhiteSpace )
                {
                // ignore
                }
            else
                {
                // hit something else
                transform = false;
                }
            }
        }// /method

    /**
     * find JavaConstant defs, indicated by preceding primitive or Class
     */
    private static void findConstantDefs()
        {
        boolean transform = false;
        for ( Token t : tokens )
            {
            if ( t instanceof Keyword && primitives.contains( t.getTrimmedText() ) )
                {
                transform = true;
                }
            else if ( t instanceof JavaClassName )
                {
                transform = true;
                }
            else if ( t instanceof JavaConstant )
                {
                if ( transform )
                    {
                    // was JavaConstant reference, but really is a JavaConstant
                    // definition.
                    ( ( Definable ) t ).setDefining( true );
                    transform = false;
                    }
                }
            else if ( transform && t instanceof WhiteSpace )
                {
                // ignore
                }
            else
                {// hit something else. That was not a JavaConstant.
                transform = false;
                }
            } // end for
        }// /method

    /**
     * find Constructor defs, indicated by class Ref not preceded by new, and followed by ( Just another type of final
     * class def.
     */
    private static void findConstructorDefs()
        {
        Definable prev = null;
        boolean seenNew = false;
        boolean seenClass = false;
        for ( Token t : tokens )
            {
            // ignore all flavours of whitespace
            if ( t instanceof WhiteSpace )
                {
                continue;
                }
            else if ( t instanceof Keyword && t.getTrimmedText().equals( "new" ) )
                {
                seenNew = true;
                seenClass = false;
                continue;
                }
            else if ( t instanceof JavaClassName )
                {
                if ( !seenNew )
                    {
                    prev = ( Definable ) t;
                    seenClass = true;
                    continue;
                    }
                }
            else if ( t instanceof Fence && t.getTrimmedText().equals( "(" ) )
                {
                if ( seenClass )
                    {
                    // replace previous class with a Def.
                    prev.setDefining( true );
                    }
                }
            // else saw something else
            // default fall through for anything without continue.
            seenNew = false;
            seenClass = false;
            } // end for
        }// /method

    /**
     * find places where interfaces are defined, after the keyword interface.
     */
    private static void findInterfaceDefs()
        {
        // Interface defs in is form interface xxxx
        int size = tokens.size();
        boolean transform = false;
        for ( int i = 0; i < size; i++ )
            {
            Token t = tokens.get( i );
            if ( t instanceof Keyword && t.getTrimmedText().equals( "interface" ) )
                {
                transform = true;
                }
            else if ( transform && t instanceof JavaClassName )
                {
                // transform from JavaClassName ref to Interface Definition.
                tokens.set( i, new InterfaceName( t.getText(), true ) );
                transform = false;
                }
            else if ( transform && t instanceof WhiteSpace )
                {
                // ignore
                }
            else
                {
                // hit something else
                transform = false;
                }
            }
        }// /method

    /**
     * find places where interfaces are used, after the keyword implements. Are others, but we can't find them.
     */
    private static void findInterfaceRefs()
        {
        // interface refs in is form implements xxxx , xxxx ;
        int size = tokens.size();
        boolean transform = false;
        for ( int i = 0; i < size; i++ )
            {
            Token t = tokens.get( i );
            if ( t instanceof Keyword && t.getTrimmedText().equals( "implements" ) )
                {
                transform = true;
                }
            else if ( transform && t instanceof JavaClassName )
                {
                // transform from JavaClassName ref to Interface ref.
                tokens.set( i, new InterfaceName( t.getText(), false ) );
                // keep transforming.
                }
            else if ( transform && t instanceof WhiteSpace )
                {
                // ignore
                }
            else if ( transform && t instanceof Operator && t.getTrimmedText()
                    .equals( "," ) )
                {
                // ignore
                }
            else
                {
                // hit something else
                transform = false;
                }
            }
        }// /method

    /**
     * find labels, indicated by preceding ; or { then a var then a colon
     */
    private static void findLabels()
        {
        boolean transform = false;
        for ( int i = 0; i < tokens.size(); i++ )
            {
            final Token t = tokens.get( i );
            if ( t instanceof Fence && t.getTrimmedText().equals( "{" ) )
                {
                transform = true;
                }
            else if ( t instanceof Semicolon )
                {
                transform = true;
                }
            else if ( transform && t instanceof Noise )
                {
                // ignore
                }
            else if ( t instanceof Var )
                {
                if ( transform )
                    {
                    if ( i + 1 < tokens.size() )
                        {
                        final Token next = tokens.get( i + 1 );
                        if ( next instanceof Operator && next.getTrimmedText().equals( ":" ) )
                            {
                            //  convert from var to label
                            tokens.set( i, new Label( t.getText() ) );
                            }
                        }
                    transform = false;
                    }
                }
            else
                {// hit something else. This is not a label pattern
                transform = false;
                }
            } // end for
        }// /method

    /**
     * find method defs, indicated by preceding primitive or Class
     */
    private static void findMethodDefs()
        {
        boolean transform = false;
        for ( Token t : tokens )
            {
            final String word = t.getTrimmedText();
            if ( t instanceof Keyword && ( word.equals( "void" )
                                           || primitives.contains( word ) ) )
                {
                transform = true;
                }
            else if ( t instanceof JavaClassName )
                {
                transform = true;
                }
            else if ( transform && t instanceof Fence && ST.isLegal( t.getTrimmedText(), "[ ]" ) )
                {
                /* leave transform set the way it was  */
                }
            else if ( transform && t instanceof Noise )
                {
                // ignore
                }
            else if ( t instanceof Method )
                {
                if ( transform )
                    {
                    // this was a Method ref, but really is a Method definition.
                    ( ( Definable ) t ).setDefining( true );
                    transform = false;
                    }
                }
            else
                {// hit something else. That was not a var.
                transform = false;
                }
            } // end for
        }// /method

    /**
     * find methods, indicated by following (
     */
    private static void findMethodRefs()
        {
        int size = tokens.size();
        boolean transform = false;
        int prev = 0;
        for ( int i = 0; i < size; i++ )
            {
            Token t = tokens.get( i );
            if ( t instanceof Var )
                {
                prev = i;
                transform = true;
                }
            else if ( transform && t instanceof Fence && t.getTrimmedText().equals( "(" ) )
                {
                // transform previous var, was really a method name
                tokens.set( prev, new Method( tokens.get( prev )
                        .getText(), false/* ref */ ) );
                transform = false;
                }
            else if ( transform && t instanceof Noise )
                {
                // ignore
                }
            else
                {// hit something else. That was not a var
                transform = false;
                }
            } // end for
        }// /method

    /**
     * find package definitions after keyword package
     */
    private static void findPackageDefs()
        {
        // package defs in is form package xxxx . xxxx . xxxx ;
        int size = tokens.size();
        boolean transform = false;
        for ( int i = 0; i < size; i++ )
            {
            Token t = tokens.get( i );
            if ( t instanceof Keyword && t.getTrimmedText().equals( "package" ) )
                {
                transform = true;
                }
            else if ( transform && t instanceof Var )
                {
                // transform from var to Package Definition.
                tokens.set( i, new PackageName( t.getText(), true ) );
                // keep transforming
                }
            else if ( transform && t instanceof Operator && t.getTrimmedText().equals( "." ) )
                {
                }
            else if ( transform && t instanceof Noise )
                {
                // ignore
                }
            else
                {/* hit something else, usually ; */
                transform = false;
                }
            }
        }// /method

    /**
     * find package names after keyword import
     */
    private static void findPackageRefs()
        {
        // package defs in is form package xxxx . xxxx . xxxx.* ;
        int size = tokens.size();
        boolean transform = false;
        for ( int i = 0; i < size; i++ )
            {
            Token t = tokens.get( i );
            if ( t instanceof Keyword && t.getTrimmedText().equals( "import" ) )
                {
                transform = true;
                }
            else if ( transform && t instanceof Var )
                {
                // transform from Var to Package Ref.
                tokens.set( i, new PackageName( t.getText(), false ) );
                // keep transforming.
                }
            else if ( transform && t instanceof Noise )
                {
                // ignore
                }
            else if ( transform && ( t instanceof Operator && t.getTrimmedText().equals( "." ) || t.getTrimmedText()
                    .equals(
                            "*" ) ) )
                {
                }
            else
                {
                /* hit something else usually ; */
                transform = false;
                }
            }
        }// /method

    /**
     * find var defs, indicated by preceding primitive or Class, transformVar int  VarDef by setting defining true.
     */
    private static void findVarDefs()
        {
        boolean transform = false;
        for ( Token t : tokens )
            {
            if ( t instanceof Keyword && primitives.contains( t.getTrimmedText() ) )
                {
                transform = true;
                }
            else if ( t instanceof JavaClassName )
                {
                transform = true;
                }
            else if ( transform && t instanceof Fence && ST.isLegal( t.getTrimmedText(), "[ ]" ) )
                {
                /* leave transform set the way it was  */
                }
            else if ( transform && t instanceof Noise )
                {
                // ignore
                }
            else if ( t instanceof Var )
                {
                if ( transform )
                    {
                    // this is a Var definition, was a Var
                    ( ( Definable ) t ).setDefining( true );
                    transform = false;
                    }
                }
            else
                {// hit something else. That was not a Var
                transform = false;
                }
            } // end for
        }// /method

    /**
     * Is a given string isComing up in the stream starting with this character. Compares ignoring case.
     *
     * @param expected string to TEST if isComing in the stream
     *
     * @return true if this string isComing up, case-insensitive
     */
    private static boolean isComing( String expected )
        {
        // check if there are enough characters left for a match.
        return charIndex + expected.length() < size && program.substring(
                charIndex,
                charIndex + expected.length() )
                .equalsIgnoreCase( expected );
        }// /method

    /**
     * Split numeric literal into groups of three or four, with slightly different colours for head and tail indicator
     * characters not part of the number proper.
     *
     * @param head     e.g. 0x for hex or 0 for octal. contains optional sign.
     * @param body     the number e.g. 122345  possibly with embedded decimal point
     * @param grouping 3 for decimal and octal 4 for hex
     * @param tail     tail indicator letter D F L d f l. For scientific  notation, tail will have form E-07d
     * @param base     radix of the number to display
     */
    private static void makeNumericLiterals( final String head,
                                             final String body,
                                             int grouping,
                                             final String tail,
                                             final int base )
        {
        if ( DEBUGGING )
            {
            out.println( head + ":" + body + ":" + grouping + ":" + tail + ":" + base );
            }
        if ( head != null && head.length() > 0 )
            {
            addToken( new NumericIndicator( head ) );
            }
        if ( body != null && body.length() > 0 )
            {
            if ( body.contains( "_" ) )
                {
                addToken( new NumericLiteralLow( body, base ) );
                }
            // split off part to right of decimal point if any
            final String left;
            final String right;
            int dotPlace = body.lastIndexOf( '.' );
            if ( dotPlace >= 0 )
                {
                left = body.substring( 0, dotPlace );
                right = body.substring( dotPlace );
                }
            else
                {
                left = body;
                right = null;
                }
            int length = left.length();
            // turn off grouping if the literal contains an underscore, which already handles it.
            if ( body.contains( "_" ) )
                {
                addToken( new NumericLiteralLow( body, base ) );    // handle the 999.999 part.
                if ( tail != null && tail.length() > 0 )
                    {
                    addToken( new NumericIndicator( tail ) );
                    }
                return;
                }
            // group in blocks of 3 or 4, with alternating colours.
            // We work left to right. Figure out if we should start with alt to
            // end up on ordinary colour for literal.
            // e.g. 111222 = length 6 = 2 groups, even so start with alt.
            // 2 = length 1 = 1 group, so start with ordinary, not alt.
            boolean useHigh = ( ( ( length + grouping - 1 ) / grouping ) & 1 ) == 0;
            // Do the first partial group starting on the left, if grouping not even
            // multiple
            int firstGroupSize = length % grouping;
            if ( firstGroupSize != 0 )
                {
                final String digits = left.substring( 0, firstGroupSize );
                if ( useHigh )
                    {
                    addToken( new NumericLiteralHigh( digits, base ) );
                    }
                else
                    {
                    addToken( new NumericLiteralLow( digits, base ) );
                    }
                // toggle for next section
                useHigh = !useHigh;
                }
            // do the rest of the groups
            for ( int i = firstGroupSize; i < length; i += grouping )
                {
                final String digits = left.substring( i, i + grouping );
                if ( useHigh )
                    {
                    addToken( new NumericLiteralHigh( digits, base ) );
                    }
                else
                    {
                    addToken( new NumericLiteralLow( digits, base ) );
                    }
                useHigh = !useHigh;
                }
            // handle stuff from decimal point to the right, digits.
            if ( right != null )
                {
                addToken( new NumericLiteralLow( right, base ) );
                }
            }
        // handle and trail L F D l f d
        if ( tail != null && tail.length() > 0 )
            {
            addToken( new NumericIndicator( tail ) );
            }
        }// /method

    /**
     * check if u number is coming up next in the program
     *
     * @return true if number follows . we have seen.
     */
    private static boolean numberComingAfterThisDot()
        {
        if ( charIndex + 1 >= size )
            {
            return false;
            }
        char c = program.charAt( charIndex + 1 );
        return ( '0' <= c && c <= '9' );
        }// /method

    /**
     * clear out the state machine ready to parse a new program
     */
    private static void reset()
        {
        nlCount = 0;
        spaceCount = 0;
        accumulatedCharLiteral.setLength( 0 );
        accumulatedComment.setLength( 0 );
        accumulatedFences.setLength( 0 );
        accumulatedName.setLength( 0 );
        accumulatedOperators.setLength( 0 );
        accumulatedQuotation.setLength( 0 );
        tokens.clear();
        how = null;
        charIndex = 0;
        // keeping certain variables local or global is crucial.
        // Don't mess with them without thinking carefully and
        // changing the docs:
        // local: category, first, oldState, state, theChar
        // global: charIndex, how
        // we make these local to discourage accidental snooping or
        // accidentally picking up the static version instead of the parm.
        }// /method

    /**
     * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing.
     */
    private static void trimNLs()
        {
        // remove leading NLs.
        while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) )
            {
            tokens.remove( 0 );
            }
        // remove trailing NLs
        int count;
        while ( ( count = tokens.size() ) > 0 && ( tokens.get( count
                                                               - 1 ) instanceof NL ) )
            {
            tokens.remove( count - 1 );
            }
        // we don't need an NL at either beginning or end
        // insert at the beginning
        tokens.add( 0, new Start( "<pre class=\"java\">" ) );
        // add to end
        addToken( new Stop( "</pre>" ) );
        }// /method

    /**
     * Default Consume one character. It has been predecided that you can and will consume it.
     *
     * @param c char to consume
     */
    abstract void consume( char c );// /method

    /**
     * default what to do on leaving state, after last char is consumed.
     */
    abstract void leaving();// /method

    /**
     * default next method determines the next state based on current state, and next char
     *
     * @param category class of next character
     * @param nextChar next character to process
     * @param first    true     if we are just entering this state.
     *
     * @return next JavaState
     * D o   n o t   m a k e   p r i v a t e ! ! !
     */
    @SuppressWarnings( { "WeakerAccess" } )
    JavaState next( JavaCharCategory category, char nextChar, boolean first )
        {
        /* default way to recognise next state */
        assert how == HowToProcess.FORWARD : "default next used without forwarding. how: " + how;
        // default
        switch ( category )
            {
            case AT:
                return IN_ANNOTATION;
            case BACKSLASH:
            case PUNCTUATION:
                return IN_OPERATOR;
            case DIGIT:
                return IN_NUMERIC_LITERAL;
            case DOT:
                if ( numberComingAfterThisDot() )
                    {
                    return IN_NUMERIC_LITERAL;
                    }
                else
                    {
                    return IN_OPERATOR;
                    }
            case EOL:
                return AT_END_OF_LINE;
            case FENCE:
                return IN_FENCE;
            case OTHER:
            case PLAIN:
            case UNDERSCORE: // lead _ is not treated as number
                return IN_NAME;
            case QUOTE:
                return IN_QUOTES;
            case SLASH:
                if ( isComing( "//" ) )
                    {
                    return IN_REM_SLASH_SLASH;
                    }
                else if ( isComing( "/*" ) )
                    {
                    return IN_REM_SLASH_STAR;// includes /**
                    }
                else
                    {
                    return IN_OPERATOR;
                    }
            case STAR:
                return IN_OPERATOR;
            case SPACE:
                return IN_WHITESPACE;
            case TICK:
                return IN_TICKS;
            case IGNORE:// should never get this far
            default:
                assert false :
                        "bad state " + category + " " + nextChar;
                return null;
            } // end switch
        }// /method

    /**
     * Parse program and leave a list of Tokens in tokens ArrayList.
     *
     * @param program the text we are going parse and eventually render. Its loop calls next on oldstate, then
     *                optionally consume on the old state, State whose next made the consume decision consumes it own
     *                char. then optionally leaving on the old state
     *
     * @return an array of tokens representing the text and how it will be rendered.
     */
    @SuppressWarnings( { "UnusedAssignment" } )
    public static Token[] parse( String program )
        {
        reset();
        JavaState.program = program;
        size = program.length();
        /*
         * keeping certain variables local or global is crucial. Don't mess with
         * them without thinking carefully and changing the docs: LOCAL:
         * category, first, oldState, state, theChar. GLOBALl: charIndex, how we
         * make these local to discourage accidental snooping or accidentally
         * picking up the static version instead of the parm.
         */
        // where we were
        JavaState oldState = AT_END_OF_LINE;
        // where we are
        JavaState state = AT_END_OF_LINE;
        // were we will be next
        JavaState newState;
        // how is global however, so next can return both a state and
        // how.
        how = null;
        // Note, NO int charIndex !! Don't "repair that".
        // charIndex is a static variable globally known so "isComing" can
        // use
        // it.
        for ( charIndex = 0; charIndex < size; charIndex++ )
            {
            // next char to process
            char theChar = program.charAt( charIndex );
            // decide which general category the char falls in
            final JavaCharCategory category = JavaCharCategory
                    .categorise( theChar );
            theChar = clean( category, theChar );
            if ( category != JavaCharCategory.IGNORE )
                {
                /*
                 * keep going till some state consumes/discards the character.
                 * Allow up to three forwarding attempts to deal with the
                 * character. Usually we should succeed on the first or second
                 * attempt. We always make at least on trip through
                 */
                attempts:
                for ( int times = 0; times < 3; times++ )
                    {
                    // first is deliberately local
                    final boolean first = state != oldState;
                    /*
                     * crank the state machine one cycle, State should modify
                     * how in addition to returning the new state. A little ugly
                     * but simplest way to return a pair of values: state and
                     * how.
                     */
                    how = null;
                    /*
                     * setting to null ensures not setting it will be caught.
                     */
                    /*
                     * This is the guts of the finite state automaton decide the
                     * next state
                     */
                    // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
                    newState = state.next( category, theChar, first );
                    // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    if ( DEBUGGING )
                        {
                        dumpState( theChar,
                                category,
                                first,
                                oldState,
                                state,
                                newState,
                                how );
                        }
                    // kick over to the next generation,
                    // we are now in the newState. We make the
                    // transition
                    // here.
                    oldState = state;
                    state = newState;
                    newState = null;
                    if ( how == null )
                        {
                        throw new NullPointerException( "JavaState bug: how not set. OldState=" + oldState + " " +
                                                        "newState=" + state + " next() must not be private." );
                        }
                    switch ( how )
                        {
                        case CONSUME:
                            oldState.consume( theChar );
                            if ( state != oldState )
                                {
                                oldState.leaving();
                                }
                            // and on to the next char
                            break attempts;
                        case DISCARD:
                            if ( state != oldState )
                                {
                                oldState.leaving();
                                }
                            // and on to the next char
                            break attempts;
                        default:
                            // should never get here
                            assert false :
                                    "JavaTokenizer state machine failed to set how variable.";
                            break attempts;
                        case FORWARD:
                            assert state
                                   != oldState :
                                    "JavaTokenizer state machine attempted to forward a char to the same state.";
                            oldState.leaving();
                            /*
                             * we give that character another try with the new
                             * state
                             */
                        } // end switch
                    /*
                     * we will only ever get here if we are forwarding. The
                     * other case leave the loop early.
                     */
                    }
                // end attempts loop
                // we fall out the bottom and land here no matter what
                assert how == HowToProcess.CONSUME || how == HowToProcess
                        .DISCARD :
                        "JavaTokenizer state machine failed to consume char in three state forwarding attempts.";
                } // end if ignore
            } // end for each character
        if ( state == oldState )
            {
            // force a final wrapup -- e.g. slash star comment without
            // terminator
            oldState.leaving();
            }
        /*
         * make sure nothing still sitting in accumulation buffer after we have
         * finished parsing the entire program.
         */
        ensureNoLeftovers();
        crunch();
        // tidy up the list of tokens.
        trimNLs();
        findPackageRefs();
        // findClassRefs(); not needed, already picked out by caps
        findInterfaceRefs();
        // findConstantRefs(); not needed already picked out by caps
        // findVarRefs(); not needed, all identifiers assumed Var to
        // start.
        // findConstructorRefs(), not needed, already treated like class
        // ref
        findMethodRefs();
        findPackageDefs();
        findClassDefs();
        findInterfaceDefs();
        findConstantDefs();
        findVarDefs();
        findLabels();
        findConstructorDefs();
        findMethodDefs();
        calcNestingDepths();
        // covert to vanilla array for even more efficient use in the
        // final
        // Applet.
        return tokens.toArray( new Token[ tokens.size() ] );
        }// /method
    // /methods
    }