/*
 * [BatState.java]
 *
 * Summary: Bat file parser, finite state automaton.
 *
 * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  3.1 2009-04-12 shorter style names, improved highlighting.
 */
/**
 * State machine for BatTokenizer.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 3.1 2009-04-12 shorter style names, improved highlighting.
 * @since 2004-05-15
 */
package com.mindprod.jprep;

import com.mindprod.jtokens.Gibberish;
import com.mindprod.jtokens.Keyword;
import com.mindprod.jtokens.Label;
import com.mindprod.jtokens.NL;
import com.mindprod.jtokens.Operator;
import com.mindprod.jtokens.Semicolon;
import com.mindprod.jtokens.Separator;
import com.mindprod.jtokens.Space;
import com.mindprod.jtokens.Start;
import com.mindprod.jtokens.Stop;
import com.mindprod.jtokens.StringLiteral;
import com.mindprod.jtokens.Token;
import com.mindprod.jtokens.bat.BatComment;
import com.mindprod.jtokens.bat.BatText;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;

import static java.lang.System.*;

/**
 * Bat file parser, finite state automaton.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 3.1 2009-04-12 shorter style names, improved highlighting.
 * @since 2004-05-02
 */
@SuppressWarnings( { "NestedAssignment", "UnnecessaryContinue", "WeakerAccess", "EnumeratedConstantNamingConvention" } )
public enum BatState
    {
        /**
         * somebody noticed an EOL and forwarded it to us. The EOL will be the first character we consume.
         */
        AT_END_OF_LINE
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        nlCount++;
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        // emit pending nls and white space
                        if ( nlCount > 3 )
                            {
                            nlCount = 3;
                            }
                        if ( nlCount > 0 )
                            {
                            addToken( new NL( nlCount ) );
                            nlCount = 0;
                            spaceCount = 0;
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case OTHER:
                            case PUNCTUATION:
                            case SEPARATOR:
                            case QUOTE:
                            case SPACE:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case PLAIN:
                                if ( isComingWord( "rem" ) || isComingWord( "remark" )
                                        )
                                    {
                                    how = HowToProcess.FORWARD;
                                    return IN_COMMENT;
                                    }
                                else
                                    {
                                    how = HowToProcess.FORWARD;
                                    return IN_TEXT;
                                    }
                            case COLON:
                                if ( isComing( "::" ) )
                                    {
                                    how = HowToProcess.FORWARD;
                                    return IN_COMMENT;
                                    }
                                else
                                    {
                                    how = HowToProcess.FORWARD;
                                    return IN_LABEL;
                                    }
                            case EOL:
                                how = HowToProcess.CONSUME;
                                return AT_END_OF_LINE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end AT_END_OF_LINE
        /**
         * in name, keyword or identifier
         */
        IN_TEXT
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedName.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String name = accumulatedName.toString();
                        accumulatedName.setLength( 0 );
                        if ( name.length() > 0 )
                            {
                            if ( keywords.contains( name.toLowerCase().trim() ) )
                                {
                                // keyword
                                addToken( new Keyword( name ) );
                                }
                            else
                                {
                                // bat command
                                addToken( new BatText( name ) );
                                } // end else
                            } // end if
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case COLON:// just punctuation except at SOL
                            case EOL:
                            case OTHER:
                            case PUNCTUATION:
                            case SEPARATOR:
                            case SPACE:
                            case QUOTE:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case PLAIN:
                                how = HowToProcess.CONSUME;
                                return IN_TEXT;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_TEXT
        /**
         * in comment :: or REM, or REMARK, perhaps in the :; rem remark
         */
        IN_COMMENT
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedComment.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String comment = accumulatedComment.toString();
                        accumulatedComment.setLength( 0 );
                        if ( comment.length() != 0 )
                            {
                            // token added includes the :: rem remark
                            addToken( new BatComment( comment ) );
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case COLON:
                            case OTHER:
                            case PLAIN:
                            case PUNCTUATION:
                            case SEPARATOR:
                            case QUOTE:
                            case SPACE:// we don't split space out as separate Space
                                // tokens.
                                // Just treat as an ordinary char inside a comment.
                                // In contrast, it does get split out in commands.
                                // stay in comment
                                how = HowToProcess.CONSUME;
                                return IN_COMMENT;
                            case EOL:
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_TEXT_COMMENT
        /**
         * in label :here
         */
        IN_LABEL
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedLabel.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String label = accumulatedLabel.toString();
                        accumulatedLabel.setLength( 0 );
                        if ( label.length() != 0 )
                            {
                            addToken( new Label( label ) );
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case COLON:
                                if ( first )
                                    {
                                    how = HowToProcess.CONSUME;
                                    return IN_LABEL;
                                    }
                                else
                                    {
                                    how = HowToProcess.FORWARD;
                                    return IN_SEPARATOR;
                                    }
                            case EOL:
                            case OTHER:
                            case PUNCTUATION:
                            case SEPARATOR:
                            case QUOTE:
                            case SPACE:
                                // hit end of label
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case PLAIN:
                                // stay in label
                                how = HowToProcess.CONSUME;
                                return IN_LABEL;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_LABEL
        /**
         * in string or arithmetic operators + - () etc. but not / : \
         */
        IN_OPERATOR
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedOperators.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String operators = accumulatedOperators.toString();
                        accumulatedOperators.setLength( 0 );
                        // treat ; specially.
                        int place;
                        while ( ( place = operators.indexOf( ';' ) ) >= 0 )
                            {
                            // possibly empty
                            String group = operators.substring( 0, place );
                            addToken( new Operator( group ) );
                            addToken( new Semicolon() );
                            operators = operators.substring( place + 1 );
                            } // end while
                        // deal with whatever is left over in operators
                        addToken( new Operator( operators ) );
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case EOL:
                            case PLAIN:
                            case QUOTE:
                            case SPACE:
                            case COLON:
                            case SEPARATOR:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case OTHER:
                            case PUNCTUATION:
                                // continue this token made of operators
                                how = HowToProcess.CONSUME;
                                return IN_OPERATOR;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_TEXT_ENTITY
        /**
         * inside " ... " . No mechanism exists to put a " inside a " in bat We may be also possibly on the first or
         * last " language
         */
        IN_QUOTES
                    {
                    /**
                     * true if "..." are balanced, false if hit EOL too soon.
                     */
                    boolean balanced = false;

                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedQuotation.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        // quotation has been stripped of lead and trail " as token wants
                        // it.
                        String quotation = accumulatedQuotation.toString();
                        accumulatedQuotation.setLength( 0 );
                        if ( balanced )
                            {
                            // add even if 0 length
                            // surrounding quotes generated as needed.
                            addToken( new StringLiteral( quotation ) );
                            }
                        else
                            {
                            // document had unbalanced " ...", missing trailing ".
                            // We treat not as literal, but an ordinary command text.
                            //
                            addToken( new Gibberish( "\"" + quotation ) );
                            // not that we have handled this anomaly
                            balanced = true;
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case COLON:
                            case OTHER:
                            case PLAIN:
                            case PUNCTUATION:
                            case SEPARATOR:
                            case SPACE:
                                how = HowToProcess.CONSUME;
                                return IN_QUOTES;
                            case QUOTE:
                                // handle the first " differently
                                if ( first )
                                    {
                                    balanced = false;
                                    how = HowToProcess.DISCARD;
                                    return IN_QUOTES;
                                    }
                                else
                                    {
                                    balanced = true;
                                    how = HowToProcess.DISCARD;
                                    return IN_TEXT;
                                    }
                            case EOL:// treat eol like missing quote
                                // display exactly as written in original text, without final
                                // quote
                                // We have no business correcting the missing trailing quote
                                // Treat as if were command all along, with lead " intact.
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_TAG
        /**
         * in string of separator chars e.g. / \ :
         */
        IN_SEPARATOR
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedSeparators.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String separators = accumulatedSeparators.toString();
                        accumulatedSeparators.setLength( 0 );
                        // treat semicolon separately
                        for ( int i = 0; i < separators.length(); i++ )
                            {
                            char c = separators.charAt( i );
                            if ( c == ';' )
                                {
                                addToken( new Semicolon() );
                                }
                            else
                                {
                                addToken( new Separator( c ) );
                                }
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case EOL:
                            case OTHER:
                            case PLAIN:
                            case PUNCTUATION:
                            case QUOTE:
                            case SPACE:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case COLON:
                            case SEPARATOR:
                                // continue this token made of separators
                                how = HowToProcess.CONSUME;
                                return IN_SEPARATOR;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_QUOTES
        /**
         * in white space including at end of line.
         */
        IN_WHITESPACE
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        spaceCount++;
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        // whitespace has ended, we don't attempt to trim trailing blanks.
                        // or collapse tokens to embed blanks in commands rather than having
                        // a separate token
                        // each word. It is not as bad as it sounds, it makes life easy for
                        // GZIP.
                        if ( spaceCount > 0 )
                            {
                            addToken( new Space( spaceCount ) );
                            }
                        spaceCount = 0;
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    BatState next( BatCharCategory category,
                                   char nextChar,
                                   boolean first )
                        {
                        switch ( category )
                            {
                            case COLON:
                            case EOL:
                            case OTHER:
                            case PLAIN:
                            case PUNCTUATION:
                            case SEPARATOR:
                            case QUOTE:
                                how = HowToProcess.FORWARD;
                                return super.next( category, nextChar, first );
                            case SPACE:
                                how = HowToProcess.CONSUME;
                                return IN_WHITESPACE;
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_WHITESPACE
        ;

    /**
     * true if want voluminous debugging output
     */
    static final boolean DEBUGGING = false;

    /**
     * list of tokens we have parsed out.
     */
    @SuppressWarnings( { "WeakerAccess" } )
    static final ArrayList<Token> tokens = new ArrayList<>( 10000 );
    // common to all enum constants
    // declarations

    /**
     * used to accumulate comments
     */
    static final StringBuilder accumulatedComment =
            new StringBuilder( 80 );
    // statics are shared common to all enum constants.
    // others are a separate field in each enum constant.

    /**
     * accumulates :done style labels
     */
    static final StringBuilder accumulatedLabel =
            new StringBuilder( 40 );

    /**
     * accumulates a name, e.g. program name, filename, general text, parameter
     */
    static final StringBuilder accumulatedName =
            new StringBuilder( 50 );

    /**
     * accumulates a string of operators
     */
    static final StringBuilder accumulatedOperators =
            new StringBuilder( 10 );

    /**
     * accumulates something between quotes
     */
    static final StringBuilder accumulatedQuotation =
            new StringBuilder( 80 );

    /**
     * accumulates strings of / \ : ;
     */
    static final StringBuilder accumulatedSeparators =
            new StringBuilder( 10 );

    /**
     * complete list of bat language keywords, aka words Only IN_TEXT need know about it.
     */
    private static final HashSet<String> keywords = new HashSet<>( Arrays
            .asList( "assoc",
                    "at",
                    "attrib",
                    "break",
                    "cacls",
                    "call",
                    "cd",
                    "cdd",
                    "chcp",
                    "chdir",
                    "chkdsk",
                    "chkntfs",
                    "cls",
                    "cmd",
                    "color",
                    "comp",
                    "compact",
                    "convert",
                    "copy",
                    "date",
                    "del",
                    "describe",
                    "dir",
                    "diskcomp",
                    "diskcopy",
                    "do",
                    "doskey",
                    "echo",
                    "else",
                    "endlocal",
                    "erase",
                    "exit",
                    "fc",
                    "find",
                    "findstr",
                    "for",
                    "format",
                    "ftype",
                    "goto",
                    "help",
                    "if",
                    "label",
                    "md",
                    "mkdir",
                    "mode",
                    "more",
                    "move",
                    "not",
                    "off",
                    "on",
                    "path",
                    "pause",
                    "popd",
                    "print",
                    "prompt",
                    "pushd",
                    "rd",
                    "recover",
                    "rem",
                    "remark",
                    "ren",
                    "rename",
                    "replace",
                    "rmdir",
                    "set",
                    "setlocal",
                    "shift",
                    "sort",
                    "start",
                    "subst",
                    "elapsedTime",
                    "title",
                    "tree",
                    "type",
                    "ver",
                    "verify",
                    "vol",
                    "xcopy" ) );

    /**
     * how far we are through parsing the program
     */
    @SuppressWarnings( { "WeakerAccess" } )
    static int charIndex;

    /**
     * how we plan to process this character, consume, procrastinate to next state, or discard
     */
    static HowToProcess how;

    /**
     * the program or program fragment we are parsing
     */
    static String program;

    /**
     * the length of the program fragment we are parsing.
     */
    static int size;

    /**
     * count of how many spaces encountered.
     */
    static int spaceCount;

    /**
     * count of how many new lines encountered, shared by several states
     */
    private static int nlCount;
    // declarations
    // methods

    /**
     * add a token to the end of the list to be rendered.
     *
     * @param t a token. Useless tokens will be not be added.
     */
    private static void addToken( Token t )
        {
        // check out token for validity, if pointless, don't bother adding it.
        if ( !t.isUseless() )
            {
            tokens.add( t );
            }
        }// /method

    /**
     * Replace junk chars with something that won't cause trouble
     *
     * @param category category of this char
     * @param c        the char
     *
     * @return c if char is clean, a replacement if it were dirty e.g. tab
     */
    private static char clean( BatCharCategory category, char c )
        {
        if ( category == BatCharCategory.SPACE )
            {
            return ' ';
            }
        else
            {
            return c;
            }
        }// /method

    /**
     * crunch multiple tokens into a single token where feasible.
     */
    private static void crunch()
        {
        int size;
        do
            {
            size = tokens.size();
            for ( int i = size - 1; i >= 1; i-- )
                {
                Token current = tokens.get( i );
                Token prev = tokens.get( i - 1 );
                if ( current instanceof NL && prev instanceof Space )
                    {
                    // trim trailing blanks on line
                    tokens.remove( i - 1 );
                    // don't i--. same NL token will be repeatedly compared
                    // against prev.
                    }
                else if ( current instanceof Space )
                    {
                    Space sp = ( Space ) ( current );
                    if ( sp.length() <= 10 && prev.isCollapsible() )
                        {
                        // combine space into previous token
                        prev.setText( prev.getText() + sp.getText() );
                        tokens.remove( i );
                        // don't i--, combined token will be compared against
                        // its predecessor
                        }
                    }
                else if ( prev.isCollapsible( current ) )
                    {
                    // combine two tokens into one
                    prev.setText( prev.getText() + current.getText() );
                    tokens.remove( i );
                    // don't i--. combined token will be compared with its
                    // predecessor
                    }
                } // end for
            // keep going while it is still finding something to crunch
            }
        while ( tokens.size() < size );
        }// /method

    /**
     * debugging dump system state
     *
     * @param theChar  char we are processing
     * @param category category of the char
     * @param first    did we just enter this state
     * @param oldState old state
     * @param state    current state
     * @param newState next state
     * @param how      how to process this char
     */
    private static void dumpState( char theChar,
                                   BatCharCategory category,
                                   boolean first,
                                   BatState oldState,
                                   BatState state,
                                   BatState newState,
                                   HowToProcess how )
        {
        if ( how == HowToProcess.CONSUME )
            {
            /* use slightly abbreviated form, black */
            out.println( theChar
                         + " "
                         + category
                         + " "
                         + first
                         + " o:"
                         + oldState
                         + " s:"
                         + state
                         + " n:"
                         + newState );
            }
        else
            {/* in red */
            err.println( theChar
                         + " "
                         + category
                         + " "
                         + first
                         + " o:"
                         + oldState
                         + " s:"
                         + state
                         + " n:"
                         + newState
                         + " "
                         + how );
            }
        }// /method

    /**
     * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens
     */
    private static void ensureNoLeftovers()
        {
        assert accumulatedComment.length() == 0 : "Comment residual : "
                                                  + accumulatedComment.toString();
        accumulatedComment.setLength( 0 );
        assert accumulatedLabel.length() == 0 : "Label residual : "
                                                + accumulatedLabel.toString();
        accumulatedLabel.setLength( 0 );
        assert accumulatedName.length() == 0 : "Name residual : "
                                               + accumulatedName.toString();
        accumulatedName.setLength( 0 );
        assert accumulatedOperators.length() == 0 : "Operators residual : "
                                                    + accumulatedOperators.toString();
        accumulatedOperators.setLength( 0 );
        assert accumulatedQuotation.length() == 0 : "Quotation residual : "
                                                    + accumulatedQuotation.toString();
        accumulatedQuotation.setLength( 0 );
        }

    /**
     * Is a given string coming up in the stream starting with this character. Compares ignoring case.
     *
     * @param expected string to test if isComing in the stream
     *
     * @return true if this string isComing up, case-insensitive
     */
    private static boolean isComing( String expected )
        {
        // check if there are enough characters left for a match.
        return charIndex + expected.length() < size && program.substring(
                charIndex,
                charIndex + expected.length() )
                .equalsIgnoreCase( expected );
        }// /method

    /**
     * Is a given word coming up in the stream starting with this character. Compares ignoring case.
     *
     * @param expectedWord word (\\p{Lower}A-z0-9) to test if is coming up in the stream
     *
     * @return true if this string isComing up, case-insensitive
     */
    private static boolean isComingWord( String expectedWord )
        {
        if ( !isComing( expectedWord ) )
            {
            return false;
            }
        final int index = charIndex + expectedWord.length();
        return index >= size || !Character.isLetterOrDigit( program.charAt( index ) );
        }// /method

    /**
     * clear out the state machine ready to parse a new program
     */
    private static void reset()
        {
        nlCount = 0;
        spaceCount = 0;
        accumulatedComment.setLength( 0 );
        accumulatedLabel.setLength( 0 );
        accumulatedName.setLength( 0 );
        accumulatedOperators.setLength( 0 );
        accumulatedQuotation.setLength( 0 );
        tokens.clear();
        how = null;
        charIndex = 0;
        // keeping certain variables local or global is crucial.
        // Don't mess with them without thinking carefully and
        // changing the docs:
        // local: category, first, oldState, state, theChar
        // global: charIndex, how
        // we make these local to discourage accidental snooping or
        // accidentally picking up the static version instead of the parm.
        }// /method

    /**
     * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing.
     */
    private static void trimNLs()
        {
        // remove leading NLs.
        while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) )
            {
            tokens.remove( 0 );
            }
        // remove trailing NLs
        int count;
        while ( ( count = tokens.size() ) > 0 && ( tokens.get( count
                                                               - 1 ) instanceof NL ) )
            {
            tokens.remove( count - 1 );
            }
        // We don't need an NL at either beginning or end.
        // insert at the beginning
        tokens.add( 0, new Start( "<pre class=\"bat\">" ) );
        // add to end
        addToken( new Stop( "</pre>" ) );
        }// /method

    /**
     * Default Consume one character. It has been predecided that you can and will consume it.
     *
     * @param c char to consume
     */
    abstract void consume( char c );// /method

    /**
     * default what to do on leaving state, after last char is consumed.
     */
    abstract void leaving();// /method

    /**
     * default next method determines the next state based on current state, and next char
     *
     * @param category class of next character
     * @param nextChar next character to process
     * @param first    true if this is the first character after we entered this state.
     *
     * @return next BatState
     * D o   n o t   m a k e   p r i v a t e ! ! !
     */
    BatState next( BatCharCategory category, char nextChar, boolean first )
        {
        /* default way to recognise next state */
        assert how == HowToProcess.FORWARD : "default next used without forwarding";
        switch ( category )
            {
            case COLON:
            case SEPARATOR:
                return IN_SEPARATOR;
            case EOL:
                return AT_END_OF_LINE;
            case OTHER:
            case PUNCTUATION:
                return IN_OPERATOR;
            case PLAIN:
                return IN_TEXT;
            case QUOTE:
                return IN_QUOTES;
            case SPACE:
                return IN_WHITESPACE;
            case IGNORE:// should never get this far
            default:
                assert false :
                        "bad state " + category + " " + nextChar;
                return null;
            } // end switch
        }// /method

    /**
     * Parse program and leave a list of Tokens in tokens ArrayList.
     *
     * @param program the text we are going parse and eventually render.
     *
     * @return an array of tokens representing the text and how it will be rendered.
     */
    @SuppressWarnings( { "UnusedAssignment" } )
    public static Token[] parse( String program )
        {
        reset();
        BatState.program = program;
        size = program.length();
        /*
         * keeping certain variables local or global is crucial. Don't mess with
         * them without thinking carefully and changing the docs: local:
         * category, first, oldState, state, theChar global: charIndex, how we
         * make these local to discourage accidental snooping or accidentally
         * picking up the static version instead of the parm.
         */
        // where we were
        BatState oldState = AT_END_OF_LINE;
        // where we are
        BatState state = AT_END_OF_LINE;
        // were we will be next
        BatState newState;
        // how is global however, so next can return both a state and how.
        how = null;
        // Note, NO int charIndex !! Don't "repair that".
        // charIndex is a static variable globally known so "isComing" can use it.
        for ( charIndex = 0; charIndex < size; charIndex++ )
            {
            // next char to process
            char theChar = program.charAt( charIndex );
            // decide which general category the char falls in
            final BatCharCategory category = BatCharCategory
                    .categorise( theChar );
            theChar = clean( category, theChar );
            if ( category != BatCharCategory.IGNORE )
                {
                /*
                 * keep going till some state consumes/discards the character.
                 * Allow up to three forwarding attempts to deal with the
                 * character. Usually we should succeed on the first or second
                 * attempt. We always make at least on trip through
                 */
                attempts:
                for ( int times = 0; times < 3; times++ )
                    {
                    // first is deliberately local
                    final boolean first = state != oldState;
                    /*
                     * crank the state machine one cycle, State should modify
                     * how in addition to returning the new state. A little ugly
                     * but simplest way to return a pair of values: state and
                     * how
                     */
                    how = null;// setting to null ensures not setting it will
                    // be caught.
                    /*
                     * This is the guts of the finite state automaton decide the
                     * next state
                     */
                    // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
                    newState = state.next( category, theChar, first );
                    // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    if ( DEBUGGING )
                        {
                        dumpState( theChar,
                                category,
                                first,
                                oldState,
                                state,
                                newState,
                                how );
                        }
                    // kick over to the next generation,
                    // we are now in the newState. We make the transition here.
                    oldState = state;
                    state = newState;
                    newState = null;
                    if ( how == null )
                        {
                        throw new NullPointerException( "BatSTate bug: how not set. OldState=" + oldState + " " +
                                                        "newState=" + state + " next() must not be private." );
                        }
                    switch ( how )
                        {
                        case CONSUME:
                            oldState.consume( theChar );
                            if ( state != oldState )
                                {
                                oldState.leaving();
                                }
                            // and on to the next char
                            break attempts;
                        case DISCARD:
                            if ( state != oldState )
                                {
                                oldState.leaving();
                                }
                            // and on to the next char
                            break attempts;
                        default:
                            // should never get here
                            assert false :
                                    "BatTokenizer state machine failed to set how variable.";
                            break attempts;
                        case FORWARD:
                            assert state
                                   != oldState :
                                    "BatTokenizer state machine attempted to forward a char to the same state.";
                            oldState.leaving();
                            // we give that character another try with the new
                            // state
                        } // end switch
                    /*
                     * we will only ever get here if we are forwarding. The
                     * other cases leave the loop early.
                     */
                    }
                // end attempts loop
                // we fall out the bottom and land here no matter what
                assert how == HowToProcess.CONSUME || how == HowToProcess
                        .DISCARD :
                        "BatTokenizer state machine failed to consume char in three state forwarding attempts.";
                } // end if ignore
            } // end for each character
        // we must leave the last state if we have not already:
        if ( state == oldState )
            {
            oldState.leaving();
            }
        // make sure nothing still sitting in accumulation buffer after we have
        // finished parsing the entire program.
        ensureNoLeftovers();
        // collapse tokens into fewer if possible
        crunch();
        trimNLs();
        // covert to vanilla array for even more efficient use in the final
        // Applet.
        return tokens.toArray( new Token[ tokens.size() ] );
        }// /method
    // /methods
    }