/* * [BatState.java] * * Summary: Bat file parser, finite state automaton. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 3.1 2009-04-12 shorter style names, improved highlighting. */ /** * State machine for BatTokenizer. * * @author Roedy Green, Canadian Mind Products * @version 3.1 2009-04-12 shorter style names, improved highlighting. * @since 2004-05-15 */ package com.mindprod.jprep; import com.mindprod.jtokens.Gibberish; import com.mindprod.jtokens.Keyword; import com.mindprod.jtokens.Label; import com.mindprod.jtokens.NL; import com.mindprod.jtokens.Operator; import com.mindprod.jtokens.Semicolon; import com.mindprod.jtokens.Separator; import com.mindprod.jtokens.Space; import com.mindprod.jtokens.Start; import com.mindprod.jtokens.Stop; import com.mindprod.jtokens.StringLiteral; import com.mindprod.jtokens.Token; import com.mindprod.jtokens.bat.BatComment; import com.mindprod.jtokens.bat.BatText; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import static java.lang.System.*; /** * Bat file parser, finite state automaton. * * @author Roedy Green, Canadian Mind Products * @version 3.1 2009-04-12 shorter style names, improved highlighting. * @since 2004-05-02 */ @SuppressWarnings( { "NestedAssignment", "UnnecessaryContinue", "WeakerAccess", "EnumeratedConstantNamingConvention" } ) public enum BatState { /** * somebody noticed an EOL and forwarded it to us. The EOL will be the first character we consume. */ AT_END_OF_LINE { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { nlCount++; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { // emit pending nls and white space if ( nlCount > 3 ) { nlCount = 3; } if ( nlCount > 0 ) { addToken( new NL( nlCount ) ); nlCount = 0; spaceCount = 0; } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case OTHER: case PUNCTUATION: case SEPARATOR: case QUOTE: case SPACE: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case PLAIN: if ( isComingWord( "rem" ) || isComingWord( "remark" ) ) { how = HowToProcess.FORWARD; return IN_COMMENT; } else { how = HowToProcess.FORWARD; return IN_TEXT; } case COLON: if ( isComing( "::" ) ) { how = HowToProcess.FORWARD; return IN_COMMENT; } else { how = HowToProcess.FORWARD; return IN_LABEL; } case EOL: how = HowToProcess.CONSUME; return AT_END_OF_LINE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end AT_END_OF_LINE /** * in name, keyword or identifier */ IN_TEXT { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedName.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String name = accumulatedName.toString(); accumulatedName.setLength( 0 ); if ( name.length() > 0 ) { if ( keywords.contains( name.toLowerCase().trim() ) ) { // keyword addToken( new Keyword( name ) ); } else { // bat command addToken( new BatText( name ) ); } // end else } // end if } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case COLON:// just punctuation except at SOL case EOL: case OTHER: case PUNCTUATION: case SEPARATOR: case SPACE: case QUOTE: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case PLAIN: how = HowToProcess.CONSUME; return IN_TEXT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_TEXT /** * in comment :: or REM, or REMARK, perhaps in the :; rem remark */ IN_COMMENT { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedComment.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String comment = accumulatedComment.toString(); accumulatedComment.setLength( 0 ); if ( comment.length() != 0 ) { // token added includes the :: rem remark addToken( new BatComment( comment ) ); } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case COLON: case OTHER: case PLAIN: case PUNCTUATION: case SEPARATOR: case QUOTE: case SPACE:// we don't split space out as separate Space // tokens. // Just treat as an ordinary char inside a comment. // In contrast, it does get split out in commands. // stay in comment how = HowToProcess.CONSUME; return IN_COMMENT; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_TEXT_COMMENT /** * in label :here */ IN_LABEL { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedLabel.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String label = accumulatedLabel.toString(); accumulatedLabel.setLength( 0 ); if ( label.length() != 0 ) { addToken( new Label( label ) ); } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case COLON: if ( first ) { how = HowToProcess.CONSUME; return IN_LABEL; } else { how = HowToProcess.FORWARD; return IN_SEPARATOR; } case EOL: case OTHER: case PUNCTUATION: case SEPARATOR: case QUOTE: case SPACE: // hit end of label how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case PLAIN: // stay in label how = HowToProcess.CONSUME; return IN_LABEL; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_LABEL /** * in string or arithmetic operators + - () etc. but not / : \ */ IN_OPERATOR { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedOperators.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String operators = accumulatedOperators.toString(); accumulatedOperators.setLength( 0 ); // treat ; specially. int place; while ( ( place = operators.indexOf( ';' ) ) >= 0 ) { // possibly empty String group = operators.substring( 0, place ); addToken( new Operator( group ) ); addToken( new Semicolon() ); operators = operators.substring( place + 1 ); } // end while // deal with whatever is left over in operators addToken( new Operator( operators ) ); } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: case PLAIN: case QUOTE: case SPACE: case COLON: case SEPARATOR: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case OTHER: case PUNCTUATION: // continue this token made of operators how = HowToProcess.CONSUME; return IN_OPERATOR; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_TEXT_ENTITY /** * inside " ... " . No mechanism exists to put a " inside a " in bat We may be also possibly on the first or * last " language */ IN_QUOTES { /** * true if "..." are balanced, false if hit EOL too soon. */ boolean balanced = false; /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedQuotation.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { // quotation has been stripped of lead and trail " as token wants // it. String quotation = accumulatedQuotation.toString(); accumulatedQuotation.setLength( 0 ); if ( balanced ) { // add even if 0 length // surrounding quotes generated as needed. addToken( new StringLiteral( quotation ) ); } else { // document had unbalanced " ...", missing trailing ". // We treat not as literal, but an ordinary command text. // addToken( new Gibberish( "\"" + quotation ) ); // not that we have handled this anomaly balanced = true; } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case COLON: case OTHER: case PLAIN: case PUNCTUATION: case SEPARATOR: case SPACE: how = HowToProcess.CONSUME; return IN_QUOTES; case QUOTE: // handle the first " differently if ( first ) { balanced = false; how = HowToProcess.DISCARD; return IN_QUOTES; } else { balanced = true; how = HowToProcess.DISCARD; return IN_TEXT; } case EOL:// treat eol like missing quote // display exactly as written in original text, without final // quote // We have no business correcting the missing trailing quote // Treat as if were command all along, with lead " intact. how = HowToProcess.FORWARD; return AT_END_OF_LINE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_TAG /** * in string of separator chars e.g. / \ : */ IN_SEPARATOR { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedSeparators.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String separators = accumulatedSeparators.toString(); accumulatedSeparators.setLength( 0 ); // treat semicolon separately for ( int i = 0; i < separators.length(); i++ ) { char c = separators.charAt( i ); if ( c == ';' ) { addToken( new Semicolon() ); } else { addToken( new Separator( c ) ); } } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: case OTHER: case PLAIN: case PUNCTUATION: case QUOTE: case SPACE: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case COLON: case SEPARATOR: // continue this token made of separators how = HowToProcess.CONSUME; return IN_SEPARATOR; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_QUOTES /** * in white space including at end of line. */ IN_WHITESPACE { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { spaceCount++; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { // whitespace has ended, we don't attempt to trim trailing blanks. // or collapse tokens to embed blanks in commands rather than having // a separate token // each word. It is not as bad as it sounds, it makes life easy for // GZIP. if ( spaceCount > 0 ) { addToken( new Space( spaceCount ) ); } spaceCount = 0; } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ BatState next( BatCharCategory category, char nextChar, boolean first ) { switch ( category ) { case COLON: case EOL: case OTHER: case PLAIN: case PUNCTUATION: case SEPARATOR: case QUOTE: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case SPACE: how = HowToProcess.CONSUME; return IN_WHITESPACE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_WHITESPACE ; /** * true if want voluminous debugging output */ static final boolean DEBUGGING = false; /** * list of tokens we have parsed out. */ @SuppressWarnings( { "WeakerAccess" } ) static final ArrayList tokens = new ArrayList<>( 10000 ); // common to all enum constants // declarations /** * used to accumulate comments */ static final StringBuilder accumulatedComment = new StringBuilder( 80 ); // statics are shared common to all enum constants. // others are a separate field in each enum constant. /** * accumulates :done style labels */ static final StringBuilder accumulatedLabel = new StringBuilder( 40 ); /** * accumulates a name, e.g. program name, filename, general text, parameter */ static final StringBuilder accumulatedName = new StringBuilder( 50 ); /** * accumulates a string of operators */ static final StringBuilder accumulatedOperators = new StringBuilder( 10 ); /** * accumulates something between quotes */ static final StringBuilder accumulatedQuotation = new StringBuilder( 80 ); /** * accumulates strings of / \ : ; */ static final StringBuilder accumulatedSeparators = new StringBuilder( 10 ); /** * complete list of bat language keywords, aka words Only IN_TEXT need know about it. */ private static final HashSet keywords = new HashSet<>( Arrays .asList( "assoc", "at", "attrib", "break", "cacls", "call", "cd", "cdd", "chcp", "chdir", "chkdsk", "chkntfs", "cls", "cmd", "color", "comp", "compact", "convert", "copy", "date", "del", "describe", "dir", "diskcomp", "diskcopy", "do", "doskey", "echo", "else", "endlocal", "erase", "exit", "fc", "find", "findstr", "for", "format", "ftype", "goto", "help", "if", "label", "md", "mkdir", "mode", "more", "move", "not", "off", "on", "path", "pause", "popd", "print", "prompt", "pushd", "rd", "recover", "rem", "remark", "ren", "rename", "replace", "rmdir", "set", "setlocal", "shift", "sort", "start", "subst", "elapsedTime", "title", "tree", "type", "ver", "verify", "vol", "xcopy" ) ); /** * how far we are through parsing the program */ @SuppressWarnings( { "WeakerAccess" } ) static int charIndex; /** * how we plan to process this character, consume, procrastinate to next state, or discard */ static HowToProcess how; /** * the program or program fragment we are parsing */ static String program; /** * the length of the program fragment we are parsing. */ static int size; /** * count of how many spaces encountered. */ static int spaceCount; /** * count of how many new lines encountered, shared by several states */ private static int nlCount; // declarations // methods /** * add a token to the end of the list to be rendered. * * @param t a token. Useless tokens will be not be added. */ private static void addToken( Token t ) { // check out token for validity, if pointless, don't bother adding it. if ( !t.isUseless() ) { tokens.add( t ); } }// /method /** * Replace junk chars with something that won't cause trouble * * @param category category of this char * @param c the char * * @return c if char is clean, a replacement if it were dirty e.g. tab */ private static char clean( BatCharCategory category, char c ) { if ( category == BatCharCategory.SPACE ) { return ' '; } else { return c; } }// /method /** * crunch multiple tokens into a single token where feasible. */ private static void crunch() { int size; do { size = tokens.size(); for ( int i = size - 1; i >= 1; i-- ) { Token current = tokens.get( i ); Token prev = tokens.get( i - 1 ); if ( current instanceof NL && prev instanceof Space ) { // trim trailing blanks on line tokens.remove( i - 1 ); // don't i--. same NL token will be repeatedly compared // against prev. } else if ( current instanceof Space ) { Space sp = ( Space ) ( current ); if ( sp.length() <= 10 && prev.isCollapsible() ) { // combine space into previous token prev.setText( prev.getText() + sp.getText() ); tokens.remove( i ); // don't i--, combined token will be compared against // its predecessor } } else if ( prev.isCollapsible( current ) ) { // combine two tokens into one prev.setText( prev.getText() + current.getText() ); tokens.remove( i ); // don't i--. combined token will be compared with its // predecessor } } // end for // keep going while it is still finding something to crunch } while ( tokens.size() < size ); }// /method /** * debugging dump system state * * @param theChar char we are processing * @param category category of the char * @param first did we just enter this state * @param oldState old state * @param state current state * @param newState next state * @param how how to process this char */ private static void dumpState( char theChar, BatCharCategory category, boolean first, BatState oldState, BatState state, BatState newState, HowToProcess how ) { if ( how == HowToProcess.CONSUME ) { /* use slightly abbreviated form, black */ out.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState ); } else {/* in red */ err.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState + " " + how ); } }// /method /** * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens */ private static void ensureNoLeftovers() { assert accumulatedComment.length() == 0 : "Comment residual : " + accumulatedComment.toString(); accumulatedComment.setLength( 0 ); assert accumulatedLabel.length() == 0 : "Label residual : " + accumulatedLabel.toString(); accumulatedLabel.setLength( 0 ); assert accumulatedName.length() == 0 : "Name residual : " + accumulatedName.toString(); accumulatedName.setLength( 0 ); assert accumulatedOperators.length() == 0 : "Operators residual : " + accumulatedOperators.toString(); accumulatedOperators.setLength( 0 ); assert accumulatedQuotation.length() == 0 : "Quotation residual : " + accumulatedQuotation.toString(); accumulatedQuotation.setLength( 0 ); } /** * Is a given string coming up in the stream starting with this character. Compares ignoring case. * * @param expected string to test if isComing in the stream * * @return true if this string isComing up, case-insensitive */ private static boolean isComing( String expected ) { // check if there are enough characters left for a match. return charIndex + expected.length() < size && program.substring( charIndex, charIndex + expected.length() ) .equalsIgnoreCase( expected ); }// /method /** * Is a given word coming up in the stream starting with this character. Compares ignoring case. * * @param expectedWord word (\\p{Lower}A-z0-9) to test if is coming up in the stream * * @return true if this string isComing up, case-insensitive */ private static boolean isComingWord( String expectedWord ) { if ( !isComing( expectedWord ) ) { return false; } final int index = charIndex + expectedWord.length(); return index >= size || !Character.isLetterOrDigit( program.charAt( index ) ); }// /method /** * clear out the state machine ready to parse a new program */ private static void reset() { nlCount = 0; spaceCount = 0; accumulatedComment.setLength( 0 ); accumulatedLabel.setLength( 0 ); accumulatedName.setLength( 0 ); accumulatedOperators.setLength( 0 ); accumulatedQuotation.setLength( 0 ); tokens.clear(); how = null; charIndex = 0; // keeping certain variables local or global is crucial. // Don't mess with them without thinking carefully and // changing the docs: // local: category, first, oldState, state, theChar // global: charIndex, how // we make these local to discourage accidental snooping or // accidentally picking up the static version instead of the parm. }// /method /** * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing. */ private static void trimNLs() { // remove leading NLs. while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) ) { tokens.remove( 0 ); } // remove trailing NLs int count; while ( ( count = tokens.size() ) > 0 && ( tokens.get( count - 1 ) instanceof NL ) ) { tokens.remove( count - 1 ); } // We don't need an NL at either beginning or end. // insert at the beginning tokens.add( 0, new Start( "
" ) );
        // add to end
        addToken( new Stop( "
" ) ); }// /method /** * Default Consume one character. It has been predecided that you can and will consume it. * * @param c char to consume */ abstract void consume( char c );// /method /** * default what to do on leaving state, after last char is consumed. */ abstract void leaving();// /method /** * default next method determines the next state based on current state, and next char * * @param category class of next character * @param nextChar next character to process * @param first true if this is the first character after we entered this state. * * @return next BatState * D o n o t m a k e p r i v a t e ! ! ! */ BatState next( BatCharCategory category, char nextChar, boolean first ) { /* default way to recognise next state */ assert how == HowToProcess.FORWARD : "default next used without forwarding"; switch ( category ) { case COLON: case SEPARATOR: return IN_SEPARATOR; case EOL: return AT_END_OF_LINE; case OTHER: case PUNCTUATION: return IN_OPERATOR; case PLAIN: return IN_TEXT; case QUOTE: return IN_QUOTES; case SPACE: return IN_WHITESPACE; case IGNORE:// should never get this far default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch }// /method /** * Parse program and leave a list of Tokens in tokens ArrayList. * * @param program the text we are going parse and eventually render. * * @return an array of tokens representing the text and how it will be rendered. */ @SuppressWarnings( { "UnusedAssignment" } ) public static Token[] parse( String program ) { reset(); BatState.program = program; size = program.length(); /* * keeping certain variables local or global is crucial. Don't mess with * them without thinking carefully and changing the docs: local: * category, first, oldState, state, theChar global: charIndex, how we * make these local to discourage accidental snooping or accidentally * picking up the static version instead of the parm. */ // where we were BatState oldState = AT_END_OF_LINE; // where we are BatState state = AT_END_OF_LINE; // were we will be next BatState newState; // how is global however, so next can return both a state and how. how = null; // Note, NO int charIndex !! Don't "repair that". // charIndex is a static variable globally known so "isComing" can use it. for ( charIndex = 0; charIndex < size; charIndex++ ) { // next char to process char theChar = program.charAt( charIndex ); // decide which general category the char falls in final BatCharCategory category = BatCharCategory .categorise( theChar ); theChar = clean( category, theChar ); if ( category != BatCharCategory.IGNORE ) { /* * keep going till some state consumes/discards the character. * Allow up to three forwarding attempts to deal with the * character. Usually we should succeed on the first or second * attempt. We always make at least on trip through */ attempts: for ( int times = 0; times < 3; times++ ) { // first is deliberately local final boolean first = state != oldState; /* * crank the state machine one cycle, State should modify * how in addition to returning the new state. A little ugly * but simplest way to return a pair of values: state and * how */ how = null;// setting to null ensures not setting it will // be caught. /* * This is the guts of the finite state automaton decide the * next state */ // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv newState = state.next( category, theChar, first ); // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if ( DEBUGGING ) { dumpState( theChar, category, first, oldState, state, newState, how ); } // kick over to the next generation, // we are now in the newState. We make the transition here. oldState = state; state = newState; newState = null; if ( how == null ) { throw new NullPointerException( "BatSTate bug: how not set. OldState=" + oldState + " " + "newState=" + state + " next() must not be private." ); } switch ( how ) { case CONSUME: oldState.consume( theChar ); if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; case DISCARD: if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; default: // should never get here assert false : "BatTokenizer state machine failed to set how variable."; break attempts; case FORWARD: assert state != oldState : "BatTokenizer state machine attempted to forward a char to the same state."; oldState.leaving(); // we give that character another try with the new // state } // end switch /* * we will only ever get here if we are forwarding. The * other cases leave the loop early. */ } // end attempts loop // we fall out the bottom and land here no matter what assert how == HowToProcess.CONSUME || how == HowToProcess .DISCARD : "BatTokenizer state machine failed to consume char in three state forwarding attempts."; } // end if ignore } // end for each character // we must leave the last state if we have not already: if ( state == oldState ) { oldState.leaving(); } // make sure nothing still sitting in accumulation buffer after we have // finished parsing the entire program. ensureNoLeftovers(); // collapse tokens into fewer if possible crunch(); trimNLs(); // covert to vanilla array for even more efficient use in the final // Applet. return tokens.toArray( new Token[ tokens.size() ] ); }// /method // /methods }