/* * [IniState.java] * * Summary: Parser for ini files to colourise them. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 3.1 2009-04-12 shorter style names, improved highlighting. */ /** * State machine for PropTokenizer. * * @author Roedy Green, Canadian Mind Products * @version 3.1 2009-04-12 shorter style names, improved highlighting. * @since 2004-05-15 */ package com.mindprod.jprep; import com.mindprod.jtokens.NL; import com.mindprod.jtokens.Operator; import com.mindprod.jtokens.Start; import com.mindprod.jtokens.Stop; import com.mindprod.jtokens.Token; import com.mindprod.jtokens.ini.IniComment; import com.mindprod.jtokens.ini.IniKey; import com.mindprod.jtokens.ini.IniSection; import com.mindprod.jtokens.ini.IniValue; import java.util.ArrayList; import static java.lang.System.*; /** * Parser for ini files to colourise them. * * @author Roedy Green, Canadian Mind Products * @version 3.1 2009-04-12 shorter style names, improved highlighting. * @since 2005-12-22 */ @SuppressWarnings( { "NestedAssignment", "UnnecessaryContinue", "EnumeratedConstantNamingConvention", "WeakerAccess" } ) public enum IniState { /** * somebody noticed an EOL and forwarded it to us. The EOL will be the first character we consume. */ AT_END_OF_LINE { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { nlCount++; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { // emit pending nls if ( nlCount > 3 ) { nlCount = 3; } if ( nlCount > 0 ) { addToken( new NL( nlCount ) ); nlCount = 0; } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_OPEN: how = HowToProcess.FORWARD; return IN_SECTION; case BRACKET_CLOSE: case EQUALS: case ORDINARY: case QUOTE: how = HowToProcess.FORWARD; return IN_KEYWORD; case EOL: how = HowToProcess.CONSUME; return AT_END_OF_LINE; case START_COMMENT: how = HowToProcess.FORWARD; return IN_COMMENT; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end AT_END_OF_LINE /** * in comment # ... */ IN_COMMENT { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { // includes lead ; accumulatedComment.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String comment = accumulatedComment.toString(); accumulatedComment.setLength( 0 ); if ( comment.length() != 0 ) { addToken( new IniComment( comment ) ); } } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_CLOSE: case BRACKET_OPEN: case EQUALS: case ORDINARY: case START_COMMENT: case QUOTE: how = HowToProcess.CONSUME; return IN_COMMENT; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next }, // end IN_KEYWORD /** * keyword= */ IN_KEYWORD { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedKeyword.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String name = accumulatedKeyword.toString(); accumulatedKeyword.setLength( 0 ); if ( name.length() > 0 ) { addToken( new IniKey( name ) ); } // end if } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_CLOSE: case ORDINARY: how = HowToProcess.CONSUME; return IN_KEYWORD; case BRACKET_OPEN: how = HowToProcess.FORWARD; return IN_SECTION; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case EQUALS: leaving(); addToken( new Operator( '=' ) ); how = HowToProcess.DISCARD; return IN_VALUE; case QUOTE: how = HowToProcess.CONSUME; return IN_KEYWORD_QUOTES; case START_COMMENT: how = HowToProcess.FORWARD; return IN_COMMENT; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next // end IN_KEYWORD }, /** * "keyword" */ IN_KEYWORD_QUOTES { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedKeyword.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String name = accumulatedKeyword.toString(); accumulatedKeyword.setLength( 0 ); if ( name.length() > 0 ) { addToken( new IniKey( name ) ); } // end if } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_CLOSE: case BRACKET_OPEN: case EQUALS: case ORDINARY: case START_COMMENT: how = HowToProcess.CONSUME; return IN_KEYWORD_QUOTES; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case QUOTE: how = HowToProcess.CONSUME; return IN_KEYWORD; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next // end IN_KEYWORD_QUOTES }, /** * inside [ssss] */ IN_SECTION { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedSection.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String name = accumulatedSection.toString(); accumulatedSection.setLength( 0 ); if ( name.length() > 0 ) { addToken( new IniSection( name ) ); } // end if } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_CLOSE: how = HowToProcess.CONSUME; return IN_KEYWORD; case BRACKET_OPEN: case EQUALS: case ORDINARY: how = HowToProcess.CONSUME; return IN_SECTION; case QUOTE: how = HowToProcess.CONSUME; return IN_SECTION_QUOTES; case START_COMMENT: how = HowToProcess.FORWARD; return IN_COMMENT; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next // end IN_SECTION }, /** * inside [ssss] */ IN_SECTION_QUOTES { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedSection.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String name = accumulatedSection.toString(); accumulatedSection.setLength( 0 ); if ( name.length() > 0 ) { addToken( new IniSection( name ) ); } // end if } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_CLOSE: case BRACKET_OPEN: case EQUALS: case ORDINARY: case START_COMMENT: how = HowToProcess.CONSUME; return IN_SECTION_QUOTES; case QUOTE: how = HowToProcess.CONSUME; return IN_SECTION; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next // end IN_SECTION_QUOTES }, /** * keyword=value */ IN_VALUE { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { accumulatedValue.append( c ); } /** * what to do on leaving state, after last char is consumed. */ void leaving() { String name = accumulatedValue.toString(); accumulatedValue.setLength( 0 ); if ( name.length() > 0 ) { addToken( new IniValue( name ) ); } // end if } // end leaving /** * Figure out what state we should go in after we process this * character and whether we can consume it. We must set consume * = false if we cannot consume the character and have to * postpone processing to the next state. * @param category category of character * @param nextChar character in the stream we are processing * @return next state to go in */ IniState next( IniCharCategory category, char nextChar ) { switch ( category ) { case BRACKET_CLOSE: case BRACKET_OPEN: case ORDINARY: case EQUALS: case QUOTE: how = HowToProcess.CONSUME; return IN_VALUE; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case START_COMMENT: how = HowToProcess.FORWARD; return IN_COMMENT; case IGNORE: default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } // end next // end IN_VALUE },; /** * true if want voluminous debugging output */ static final boolean DEBUGGING = false; /** * list of tokens we have parsed out. */ private static final ArrayList tokens = new ArrayList<>( 10000 ); // statics are shared common to all enum constants. // others are a separate field in each enum constant. /** * used to accumulate comments */ private static final StringBuilder accumulatedComment = new StringBuilder( 100 ); /** * accumulates left hand side of equal sign */ private static final StringBuilder accumulatedKeyword = new StringBuilder( 80 ); /** * accumulates [section] */ private static final StringBuilder accumulatedSection = new StringBuilder( 100 ); /** * accumulates right hand side of equal sign */ private static final StringBuilder accumulatedValue = new StringBuilder( 100 ); /** * how far we are through parsing the program */ static int charIndex; /** * count of how many new lines encountered, shared by several states */ static int nlCount; /** * the length of the program fragment we are parsing. */ static int size; /** * how we plan to process this character, consume, procrastinate to next state, or discard */ private static HowToProcess how; /** * add a token to the end of the list to be rendered. * * @param t a token. Useless tokens will be not be added. */ private static void addToken( Token t ) { // check out token for validity, if pointless, don't bother adding it. if ( !t.isUseless() ) { tokens.add( t ); } } /** * crunch multiple tokens into a single token where feasible. */ private static void crunch() { int size; do { size = tokens.size(); for ( int i = size - 1; i >= 1; i-- ) { // this version does not deal with Space tokens. Token current = tokens.get( i ); Token prev = tokens.get( i - 1 ); if ( prev.isCollapsible( current ) ) { // combine two tokens into one prev.setText( prev.getText() + current.getText() ); tokens.remove( i ); // don't i--. combined token will be compared with its // predecessor } } // end for // keep going while it is still finding something to crunch } while ( tokens.size() < size ); } /** * debugging dump system state * * @param theChar char we are processing * @param category category of the char * @param first did we just enter this state * @param oldState old state * @param state current state * @param newState next state * @param how do we consume, forward or discard this character. */ private static void dumpState( char theChar, IniCharCategory category, boolean first, IniState oldState, IniState state, IniState newState, HowToProcess how ) { if ( how == HowToProcess.CONSUME ) { /* use slightly abbreviated form, black */ out.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState ); } else {/* in red */ err.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState + " " + how ); } } /** * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens */ private static void ensureNoLeftovers() { assert accumulatedComment.length() == 0 : "comment residual : " + accumulatedComment.toString(); accumulatedComment.setLength( 0 ); assert accumulatedKeyword.length() == 0 : "keyword residual : " + accumulatedKeyword.toString(); accumulatedKeyword.setLength( 0 ); assert accumulatedSection.length() == 0 : "section residual : " + accumulatedSection.toString(); accumulatedSection.setLength( 0 ); assert accumulatedValue.length() == 0 : "value residual : " + accumulatedValue.toString(); accumulatedValue.setLength( 0 ); } /** * clear out the state machine ready to parse a new program */ private static void reset() { nlCount = 0; accumulatedComment.setLength( 0 ); accumulatedKeyword.setLength( 0 ); accumulatedSection.setLength( 0 ); accumulatedValue.setLength( 0 ); tokens.clear(); how = null; charIndex = 0; // keeping certain variables local or global is crucial. // Don't mess with them without thinking carefully and // changing the docs: // local: category, first, oldState, state, theChar // global: charIndex, how // we make these local to discourage accidental snooping or // accidentally picking up the static version instead of the parm. } /** * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing. */ private static void trimNLs() { // remove leading NLs. while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) ) { tokens.remove( 0 ); } // remove trailing NLs int count; while ( ( count = tokens.size() ) > 0 && ( tokens.get( count - 1 ) instanceof NL ) ) { tokens.remove( count - 1 ); } // We don't need an NL at either beginning or end. // insert at the beginning tokens.add( 0, new Start( "
" ) );
        // add to end
        addToken( new Stop( "
" ) ); } /** * Default Consume one character. It has been predecided that you can and will consume it. * * @param c char to consume */ abstract void consume( char c ); /** * default what to do on leaving state, after last char is consumed. */ abstract void leaving(); /** * default next method determines the next state based on current state, and next char * * @param category class of next character * @param nextChar next character to process * * @return next IniCharCategoryState */ abstract IniState next( IniCharCategory category, char nextChar ); /** * Parse program and leave a list of Tokens in tokens ArrayList. * * @param program the text we are going parse and eventually render. * * @return an array of tokens representing the text and how it will be rendered. */ @SuppressWarnings( { "UnusedAssignment" } ) public static Token[] parse( String program ) { reset(); size = program.length(); if ( program.charAt( 0 ) == '#' ) { IniCharCategory.setCommentChar( '#' ); } // where we were IniState oldState = AT_END_OF_LINE; // where we are IniState state = AT_END_OF_LINE; // were we will be next IniState newState = null; // how is global however, so next can return both a state and how. how = null; // Note, NO int charIndex !! Don't "repair that". // charIndex is a static variable globally known so "coming" can use it. for ( charIndex = 0; charIndex < size; charIndex++ ) { // next char to process char theChar = program.charAt( charIndex ); // decide which general category the char falls in final IniCharCategory category = IniCharCategory .categorise( theChar ); if ( category != IniCharCategory.IGNORE ) { /* * keep going till some state consumes/discards the character. * Allow up to three forwarding attempts to deal with the * character. Usually we should succeed on the first or second * attempt. We always make at least on trip through */ attempts: for ( int times = 0; times < 3; times++ ) { // first is deliberately local final boolean first = state != oldState; /* * crank the state machine one cycle, State should modify * how in addition to returning the new state. A little ugly * but simplest way to return a pair of values: state and * how */ how = null;// setting to null ensures not setting it will // be caught. /* * This is the guts of the finite state automaton decide the * next state */ // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv newState = state.next( category, theChar ); // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if ( DEBUGGING ) { dumpState( theChar, category, first, oldState, state, newState, how ); } // kick over to the next generation, // we are now in the newState. We make the transition here. oldState = state; state = newState; newState = null; if ( how == null ) { throw new NullPointerException( "IniState bug: how not set. OldState=" + oldState + " " + "newState=" + state + " next() must not be private." ); } switch ( how ) { case CONSUME: oldState.consume( theChar ); if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; case DISCARD: if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; default: // should never get here assert false : "IniTokenizer state machine failed to set how variable."; break attempts; case FORWARD: assert state != oldState : "IniTokenizer state machine attempted to forward a char to the same state."; oldState.leaving(); // we give that character another try with the new // state } // end switch /* * we will only ever get here if we are forwarding. The * other cases leave the loop early. */ } // end attempts loop // we fall out the bottom and land here no matter what assert how == HowToProcess.CONSUME || how == HowToProcess .DISCARD : "BatTokenizer state machine failed to consume char in three state forwarding attempts."; } // end if ignore } // end for each character // we must leave the last state if we have not already: if ( state == oldState ) { oldState.leaving(); } // make sure nothing still sitting in accumulation buffer after we have // finished parsing the entire program. ensureNoLeftovers(); // collapse tokens into fewer if possible crunch(); trimNLs(); // covert to vanilla array for even more efficient use in the final // Applet. return tokens.toArray( new Token[ tokens.size() ] ); } // end parse ; }