/* * [CSSState.java] * * Summary: Parse a CSS style sheet into tokens. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-11-09 0 initial version. */ package com.mindprod.jprep; import com.mindprod.common18.ST; import com.mindprod.jtokens.NL; import com.mindprod.jtokens.Start; import com.mindprod.jtokens.Stop; import com.mindprod.jtokens.Token; import java.util.ArrayList; import static java.lang.System.*; /** * Parse a CSS style sheet into tokens. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-11-09 initial version. * @since 2011-11-09 */ enum CSSState { BETWEEN_ITEMS { void consume( char c ) { } CSSState next( CSSCharCategory category, char nextChar ) { // dummy return null; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { } }, IN_PROPERTY { void consume( char c ) { } CSSState next( CSSCharCategory category, char nextChar ) { // dummy return null; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { } }, // IN_PSEUDO // IN_REM_SLASH_SLASH // IN_REM_SLASH_STAR IN_SELECTOR { void consume ( char c ) { } CSSState next ( CSSCharCategory category, char nextChar ) { // dummy return null; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { } }, IN_VALUE { void consume( char c ) { } CSSState next ( CSSCharCategory category, char nextChar ) { // dummy return null; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { } }, // IN_VALUE_STRING AT_END_OF_LINE { /** * Consume one character. It has been predecided that you can * and will consume it. * @param c char to consume */ void consume( char c ) { nlCount++; } CSSState next ( CSSCharCategory category, char nextChar ) { // dummy return AT_END_OF_LINE; } /** * what to do on leaving state, after last char is consumed. */ void leaving() { // emit pending nls if ( nlCount > 3 ) { nlCount = 3; } if ( nlCount > 0 ) { addToken( new NL( nlCount ) ); nlCount = 0; } } // end leaving }; /** * true if want voluminous debugging output */ private static final boolean DEBUGGING = false; /** * list of tokens we have parsed out. */ private static final ArrayList tokens = new ArrayList<>( 10000 ); // statics are shared common to all enum constants. // others are a separate field in each enum constant. /** * used to accumulate comments */ private static final StringBuilder accumulatedComment = new StringBuilder( 100 ); /** * accumulates left hand side of : */ private static final StringBuilder accumulatedKeyword = new StringBuilder( 80 ); /** * accumulates right hand side of equal sign */ private static final StringBuilder accumulatedValue = new StringBuilder( 100 ); /** * how far we are through parsing the stylesheet */ private static int charIndex; /** * how we plan to process this character, consume, procrastinate to next state, or discard */ private static HowToProcess how; /** * count of how many new lines encountered, shared by several states */ private static int nlCount; /** * the length of the stylesheet fragment we are parsing. */ private static int size; /** * the stylesheet or stylesheet fragment we are parsing */ private static String stylesheet; /** * add a token to the end of the list to be rendered. * * @param t a token. Useless tokens will be not be added. */ private static void addToken( Token t ) { // check out token for validity, if pointless, don't bother adding it. if ( !t.isUseless() ) { tokens.add( t ); } } /** * crunch multiple tokens into a single token where feasible. */ private static void crunch() { int size; do { size = tokens.size(); for ( int i = size - 1; i >= 1; i-- ) { // this version does not deal with Space tokens. Token current = tokens.get( i ); Token prev = tokens.get( i - 1 ); if ( prev.isCollapsible( current ) ) { // combine two tokens into one prev.setText( prev.getText() + current.getText() ); tokens.remove( i ); // don't i--. combined token will be compared with its // predecessor } } // end for // keep going while it is still finding something to crunch } while ( tokens.size() < size ); } /** * debugging dump system state * * @param theChar char we are processing * @param category category of the char * @param first did we just enter this state * @param oldState old state * @param state current state * @param newState next state * @param how do we consume, forward or discard this character. */ private static void dumpState( char theChar, CSSCharCategory category, boolean first, CSSState oldState, CSSState state, CSSState newState, HowToProcess how ) { if ( how == HowToProcess.CONSUME ) { /* use slightly abbreviated form, black */ out.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState ); } else { /* in red */ err.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState + " " + how ); } } /** * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens */ private static void ensureNoLeftovers() { assert accumulatedComment.length() == 0 : "comment residual : " + accumulatedComment.toString(); accumulatedComment.setLength( 0 ); assert accumulatedKeyword.length() == 0 : "keyword residual : " + accumulatedKeyword.toString(); accumulatedKeyword.setLength( 0 ); assert accumulatedValue.length() == 0 : "value residual : " + accumulatedValue.toString(); accumulatedValue.setLength( 0 ); } /** * Is there /uxxxx coming? * * @return true if valid hex unicode char in stream */ private static boolean isUnicodeEscapeComing() { if ( charIndex + 6 >= size ) { return false; } String suxxxx = stylesheet.substring( charIndex, charIndex + 6 ); // we know first char is backslash already. return suxxxx.charAt( 1 ) == 'u' && ST.isLegal( suxxxx.substring( 2, 6 ), "0123456789abcdefABCDEF" ); } /** * clear out the state machine ready to parse a new stylesheet */ private static void reset() { nlCount = 0; accumulatedComment.setLength( 0 ); accumulatedKeyword.setLength( 0 ); accumulatedValue.setLength( 0 ); tokens.clear(); how = null; charIndex = 0; // keeping certain variables local or global is crucial. // Don't mess with them without thinking carefully and // changing the docs: // local: category, first, oldState, state, theChar // global: charIndex, how // we make these local to discourage accidental snooping or // accidentally picking up the static version instead of the parm. } /** * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing. */ private static void trimNLs() { // remove leading NLs. while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) ) { tokens.remove( 0 ); } // remove trailing NLs int count; while ( ( count = tokens.size() ) > 0 && ( tokens.get( count - 1 ) instanceof NL ) ) { tokens.remove( count - 1 ); } // We don't need an NL at either beginning or end. // insert at the beginning tokens.add( 0, new Start( "
" ) );
        // add to end
        addToken( new Stop( "
" ) ); } /** * Default Consume one character. It has been predecided that you can and will consume it. * * @param c char to consume */ abstract void consume( char c ); /** * default what to do on leaving state, after last char is consumed. */ abstract void leaving(); /** * default next method determines the next state based on current state, and next char * * @param category class of next character * @param nextChar next character to process * * @return next CSSCharCategory */ abstract CSSState next( CSSCharCategory category, char nextChar ); /** * Parse stylesheet and leave a list of Tokens in tokens ArrayList. * * @param stylesheet the text we are going parse and eventually render. * * @return an array of tokens representing the text and how it will be rendered. */ @SuppressWarnings( { "UnusedAssignment" } ) public static Token[] parse( String stylesheet ) { reset(); CSSState.stylesheet = stylesheet; size = stylesheet.length(); // where we were CSSState oldState = AT_END_OF_LINE; // where we are CSSState state = AT_END_OF_LINE; // were we will be next CSSState newState; // how is global however, so next can return both a state and how. how = null; // Note, NO int charIndex !! Don't "repair that". // charIndex is a static variable globally known so "coming" can use it. for ( charIndex = 0; charIndex < size; charIndex++ ) { // next char to process char theChar = stylesheet.charAt( charIndex ); // decide which general category the char falls in final CSSCharCategory category = CSSCharCategory.categorise( theChar ); if ( category != CSSCharCategory.IGNORE ) { /* * keep going till some state consumes/discards the character. * Allow up to three forwarding attempts to deal with the * character. Usually we should succeed on the first or second * attempt. We always make at least on trip through */ attempts: for ( int times = 0; times < 3; times++ ) { // first is deliberately local final boolean first = state != oldState; /* * crank the state machine one cycle, State should modify * how in addition to returning the new state. A little ugly * but simplest way to return a pair of values: state and * how */ how = null;// setting to null ensures not setting it will // be caught. /* * This is the guts of the finite state automaton decide the * next state */ // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv newState = state.next( category, theChar ); // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if ( DEBUGGING ) { dumpState( theChar, category, first, oldState, state, newState, how ); } // kick over to the next generation, // we are now in the newState. We make the transition here. oldState = state; state = newState; newState = null; if ( how == null ) { throw new NullPointerException( "CSSState bug: how not set. OldState=" + oldState + " newState=" + state + " next() " + "must not be private." ); } switch ( how ) { case CONSUME: oldState.consume( theChar ); if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; case DISCARD: if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; default: // should never get here assert false : "CSSState parser failed to set how variable."; break attempts; case FORWARD: assert state != oldState : "CSSState parser attempted to forward a char to the same state."; oldState.leaving(); // we give that character another try with the new // state } // end switch /* * we will only ever get here if we are forwarding. The * other cases leave the loop early. */ } // end attempts loop // we fall out the bottom and land here no matter what assert how == HowToProcess.CONSUME || how == HowToProcess .DISCARD : "CSSState parser failed to consume char in three state forwarding attempts."; } // end if ignore } // end for each character // we must leave the last state if we have not already: if ( state == oldState ) { oldState.leaving(); } // make sure nothing still sitting in accumulation buffer after we have // finished parsing the entire stylesheet. ensureNoLeftovers(); // collapse tokens into fewer if possible crunch(); trimNLs(); // covert to vanilla array for even more efficient use in the final // Applet. return tokens.toArray( new Token[ tokens.size() ] ); } // end parse ; } // end CSSState