/*
 * [PropState.java]
 *
 * Summary: properties file parser, finite state automaton.
 *
 * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  3.1 2009-04-12 shorter style names, improved highlighting.
 */
package com.mindprod.jprep;

import com.mindprod.common18.ST;
import com.mindprod.jtokens.Gibberish;
import com.mindprod.jtokens.NL;
import com.mindprod.jtokens.Operator;
import com.mindprod.jtokens.Start;
import com.mindprod.jtokens.Stop;
import com.mindprod.jtokens.Token;
import com.mindprod.jtokens.Value;
import com.mindprod.jtokens.prop.PropComment;
import com.mindprod.jtokens.prop.PropKey;
import com.mindprod.jtokens.prop.UnicodeLiteral;

import java.util.ArrayList;

import static java.lang.System.*;

/**
 * properties file parser, finite state automaton.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 3.1 2009-04-12 shorter style names, improved highlighting.
 * @since 2005-12-22
 */
@SuppressWarnings( { "NestedAssignment", "UnnecessaryContinue", "EnumeratedConstantNamingConvention" } )
public enum PropState
    {
        /**
         * somebody noticed an EOL and forwarded it to us. The EOL will be the first character we consume.
         */
        @SuppressWarnings( { "WeakerAccess" } ) AT_END_OF_LINE
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        nlCount++;
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        // emit pending nls
                        if ( nlCount > 3 )
                            {
                            nlCount = 3;
                            }
                        if ( nlCount > 0 )
                            {
                            addToken( new NL( nlCount ) );
                            nlCount = 0;
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    PropState next( PropCharCategory category, char nextChar )
                        {
                        switch ( category )
                            {
                            case BACKSLASH:
                            case EQUALS:
                            case ORDINARY:
                            case OTHER:
                                how = HowToProcess.FORWARD;
                                return IN_KEYWORD;
                            case SHARP:
                                how = HowToProcess.FORWARD;
                                return IN_COMMENT;
                            case EOL:
                                how = HowToProcess.CONSUME;
                                return AT_END_OF_LINE;
                            case IGNORE:
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end AT_END_OF_LINE
        /**
         * in comment # ...
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_COMMENT
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        // includes lead #
                        accumulatedComment.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String comment = accumulatedComment.toString();
                        accumulatedComment.setLength( 0 );
                        if ( comment.length() != 0 )
                            {
                            addToken( new PropComment( comment ) );
                            }
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    PropState next( PropCharCategory category, char nextChar )
                        {
                        switch ( category )
                            {
                            case BACKSLASH:
                                if ( isUnicodeEscapeComing() )
                                    {
                                    leaving();
                                    processUnicodeEscape();
                                    how = HowToProcess.DISCARD;
                                    return IN_COMMENT;
                                    }
                                else
                                    {
                                    how = HowToProcess.CONSUME;
                                    return IN_COMMENT;
                                    }
                            case OTHER:
                                leaving();
                                addToken( new Gibberish( nextChar ) );
                                how = HowToProcess.DISCARD;
                                return IN_COMMENT;
                            case EQUALS:
                            case ORDINARY:
                            case SHARP:
                                // Just treat as an ordinary char inside a comment.
                                // stay in comment
                                how = HowToProcess.CONSUME;
                                return IN_COMMENT;
                            case EOL:
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            case IGNORE:
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    }, // end IN_KEYWORD
        /**
         * keyword=value
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_KEYWORD
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedKeyword.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String name = accumulatedKeyword.toString();
                        accumulatedKeyword.setLength( 0 );
                        if ( name.length() > 0 )
                            {
                            addToken( new PropKey( name ) );
                            } // end if
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    PropState next( PropCharCategory category, char nextChar )
                        {
                        switch ( category )
                            {
                            case ORDINARY:
                                how = HowToProcess.CONSUME;
                                return IN_KEYWORD;
                            case BACKSLASH:
                                if ( isUnicodeEscapeComing() )
                                    {
                                    leaving();
                                    processUnicodeEscape();
                                    how = HowToProcess.DISCARD;
                                    return IN_KEYWORD;
                                    }
                                else
                                    {
                                    how = HowToProcess.CONSUME;
                                    return IN_KEYWORD;
                                    }
                            case EQUALS:
                                leaving();
                                addToken( new Operator( '=' ) );
                                how = HowToProcess.DISCARD;
                                return IN_VALUE;
                            case OTHER:
                                leaving();
                                addToken( new Gibberish( nextChar ) );
                                how = HowToProcess.DISCARD;
                                return IN_KEYWORD;
                            case SHARP:
                                how = HowToProcess.FORWARD;
                                return IN_COMMENT;
                            case EOL:
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            case IGNORE:
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    // end IN_KEYWORD
                    },
        /**
         * keyword=value
         */
        @SuppressWarnings( { "WeakerAccess" } ) IN_VALUE
                    {
                    /**
                     * Consume one character. It has been predecided that you can
                     * and will consume it.
                     * @param c        char to consume
                     */
                    void consume( char c )
                        {
                        accumulatedValue.append( c );
                        }

                    /**
                     * what to do on leaving state, after last char is consumed.
                     */
                    void leaving()
                        {
                        String name = accumulatedValue.toString();
                        accumulatedValue.setLength( 0 );
                        if ( name.length() > 0 )
                            {
                            addToken( new Value( name ) );
                            } // end if
                        } // end leaving

                    /**
                     * Figure out what state we should go in after we process this
                     * character and whether we can consume it. We must set consume
                     * = false if we cannot consume the character and have to
                     * postpone processing to the next state.
                     * @param category category of character
                     * @param nextChar character in the stream we are processing
                     * @return next state to go in
                     */
                    PropState next( PropCharCategory category, char nextChar )
                        {
                        switch ( category )
                            {
                            case ORDINARY:
                            case EQUALS:// second equals is just ordinary char
                                how = HowToProcess.CONSUME;
                                return IN_VALUE;
                            case BACKSLASH:
                                if ( isUnicodeEscapeComing() )
                                    {
                                    leaving();
                                    processUnicodeEscape();
                                    how = HowToProcess.DISCARD;
                                    return IN_VALUE;
                                    }
                                else
                                    {
                                    how = HowToProcess.CONSUME;
                                    return IN_VALUE;
                                    }
                            case OTHER:
                                leaving();
                                addToken( new Gibberish( nextChar ) );
                                how = HowToProcess.DISCARD;
                                return IN_VALUE;
                            case SHARP:
                                how = HowToProcess.FORWARD;
                                return IN_COMMENT;
                            case EOL:
                                how = HowToProcess.FORWARD;
                                return AT_END_OF_LINE;
                            case IGNORE:
                            default:
                                assert false :
                                        "bad state "
                                        + category
                                        + " "
                                        + nextChar;
                                return null;
                            } // end switch
                        } // end next
                    // end IN_VALUE
                    };

    /**
     * true if want voluminous debugging output
     */
    private static final boolean DEBUGGING = false;

    /**
     * list of tokens we have parsed out.
     */
    private static final ArrayList<Token> tokens =
            new ArrayList<>( 10000 );
    // statics are shared common to all enum constants.
    // others are a separate field in each enum constant.

    /**
     * used to accumulate comments
     */
    private static final StringBuilder accumulatedComment =
            new StringBuilder( 100 );

    /**
     * accumulates left hand side of equal sign
     */
    private static final StringBuilder accumulatedKeyword =
            new StringBuilder( 80 );

    /**
     * accumulates right hand side of equal sign
     */
    private static final StringBuilder accumulatedValue =
            new StringBuilder( 100 );

    /**
     * how far we are through parsing the program
     */
    private static int charIndex;

    /**
     * how we plan to process this character, consume, procrastinate to next state, or discard
     */
    private static HowToProcess how;

    /**
     * count of how many new lines encountered, shared by several states
     */
    private static int nlCount;

    /**
     * the program or program fragment we are parsing
     */
    private static String program;

    /**
     * the length of the program fragment we are parsing.
     */
    private static int size;

    /**
     * add a token to the end of the list to be rendered.
     *
     * @param t a token. Useless tokens will be not be added.
     */
    private static void addToken( Token t )
        {
        // check out token for validity, if pointless, don't bother adding it.
        if ( !t.isUseless() )
            {
            tokens.add( t );
            }
        }

    /**
     * crunch multiple tokens into a single token where feasible.
     */
    private static void crunch()
        {
        int size;
        do
            {
            size = tokens.size();
            for ( int i = size - 1; i >= 1; i-- )
                {
                // this version does not deal with Space tokens.
                Token current = tokens.get( i );
                Token prev = tokens.get( i - 1 );
                if ( prev.isCollapsible( current ) )
                    {
                    // combine two tokens into one
                    prev.setText( prev.getText() + current.getText() );
                    tokens.remove( i );
                    // don't i--. combined token will be compared with its
                    // predecessor
                    }
                } // end for
            // keep going while it is still finding something to crunch
            }
        while ( tokens.size() < size );
        }

    /**
     * debugging dump system state
     *
     * @param theChar  char we are processing
     * @param category category of the char
     * @param first    did we just enter this state
     * @param oldState old state
     * @param state    current state
     * @param newState next state
     * @param how      do we consume, forward or discard this character.
     */
    private static void dumpState( char theChar,
                                   PropCharCategory category,
                                   boolean first,
                                   PropState oldState,
                                   PropState state,
                                   PropState newState,
                                   HowToProcess how )
        {
        if ( how == HowToProcess.CONSUME )
            {
            /* use slightly abbreviated form, black */
            out.println( theChar
                         + " "
                         + category
                         + " "
                         + first
                         + " o:"
                         + oldState
                         + " s:"
                         + state
                         + " n:"
                         + newState );
            }
        else
            {
            /* in red */
            err.println( theChar
                         + " "
                         + category
                         + " "
                         + first
                         + " o:"
                         + oldState
                         + " s:"
                         + state
                         + " n:"
                         + newState
                         + " "
                         + how );
            }
        }

    /**
     * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens
     */
    private static void ensureNoLeftovers()
        {
        assert accumulatedComment.length() == 0 : "comment residual : "
                                                  + accumulatedComment.toString();
        accumulatedComment.setLength( 0 );
        assert accumulatedKeyword.length() == 0 : "keyword residual : "
                                                  + accumulatedKeyword.toString();
        accumulatedKeyword.setLength( 0 );
        assert accumulatedValue.length() == 0 : "value residual : "
                                                + accumulatedValue.toString();
        accumulatedValue.setLength( 0 );
        }

    /**
     * Is there /uxxxx coming?
     *
     * @return true if valid hex unicode char in stream
     */
    private static boolean isUnicodeEscapeComing()
        {
        if ( charIndex + 6 >= size )
            {
            return false;
            }
        String suxxxx = program.substring( charIndex, charIndex + 6 );
        // we know first char is backslash already.
        return suxxxx.charAt( 1 ) == 'u'
               && ST.isLegal( suxxxx.substring( 2, 6 ),
                "0123456789abcdefABCDEF" );
        }

    /**
     * create token for /uxxxx Unicode Escape in stream.
     */
    private static void processUnicodeEscape()
        {
        // we know the Unicode Escape is well formed
        addToken( new UnicodeLiteral( program.substring( charIndex,
                charIndex + 6 ) ) );
        // we have just processed 5 more chars than usual with lookahead.
        // should refactor this with code such as used for CDATA
        charIndex += 5;
        }

    /**
     * clear out the state machine ready to parse a new program
     */
    private static void reset()
        {
        nlCount = 0;
        accumulatedComment.setLength( 0 );
        accumulatedKeyword.setLength( 0 );
        accumulatedValue.setLength( 0 );
        tokens.clear();
        how = null;
        charIndex = 0;
        // keeping certain variables local or global is crucial.
        // Don't mess with them without thinking carefully and
        // changing the docs:
        // local: category, first, oldState, state, theChar
        // global: charIndex, how
        // we make these local to discourage accidental snooping or
        // accidentally picking up the static version instead of the parm.
        }

    /**
     * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing.
     */
    private static void trimNLs()
        {
        // remove leading NLs.
        while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) )
            {
            tokens.remove( 0 );
            }
        // remove trailing NLs
        int count;
        while ( ( count = tokens.size() ) > 0 && ( tokens.get( count
                                                               - 1 ) instanceof NL ) )
            {
            tokens.remove( count - 1 );
            }
        // We don't need an NL at either beginning or end.
        // insert at the beginning
        tokens.add( 0, new Start( "<pre class=\"prop\">" ) );
        // add to end
        addToken( new Stop( "</pre>" ) );
        }

    /**
     * Default Consume one character. It has been predecided that you can and will consume it.
     *
     * @param c char to consume
     */
    abstract void consume( char c );

    /**
     * default what to do on leaving state, after last char is consumed.
     */
    abstract void leaving();

    /**
     * default next method determines the next state based on current state, and next char
     *
     * @param category class of next character
     * @param nextChar next character to process
     *
     * @return next PropCharCategoryState
     */
    abstract PropState next( PropCharCategory category, char nextChar );

    /**
     * Parse program and leave a list of Tokens in tokens ArrayList.
     *
     * @param program the text we are going parse and eventually render.
     *
     * @return an array of tokens representing the text and how it will be rendered.
     */
    @SuppressWarnings( { "UnusedAssignment" } )
    public static Token[] parse( String program )
        {
        reset();
        PropState.program = program;
        size = program.length();
        // where we were
        PropState oldState = AT_END_OF_LINE;
        // where we are
        PropState state = AT_END_OF_LINE;
        // were we will be next
        PropState newState;
        // how is global however, so next can return both a state and how.
        how = null;
        // Note, NO int charIndex !! Don't "repair that".
        // charIndex is a static variable globally known so "coming" can use it.
        for ( charIndex = 0; charIndex < size; charIndex++ )
            {
            // next char to process
            char theChar = program.charAt( charIndex );
            // decide which general category the char falls in
            final PropCharCategory category = PropCharCategory
                    .categorise( theChar );
            if ( category != PropCharCategory.IGNORE )
                {
                /*
                 * keep going till some state consumes/discards the character.
                 * Allow up to three forwarding attempts to deal with the
                 * character. Usually we should succeed on the first or second
                 * attempt. We always make at least on trip through
                 */
                attempts:
                for ( int times = 0; times < 3; times++ )
                    {
                    // first is deliberately local
                    final boolean first = state != oldState;
                    /*
                     * crank the state machine one cycle, State should modify
                     * how in addition to returning the new state. A little ugly
                     * but simplest way to return a pair of values: state and
                     * how
                     */
                    how = null;// setting to null ensures not setting it will
                    // be caught.
                    /*
                     * This is the guts of the finite state automaton decide the
                     * next state
                     */
                    // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
                    newState = state.next( category, theChar );
                    // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    if ( DEBUGGING )
                        {
                        dumpState( theChar,
                                category,
                                first,
                                oldState,
                                state,
                                newState,
                                how );
                        }
                    // kick over to the next generation,
                    // we are now in the newState. We make the transition here.
                    oldState = state;
                    state = newState;
                    newState = null;
                    if ( how == null )
                        {
                        throw new NullPointerException( "PropState bug: how not set. OldState=" + oldState + " " +
                                                        "newState=" + state + " next() must not be private." );
                        }
                    switch ( how )
                        {
                        case CONSUME:
                            oldState.consume( theChar );
                            if ( state != oldState )
                                {
                                oldState.leaving();
                                }
                            // and on to the next char
                            break attempts;
                        case DISCARD:
                            if ( state != oldState )
                                {
                                oldState.leaving();
                                }
                            // and on to the next char
                            break attempts;
                        default:
                            // should never get here
                            assert false :
                                    "PropTokenizer state machine failed to set how variable.";
                            break attempts;
                        case FORWARD:
                            assert state
                                   != oldState :
                                    "PropTokenizer state machine attempted to forward a char to the same state.";
                            oldState.leaving();
                            // we give that character another try with the new
                            // state
                        } // end switch
                    /*
                     * we will only ever get here if we are forwarding. The
                     * other cases leave the loop early.
                     */
                    }
                // end attempts loop
                // we fall out the bottom and land here no matter what
                assert how == HowToProcess.CONSUME || how == HowToProcess
                        .DISCARD :
                        "PropTokenizer state machine failed to consume char in three state forwarding attempts.";
                } // end if ignore
            } // end for each character
        // we must leave the last state if we have not already:
        if ( state == oldState )
            {
            oldState.leaving();
            }
        // make sure nothing still sitting in accumulation buffer after we have
        // finished parsing the entire program.
        ensureNoLeftovers();
        // collapse tokens into fewer if possible
        crunch();
        trimNLs();
        // covert to vanilla array for even more efficient use in the final
        // Applet.
        return tokens.toArray( new Token[ tokens.size() ] );
        } // end parse ;
    } // end PropState