/* * [MASMState.java] * * Summary: State machine for parsing MASM to align the columns. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-01-01 initial version. */ package com.mindprod.masmtidy; import com.mindprod.common18.ST; import com.mindprod.fastcat.FastCat; import static java.lang.System.*; /** * State machine for parsing MASM to align the columns. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-01-02 initial version * @since 2011-01-02 */ public enum MASMState { AT_FIRST_COLUMN { void consume( char c ) { // ignore spaces. } void leaving() { // just ignore spaces. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case SPACE: how = HowToProcess.DISCARD; return SEEKING_OPERATOR; case PLAIN: if ( isComing() ) // case-insensitive { how = HowToProcess.FORWARD; return IN_MULTILINE_COMMENT; } else { how = HowToProcess.FORWARD; return IN_LABEL; } case QUOTE: case TICK: how = HowToProcess.FORWARD; return IN_LABEL; case SEMICOLON: how = HowToProcess.FORWARD; return IN_LINE_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } }, IN_LABEL { @SuppressWarnings( { "UnusedParameters" } ) void consume( char c ) { accumulatedLabel.append( c ); } void leaving() { // just leave Accumulated label till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case SPACE: how = HowToProcess.DISCARD; return SEEKING_OPERATOR; case PLAIN: case QUOTE: case TICK: how = HowToProcess.CONSUME; return IN_LABEL; case SEMICOLON: how = HowToProcess.FORWARD; return IN_TAIL_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, SEEKING_OPERATOR { @SuppressWarnings( { "UnusedParameters" } ) void consume( char c ) { // just ignore the spaces. } void leaving() { // just ignore spaces. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case SPACE: how = HowToProcess.DISCARD; return SEEKING_OPERATOR; case PLAIN: case QUOTE: case TICK: how = HowToProcess.FORWARD; return IN_OPERATOR; case SEMICOLON: how = HowToProcess.FORWARD; return IN_TAIL_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } }, IN_OPERATOR { void consume( char c ) { accumulatedOperator.append( c ); } void leaving() { // just leave Accumulated operator till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case SPACE: how = HowToProcess.DISCARD; return SEEKING_OPERAND; case PLAIN: case QUOTE: case TICK: how = HowToProcess.CONSUME; return IN_OPERATOR; case SEMICOLON: how = HowToProcess.FORWARD; return IN_TAIL_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, SEEKING_OPERAND { @SuppressWarnings( { "UnusedParameters" } ) void consume( char c ) { // just ignore the spaces. } void leaving() { // just ignore spaces. } @SuppressWarnings( { "UnusedDeclaration" } ) MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case SPACE: how = HowToProcess.DISCARD; return SEEKING_OPERAND; case PLAIN: case QUOTE: case TICK: how = HowToProcess.FORWARD; return IN_OPERAND; case SEMICOLON: how = HowToProcess.FORWARD; return IN_TAIL_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } }, IN_OPERAND { void consume( char c ) { accumulatedOperand.append( c ); } void leaving() { // just leave Accumulated operator till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case PLAIN: case SPACE: how = HowToProcess.CONSUME; return IN_OPERAND; case QUOTE: how = HowToProcess.FORWARD; return IN_QUOTE_OPERAND; case TICK: how = HowToProcess.FORWARD; return IN_TICK_OPERAND; case SEMICOLON: how = HowToProcess.FORWARD; return IN_TAIL_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, IN_TICK_OPERAND { void consume( char c ) { accumulatedOperand.append( c ); } void leaving() { // just leave Accumulated operator till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case PLAIN: case QUOTE: case SEMICOLON: case SPACE: how = HowToProcess.CONSUME; return IN_TICK_OPERAND; case TICK: how = HowToProcess.CONSUME; return IN_OPERAND; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, IN_QUOTE_OPERAND { void consume( char c ) { accumulatedOperand.append( c ); } void leaving() { // just leave Accumulated operator till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case PLAIN: case SEMICOLON: case SPACE: case TICK: how = HowToProcess.CONSUME; return IN_QUOTE_OPERAND; case QUOTE: how = HowToProcess.CONSUME; return IN_OPERAND; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, IN_LINE_COMMENT { void consume( char c ) { accumulatedLineComment.append( c ); } void leaving() { // just leave Accumulated tail comment till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case PLAIN: case QUOTE: case SEMICOLON: case SPACE: case TICK: how = HowToProcess.CONSUME; return IN_LINE_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, IN_TAIL_COMMENT { void consume( char c ) { accumulatedTailComment.append( c ); } void leaving() { // just leave Accumulated tail comment till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { switch ( category ) { case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; case PLAIN: case QUOTE: case SEMICOLON: case SPACE: case TICK: how = HowToProcess.CONSUME; return IN_TAIL_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, IN_MULTILINE_COMMENT { char delimiter; void consume( char c ) { if ( c == '\n' ) { accumulatedMultilineComment.append( lineSeparator ); } else { accumulatedMultilineComment.append( c ); } } void leaving() { // just leave Accumulated tail comment till end of line.. } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { if ( first ) { delimiter = 0; } switch ( category ) { case EOL: how = HowToProcess.CONSUME; return IN_MULTILINE_COMMENT; case PLAIN: if ( delimiter == 0 && !ST.isLegal( nextChar, "coment COMENT" ) ) { delimiter = nextChar; // first non-blank after common } else if ( nextChar == delimiter ) { // found end of comment how = HowToProcess.CONSUME; return AT_FIRST_COLUMN; // it is not really the first column, but logically it is. } how = HowToProcess.CONSUME; return IN_MULTILINE_COMMENT; case QUOTE: case SEMICOLON: case SPACE: case TICK: how = HowToProcess.CONSUME; return IN_MULTILINE_COMMENT; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch} } }, /** * Someone has forwarded us an EOL. We deal with it and any subsequent EOLs. When we hit something * interesting we * let the default next deal with it. */ AT_END_OF_LINE { void consume( char c ) { } void leaving() { buildLine(); } MASMState next( MASMCharCategory category, char nextChar, boolean first ) { // AT_END_OF_LINE switch ( category ) { case EOL: how = HowToProcess.DISCARD; return AT_FIRST_COLUMN; case PLAIN: case QUOTE: case SEMICOLON: case SPACE: case TICK: how = HowToProcess.FORWARD; return AT_FIRST_COLUMN; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } }; // ////////////////////////////////////////////////////////////////// // common to all enum constants // declarations /** * true if want extra debugging checks and output */ private static final boolean DEBUGGING = false; /** * configure false to allow mixed case operators in second column. */ private static final boolean LOWER_CASE_OPERATORS = true; /** * configure 0-based column for the third operand column */ private static final int COL_FOR_OPERAND = 25; /** * configure 0-based column for the second operator column */ private static final int COL_FOR_OPERATOR = 15; /** * configure 0-based column for the fourth commend column */ private static final int COL_FOR_TAIL_COMMENT = 40; /** * CrLf or lf used for line endings */ private static final String lineSeparator = System.getProperty( "line.separator" ); /** * accumulates first col */ private static final StringBuilder accumulatedLabel = new StringBuilder( 20 ); /** * accumulates comment starting in column 1 */ private static final StringBuilder accumulatedLineComment = new StringBuilder( 150 ); /** * accumulates a multiline comment. */ private static final StringBuilder accumulatedMultilineComment = new StringBuilder( 1024 ); /** * accumulates second col */ private static final StringBuilder accumulatedOperator = new StringBuilder( 20 ); /** * accumulates fourth col */ private static final StringBuilder accumulatedTailComment = new StringBuilder( 10 ); /* accumulates third col */ private static final StringBuilder accumulatedOperand = new StringBuilder( 80 ); /** * used to track blank lines. More than 1 is suppressed */ private static boolean prevLineEmpty = true; /** * how far we are through parsing the program */ private static int charIndex; /** * the length of the program fragment we are parsing. */ private static int size; /** * the program or program fragment we are parsing */ private static String program; /** * how the next next character will be treated, usually consumed and stuffed in a buffer, or forwarded to the next * state to deal with. */ private static HowToProcess how; /** * used to accumulate one whole indented program in memory */ private static java.lang.StringBuilder sb; // /declarations // methods /** * build and append one line of HTML, or one multiline comment */ private static void buildLine() { // Three basic pieces, multiline, line comment, quadruple label, operator, operand, tail comment.. final String multiLineComment = accumulatedMultilineComment.toString(); if ( multiLineComment.length() > 0 ) { sb.append( multiLineComment ); // ends with special delimiter, not NL. accumulatedMultilineComment.setLength( 0 ); } final String lineComment = accumulatedLineComment.toString().trim(); if ( lineComment.length() > 0 ) { sb.append( lineComment ); accumulatedLineComment.setLength( 0 ); sb.append( lineSeparator ); assert accumulatedLabel.length() + accumulatedOperator.length() + accumulatedOperand.length() == 0 : "bug: line comment with label/operator/operand."; } else { final String label = accumulatedLabel.toString().trim(); accumulatedLabel.setLength( 0 ); String operator = accumulatedOperator.toString().trim(); accumulatedOperator.setLength( 0 ); if ( LOWER_CASE_OPERATORS ) { operator = operator.toLowerCase(); } final String operand = accumulatedOperand.toString().trim(); accumulatedOperand.setLength( 0 ); final String tailComment = accumulatedTailComment.toString(); accumulatedTailComment.setLength( 0 ); // calculate how many spaces needed to separate fields; int col = label.length(); final int spacesBeforeOperator = Math.max( 1, COL_FOR_OPERATOR - col ); col += spacesBeforeOperator; col += operator.length(); final int spacesBeforeOperand = Math.max( 1, COL_FOR_OPERAND - col ); col += spacesBeforeOperand; col += operand.length(); final int spacesBeforeTailComment = Math.max( 1, COL_FOR_TAIL_COMMENT - col ); // glue line back together with spaces to make things fit is desired cols as best as possible. FastCat fb = new FastCat( 7 ); if ( label.length() > 0 || operator.length() > 0 || operand.length() > 0 || tailComment.length() > 0 ) { fb.append( label ); if ( operator.length() > 0 || operand.length() > 0 || tailComment.length() > 0 ) { fb.append( ST.spaces( spacesBeforeOperator ) ); fb.append( operator ); if ( operand.length() > 0 || tailComment.length() > 0 ) { fb.append( ST.spaces( spacesBeforeOperand ) ); fb.append( operand ); if ( tailComment.length() > 0 ) { fb.append( ST.spaces( spacesBeforeTailComment ) ); fb.append( tailComment ); } } } } final String theLine = fb.toString(); if ( theLine.length() == 0 ) { if ( !prevLineEmpty ) { sb.append( lineSeparator ); prevLineEmpty = true; } } else { sb.append( theLine ); sb.append( lineSeparator ); prevLineEmpty = false; } } }// /method /** * debugging dump system state * * @param theChar char we are processing * @param category category of the char * @param first did we just enter this state * @param oldState old state * @param state current state * @param newState next state * @param how do we consume, forward or discard this character. */ private static void dumpState( char theChar, MASMCharCategory category, boolean first, MASMState oldState, MASMState state, MASMState newState, HowToProcess how ) { if ( how == HowToProcess.CONSUME ) { /* use slightly abbreviated form, black */ out.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState ); } else {/* in red */ err.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState + " " + how ); } }// /method /** * Is a given string isComing up in the stream starting with this character. Compares ignoring case. * * @return true if this string isComing up, case-insensitive */ private static boolean isComing() { // check if there are enough characters left for a match. return charIndex + "comment ".length() < size && program.substring( charIndex, charIndex + "comment ".length() ) .equalsIgnoreCase( "comment " ); } /** * clear out the state machine ready to parse a new program */ private static void reset() { accumulatedLabel.setLength( 0 ); accumulatedOperator.setLength( 0 ); accumulatedOperand.setLength( 0 ); accumulatedTailComment.setLength( 0 ); accumulatedLineComment.setLength( 0 ); accumulatedMultilineComment.setLength( 0 ); how = null; charIndex = 0; // keeping certain variables local or global is crucial. // Don't mess with them without thinking carefully and // changing the docs: // local: category, first, oldState, state, theChar // global: charIndex, how // we make these local to discourage accidental snooping or // accidentally picking up the static version instead of the parm. }// /method /** * Default Consume one character. It has been predecided that you can and will consume it. * * @param c char to consume */ abstract void consume( char c );// /method /** * default what to do on leaving state, after last char is consumed. */ abstract void leaving();// /method /** * default next method determines the next state based on current state, and next char * * @param category class of next character * @param nextChar next character to process * @param first true if we are just entering this state. * * @return next JavaState * D o n o t m a k e p r i v a t e ! ! ! */ @SuppressWarnings( { "WeakerAccess" } ) abstract MASMState next( MASMCharCategory category, char nextChar, boolean first );// /method /** * Parse program and leave a list of Tokens in tokens ArrayList. * * @param program the text we are going parse and eventually render. Its loop calls next on oldstate, then * optionally consume on the old state, State whose next made the consume decision consumes it own * char. then optionally leaving on the old state * * @return the aligned program */ @SuppressWarnings( { "UnusedAssignment" } ) public static String align( String program ) { reset(); MASMState.program = program; size = program.length(); /* * keeping certain variables local or global is crucial. Don't mess with * them without thinking carefully and changing the docs: LOCAL: * category, first, oldState, state, theChar. GLOBALl: charIndex, how we * make these local to discourage accidental snooping or accidentally * picking up the static version instead of the parm. */ // where we were MASMState oldState = AT_FIRST_COLUMN; // where we are MASMState state = AT_FIRST_COLUMN; // were we will be next MASMState newState; // how is global however, so next can return both a state and // how. how = null; sb = new java.lang.StringBuilder( size * 2 ); // Note, NO int charIndex !! Don't "repair that". // charIndex is a static variable globally known so "isComing" can // use // it. for ( charIndex = 0; charIndex < size; charIndex++ ) { // next char to process char theChar = program.charAt( charIndex ); // decide which general category the char falls in final MASMCharCategory category = MASMCharCategory.categorise( theChar ); if ( category != MASMCharCategory.IGNORE ) { /* * keep going till some state consumes/discards the character. * Allow up to three forwarding attempts to deal with the * character. Usually we should succeed on the first or second * attempt. We always make at least on trip through */ attempts: for ( int times = 0; times < 3; times++ ) { // first is deliberately local final boolean first = state != oldState; /* * crank the state machine one cycle, State should modify * how in addition to returning the new state. A little ugly * but simplest way to return a pair of values: state and * how. */ how = null; /* * setting to null ensures not setting it will be caught. */ /* * This is the guts of the finite state automaton decide the * next state */ // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv newState = state.next( category, theChar, first ); // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if ( DEBUGGING ) { dumpState( theChar, category, first, oldState, state, newState, how ); } // kick over to the next generation, // we are now in the newState. We make the // transition // here. oldState = state; state = newState; newState = null; if ( how == null ) { throw new NullPointerException( "MASMState bug: how not set. OldState=" + oldState + " " + "newState=" + state + " next() must not be private." ); } switch ( how ) { case CONSUME: oldState.consume( theChar ); if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; case DISCARD: if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; default: // should never get here assert false : "MASMState failed to set how variable."; break attempts; case FORWARD: assert state != oldState : "MASMState attempted to forward a char to the same state."; oldState.leaving(); /* * we give that character another try with the new * state */ } // end switch /* * we will only ever get here if we are forwarding. The * other case leave the loop early. */ } // end attempts loop // we fall out the bottom and land here no matter what assert how == HowToProcess.CONSUME || how == HowToProcess .DISCARD : "MASMState failed to consume char in three state forwarding attempts."; } // end if ignore } // end for each character if ( state == oldState ) { // force a final wrapup -- e.g. slash star comment without // terminator oldState.leaving(); } final String result = sb.toString(); sb = null; return result; }// /method // /methods }