/* * [ParmParser.java] * * Summary: parse macro parameters. * * Copyright: (c) 2002-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.1 2002-07-13 */ package com.mindprod.htmlmacros.support; import java.util.ArrayList; import static java.lang.System.*; /** * various states of the finite state machine used to parse macro parameters */ enum ParmState { /** * We are on whitespace or = between parms. */ BETWEEN_PARMS, /** * We are in the middle of an unquoted parm. */ IN_UNQUOTED_PARM, /** * We are in the middle of a quoted parm. */ IN_QUOTED_PARM, /** * We are in the middle of a {} parm. */ IN_BRACED_PARM, /** * We are in the middle of a '..' parm. */ IN_TICKED_PARM } /** * parse macro parameters. * * @author Roedy Green, Canadian Mind Products * @version 1.1 2002-07-13 * @since 2002-07-12 */ public final class ParmParser { /** * Estimated number of parms we will find. Used to initialise size of ArrayList so it should be slightly on the high * side. */ private static final int EST_NUMBER_OF_PARMS = 20; /** * Estimated parameter length, used to initialise StringBuilder size for accumulating a parameter. */ private static final int EST_PARM_LENGTH = 30; /** * TEST driver * * @param args not used */ public static void main( String[] args ) { String[] s = parseParms( "flavour=strawberry colour=\""Chestnut" chocolate\" myFile.txt {abc=def} {\"abc\"} " + "\'123 456\' " + "{*.png}" ); for ( String value : s ) { out.println( value ); // prints flavour // strawberry // colour // "Chestnut" chocolate // myFile.txt // abc=def // "abc" // 123 456 // *.png } } /** * parse macro parameters. *

* Parses macro parameters of the form: flavour=strawberry * colour=""Chestnut" chocolate" myFile.txt this={"that"} using a * finite state automaton. Quoted string may not contain any embedded * quotes. * *Using a finite state automaton, parse a group of parameters of * the form: flavour=strawberry colour=""Chestnut" chocolate * myFile.txt into the array of strings: flavour, strawberry, colour, Brown's * chocolate, myFile.txt The pattern of parm delimiters we look for are: xxxx * xxxx xxxxx" "yyyy" 'xxxxx' 'yyyy' {xxx=xx} {yyyy} xxxx=xxxx "xxxxx"="yyyy" * 'xxxxx'='yyyy' {xxxxx}={yyyy} {*.png} or * mixtures. {} string may not contain any embedded }. the "= and {}, * lead/trail space are stripped off each parm. * * @param parms the parameter string to split up into fields. Just one string containing all the parms. * * @return Array of strings, one string for each element, delimited by space/equals. Surrounding quotes are stripped * and embedded "s are turned back into quotes. Parms with embedded " may be surrounded in {} or ' " * instead. There are no possible syntax errors. Every string will parse in some way. */ public static String[] parseParms( String parms ) { // where we accumulate the parm strings ArrayList result = new ArrayList<>( EST_NUMBER_OF_PARMS ); // where we accumulate the characters for a single parm. // Can't be final. StringBuilder sb = null; // internal state of the finite state parsing automaton. ParmState state = ParmState.BETWEEN_PARMS; // This code is tricky using nested switches. // It is legacy from before enums made state machines easier. // Break up string into parms. for ( int i = 0; i < parms.length(); i++ ) { char c = parms.charAt( i ); switch ( c ) { case ' ': case '\t': case '\r': case '\n': // whitespace. switch ( state ) { case BETWEEN_PARMS: // ignore break; case IN_UNQUOTED_PARM: // finished with that parm result.add( sb.toString().trim() ); sb = null; state = ParmState.BETWEEN_PARMS; break; case IN_QUOTED_PARM: case IN_BRACED_PARM: case IN_TICKED_PARM: // convert nls to whitespace. sb.append( ' ' ); break; } // end state switch break; case '=': switch ( state ) { case BETWEEN_PARMS: // ignore break; case IN_UNQUOTED_PARM: // finished with that parm result.add( sb.toString().trim() ); sb = null; state = ParmState.BETWEEN_PARMS; break; case IN_QUOTED_PARM: case IN_BRACED_PARM: case IN_TICKED_PARM: sb.append( c ); break; } // end state switch break; case '\"': // start or end of quoted string switch ( state ) { case BETWEEN_PARMS: sb = new StringBuilder( EST_PARM_LENGTH ); state = ParmState.IN_QUOTED_PARM; break; case IN_UNQUOTED_PARM: // treat as though were space " result.add( sb.toString().trim() ); sb = new StringBuilder( EST_PARM_LENGTH ); state = ParmState.IN_QUOTED_PARM; break; case IN_QUOTED_PARM: // finished with that parm result.add( sb.toString().trim() ); sb = null; state = ParmState.BETWEEN_PARMS; break; case IN_TICKED_PARM: case IN_BRACED_PARM: // it is just an ordinary letter sb.append( c ); break; } // end state switch break; case '\'': // start or end of ticked string switch ( state ) { case BETWEEN_PARMS: sb = new StringBuilder( EST_PARM_LENGTH ); state = ParmState.IN_TICKED_PARM; break; case IN_UNQUOTED_PARM: // treat as though were space ' result.add( sb.toString().trim() ); sb = new StringBuilder( EST_PARM_LENGTH ); state = ParmState.IN_TICKED_PARM; break; case IN_TICKED_PARM: // finished with that parm result.add( sb.toString().trim() ); sb = null; state = ParmState.BETWEEN_PARMS; break; case IN_QUOTED_PARM: case IN_BRACED_PARM: // it is just an ordinary letter sb.append( c ); break; } // end state switch break; case '{': // start of braced string switch ( state ) { case BETWEEN_PARMS: sb = new StringBuilder( EST_PARM_LENGTH ); state = ParmState.IN_BRACED_PARM; break; case IN_UNQUOTED_PARM: // treat as though were space ' result.add( sb.toString().trim() ); sb = new StringBuilder( EST_PARM_LENGTH ); state = ParmState.IN_BRACED_PARM; break; default: case IN_QUOTED_PARM: case IN_BRACED_PARM:// need } not { to terminate case IN_TICKED_PARM: // it is just an ordinary letter sb.append( c ); break; } // end state switch break; case '}': // end of braced string switch ( state ) { case BETWEEN_PARMS: // treat as ordinary char if starts a parm. sb = new StringBuilder( EST_PARM_LENGTH ); sb.append( c ); state = ParmState.IN_UNQUOTED_PARM; break; case IN_UNQUOTED_PARM: case IN_QUOTED_PARM: case IN_TICKED_PARM: // it is just an ordinary letter sb.append( c ); break; case IN_BRACED_PARM: // finished with that parm result.add( sb.toString().trim() ); sb = null; state = ParmState.BETWEEN_PARMS; break; } // end state switch break; default: // e.g. &xxx; \\p{Lower} A-Z 0-9 ordinary punctuation switch ( state ) { case BETWEEN_PARMS: sb = new StringBuilder( EST_PARM_LENGTH ); sb.append( c ); state = ParmState.IN_UNQUOTED_PARM; break; case IN_UNQUOTED_PARM: case IN_QUOTED_PARM: case IN_BRACED_PARM: case IN_TICKED_PARM: // ordinary char sb.append( c ); break; } // end state switch break; } // end switch } // end for // deal with last unterminated parm switch ( state ) { case BETWEEN_PARMS: break; case IN_UNQUOTED_PARM: case IN_QUOTED_PARM: case IN_BRACED_PARM: case IN_TICKED_PARM: // finished with that parm result.add( sb.toString() ); break; } // end state switch // NO LONGER convert any entities in any of the parms return result.toArray( new String[ result.size() ] ); } // end parseParms } // end class ParmParser