/*
* [ParmParser.java]
*
* Summary: parse macro parameters.
*
* Copyright: (c) 2002-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.1 2002-07-13
*/
package com.mindprod.htmlmacros.support;
import java.util.ArrayList;
import static java.lang.System.*;
/**
* various states of the finite state machine used to parse macro parameters
*/
enum ParmState
{
/**
* We are on whitespace or = between parms.
*/
BETWEEN_PARMS,
/**
* We are in the middle of an unquoted parm.
*/
IN_UNQUOTED_PARM,
/**
* We are in the middle of a quoted parm.
*/
IN_QUOTED_PARM,
/**
* We are in the middle of a {} parm.
*/
IN_BRACED_PARM,
/**
* We are in the middle of a '..' parm.
*/
IN_TICKED_PARM
}
/**
* parse macro parameters.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.1 2002-07-13
* @since 2002-07-12
*/
public final class ParmParser
{
/**
* Estimated number of parms we will find. Used to initialise size of ArrayList so it should be slightly on the high
* side.
*/
private static final int EST_NUMBER_OF_PARMS = 20;
/**
* Estimated parameter length, used to initialise StringBuilder size for accumulating a parameter.
*/
private static final int EST_PARM_LENGTH = 30;
/**
* TEST driver
*
* @param args not used
*/
public static void main( String[] args )
{
String[] s =
parseParms(
"flavour=strawberry colour=\""Chestnut" chocolate\" myFile.txt {abc=def} {\"abc\"} " +
"\'123 456\' "
+ "{*.png}"
);
for ( String value : s )
{
out.println( value );
// prints flavour
// strawberry
// colour
// "Chestnut" chocolate
// myFile.txt
// abc=def
// "abc"
// 123 456
// *.png
}
}
/**
* parse macro parameters.
*
* Parses macro parameters of the form: flavour=strawberry
* colour=""Chestnut" chocolate" myFile.txt this={"that"} using a
* finite state automaton. Quoted string may not contain any embedded
* quotes. * *Using a finite state automaton, parse a group of parameters of
* the form: flavour=strawberry colour=""Chestnut" chocolate
* myFile.txt into the array of strings: flavour, strawberry, colour, Brown's
* chocolate, myFile.txt The pattern of parm delimiters we look for are: xxxx
* xxxx xxxxx" "yyyy" 'xxxxx' 'yyyy' {xxx=xx} {yyyy} xxxx=xxxx "xxxxx"="yyyy"
* 'xxxxx'='yyyy' {xxxxx}={yyyy} {*.png} or
* mixtures. {} string may not contain any embedded }. the "= and {},
* lead/trail space are stripped off each parm.
*
* @param parms the parameter string to split up into fields. Just one string containing all the parms.
*
* @return Array of strings, one string for each element, delimited by space/equals. Surrounding quotes are stripped
* and embedded "s are turned back into quotes. Parms with embedded " may be surrounded in {} or ' "
* instead. There are no possible syntax errors. Every string will parse in some way.
*/
public static String[] parseParms( String parms )
{
// where we accumulate the parm strings
ArrayList result = new ArrayList<>( EST_NUMBER_OF_PARMS );
// where we accumulate the characters for a single parm.
// Can't be final.
StringBuilder sb = null;
// internal state of the finite state parsing automaton.
ParmState state = ParmState.BETWEEN_PARMS;
// This code is tricky using nested switches.
// It is legacy from before enums made state machines easier.
// Break up string into parms.
for ( int i = 0; i < parms.length(); i++ )
{
char c = parms.charAt( i );
switch ( c )
{
case ' ':
case '\t':
case '\r':
case '\n':
// whitespace.
switch ( state )
{
case BETWEEN_PARMS:
// ignore
break;
case IN_UNQUOTED_PARM:
// finished with that parm
result.add( sb.toString().trim() );
sb = null;
state = ParmState.BETWEEN_PARMS;
break;
case IN_QUOTED_PARM:
case IN_BRACED_PARM:
case IN_TICKED_PARM:
// convert nls to whitespace.
sb.append( ' ' );
break;
} // end state switch
break;
case '=':
switch ( state )
{
case BETWEEN_PARMS:
// ignore
break;
case IN_UNQUOTED_PARM:
// finished with that parm
result.add( sb.toString().trim() );
sb = null;
state = ParmState.BETWEEN_PARMS;
break;
case IN_QUOTED_PARM:
case IN_BRACED_PARM:
case IN_TICKED_PARM:
sb.append( c );
break;
} // end state switch
break;
case '\"':
// start or end of quoted string
switch ( state )
{
case BETWEEN_PARMS:
sb = new StringBuilder( EST_PARM_LENGTH );
state = ParmState.IN_QUOTED_PARM;
break;
case IN_UNQUOTED_PARM:
// treat as though were space "
result.add( sb.toString().trim() );
sb = new StringBuilder( EST_PARM_LENGTH );
state = ParmState.IN_QUOTED_PARM;
break;
case IN_QUOTED_PARM:
// finished with that parm
result.add( sb.toString().trim() );
sb = null;
state = ParmState.BETWEEN_PARMS;
break;
case IN_TICKED_PARM:
case IN_BRACED_PARM:
// it is just an ordinary letter
sb.append( c );
break;
} // end state switch
break;
case '\'':
// start or end of ticked string
switch ( state )
{
case BETWEEN_PARMS:
sb = new StringBuilder( EST_PARM_LENGTH );
state = ParmState.IN_TICKED_PARM;
break;
case IN_UNQUOTED_PARM:
// treat as though were space '
result.add( sb.toString().trim() );
sb = new StringBuilder( EST_PARM_LENGTH );
state = ParmState.IN_TICKED_PARM;
break;
case IN_TICKED_PARM:
// finished with that parm
result.add( sb.toString().trim() );
sb = null;
state = ParmState.BETWEEN_PARMS;
break;
case IN_QUOTED_PARM:
case IN_BRACED_PARM:
// it is just an ordinary letter
sb.append( c );
break;
} // end state switch
break;
case '{':
// start of braced string
switch ( state )
{
case BETWEEN_PARMS:
sb = new StringBuilder( EST_PARM_LENGTH );
state = ParmState.IN_BRACED_PARM;
break;
case IN_UNQUOTED_PARM:
// treat as though were space '
result.add( sb.toString().trim() );
sb = new StringBuilder( EST_PARM_LENGTH );
state = ParmState.IN_BRACED_PARM;
break;
default:
case IN_QUOTED_PARM:
case IN_BRACED_PARM:// need } not { to terminate
case IN_TICKED_PARM:
// it is just an ordinary letter
sb.append( c );
break;
} // end state switch
break;
case '}':
// end of braced string
switch ( state )
{
case BETWEEN_PARMS:
// treat as ordinary char if starts a parm.
sb = new StringBuilder( EST_PARM_LENGTH );
sb.append( c );
state = ParmState.IN_UNQUOTED_PARM;
break;
case IN_UNQUOTED_PARM:
case IN_QUOTED_PARM:
case IN_TICKED_PARM:
// it is just an ordinary letter
sb.append( c );
break;
case IN_BRACED_PARM:
// finished with that parm
result.add( sb.toString().trim() );
sb = null;
state = ParmState.BETWEEN_PARMS;
break;
} // end state switch
break;
default:
// e.g. &xxx; \\p{Lower} A-Z 0-9 ordinary punctuation
switch ( state )
{
case BETWEEN_PARMS:
sb = new StringBuilder( EST_PARM_LENGTH );
sb.append( c );
state = ParmState.IN_UNQUOTED_PARM;
break;
case IN_UNQUOTED_PARM:
case IN_QUOTED_PARM:
case IN_BRACED_PARM:
case IN_TICKED_PARM:
// ordinary char
sb.append( c );
break;
} // end state switch
break;
} // end switch
} // end for
// deal with last unterminated parm
switch ( state )
{
case BETWEEN_PARMS:
break;
case IN_UNQUOTED_PARM:
case IN_QUOTED_PARM:
case IN_BRACED_PARM:
case IN_TICKED_PARM:
// finished with that parm
result.add( sb.toString() );
break;
} // end state switch
// NO LONGER convert any entities in any of the parms
return result.toArray( new String[ result.size() ] );
} // end parseParms
} // end class ParmParser