/* * [JavaState.java] * * Summary: State machine for JavaTokenizer. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 3.2 2009-05-04 now treat Javadoc tags specially. */ package com.mindprod.jprep; import com.mindprod.common18.ST; import com.mindprod.jtokens.CharLiteral; import com.mindprod.jtokens.Gibberish; import com.mindprod.jtokens.ImportantKeyword; import com.mindprod.jtokens.Keyword; import com.mindprod.jtokens.Label; import com.mindprod.jtokens.NL; import com.mindprod.jtokens.Noise; import com.mindprod.jtokens.Operator; import com.mindprod.jtokens.Semicolon; import com.mindprod.jtokens.Space; import com.mindprod.jtokens.Start; import com.mindprod.jtokens.Stop; import com.mindprod.jtokens.StringLiteral; import com.mindprod.jtokens.Token; import com.mindprod.jtokens.WhiteSpace; import com.mindprod.jtokens.java.Annotation; import com.mindprod.jtokens.java.CommentJavadoc; import com.mindprod.jtokens.java.CommentJavadocTag; import com.mindprod.jtokens.java.CommentSlashSlash; import com.mindprod.jtokens.java.CommentSlashStar; import com.mindprod.jtokens.java.Definable; import com.mindprod.jtokens.java.Fence; import com.mindprod.jtokens.java.InterfaceName; import com.mindprod.jtokens.java.JavaClassName; import com.mindprod.jtokens.java.JavaConstant; import com.mindprod.jtokens.java.Method; import com.mindprod.jtokens.java.NumericIndicator; import com.mindprod.jtokens.java.NumericLiteralHigh; import com.mindprod.jtokens.java.NumericLiteralLow; import com.mindprod.jtokens.java.PackageName; import com.mindprod.jtokens.java.Var; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.*; /** * State machine for JavaTokenizer. * * @author Roedy Green, Canadian Mind Products * @version 3.2 2009-05-04 now treat javadoc tags specially. * @since 2004-05-15 */ // TODO: group digits to right of decimal in threes. // TODO: figure out why making next private stops it from working. @SuppressWarnings( { "NestedAssignment", "ValueOfIncrementOrDecrementUsed", "UnnecessaryContinue", "EnumeratedConstantNamingConvention" } ) public enum JavaState { /** * Someone has forwarded us an EOL. We deal with it and any subsequent EOLs. When we hit something * interesting we * let the default next deal with it. */ @SuppressWarnings( { "WeakerAccess" } ) AT_END_OF_LINE { @SuppressWarnings( { "UnusedParameters" } ) void consume( char c ) { nlCount++; // end consume AT_END_OF_LINE } void leaving() { // no more than 3 NLs in a row. if ( nlCount > 3 ) { nlCount = 3; } // collapse multiple NLs into a single token. if ( nlCount > 0 ) { // ignore trailing spaces spaceCount = 0; addToken( new NL( nlCount ) ); nlCount = 0; } // end leaving AT_END_OF_LINE } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // AT_END_OF_LINE switch ( category ) { case AT: case BACKSLASH: case DIGIT: case DOT: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case QUOTE: case SLASH: case SPACE: case STAR: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case EOL: how = HowToProcess.CONSUME; return AT_END_OF_LINE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next AT_END_OF_LINE } // end of enum constant AT_END_OF_LINE }, /** * in the middle of processing a string of {}()[] chars each one gets it own token. */ @SuppressWarnings( { "WeakerAccess" } ) IN_FENCE { void consume( char c ) { accumulatedFences.append( c ); // end consume IN_FENCE } void leaving() { String fences = accumulatedFences.toString(); accumulatedFences.setLength( 0 ); // split (( up into separate tokens so can be rendered different // sizes. for ( int i = 0; i < fences.length(); i++ ) { char fence = fences.charAt( i ); // depth not necessarily correct yet addToken( new Fence( fence, 0/* depth */ ) ); } // end leaving IN_FENCE } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_FENCE switch ( category ) { case AT: case BACKSLASH: case DIGIT: case DOT: case EOL: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case QUOTE: case SLASH: case SPACE: case STAR: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case FENCE: // stay how = HowToProcess.CONSUME; return IN_FENCE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_FENCE } // end of enum constant IN_FENCE }, /** * in name, keyword or identifier. We are overly strict on what constitutes an identifier. */ @SuppressWarnings( { "WeakerAccess" } ) IN_NAME { /** * characters legal in an identifier, case-insensitive */ private static final String LEGAL_CONSTANT_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; /** * Ensure the string contains only legal characters * @param candidate string to TEST to see if it is a Java static * final constant name. * @return true if identifier is all upper case and numbers and * _ */ private boolean isNameAConstant( String candidate ) { return ST.isLegal( candidate, LEGAL_CONSTANT_CHARS ); } void consume( char c ) { accumulatedName.append( c ); // end consume IN_NAME } void leaving() { final String name = accumulatedName.toString(); accumulatedName.setLength( 0 ); if ( name.length() > 0 ) { final char firstChar = name.charAt( 0 ); assert name.equals( name.trim() ) : "name not trimmed"; // make a first stab at what sort of identifier it is. if ( keywords.contains( name ) ) { // keyword if ( importantKeywords.contains( name ) ) { addToken( new ImportantKeyword( name ) ); } else { addToken( new Keyword( name ) ); } } else if ( Character.isUpperCase( firstChar ) ) { if ( isNameAConstant( name ) ) { // all upper case name, constant addToken( new JavaConstant( name, false ) ); } else { // start with upper case, class // later analyse to find classDef and interfaceDef addToken( new JavaClassName( name, false ) ); } } else { // might really be a method, will find out later. addToken( new Var( name, false ) ); } } // end if // end leaving IN_NAME } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_NAME switch ( category ) { case AT: case BACKSLASH: case DOT: case EOL: case FENCE: case PUNCTUATION: case QUOTE: case SLASH: case SPACE: case STAR: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case DIGIT: case OTHER:/* allow Unicode names */ case PLAIN: case UNDERSCORE: // stay in name how = HowToProcess.CONSUME; return IN_NAME; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_NAME } }, /** * in numeric literal e.g. 01234 0xabcdef099 1.2 2.4d 1.4f 2.30E-05 2.30E-05f 2.30E-05d 2.30E-4E 2.30E-3D * Any lead - will be treated as an operator. */ @SuppressWarnings( { "WeakerAccess" } ) IN_NUMERIC_LITERAL { /** * characters legal in an identifier, case-insensitive */ private static final String LEGAL_NUMERIC_CHARS = "0123456789xXabcdefABCDEFlL+-._"; void consume( char c ) { accumulatedNumeric.append( c ); // end consume IN_NUMERIC_LITERAL } void leaving() { final String number = accumulatedNumeric.toString(); accumulatedNumeric.setLength( 0 ); // build any tokens. // name will look like 1.0 0xff 007 3.45E-93d 40L analyseNumericLiteral( number ); // end leaving IN_NUMERIC_LITERAL } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_NUMERIC_LITERAL switch ( category ) { case AT: case BACKSLASH: case EOL: case FENCE: case OTHER:/* allow unicode names */ case QUOTE: case SLASH: case SPACE: case STAR: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case DIGIT: case DOT: case UNDERSCORE: how = HowToProcess.CONSUME; return IN_NUMERIC_LITERAL; case PLAIN: case PUNCTUATION: // hex, trail letters, - all ok. if ( LEGAL_NUMERIC_CHARS.indexOf( nextChar ) >= 0 ) { // stay in number, since this char was a valid numeric how = HowToProcess.CONSUME; return IN_NUMERIC_LITERAL; } else { // found end of number how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); } default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_NUMERIC_LITERAL } }, /** * in string or arithmetic operators, including ;, but not fences {}() [] */ @SuppressWarnings( { "WeakerAccess" } ) IN_OPERATOR { void consume( char c ) { accumulatedOperators.append( c ); } void leaving() { String operators = accumulatedOperators.toString(); accumulatedOperators.setLength( 0 ); // treat ; specially. int place; while ( ( place = operators.indexOf( ';' ) ) >= 0 ) { // possibly empty String group = operators.substring( 0, place ); addToken( new Operator( group ) ); addToken( new Semicolon() ); operators = operators.substring( place + 1 ); } // end while // deal with whatever is left over in operators addToken( new Operator( operators ) ); } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_OPERATOR switch ( category ) { case DIGIT: case EOL: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case QUOTE: case SPACE: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case AT: case BACKSLASH: case PUNCTUATION: case SLASH: case STAR: // stay how = HowToProcess.CONSUME; return IN_OPERATOR; case DOT: if ( numberComingAfterThisDot() ) { how = HowToProcess.FORWARD; return IN_NUMERIC_LITERAL; } else { how = HowToProcess.CONSUME; return IN_OPERATOR; } default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } }, /** * saw @ now in annotation. */ @SuppressWarnings( { "WeakerAccess" } ) IN_ANNOTATION { void consume( char c ) { accumulatedAnnotation.append( c ); } void leaving() { String annotation = accumulatedAnnotation.toString(); accumulatedAnnotation.setLength( 0 ); addToken( new Annotation( annotation ) ); } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { /* this is pretty crude parsing. We cannot easily tell when annotation ends, so end it after the first word. */ // IN_ANNOTATION switch ( category ) { case BACKSLASH: case DOT: case EOL: case FENCE: case PUNCTUATION: case QUOTE: case SLASH: case SPACE: case STAR: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case AT: case DIGIT: case OTHER: case PLAIN: case UNDERSCORE: // stay how = HowToProcess.CONSUME; return IN_ANNOTATION; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch } }, /** * Someone saw a quote in outside a comment and forwarded it to us. we accumulate the quotation terminated by a * quote. We don't save either lead or trail quote in the buffer to save space. They are regenerated by the * token * as needed. There are two complications, we can hit eol before hitting the end quote, in which case we just * render it verbatim. \quote does not terminate the string */ @SuppressWarnings( { "WeakerAccess" } ) IN_QUOTES { /** * true if "..." are balanced, false if hit EOL too soon. */ private boolean balanced = false; /** * Used by IN_QUOTES to track whether quote preceded by \ */ private boolean prevWasBackslash = false; void consume( char c ) { accumulatedQuotation.append( c ); // end consume IN_QUOTES } void leaving() { String quotation = accumulatedQuotation.toString(); accumulatedQuotation.setLength( 0 ); if ( balanced ) { // add even if 0 length // surrounding quotes generated as needed. not part of quotation addToken( new StringLiteral( quotation ) ); } else { // document had unbalanced " ...", missing trailing ". // We treat not as literal, but as an Error addToken( new Gibberish( "\"" + quotation ) ); // note that we have handled this anomaly balanced = true; } // end leaving IN_QUOTES } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_QUOTES switch ( category ) { case BACKSLASH: prevWasBackslash = true; how = HowToProcess.CONSUME; return IN_QUOTES; case AT: case DIGIT: case DOT: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case SLASH: case SPACE: case STAR: case TICK: prevWasBackslash = false; how = HowToProcess.CONSUME; return IN_QUOTES; case EOL: // we had an unbalanced " ...\ then eol // balanced will be false how = HowToProcess.FORWARD; return AT_END_OF_LINE; case QUOTE: if ( first ) { prevWasBackslash = false; balanced = false; how = HowToProcess.DISCARD; return IN_QUOTES; } else if ( prevWasBackslash ) { // treat as an ordinary char prevWasBackslash = false; how = HowToProcess.CONSUME; return IN_QUOTES; } else { // was the final one prevWasBackslash = false; balanced = true; how = HowToProcess.DISCARD; return IN_WHITESPACE;// super would just send us back // here } // break; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_QUOTES } // end of enum constant IN_QUOTES }, /** * seen slash slash, in one line comment... eol Will be terminated by eol slash slash goes in the token * we don't worry about embedded @tags just yet. We deal with them later. */ @SuppressWarnings( { "WeakerAccess" } ) IN_REM_SLASH_SLASH { void consume( char c ) { accumulatedComment.append( c ); // end consume IN_REM_SLASH_SLASH } void leaving() { // simpler than other comments. Can't contain embedded \nl String comment = accumulatedComment.toString(); accumulatedComment.setLength( 0 ); if ( comment.length() > 0 ) { // if there are embedded @tag, we want to split them out as their own tokens. // we will lead with long strings of lead, trail or embedded spaces we will optimise them later. addToken( new CommentSlashSlash( comment ) ); } // end leaving IN_REM_SLASH_SLASH } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_REM_SLASH_SLASH switch ( category ) { case AT: case BACKSLASH: case DIGIT: case DOT: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case QUOTE: case SLASH: case SPACE: case STAR: case TICK: how = HowToProcess.CONSUME; return IN_REM_SLASH_SLASH; case EOL: how = HowToProcess.FORWARD; return AT_END_OF_LINE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_REM_SLASH_SLASH } // end of enum constant IN_REM_SLASH_SLASH }, /** * in slash star or slash star star. We don't leave this state on hitting EOL but just keep on trucking and deal * with the eol tokens later. Finally ended by star slash. We don't deal with running of the end of the program * unbalanced. That will show ups as an error with stuff left un-tokenized. */ @SuppressWarnings( { "WeakerAccess" } ) IN_REM_SLASH_STAR { /** * keep track of whether / was preceded by star to mark end of * comment. */ private boolean prevWasStar = false; void consume( char c ) { accumulatedComment.append( c ); // end consume IN_REM_SLASH_STAR } void leaving() { // break into several tokens if contains \n String comments = accumulatedComment.toString(); accumulatedComment.setLength( 0 ); boolean javaDoc = comments.startsWith( "/**" ); int place; Token token; while ( ( place = comments.indexOf( '\n' ) ) >= 0 ) { String comment = comments.substring( 0, place ); if ( javaDoc ) { // special add that splits out @tags, adds CommentJavadoc or commentJavadocTag addJavadocToken( comment ); } else { addToken( new CommentSlashStar( comment ) ); } addToken( new NL() ); comments = comments.substring( place + 1 ); } // end while // deal with whatever is left over in comments if ( comments.length() > 0 ) { if ( javaDoc ) { token = new CommentJavadoc( comments ); } else { token = new CommentSlashStar( comments ); } addToken( token ); } // end leaving IN_REM_SLASH_STAR } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_REM_SLASH_STAR switch ( category ) { case AT: case BACKSLASH: case DIGIT: case DOT: case EOL: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case QUOTE: case SPACE: case TICK: prevWasStar = false; how = HowToProcess.CONSUME; return IN_REM_SLASH_STAR; case SLASH: if ( first ) { // leading slash at begin of slash star or slash star star prevWasStar = false; how = HowToProcess.CONSUME; return IN_REM_SLASH_STAR; } else if ( prevWasStar ) { // hit star slash end marker of comment how = HowToProcess.CONSUME; return IN_WHITESPACE;// super would just send us back // here } else { // just incidental / how = HowToProcess.CONSUME; return IN_REM_SLASH_STAR; } // break; case STAR: prevWasStar = true; how = HowToProcess.CONSUME; return IN_REM_SLASH_STAR; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_REM_SLASH_STAR } // end of enum constant IN_REM_SLASH_STAR }, /** * Someone saw a ' in outside a comment and forwarded it to us. we accumulate the char literal terminated by a * '. We don't put either lead or trail ' in the buffer to save space. They are regenerated by the token as * needed. There are two complications, we can hit eol before hitting the end ', in which case we just render it * verbatim. \' does not terminate the literal */ @SuppressWarnings( { "WeakerAccess" } ) IN_TICKS { /** * true if 'x' are balanced, false if hit EOL too soon. */ private boolean balanced = false; /** * Used by IN_TICKS to track whether ' preceded by \ */ private boolean prevWasBackslash = false; void consume( char c ) { accumulatedQuotation.append( c ); // end consume } void leaving() { String quotation = accumulatedQuotation.toString(); accumulatedQuotation.setLength( 0 ); if ( balanced ) { // add even if 0 length // surrounding quotes generated as needed. not part of quotation addToken( new CharLiteral( quotation ) ); } else { // document had unbalanced " ...", missing trailing ". // We treat not as literal, but as an error. addToken( new Gibberish( "\'" + quotation ) ); // note that we have handled this anomaly balanced = true; } // end leaving IN_TICKS } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_TICKS switch ( category ) { case AT: case DIGIT: case DOT: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case QUOTE: case SLASH: case SPACE: case STAR: prevWasBackslash = false; how = HowToProcess.CONSUME; return IN_TICKS; case BACKSLASH: prevWasBackslash = true; how = HowToProcess.CONSUME; return IN_TICKS; case EOL: // we had an unbalanced '\ then eol // balanced will be false how = HowToProcess.FORWARD; return AT_END_OF_LINE; case TICK: if ( first ) { prevWasBackslash = false; balanced = false; how = HowToProcess.DISCARD; return IN_TICKS; } else if ( prevWasBackslash ) { // treat as an ordinary char prevWasBackslash = false; how = HowToProcess.CONSUME; return IN_TICKS; } else { // was the final one prevWasBackslash = false; balanced = true; how = HowToProcess.DISCARD; return IN_WHITESPACE;// super would just send us back // here } // break; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_TICKS } // end of enum constant IN_TICKS }, /** * in white space, but not EOL. */ @SuppressWarnings( { "WeakerAccess" } ) IN_WHITESPACE { @SuppressWarnings( { "UnusedParameters" } ) void consume( char c ) { spaceCount++; // end consume IN_WHITESPACE } void leaving() { // collapse multiple spaces into a single token if ( spaceCount > 0 ) { addToken( new Space( spaceCount ) ); spaceCount = 0; } // end leaving IN_WHITESPACE } @SuppressWarnings( { "UnusedDeclaration" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { // IN_WHITESPACE switch ( category ) { case AT: case BACKSLASH: case DIGIT: case DOT: case EOL: case FENCE: case OTHER: case PLAIN: case UNDERSCORE: case PUNCTUATION: case QUOTE: case SLASH: case STAR: case TICK: how = HowToProcess.FORWARD; return super.next( category, nextChar, first ); case SPACE: how = HowToProcess.CONSUME; return IN_WHITESPACE; default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch // end next IN_WHITESPACE } // end of enum constant IN_WHITESPACE }; // ////////////////////////////////////////////////////////////////// // common to all enum constants // declarations /** * true if want extra debugging checks and output */ private static final boolean DEBUGGING = false; /** * list of tokens we have parsed out. */ private static final ArrayList tokens = new ArrayList<>( 50000 ); /** * all legal important keywords */ private static final HashSet importantKeywords = new HashSet<>( Arrays.asList( "break", "class", "continue", "for", "interface", "package", "return", "while" ) ); /** * complete list of Java keywords, aka reserved words, includes the important keywords too. */ private static final HashSet keywords = new HashSet<>( Arrays .asList( "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue", "default", "do", "double", "else", "enum", "extends", "false", "final", "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", "int", "interface", "long", "native", "new", "null", "package", "private", "protected", "public", "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this", "throw", "throws", "transient", "true", "try", "void", "volatile", "while" ) ); /** * list of java keywords used to define primitive variables. */ private static final HashSet primitives = new HashSet<>( Arrays .asList( "boolean", "byte", "char", "double", "float", "int", "long", "short" ) ); /** * regex to detect a decimal string literal. This till also match octal patterns, so octals must be filtered out * first. matches int , long, float/double without exponent */ private static final Pattern decimalPattern = Pattern .compile( "([-]?)([_\\d\\.]+)([dDfFlL]?)" ); /** * regex to detect a decimal string literal. float or double. */ private static final Pattern floatPattern = Pattern .compile( "([-]?)([_\\d\\.]+)([eE])([-]?)(\\d+[dDfF]?)" ); /** * regex to detect an hex string literal -0xffffL */ private static final Pattern hexPattern = Pattern .compile( "([-]?0x)([_\\p{XDigit}]+)([lL]?)" ); /** * recognise a legit Javadoc tag */ private static final Pattern JAVADOCTAG_RECOGNISER = Pattern.compile( "@(author|deprecated|inheritDoc|override|param|return|see|serial|serialData|serialField|since|throws" + "|version)\\s" ); /** * regex to detect an octal string literal -07777L */ private static final Pattern octalPattern = Pattern .compile( "([-]?0)([0-7]++)([lL]?)" ); /** * accumulates strings forming an annotation */ private static final StringBuilder accumulatedAnnotation = new StringBuilder( 10 ); // statics are shared common to all enum constants. // others are a separate field in each enum constant. /** * accumulates char ' ' strings. */ private static final StringBuilder accumulatedCharLiteral = new StringBuilder( 5 ); /** * accumulates any of the various types of comment */ private static final StringBuilder accumulatedComment = new StringBuilder( 80 ); /** * accumulates a string of fence chars e.g. (){}[] */ private static final StringBuilder accumulatedFences = new StringBuilder( 10 ); /** * accumulate names of classes, variable, methods. */ private static final StringBuilder accumulatedName = new StringBuilder( 50 ); /** * accumulate number, hex, decimal, scientific, float, long */ private static final StringBuilder accumulatedNumeric = new StringBuilder( 50 ); /** * accumulates strings of operators */ private static final StringBuilder accumulatedOperators = new StringBuilder( 10 ); /** * accumulates things inside quotes. */ private static final StringBuilder accumulatedQuotation = new StringBuilder( 80 ); /** * how far we are through parsing the program */ private static int charIndex; /** * how the next next character will be treated, usually consumed and stuffed in a buffer, or forwarded to the next * state to deal with. */ private static HowToProcess how; /** * count of how many new lines encountered, shared by several states */ private static int nlCount; /** * the program or program fragment we are parsing */ private static String program; /** * the length of the program fragment we are parsing. */ private static int size; /** * count of how many spaces encountered. */ private static int spaceCount; // declarations // methods /** * create tokens for for a JavaDoc Comment. * * @param comment comment text. no \n, but may contain embedded @tags. */ private static void addJavadocToken( String comment ) { // if there are embedded @tag, we want to split them out as their own tokens. // we will lead with long strings of lead, trail or embedded spaces we will optimise them later. // We loop to process any embedded tag tokens in that comment. while ( comment.length() > 0 ) { int atPlace = comment.indexOf( '@' ); if ( atPlace < 0 ) { // no more tags, add rest as ordinary javadoc addToken( new CommentJavadoc( comment ) ); return; } else if ( atPlace > 0 ) { // add comment prior to @ addToken( new CommentJavadoc( comment.substring( 0, atPlace ) ) ); comment = comment.substring( atPlace ); } else if ( atPlace == 0 ) { // remaining comment starts with @ Matcher m = JAVADOCTAG_RECOGNISER.matcher( comment ); if ( m.lookingAt() ) { // was a legit tag, treat it specially String tag = m.group( 1 ); addToken( new CommentJavadocTag( '@' + tag ) ); comment = comment.substring( tag.length() + 1 ); } else { // no match, just a stray at. Treat it as an ordinary comment char, later will be coalesced.. addToken( new CommentJavadoc( "@" ) ); comment = comment.substring( 1 ); } } // end time around te loop the comment gets a little shorter. } // end loop }// /method /** * add a token to the end of the list to be rendered. * * @param t a token. Useless tokens will be not be added. */ private static void addToken( Token t ) { // check out token for validity if ( !t.isUseless() ) { tokens.add( t ); } }// /method /** * analyse string composing a numeric literal and break it into pieces and create the tokens. * * @param name String representing the literal. Will not have lead -, but could be float, hex, octal, decimal, * with decimal point... */ private static void analyseNumericLiteral( String name ) { if ( DEBUGGING ) { out.println( name ); } Matcher m; if ( ( m = hexPattern.matcher( name ) ).matches() ) { assert m.groupCount() == 3 : "hex literal pattern matcher failure"; makeNumericLiterals( m.group( 1 ), m.group( 2 ), 4/* group by 4s */, m.group( 3 ), 16 ); // group 0 is the whole pattern matched, // loops runs from from 0 to gc, not 0 to gc-1 as is // traditional. } else if ( ( m = octalPattern.matcher( name ) ).matches() ) { assert m.groupCount() == 3 : "octal literal pattern matcher failure"; makeNumericLiterals( m.group( 1 ), m.group( 2 ), 3, m.group( 3 ), 8 ); } else if ( ( m = floatPattern.matcher( name ) ).matches() ) { assert m.groupCount() == 5 : "float literal pattern matcher failure"; makeNumericLiterals( m.group( 1 ), m.group( 2 ), 3, m.group( 3 ) + m.group( 4 ) + m.group( 5 ), 10 ); } else if ( ( m = decimalPattern.matcher( name ) ).matches() ) { assert m.groupCount() == 3 : "decimal literal pattern matcher failure"; makeNumericLiterals( m.group( 1 ), m.group( 2 ), 3, m.group( 3 ), 10 ); } else { // got garbage, but we have to render it anyway. addToken( new Gibberish( name ) ); } }// /method /** * calculate depth of {} () and [] */ private static void calcNestingDepths() { int braceDepth = 0; int parenDepth = 0; int bracketDepth = 0; for ( Token t : tokens ) { if ( !( t instanceof Fence ) ) { continue; } Fence b = ( Fence ) t; char c = b.getChar(); switch ( c ) { case '{': b.setNestingDepth( ++braceDepth ); break; case '}': b.setNestingDepth( braceDepth-- ); break; case '(': b.setNestingDepth( ++parenDepth ); break; case ')': b.setNestingDepth( parenDepth-- ); break; case '[': b.setNestingDepth( ++bracketDepth ); break; case ']': b.setNestingDepth( bracketDepth-- ); break; default: throw new IllegalStateException( "JavaTokenizer.calcNestingDepths: invalid fence character" ); } } // end for }// /method /** * Replace junk chars with something that won't cause trouble * * @param category category of this char * @param c the char * * @return c if char is clean, a replacement if it were dirty e.g. tab */ private static char clean( JavaCharCategory category, char c ) { if ( category == JavaCharCategory.SPACE ) { return ' '; } else { return c; } }// /method /** * crunch multiple tokens into a single token where feasible. */ private static void crunch() { int size; do { size = tokens.size(); for ( int i = size - 1; i >= 1; i-- ) { Token current = tokens.get( i ); Token prev = tokens.get( i - 1 ); if ( current instanceof NL && prev instanceof Space ) { // trim trailing blanks on line tokens.remove( i - 1 ); // don't i--. same NL token will be repeatedly compared // against prev. } else if ( current instanceof Space ) { Space sp = ( Space ) ( current ); if ( sp.length() <= 10 && prev.isCollapsible() ) { // combine space into previous token prev.setText( prev.getText() + sp.getText() ); tokens.remove( i ); // don't i--, combined token will be compared against // its predecessor } } else if ( prev.isCollapsible( current ) ) { // combine two tokens into one prev.setText( prev.getText() + current.getText() ); tokens.remove( i ); // don't i--. combined token will be compared with its // predecessor } } // end for // keep going while it is still finding something to crunch } while ( tokens.size() < size ); }// /method /** * debugging dump system state * * @param theChar char we are processing * @param category category of the char * @param first did we just enter this state * @param oldState old state * @param state current state * @param newState next state * @param how do we consume, forward or discard this character. */ private static void dumpState( char theChar, JavaCharCategory category, boolean first, JavaState oldState, JavaState state, JavaState newState, HowToProcess how ) { if ( how == HowToProcess.CONSUME ) { /* use slightly abbreviated form, black */ out.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState ); } else {/* in red */ err.println( theChar + " " + category + " " + first + " o:" + oldState + " s:" + state + " n:" + newState + " " + how ); } }// /method /** * Make sure nothing there is left accumulated in buffers from parsing not yet converted to tokens */ private static void ensureNoLeftovers() { assert accumulatedCharLiteral.length() == 0 : "CharLiteral residual : " + accumulatedCharLiteral.toString(); accumulatedCharLiteral.setLength( 0 ); assert accumulatedComment.length() == 0 : "Comment residual : " + accumulatedComment.toString(); accumulatedComment.setLength( 0 ); assert accumulatedFences.length() == 0 : "Fences residual : " + accumulatedFences.toString(); accumulatedFences.setLength( 0 ); assert accumulatedName.length() == 0 : "Name residual : " + accumulatedName.toString(); accumulatedName.setLength( 0 ); assert accumulatedNumeric.length() == 0 : "Numeric residual : " + accumulatedNumeric.toString(); accumulatedNumeric.setLength( 0 ); assert accumulatedOperators.length() == 0 : "Operators residual : " + accumulatedOperators.toString(); accumulatedOperators.setLength( 0 ); assert accumulatedQuotation.length() == 0 : "Quotation residual : " + accumulatedQuotation.toString(); accumulatedQuotation.setLength( 0 ); }// /method /** * find places where classes are defined, after the keyword class. */ private static void findClassDefs() { // class defs in is form final xxxx boolean transform = false; for ( Token t : tokens ) { if ( t instanceof Keyword && t.getTrimmedText().equals( "class" ) ) { transform = true; } else if ( transform && t instanceof JavaClassName ) { // transform from class ref to class Definition. ( ( Definable ) t ).setDefining( true ); transform = false; } else if ( transform && t instanceof WhiteSpace ) { // ignore } else { // hit something else transform = false; } } }// /method /** * find JavaConstant defs, indicated by preceding primitive or Class */ private static void findConstantDefs() { boolean transform = false; for ( Token t : tokens ) { if ( t instanceof Keyword && primitives.contains( t.getTrimmedText() ) ) { transform = true; } else if ( t instanceof JavaClassName ) { transform = true; } else if ( t instanceof JavaConstant ) { if ( transform ) { // was JavaConstant reference, but really is a JavaConstant // definition. ( ( Definable ) t ).setDefining( true ); transform = false; } } else if ( transform && t instanceof WhiteSpace ) { // ignore } else {// hit something else. That was not a JavaConstant. transform = false; } } // end for }// /method /** * find Constructor defs, indicated by class Ref not preceded by new, and followed by ( Just another type of final * class def. */ private static void findConstructorDefs() { Definable prev = null; boolean seenNew = false; boolean seenClass = false; for ( Token t : tokens ) { // ignore all flavours of whitespace if ( t instanceof WhiteSpace ) { continue; } else if ( t instanceof Keyword && t.getTrimmedText().equals( "new" ) ) { seenNew = true; seenClass = false; continue; } else if ( t instanceof JavaClassName ) { if ( !seenNew ) { prev = ( Definable ) t; seenClass = true; continue; } } else if ( t instanceof Fence && t.getTrimmedText().equals( "(" ) ) { if ( seenClass ) { // replace previous class with a Def. prev.setDefining( true ); } } // else saw something else // default fall through for anything without continue. seenNew = false; seenClass = false; } // end for }// /method /** * find places where interfaces are defined, after the keyword interface. */ private static void findInterfaceDefs() { // Interface defs in is form interface xxxx int size = tokens.size(); boolean transform = false; for ( int i = 0; i < size; i++ ) { Token t = tokens.get( i ); if ( t instanceof Keyword && t.getTrimmedText().equals( "interface" ) ) { transform = true; } else if ( transform && t instanceof JavaClassName ) { // transform from JavaClassName ref to Interface Definition. tokens.set( i, new InterfaceName( t.getText(), true ) ); transform = false; } else if ( transform && t instanceof WhiteSpace ) { // ignore } else { // hit something else transform = false; } } }// /method /** * find places where interfaces are used, after the keyword implements. Are others, but we can't find them. */ private static void findInterfaceRefs() { // interface refs in is form implements xxxx , xxxx ; int size = tokens.size(); boolean transform = false; for ( int i = 0; i < size; i++ ) { Token t = tokens.get( i ); if ( t instanceof Keyword && t.getTrimmedText().equals( "implements" ) ) { transform = true; } else if ( transform && t instanceof JavaClassName ) { // transform from JavaClassName ref to Interface ref. tokens.set( i, new InterfaceName( t.getText(), false ) ); // keep transforming. } else if ( transform && t instanceof WhiteSpace ) { // ignore } else if ( transform && t instanceof Operator && t.getTrimmedText() .equals( "," ) ) { // ignore } else { // hit something else transform = false; } } }// /method /** * find labels, indicated by preceding ; or { then a var then a colon */ private static void findLabels() { boolean transform = false; for ( int i = 0; i < tokens.size(); i++ ) { final Token t = tokens.get( i ); if ( t instanceof Fence && t.getTrimmedText().equals( "{" ) ) { transform = true; } else if ( t instanceof Semicolon ) { transform = true; } else if ( transform && t instanceof Noise ) { // ignore } else if ( t instanceof Var ) { if ( transform ) { if ( i + 1 < tokens.size() ) { final Token next = tokens.get( i + 1 ); if ( next instanceof Operator && next.getTrimmedText().equals( ":" ) ) { // convert from var to label tokens.set( i, new Label( t.getText() ) ); } } transform = false; } } else {// hit something else. This is not a label pattern transform = false; } } // end for }// /method /** * find method defs, indicated by preceding primitive or Class */ private static void findMethodDefs() { boolean transform = false; for ( Token t : tokens ) { final String word = t.getTrimmedText(); if ( t instanceof Keyword && ( word.equals( "void" ) || primitives.contains( word ) ) ) { transform = true; } else if ( t instanceof JavaClassName ) { transform = true; } else if ( transform && t instanceof Fence && ST.isLegal( t.getTrimmedText(), "[ ]" ) ) { /* leave transform set the way it was */ } else if ( transform && t instanceof Noise ) { // ignore } else if ( t instanceof Method ) { if ( transform ) { // this was a Method ref, but really is a Method definition. ( ( Definable ) t ).setDefining( true ); transform = false; } } else {// hit something else. That was not a var. transform = false; } } // end for }// /method /** * find methods, indicated by following ( */ private static void findMethodRefs() { int size = tokens.size(); boolean transform = false; int prev = 0; for ( int i = 0; i < size; i++ ) { Token t = tokens.get( i ); if ( t instanceof Var ) { prev = i; transform = true; } else if ( transform && t instanceof Fence && t.getTrimmedText().equals( "(" ) ) { // transform previous var, was really a method name tokens.set( prev, new Method( tokens.get( prev ) .getText(), false/* ref */ ) ); transform = false; } else if ( transform && t instanceof Noise ) { // ignore } else {// hit something else. That was not a var transform = false; } } // end for }// /method /** * find package definitions after keyword package */ private static void findPackageDefs() { // package defs in is form package xxxx . xxxx . xxxx ; int size = tokens.size(); boolean transform = false; for ( int i = 0; i < size; i++ ) { Token t = tokens.get( i ); if ( t instanceof Keyword && t.getTrimmedText().equals( "package" ) ) { transform = true; } else if ( transform && t instanceof Var ) { // transform from var to Package Definition. tokens.set( i, new PackageName( t.getText(), true ) ); // keep transforming } else if ( transform && t instanceof Operator && t.getTrimmedText().equals( "." ) ) { } else if ( transform && t instanceof Noise ) { // ignore } else {/* hit something else, usually ; */ transform = false; } } }// /method /** * find package names after keyword import */ private static void findPackageRefs() { // package defs in is form package xxxx . xxxx . xxxx.* ; int size = tokens.size(); boolean transform = false; for ( int i = 0; i < size; i++ ) { Token t = tokens.get( i ); if ( t instanceof Keyword && t.getTrimmedText().equals( "import" ) ) { transform = true; } else if ( transform && t instanceof Var ) { // transform from Var to Package Ref. tokens.set( i, new PackageName( t.getText(), false ) ); // keep transforming. } else if ( transform && t instanceof Noise ) { // ignore } else if ( transform && ( t instanceof Operator && t.getTrimmedText().equals( "." ) || t.getTrimmedText() .equals( "*" ) ) ) { } else { /* hit something else usually ; */ transform = false; } } }// /method /** * find var defs, indicated by preceding primitive or Class, transformVar int VarDef by setting defining true. */ private static void findVarDefs() { boolean transform = false; for ( Token t : tokens ) { if ( t instanceof Keyword && primitives.contains( t.getTrimmedText() ) ) { transform = true; } else if ( t instanceof JavaClassName ) { transform = true; } else if ( transform && t instanceof Fence && ST.isLegal( t.getTrimmedText(), "[ ]" ) ) { /* leave transform set the way it was */ } else if ( transform && t instanceof Noise ) { // ignore } else if ( t instanceof Var ) { if ( transform ) { // this is a Var definition, was a Var ( ( Definable ) t ).setDefining( true ); transform = false; } } else {// hit something else. That was not a Var transform = false; } } // end for }// /method /** * Is a given string isComing up in the stream starting with this character. Compares ignoring case. * * @param expected string to TEST if isComing in the stream * * @return true if this string isComing up, case-insensitive */ private static boolean isComing( String expected ) { // check if there are enough characters left for a match. return charIndex + expected.length() < size && program.substring( charIndex, charIndex + expected.length() ) .equalsIgnoreCase( expected ); }// /method /** * Split numeric literal into groups of three or four, with slightly different colours for head and tail indicator * characters not part of the number proper. * * @param head e.g. 0x for hex or 0 for octal. contains optional sign. * @param body the number e.g. 122345 possibly with embedded decimal point * @param grouping 3 for decimal and octal 4 for hex * @param tail tail indicator letter D F L d f l. For scientific notation, tail will have form E-07d * @param base radix of the number to display */ private static void makeNumericLiterals( final String head, final String body, int grouping, final String tail, final int base ) { if ( DEBUGGING ) { out.println( head + ":" + body + ":" + grouping + ":" + tail + ":" + base ); } if ( head != null && head.length() > 0 ) { addToken( new NumericIndicator( head ) ); } if ( body != null && body.length() > 0 ) { if ( body.contains( "_" ) ) { addToken( new NumericLiteralLow( body, base ) ); } // split off part to right of decimal point if any final String left; final String right; int dotPlace = body.lastIndexOf( '.' ); if ( dotPlace >= 0 ) { left = body.substring( 0, dotPlace ); right = body.substring( dotPlace ); } else { left = body; right = null; } int length = left.length(); // turn off grouping if the literal contains an underscore, which already handles it. if ( body.contains( "_" ) ) { addToken( new NumericLiteralLow( body, base ) ); // handle the 999.999 part. if ( tail != null && tail.length() > 0 ) { addToken( new NumericIndicator( tail ) ); } return; } // group in blocks of 3 or 4, with alternating colours. // We work left to right. Figure out if we should start with alt to // end up on ordinary colour for literal. // e.g. 111222 = length 6 = 2 groups, even so start with alt. // 2 = length 1 = 1 group, so start with ordinary, not alt. boolean useHigh = ( ( ( length + grouping - 1 ) / grouping ) & 1 ) == 0; // Do the first partial group starting on the left, if grouping not even // multiple int firstGroupSize = length % grouping; if ( firstGroupSize != 0 ) { final String digits = left.substring( 0, firstGroupSize ); if ( useHigh ) { addToken( new NumericLiteralHigh( digits, base ) ); } else { addToken( new NumericLiteralLow( digits, base ) ); } // toggle for next section useHigh = !useHigh; } // do the rest of the groups for ( int i = firstGroupSize; i < length; i += grouping ) { final String digits = left.substring( i, i + grouping ); if ( useHigh ) { addToken( new NumericLiteralHigh( digits, base ) ); } else { addToken( new NumericLiteralLow( digits, base ) ); } useHigh = !useHigh; } // handle stuff from decimal point to the right, digits. if ( right != null ) { addToken( new NumericLiteralLow( right, base ) ); } } // handle and trail L F D l f d if ( tail != null && tail.length() > 0 ) { addToken( new NumericIndicator( tail ) ); } }// /method /** * check if u number is coming up next in the program * * @return true if number follows . we have seen. */ private static boolean numberComingAfterThisDot() { if ( charIndex + 1 >= size ) { return false; } char c = program.charAt( charIndex + 1 ); return ( '0' <= c && c <= '9' ); }// /method /** * clear out the state machine ready to parse a new program */ private static void reset() { nlCount = 0; spaceCount = 0; accumulatedCharLiteral.setLength( 0 ); accumulatedComment.setLength( 0 ); accumulatedFences.setLength( 0 ); accumulatedName.setLength( 0 ); accumulatedOperators.setLength( 0 ); accumulatedQuotation.setLength( 0 ); tokens.clear(); how = null; charIndex = 0; // keeping certain variables local or global is crucial. // Don't mess with them without thinking carefully and // changing the docs: // local: category, first, oldState, state, theChar // global: charIndex, how // we make these local to discourage accidental snooping or // accidentally picking up the static version instead of the parm. }// /method /** * get rid of leading and trailing NLs tokens. It is easier to handle it later than during parsing. */ private static void trimNLs() { // remove leading NLs. while ( tokens.size() > 0 && ( tokens.get( 0 ) instanceof NL ) ) { tokens.remove( 0 ); } // remove trailing NLs int count; while ( ( count = tokens.size() ) > 0 && ( tokens.get( count - 1 ) instanceof NL ) ) { tokens.remove( count - 1 ); } // we don't need an NL at either beginning or end // insert at the beginning tokens.add( 0, new Start( "
" ) );
        // add to end
        addToken( new Stop( "
" ) ); }// /method /** * Default Consume one character. It has been predecided that you can and will consume it. * * @param c char to consume */ abstract void consume( char c );// /method /** * default what to do on leaving state, after last char is consumed. */ abstract void leaving();// /method /** * default next method determines the next state based on current state, and next char * * @param category class of next character * @param nextChar next character to process * @param first true if we are just entering this state. * * @return next JavaState * D o n o t m a k e p r i v a t e ! ! ! */ @SuppressWarnings( { "WeakerAccess" } ) JavaState next( JavaCharCategory category, char nextChar, boolean first ) { /* default way to recognise next state */ assert how == HowToProcess.FORWARD : "default next used without forwarding. how: " + how; // default switch ( category ) { case AT: return IN_ANNOTATION; case BACKSLASH: case PUNCTUATION: return IN_OPERATOR; case DIGIT: return IN_NUMERIC_LITERAL; case DOT: if ( numberComingAfterThisDot() ) { return IN_NUMERIC_LITERAL; } else { return IN_OPERATOR; } case EOL: return AT_END_OF_LINE; case FENCE: return IN_FENCE; case OTHER: case PLAIN: case UNDERSCORE: // lead _ is not treated as number return IN_NAME; case QUOTE: return IN_QUOTES; case SLASH: if ( isComing( "//" ) ) { return IN_REM_SLASH_SLASH; } else if ( isComing( "/*" ) ) { return IN_REM_SLASH_STAR;// includes /** } else { return IN_OPERATOR; } case STAR: return IN_OPERATOR; case SPACE: return IN_WHITESPACE; case TICK: return IN_TICKS; case IGNORE:// should never get this far default: assert false : "bad state " + category + " " + nextChar; return null; } // end switch }// /method /** * Parse program and leave a list of Tokens in tokens ArrayList. * * @param program the text we are going parse and eventually render. Its loop calls next on oldstate, then * optionally consume on the old state, State whose next made the consume decision consumes it own * char. then optionally leaving on the old state * * @return an array of tokens representing the text and how it will be rendered. */ @SuppressWarnings( { "UnusedAssignment" } ) public static Token[] parse( String program ) { reset(); JavaState.program = program; size = program.length(); /* * keeping certain variables local or global is crucial. Don't mess with * them without thinking carefully and changing the docs: LOCAL: * category, first, oldState, state, theChar. GLOBALl: charIndex, how we * make these local to discourage accidental snooping or accidentally * picking up the static version instead of the parm. */ // where we were JavaState oldState = AT_END_OF_LINE; // where we are JavaState state = AT_END_OF_LINE; // were we will be next JavaState newState; // how is global however, so next can return both a state and // how. how = null; // Note, NO int charIndex !! Don't "repair that". // charIndex is a static variable globally known so "isComing" can // use // it. for ( charIndex = 0; charIndex < size; charIndex++ ) { // next char to process char theChar = program.charAt( charIndex ); // decide which general category the char falls in final JavaCharCategory category = JavaCharCategory .categorise( theChar ); theChar = clean( category, theChar ); if ( category != JavaCharCategory.IGNORE ) { /* * keep going till some state consumes/discards the character. * Allow up to three forwarding attempts to deal with the * character. Usually we should succeed on the first or second * attempt. We always make at least on trip through */ attempts: for ( int times = 0; times < 3; times++ ) { // first is deliberately local final boolean first = state != oldState; /* * crank the state machine one cycle, State should modify * how in addition to returning the new state. A little ugly * but simplest way to return a pair of values: state and * how. */ how = null; /* * setting to null ensures not setting it will be caught. */ /* * This is the guts of the finite state automaton decide the * next state */ // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv newState = state.next( category, theChar, first ); // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if ( DEBUGGING ) { dumpState( theChar, category, first, oldState, state, newState, how ); } // kick over to the next generation, // we are now in the newState. We make the // transition // here. oldState = state; state = newState; newState = null; if ( how == null ) { throw new NullPointerException( "JavaState bug: how not set. OldState=" + oldState + " " + "newState=" + state + " next() must not be private." ); } switch ( how ) { case CONSUME: oldState.consume( theChar ); if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; case DISCARD: if ( state != oldState ) { oldState.leaving(); } // and on to the next char break attempts; default: // should never get here assert false : "JavaTokenizer state machine failed to set how variable."; break attempts; case FORWARD: assert state != oldState : "JavaTokenizer state machine attempted to forward a char to the same state."; oldState.leaving(); /* * we give that character another try with the new * state */ } // end switch /* * we will only ever get here if we are forwarding. The * other case leave the loop early. */ } // end attempts loop // we fall out the bottom and land here no matter what assert how == HowToProcess.CONSUME || how == HowToProcess .DISCARD : "JavaTokenizer state machine failed to consume char in three state forwarding attempts."; } // end if ignore } // end for each character if ( state == oldState ) { // force a final wrapup -- e.g. slash star comment without // terminator oldState.leaving(); } /* * make sure nothing still sitting in accumulation buffer after we have * finished parsing the entire program. */ ensureNoLeftovers(); crunch(); // tidy up the list of tokens. trimNLs(); findPackageRefs(); // findClassRefs(); not needed, already picked out by caps findInterfaceRefs(); // findConstantRefs(); not needed already picked out by caps // findVarRefs(); not needed, all identifiers assumed Var to // start. // findConstructorRefs(), not needed, already treated like class // ref findMethodRefs(); findPackageDefs(); findClassDefs(); findInterfaceDefs(); findConstantDefs(); findVarDefs(); findLabels(); findConstructorDefs(); findMethodDefs(); calcNestingDepths(); // covert to vanilla array for even more efficient use in the // final // Applet. return tokens.toArray( new Token[ tokens.size() ] ); }// /method // /methods }