/* * [AlignJava.java] * * Summary: Align converts tabs and other control chars to spaces, and aligns columns. * * Copyright: (c) 2002-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 4.5 2009-02-26 add both Java string quoting and plain for Java search/regexes. */ package com.mindprod.quoter; import static com.mindprod.quoter.AlignBlankState.IN_LEADING; import static com.mindprod.quoter.AlignBlankState.IN_MIDDLE; import static com.mindprod.quoter.AlignBlankState.IN_TRAILING; import static java.lang.System.*; /** * Align converts tabs and other control chars to spaces, and aligns columns. *

* Considers Java source code conventions. * * @author Roedy Green, Canadian Mind Products * @version 4.5 2009-02-26 add both Java string quoting and plain for Java search/regexes. * @since 2002-06-19 */ final class AlignJava extends TextProcessor { /** * true if want extra debugging output. */ private static final boolean DEBUGGING = false; /** * maximum number of columns we can handle */ private static final int MAX_COLS = 1024; // how much blank space to put between the columns private static final int padding = 2; /** * widths of the columns */ private final int[] biggestWidth = new int[ MAX_COLS ]; /** * state of finite state automaton that categorises characters. */ private AlignJavaCommentState commentState; /** * where we accumulate the output . */ private StringBuilder cooked; /** * the raw text we are processing. */ private String raw; /** * which field/column we are working on. 0 is first. */ private int colIndex; /** * how many columns there are. */ private int cols; /** * pass=1 when deciding col widths, and pass=2 when outputting. */ private int pass = 1; /** * width of current column. */ private int width; /** * categorise the character, in a context sensitive way, thinking in terms of parsing Java source code. * * @param c char to categorise. * * @return COMMENT, QUOTED, ORDINARY, WHITESPACE, NEWLINE */ private AlignJavaCategory categorise( char c ) /** * accept a character and categorise it. COMMENT -- inside // or /* comment * QUOTED -- inside single or double quote string ORDINARY -- normal code * WHITESPACE -- whitespace in code. Whitespace in comments and quotes * counts as comment or quote. NEWLINE -- newline character. Newline inside * comment counts as comment. Comments require two chars to start them. The * first char will be considered code, and only the second as comment. */ { /** * commentState remembered between calls. We implement this as yet * another finite state automaton. Don't confuse it with blankState used * another finite state automaton. Don't confuse it with blankState used * by doAPass. = OUTSIDE_QUOTES normal C code = INSIDE_QUOTES inside a " " = * INSIDE_SINGLE_QUOTES inside a ' ' = SEEN_QUOTE_BACK just seen "\ = * SEEN_SINGLE_QUOTE_BACK just seen '\ = INSIDE_STAR_COMMENT inside |* *| = * SEEN_SLASH just seen | = SEEN_SLASH_STAR_STAR just seen |* ... * = * INSIDE_SLASH_SLASH inside || */ switch ( c ) { case '\n':/* new line */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen / */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.NEWLINE; } case ' ':/* blanks */ case 0x00:/* all control chars, except =\n */ case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:/* \n */ case 0x08: case 0x09: // not 0x0A: \n case 0x0B: case 0x0C: case 0x0D:/* \r */ case 0x0E: case 0x0F: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.WHITESPACE; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen / */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.WHITESPACE; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } case '\"':/* double quote */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen / */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } case '\'':/* singlequote */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen / */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } case '*':/* star */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.ORDINARY; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.SEEN_SLASH_STAR_STAR; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen | */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.SEEN_SLASH_STAR_STAR; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } case '\\':/* backslash */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.ORDINARY; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.SEEN_QUOTE_BACK; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.SEEN_SINGLE_QUOTE_BACK; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen | */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.ORDINARY; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } case '/':/* forwardslash */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.SEEN_SLASH; return AlignJavaCategory.ORDINARY;/* * might be comment, but don't * know that yet */ case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen | */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } case ',':/* comma, very similar to default */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.COMMA; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen | */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.COMMA; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.SEEN_SLASH_STAR_STAR; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } default:/* non blank */ switch ( commentState ) { default: case OUTSIDE_QUOTES:/* normal code */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.ORDINARY; case INSIDE_QUOTES:/* inside a " " */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_SINGLE_QUOTES:/* inside a ' ' */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_QUOTE_BACK:/* just seen "\ */ commentState = AlignJavaCommentState.INSIDE_QUOTES; return AlignJavaCategory.QUOTED; case SEEN_SINGLE_QUOTE_BACK:/* just seen '\ */ commentState = AlignJavaCommentState.INSIDE_SINGLE_QUOTES; return AlignJavaCategory.QUOTED; case INSIDE_STAR_COMMENT:/* inside |* *| comment */ commentState = AlignJavaCommentState.INSIDE_STAR_COMMENT; return AlignJavaCategory.COMMENT; case SEEN_SLASH:/* just seen | */ commentState = AlignJavaCommentState.OUTSIDE_QUOTES; return AlignJavaCategory.ORDINARY; case SEEN_SLASH_STAR_STAR:/* just seen |* ... * */ commentState = AlignJavaCommentState.SEEN_SLASH_STAR_STAR; return AlignJavaCategory.COMMENT; case INSIDE_SLASH_SLASH:/* inside || C++ style comment */ commentState = AlignJavaCommentState.INSIDE_SLASH_SLASH; return AlignJavaCategory.COMMENT; } // end switch } // end outer switch(c) } // end categorise /* ==================================== */ /** * Calculate how wide each column is and store it in biggestWidth[colIndex] on pass1. First column in index 0. On * pass2, generate the output by appending to cooked StringBuilder. */ private void doAPass() { /* * blankState - we implement the algorithm as a finite state machine. * Don't confuse it with commentState inside categorise. =BlankState.IN_LEADING * reading leading blanks on a field. =BlankState.IN_MIDDLE reading non-blanks or * quotes in middle of a field. =BlankState.IN_TRAILING reading trailing blanks. * when call endField ----------o---o--o--------o * ____xxxx__xx__,__,__xxx___ ----o-----o---o--o-------- when call * startField */ AlignBlankState blankState = IN_LEADING; commentState = AlignJavaCommentState.OUTSIDE_QUOTES; startLine(); int rawLen = raw.length(); for ( int i = 0; i < rawLen; i++ ) { char c = raw.charAt( i ); switch ( categorise( c ) ) { case WHITESPACE:/* blanks */ switch ( blankState ) { case IN_LEADING: blankState = IN_LEADING; break; case IN_MIDDLE: blankState = IN_TRAILING; break; case IN_TRAILING: blankState = IN_TRAILING; break; } break; case COMMA: switch ( blankState ) {/* comma */ case IN_LEADING: startField();/* null field */ inField( c ); endField(); blankState = IN_LEADING; break; case IN_MIDDLE: inField( c ); endField(); blankState = IN_LEADING; break; case IN_TRAILING: inField( c ); endField(); blankState = IN_LEADING; break; } // end switch break; case ORDINARY: switch ( blankState ) {/* ordinary non-blank */ case IN_LEADING: startField(); inField( c ); blankState = IN_MIDDLE; break; case IN_MIDDLE: inField( c ); blankState = IN_MIDDLE; break; case IN_TRAILING: endField(); startField(); inField( c ); blankState = IN_MIDDLE; break; } break; case COMMENT:/* * treat comments like a non-blank */ case QUOTED:/* something in quoted string */ switch ( blankState ) { case IN_LEADING: startField(); inField( c ); blankState = IN_MIDDLE; break; case IN_MIDDLE: inField( c ); blankState = IN_MIDDLE; break; case IN_TRAILING: endField(); startField(); inField( c ); blankState = IN_MIDDLE; break; } break; case NEWLINE:/* new line */ switch ( blankState ) { case IN_LEADING: break; case IN_MIDDLE: endField(); break; case IN_TRAILING: endField(); break; } endLine(); startLine(); blankState = IN_LEADING; break; } // end switch on char } // end for // nothing special needed to simulate NEWLINE processing // if there isn't one at the very end. // if input is missing terminal \n, so will the output. } // end doAPass /** * Field may have lead and trail spaces on it. We just hit the first space etc. after the last non-blank. */ private void endField() { switch ( pass ) { case 1: break; case 2: /* * pad all but the last column with spaces */ if ( colIndex < ( cols - 1 ) ) { for ( width = biggestWidth[ colIndex ] - width;// how many // chars too // short we are. width > 0; width-- ) { /* * pad column on right with spaces */ cooked.append( ' ' ); } // end for } // end if break; } // end switch } // end endField /* ==================================== */ /* ==================================== */ /* ==================================== */ private void endLine() { switch ( pass ) { case 1: break; case 2: cooked.append( '\n' ); break; } } /** * Field may have lead and trail spaces on it. This is called to process non-space chars in the middle of a field. * * @param c char processing. */ private void inField( char c ) { switch ( pass ) { case 1: if ( ++width > biggestWidth[ colIndex ] ) { biggestWidth[ colIndex ] = width; } break; case 2: ++width; cooked.append( c ); break; } } // end inField /** * Field may have lead and trail spaces on it. We have just hit the first non-blank. */ private void startField() { switch ( pass ) { case 1: width = 0; if ( ++colIndex > ( cols - 1 ) ) { cols = colIndex + 1; } break; case 2: width = 0; ++colIndex; break; } } // end startField /* ==================================== */ /** * We have just started a new line. */ private void startLine() { colIndex = -1; width = 0; } /* ==================================== */ /** * debugging harness. * * @param args not used. * * @noinspection ConstantConditions */ public static void main( String[] args ) { if ( DEBUGGING ) { Align a = new Align(); String test = " abc def 999\n" + " abc def 999\n"; out.println( test ); out.println( a.process( test ) ); test = " abc, def,999\n"; out.println( test ); out.println( a.process( test ) ); test = " \"abc 88\", def,999 0000000000000000 \n" + " abc def 999 /* this is a comment*/ followed\n" + " abc // stuff to do\n" + " abc def 999\n"; out.println( test ); out.println( a.process( test ) ); test = " \"abc ,, 88\" , ,def,999 0000000000000000 \n" + " abc def 999 /* this is a comment*/ followed\n" + " abc // stuff to do\n"; out.println( test ); out.println( a.process( test ) ); test = " abc def ghi\n"; out.println( test ); out.println( a.process( test ) ); test = " abc,def, ghi, hef"; out.println( test ); out.println( a.process( test ) ); } // end if debugging } // end main /** * Aligns text into columns. Columns are delimited by space, tabs, commas or other control characters. Column * breaking rules take into account the Java source code conventions, e.g. char and string literals and comments. * * @param raw input to be aligned. String typically with embedded \ns. May or may not have a terminal \n. * * @return raw input aligned in columns. If raw had a terminal \n, result will too, otherwise it will not. No commas * will be added or removed. */ public String process( String raw ) { if ( raw == null ) { return null; } this.raw = raw; cooked = new StringBuilder( raw.length() * 2 ); /* * calculate how wide each column is and store it in * biggestWidth[colIndex] */ /* zero out column widths */ for ( int i = 0; i < 40; i++ ) { biggestWidth[ i ] = 0; } cols = 0; pass = 1; doAPass(); /* * pad the column widths to put a little space between the columns */ for ( int i = 0; i < cols; i++ ) { biggestWidth[ i ] += padding; } /* * repass the file, this time appending to the cooked StringBuilder */ pass = 2; doAPass(); return cooked.toString(); } // end align }