/* * [Align.java] * * Summary: Align converts tabs and other control chars to spaces, and aligns columns. * * Copyright: (c) 2002-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 4.5 2009-02-26 add both Java string quoting and plain for Java search/regexes. */ package com.mindprod.quoter; import static com.mindprod.quoter.AlignBlankState.IN_LEADING; import static com.mindprod.quoter.AlignBlankState.IN_MIDDLE; import static com.mindprod.quoter.AlignBlankState.IN_TRAILING; import static com.mindprod.quoter.AlignCategory.*; import static com.mindprod.quoter.AlignQuotedState.INSIDE_QUOTES; import static com.mindprod.quoter.AlignQuotedState.OUTSIDE_QUOTES; import static java.lang.System.*; /** * Align converts tabs and other control chars to spaces, and aligns columns. *
* No special treatment for Java comments, * 'xxx'. Columns are comma, tab, or space delimited. Text enclosed in quotes "abc def" is considered a * single column. * * @author Roedy Green, Canadian Mind Products * @version 4.5 2009-02-26 add both Java string quoting and plain for Java search/regexes. * @since 2002-06-19 */ final class Align extends TextProcessor { /** * true if want debugging output. */ private static final boolean DEBUGGING = false; /** * maximum number of columns we can handle */ private static final int MAX_COLS = 1024; /** * how many char to pad between aligned columns. */ private static final int padding = 2; /** * track width of each column, widest on any line, max 40 cols. */ private final int[] biggestWidth = new int[ MAX_COLS ]; /** * state of finite state automaton that categorises characters. */ private AlignQuotedState quotedState; /** * where we accumulate the output. */ private StringBuilder cooked; /** * the raw text we are processing. */ private String raw; /** * C which field/column we are working on. 0 is first. */ private int colIndex; /** * how many columns there are. */ private int cols; /** * pass=1 when deciding col widths, and pass=2 when outputting. */ private int pass = 1; /** * width of current column. */ private int width; /** * categorise the character, in a context sensitive way, thinking in terms of parsing Java source code. * * @param c char to categorise. * * @return COMMA, COMMENT, NEWLINE, QUOTED, ORDINARY, WHITESPACE, */ final AlignCategory categorise( char c ) /* ** accept a character and categorise it. * COMMENT -- inside // or /* comment * QUOTED -- inside single or double quote string * ORDINARY -- normal code * WHITESPACE -- whitespace in code. * Whitespace in comments and quotes counts as comment or * quote. * NEWLINE -- newline character. Newline inside comment counts * as comment. * Comments require two chars to start them. The first char will be * considered code, and only the second as comment. **/ { /* *
* quotedState remembered between calls. We implement this as yet another * finite state automaton. Don't confuse it with blankState used by doAPass. * = OUTSIDE_QUOTES normal C code * = INSIDE_QUOTES inside a " " * <pre> **/ switch ( c ) { case '\n':/* new line */ switch ( quotedState ) { default: case OUTSIDE_QUOTES:/* normal code */ quotedState = OUTSIDE_QUOTES; return ( NEWLINE ); case INSIDE_QUOTES:/* inside a " " */ quotedState = OUTSIDE_QUOTES; return ( NEWLINE ); } case ' ':/* blanks */ case 0x00:/* all control chars, except =\n */ case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:/* \n */ case 0x08: case 0x09: // not 0x0A: \n case 0x0B: case 0x0C: case 0x0D:/* \r */ case 0x0E: case 0x0F: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: switch ( quotedState ) { default: case OUTSIDE_QUOTES:/* normal code */ quotedState = OUTSIDE_QUOTES; return ( WHITESPACE ); case INSIDE_QUOTES:/* inside a " " */ quotedState = INSIDE_QUOTES; return ( QUOTED ); } case '\"':/* double quote */ switch ( quotedState ) { default: case OUTSIDE_QUOTES:/* normal code */ quotedState = INSIDE_QUOTES; return ( QUOTED ); case INSIDE_QUOTES:/* inside a " " */ quotedState = OUTSIDE_QUOTES; return ( QUOTED ); } case ',':/* comma, very similar to default */ switch ( quotedState ) { default: case OUTSIDE_QUOTES:/* normal code */ quotedState = OUTSIDE_QUOTES; return ( COMMA ); case INSIDE_QUOTES:/* inside a " " */ quotedState = INSIDE_QUOTES; return ( QUOTED ); } default:/* non blank */ switch ( quotedState ) { default: case OUTSIDE_QUOTES:/* normal code */ quotedState = OUTSIDE_QUOTES; return ( ORDINARY ); case INSIDE_QUOTES:/* inside a " " */ quotedState = INSIDE_QUOTES; return ( QUOTED ); } // end switch }/* end outer switch(c) */ }/* end categorise */ /** * Calculate how wide each column is and store it in biggestWidth[colIndex] on pass1. First column in index 0. On * pass2, generate the output by appending to cooked StringBuilder. */ final void doAPass() { /** *
* blankState - we implement the algorithm as a finite state machine. * Don't confuse it with quotedState inside categorise. * =IN_LEADING reading leading blanks on a field. * =IN_MIDDLE reading non-blanks or quotes in middle of a field. * =IN_TRAILING reading trailing blanks. * when call endField * ----------o---o--o--------o * ____xxxx__xx__,__,__xxx___ * ----o-----o---o--o-------- * when call startField **/ AlignBlankState blankState = IN_LEADING; quotedState = OUTSIDE_QUOTES; startLine(); int rawLen = raw.length(); for ( int i = 0; i < rawLen; i++ ) { char c = raw.charAt( i ); switch ( categorise( c ) ) { case WHITESPACE:/* blanks */ switch ( blankState ) { case IN_LEADING: blankState = IN_LEADING; break; case IN_MIDDLE: blankState = IN_TRAILING; break; case IN_TRAILING: blankState = IN_TRAILING; break; } break; case COMMA: switch ( blankState ) {/* comma */ case IN_LEADING: startField();/* null field */ inField( c ); endField(); blankState = IN_LEADING; break; case IN_MIDDLE: inField( c ); endField(); blankState = IN_LEADING; break; case IN_TRAILING: inField( c ); endField(); blankState = IN_LEADING; break; } // end switch break; case ORDINARY: switch ( blankState ) {/* ordinary non-blank */ case IN_LEADING: startField(); inField( c ); blankState = IN_MIDDLE; break; case IN_MIDDLE: inField( c ); blankState = IN_MIDDLE; break; case IN_TRAILING: endField(); startField(); inField( c ); blankState = IN_MIDDLE; break; } break; case QUOTED:/* something in quoted string */ switch ( blankState ) { case IN_LEADING: startField(); inField( c ); blankState = IN_MIDDLE; break; case IN_MIDDLE: inField( c ); blankState = IN_MIDDLE; break; case IN_TRAILING: endField(); startField(); inField( c ); blankState = IN_MIDDLE; break; } break; case NEWLINE:/* new line */ switch ( blankState ) { case IN_LEADING: break; case IN_MIDDLE: endField(); break; case IN_TRAILING: endField(); break; } endLine(); startLine(); blankState = IN_LEADING; break; } // end switch on char } // end for // nothing special needed to simulate NEWLINE processing // if there isn't one at the very end. // if input is missing terminal \n, so will the output. } // end doAPass /** * Field may have lead and trail spaces on it. We just hit the first space etc. after the last non-blank. */ final void endField() { switch ( pass ) { case 1: break; case 2: /* * pad all but the last column with spaces */ if ( colIndex < ( cols - 1 ) ) { for ( width = biggestWidth[ colIndex ] - width;// how many // chars too // short we are. width > 0; width-- ) { /* * pad column on right with spaces */ cooked.append( ' ' ); } // end for } // end if break; } // end switch } // end endField /** * handle end of line. */ final void endLine() { switch ( pass ) { case 1: break; case 2: cooked.append( '\n' ); break; } } /** * Field may have lead and trail spaces on it. This is called to process non-space chars in the middle of a field. * * @param c char we are processing. */ final void inField( char c ) { switch ( pass ) { case 1: if ( ++width > biggestWidth[ colIndex ] ) { biggestWidth[ colIndex ] = width; } break; case 2: ++width; cooked.append( c ); break; } } // end inField // how much blank space to put between the columns /** * Field may have lead and trail spaces on it. We have just hit the first non-blank. */ final void startField() { switch ( pass ) { case 1: width = 0; if ( ++colIndex > ( cols - 1 ) ) { cols = colIndex + 1; } break; case 2: width = 0; ++colIndex; break; } } // end startField // widths of the columns /** * We have just started a new line. */ final void startLine() { colIndex = -1; width = 0; } /** * test harness. * * @param args not used. * * @noinspection ConstantConditions */ public static void main( String[] args ) { if ( DEBUGGING ) { Align a = new Align(); String test = " abc def 999\n" + " abc def 999\n"; out.println( test ); out.println( a.process( test ) ); test = " abc, def,999\n"; out.println( test ); out.println( a.process( test ) ); test = " \"abc 88\", def,999 0000000000000000 \n" + " abc def 999 /* this is a comment*/ followed\n" + " abc // stuff to do\n" + " abc def 999\n"; out.println( test ); out.println( a.process( test ) ); test = " \"abc ,, 88\" , ,def,999 0000000000000000 \n" + " abc def 999 /* this is a comment*/ followed\n" + " abc // stuff to do\n"; out.println( test ); out.println( a.process( test ) ); test = " abc def ghi\n"; out.println( test ); out.println( a.process( test ) ); test = " abc,def, ghi, hef"; out.println( test ); out.println( a.process( test ) ); } // end if debugging } // end main /** * Aligns text into columns. Columns are delimited by space, tabs, commas or other control characters. Column * breaking IGNORE the Java source code conventions, e.g. string literals and comments. I.e. There is No special * treatment for Java comments, 'xxx'. Text enclosed in quotes "abc def" is considered a single column.* * * @param raw input to be aligned. String typically with embedded \ns. May or may not have a terminal \n. * * @return raw input aligned in columns. If raw had a terminal \n, result will too, otherwise it will not. No commas * will be added or removed. */ public final String process( String raw ) { if ( raw == null ) { return null; } this.raw = raw; cooked = new StringBuilder( raw.length() * 2 ); /* zero out column widths */ for ( int i = 0; i < 40; i++ ) { biggestWidth[ i ] = 0; } cols = 0; pass = 1; doAPass(); /* * pad the column widths to put a little space between the columns */ for ( int i = 0; i < cols; i++ ) { biggestWidth[ i ] += padding; } /* * repass the file, this time appending to the cooked StringBuilder */ pass = 2; doAPass(); return cooked.toString(); } // end align }