/* * [Chaos.java] * * Summary: Measures how chaotic the line lengths of a text or html file are. * * Copyright: (c) 2013-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2013-02-16 initial version */ package com.mindprod.chaos; import com.mindprod.commandline.CommandLine; import com.mindprod.common18.EIO; import com.mindprod.common18.ST; import com.mindprod.filter.AllButSVNDirectoriesFilter; import com.mindprod.filter.ExtensionListFilter; import com.mindprod.hunkio.HunkIO; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.regex.Pattern; import static java.lang.System.*; /** * Measures how chaotic the line lengths of a text or html file are. *

* An indication the file needs to be tidied. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2013-02-16 initial version * @since 2013-02-16 */ public class Chaos { /** * year first released */ private static final int FIRST_COPYRIGHT_YEAR = 2013; /** * undisplayed copyright notice. * * @noinspection UnusedDeclaration */ private static final String EMBEDDED_COPYRIGHT = "Copyright: (c) 2013-2017 Roedy Green, Canadian Mind Products, http://mindprod.com"; /** * date this version released. * * @noinspection UnusedDeclaration */ private static final String RELEASE_DATE = "2013-02-16"; /** * how to use the program */ private static final String USAGE = "\n\nChaos requires a list of *.html or *.txt files to process. -q means all " + "files in dirs."; /** * embedded version string. * * @noinspection UnusedDeclaration */ private static final String VERSION_STRING = "1.0"; /** * break file into lines at \n or \r\n */ private static final Pattern LINE_SPLITTER = Pattern.compile( "\\r\\n|\\n", Pattern.MULTILINE ); /** * break file into words at space */ private static final Pattern WORD_SPLITTER = Pattern.compile( "\\s+" ); /** * calculate average line length andf aveerage deviation (not least square) * * @param file file to compute * * @throws IOException */ private static void displayChaos( File file ) throws IOException { final String contents = HunkIO.readEntireFile( file ); final String[] lines = LINE_SPLITTER.split( contents ); final int divisor = lines.length != 0 ? lines.length : 1; // calc average line length int total = 0; for ( String line : lines ) { total += line.length(); } final int average = total / divisor; //calc average deviation from average int totalDev = 0; for ( String line : lines ) { totalDev += Math.abs( line.length() - average ); } final int averageDev = totalDev / divisor; // find longest line int max = 0; for ( String line : lines ) { if ( line.length() > max ) { max = line.length(); } } // find word count int wordCount = 0; for ( String line : lines ) { final String[] wordsInLine = WORD_SPLITTER.split( line ); wordCount += wordsInLine.length; // don't count poosible dummy blank word at head of line if ( wordsInLine.length > 0 && wordsInLine[ 0 ].length() == 0 ) { wordCount--; } } // output average line length and average deviation from average out.println( ST.rightJustified( average, 4, false ) + ST.rightJustified( averageDev, 4, false ) + ST.rightJustified( max, 6, false ) + ST.rightJustified( lines.length, 7, false ) + ST.rightJustified( wordCount, 7, false ) + ST.rightJustified( contents.length(), 8, false ) + " " + EIO.getCanOrAbsPath( file ) ); } /** * takes list of files to process, displays chaos numbers. * * @param args command line arguments names of files to process or -s */ public static void main( final String args[] ) { CommandLine commandLine = new CommandLine( args, new AllButSVNDirectoriesFilter(), new ExtensionListFilter( "html", "htm", "txt", "bat", "btm" ) ); if ( commandLine.size() == 0 ) { throw new IllegalArgumentException( "No files found to process\n" + USAGE ); } out.println( " -line length- -------count--------" ); out.println( " avg dev max lines words chars" ); for ( File file : commandLine ) { try { displayChaos( file ); } catch ( FileNotFoundException e ) { out.println( "Error: " + EIO.getCanOrAbsPath( file ) + " not found." ); } catch ( Exception e ) { out.println( e.getMessage() + " in file " + EIO.getCanOrAbsPath( file ) ); } } // end for } // end main }