/* * [FindUndone.java] * * Summary: Find block quotes not yet done. * * Copyright: (c) 2008-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2008-06-08 */ package com.mindprod.repair; import com.mindprod.commandline.CommandLine; import com.mindprod.entities.DeEntifyStrings; import com.mindprod.filter.AllButSVNDirectoriesFilter; import com.mindprod.filter.ExtensionListFilter; import com.mindprod.hunkio.HunkIO; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.*; /** * Find block quotes not yet done. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2008-06-08 * @since 2008-06-08 */ public class FindUndone { // ------------------------------ CONSTANTS ------------------------------ /** * estimated number of authors whose dates we collect, with some padding */ private static final int EST_AUTHORS = 600; /** * count of haw many times each author referenced */ private static final HashMap authorLookup = new HashMap( EST_AUTHORS ); /** * what we look for surrounding text of interest */ private static final String CLOSE_TAG = ""; /** * what we look for surrounding text of interest */ private static final String OPEN_TAG = "(.+?).*", Pattern.DOTALL ); // $ ( ) * + - . ? [ \ ] ^ { | } <- quotable regex chars /** * /** * pattern of quote already handled */ private static final Pattern DONE1 = Pattern.compile( ".+(.+)[ \n\r]+(.+) ", Pattern.DOTALL ); /** * Study all blockqkotes to see which done. List ones not complte. * * @param rawBlockQuote blockquote contents as they are now, including
*/ private static void processDoneBlockQuote( String rawBlockQuote ) { if ( DONE1.matcher( rawBlockQuote ).matches() || DONE2.matcher( rawBlockQuote ).matches() || rawBlockQuote.contains( "macro KJV" ) ) { // if was already done. } else { // not yet done. final Matcher a = AUTHOR.matcher( rawBlockQuote ); if ( a.matches() && a.groupCount() == 1 ) { // unprocessed author String author = a.group( 1 ); if ( author.length() > 150 ) { author = author.substring( 0, 150 ); } author = DeEntifyStrings.flattenHTML( author, ' ' ); RankedAuthor existing = authorLookup.get( author ); if ( existing != null ) { existing.inc(); } else { authorLookup.put( author, new RankedAuthor( author ) ); } } else { // block quote without parseable author. } } } // -------------------------- STATIC METHODS -------------------------- // --------------------------- main() method --------------------------- @SuppressWarnings( { "ResultOfMethodCallIgnored" } ) public static void main ( String[] args ) { // get files to process from command line. out.println( "Gathering files to find non-standard ..." ); CommandLine wantedFiles = new CommandLine( args, new AllButSVNDirectoriesFilter(), new ExtensionListFilter( "html" ) ); for ( File file : wantedFiles ) { try { final String big = HunkIO.readEntireFile( file ); int start0 = 0; int start1; while ( ( start1 = big.indexOf( OPEN_TAG, start0 ) ) >= 0 ) { final int start2 = start1 + OPEN_TAG.length(); final int end1 = big.indexOf( CLOSE_TAG, start2 ); if ( end1 < 0 ) { throw new IllegalArgumentException( " missing
" ); } final int end2 = end1 + CLOSE_TAG.length(); final String rawBlockQuote = big.substring( start1, end2 ); // process text
processDoneBlockQuote( rawBlockQuote ); start0 = end2; } // end while } catch ( IOException e ) { err.println(); e.printStackTrace( err ); err.println(); } }// end for to process each file // export to array for easy sort. RankedAuthor[] authors = authorLookup.values().toArray( new RankedAuthor[ authorLookup.size() ] ); Arrays.sort( authors ); out.println(); out.println( authors.length + " fixes yet to do" ); for ( RankedAuthor author : authors ) { out.println( author.count + " " + author.author ); } out.println( "done" ); } } /** * used to count hom many times each author is quoted */ class RankedAuthor implements Comparable { // ------------------------------ FIELDS ------------------------------ public final String author; /** * how many times this author was quoted on the website */ public int count; // -------------------------- PUBLIC INSTANCE METHODS -------------------------- // --------------------------- CONSTRUCTORS --------------------------- RankedAuthor( String author ) { this.author = author; this.count = 1; } // -------------------------- OTHER METHODS -------------------------- void inc() { count++; } /** * sort by rank big to small, author tie breaker. * Defines default the sort order for RankedAuthor Objects. * Compare this RankedAuthor with another RankedAuthor. * Compares descending count then author. * Informally, returns (this-other) or +ve if this is more positive than other. * * @param other other RankedAuthor to compare with this one * * @return +ve if this>other, 0 if this==other, -ve if this<other */ public final int compareTo( RankedAuthor other ) { int diff = other.count - this.count; if ( diff != 0 ) { return diff; } return this.author.compareToIgnoreCase( other.author ); } }