/* * [AccountForQuotes.java] * * Summary: When a quotation file split, checks no quotes were lost. Works best with StripGenerated files. * * Copyright: (c) 2015-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2015-12-30 initial version */ package com.mindprod.htmlmacros; import com.mindprod.common18.FNV1a64; import com.mindprod.entities.DeEntifyStrings; import com.mindprod.hunkio.HunkIO; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.regex.Pattern; import static java.lang.System.*; /** * When a quotation file split, checks no quotes were lost. Works best with StripGenerated files. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2015-12-30 initial version * @since 2015-12-30 */ public class AccountForQuotes { /** * what we look for surrounding text of interest */ private static final String CLOSE_TAG = ""; /** * what we look for surrounding text of interest */ private static final String OPEN_TAG = " extantQuotes = new HashSet<>( 10000 ); private static final Pattern SPLIT_ON_SPACE = Pattern.compile( "\\s+" ); /** * read all quotes in new split up files. */ private static void readNewQuotes( final String[] args ) { for ( int i = 1; i < args.length; i++ ) { final File file = new File( args[ i ] ); try { final String big = HunkIO.readEntireFile( file ); int start0 = 0; int start1; while ( ( start1 = big.indexOf( OPEN_TAG, start0 ) ) >= 0 ) { final int start2 = start1 + OPEN_TAG.length(); final int end1 = big.indexOf( CLOSE_TAG, start2 ); if ( end1 < 0 ) { throw new IllegalArgumentException( ">>>missing " ); } final int end2 = end1 + CLOSE_TAG.length(); final String rawBlockQuote = big.substring( start1, end2 ); final String plainBlockQuote = DeEntifyStrings.flattenHTML( rawBlockQuote, ' ' ); // process text
// break into words final String[] words = SPLIT_ON_SPACE.split( plainBlockQuote ); long hash = FNV1a64.computeHash( words ); extantQuotes.add( hash ); start0 = end2; } // end while } catch ( IOException e ) { e.printStackTrace( err ); err.println( "Fatal error: unable to open " + file + " possibly locked in the editor." ); System.exit( 2 ); } } } /** * read original quotes, all in one file, first on command line */ private static void readOldQuotes( final String[] args ) { int errorCount = 0; final File file = new File( args[ 0 ] ); try { final String big = HunkIO.readEntireFile( file ); int start0 = 0; int start1; while ( ( start1 = big.indexOf( OPEN_TAG, start0 ) ) >= 0 ) { final int start2 = start1 + OPEN_TAG.length(); final int end1 = big.indexOf( CLOSE_TAG, start2 ); if ( end1 < 0 ) { throw new IllegalArgumentException( ">>>missing
" ); } final int end2 = end1 + CLOSE_TAG.length(); final String rawBlockQuote = big.substring( start1, end2 ); final String plainBlockQuote = DeEntifyStrings.flattenHTML( rawBlockQuote, ' ' ); // process text
// break into words final String[] words = SPLIT_ON_SPACE.split( plainBlockQuote ); long hash = FNV1a64.computeHash( words ); if ( !extantQuotes.contains( hash ) ) { errorCount++; err.println( "Error: quote in " + file + " is missing from the new files" ); err.println( rawBlockQuote ); err.println(); } start0 = end2; } // end while } catch ( IOException e ) { e.printStackTrace( err ); err.println( "Fatal error: unable to open " + file + " possibly locked in the editor." ); System.exit( 2 ); } out.println( errorCount + " errors" ); } /** * read all quotes and store checksum of those in new1 new2 ... then process old, and see if its checksumes exist * * @param args old new1 new2 ... * * @throws IOException if cannot read quotations */ public static void main( String[] args ) throws IOException { readNewQuotes( args ); readOldQuotes( args ); } }