/* * [Lazy.java] * * Summary: Lets us avoid the work of expanding macros if they were done successfully earlier. * * Copyright: (c) 2012-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2014-04-21 initial version. */ package com.mindprod.htmlmacros.support; import com.mindprod.common18.EIO; import com.mindprod.common18.FNV1a64; import com.mindprod.common18.ST; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.EOFException; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import static java.lang.System.*; /** * Lets us avoid the work of expanding macros if they were done successfully earlier. *

* To use it: * 1. Call the constructor to create a Lazy object. * 2. call lazy.open * 3. for each file you are considering processing call lazy.isAlreadyDone * 4. after you have processed each file call lazy.markStatus. * 5. when you are done call lazy.close * * @author Roedy Green, Canadian Mind Products * @version 1.0 2014-04-21 initial version * @since 2014-04-21 */ public class Lazy { // declarations /** * size of buffer in bytes */ private static final int BUFFERSIZE_IN_BYTES = 64 * 1024; /** * allow 5 seconds of slop in matching dates */ private static final long SLOP = TimeUnit.SECONDS.toMillis( 5 ); /** * length of each record in the serialised cache file in bytes. * two 64-bit longs. */ private static int RECORD_LENGTH = ( 64 + 64 ) / 8; /** * lead ignorable string on most files */ private String filenamePrefix; /** * trailing ignorable string on most files */ private String filenameSuffix; /** * binary file where we store state between runs. */ private File cacheFile; /** * look up filename hash-32 to timestamp */ private HashMap hashToTimestamp; // /declarations /** * true if this Lasy is open */ private boolean isOpen; // methods /** * constructor */ public Lazy() { isOpen = false; } /** * calculate a hash-64 of the name of the file, not its contents * * @param file file to be processed * * @return 64-bit FNV1a64 hash. */ private long calcHash( final File file ) { // prune down if possible // internally use / instead of \ in filenames final String chopped = ST.chopLeadingString( ST.chopTrailingString( file.getAbsolutePath().replace( '\\', '/' ), this.filenameSuffix ), this.filenamePrefix ); return FNV1a64.computeHash( chopped ); }// /method /** * save the contents of the lookup cacheFIle into embellishments/cacheFIle.bin * It is a binary file of pairs hash-32, timestamp-64 */ public void close() { if ( !isOpen ) { return; } try { // O P E N final DataOutputStream dos = EIO.getDataOutputStream( this.cacheFile, BUFFERSIZE_IN_BYTES ); for ( Entry entry : hashToTimestamp.entrySet() ) { // W R I T E final long hash = entry.getKey(); final long timestamp = entry.getValue(); // writing in big-endian binary to be compact and fast. dos.writeLong( hash ); // we write int and long, not Integer and Long. dos.writeLong( timestamp ); } // C L O S E dos.close(); } // end if catch ( IOException e ) { err.println( ">>> Warning. Unable to write " + this.cacheFile.getAbsolutePath() + " file " + e.getMessage() ); } isOpen = false; }// /method /** * has this file already been processed and is unchanged since that time? * * @param file file we are processing. * * @return true if the file has already been successfully processed. */ public boolean isAlreadyDone( File file ) { if ( !isOpen ) { throw new IllegalArgumentException( "Lazy.open() has not yet been called." ); } final long hash = calcHash( file ); final Long timestampL = hashToTimestamp.get( hash ); // if no entry, it was not registered as done. if ( timestampL == null ) { return false; } // if all is well ,the last modified date should not have changed since we recorded the file as // successfully processed. if ( file.lastModified() > timestampL + SLOP ) { // the file has been modified since we last processed it. // we will have to reprocess it. // This cacheFile entry is useless. We might as well get rid of it now to save some space. hashToTimestamp.remove( hash ); return false; } else { // it has not been touched since we last successfully processed it. // the cacheFile entry is fine as is. This is the whole point, to save reprocessing it. return true; } }// /method /** * Mark the status of this file. * MarkStatus false can be use to force to be reprocessed soon, e.g. because includes have have changed. * * @param file file we are processing. * @param status true= file successfully processed, false=file was not successfully processed. */ public void markStatus( File file, boolean status ) { if ( !isOpen ) { throw new IllegalArgumentException( "Lazy.open() has not yet been called." ); } final long hash = calcHash( file ); if ( status ) { // GOOD // we record the fact by leaving an entry with hash/Now. // file was just given or will soon be given a timestamp close to this. hashToTimestamp.put( hash, System.currentTimeMillis() ); // collisions are so rare, we do not worry about them. Two files sharing a hash. } else { // BAD // erase all record of it. There may be no record already hashToTimestamp.remove( hash ); } }// /method /** * Open and read the cacheFIle file. * * @param cacheFile cacheFile file with state from last stime storted. If the file does not exist, * we start over. e.g. new File( "E:\\mindprod\\embellishments\\cacheFIle.bin") * @param filePrefix If nearly all filenames start the same way, the common lead string, null or "" otherwise. * e.g. "E:\mindprod\". Use either / or \ in names. * @param fileSuffix If nearly all filenames end the same way, the common trailing string, null or "" otherwise. * e.g. ".html". Use either / or \ in names. * @param estimatedFiles estimate of how many files we will process */ public void open( final File cacheFile, final String filePrefix, final String fileSuffix, final int estimatedFiles ) { if ( isOpen ) { return; } this.cacheFile = cacheFile; // internally we workn with / in filenames, not \ this.filenamePrefix = ST.canonical( filePrefix ).replace( '\\', '/' ); this.filenameSuffix = ST.canonical( fileSuffix ).replace( '\\', '/' ); if ( cacheFile.exists() && cacheFile.canRead() ) { // load up the HashMap we use to track when files were last successfully processed. final int elts = Math.max( estimatedFiles, ( int ) cacheFile.length() / RECORD_LENGTH ); // allow some padding to avoid collisions hashToTimestamp = new HashMap<>( elts + elts / 4 ); // 25% padding // load binary long pairs from it. DataInputStream dis = null; try { try { // O P E N dis = EIO.getDataInputStream( cacheFile, BUFFERSIZE_IN_BYTES ); while ( true ) { // R E A D pairs hash-64, timestamp-64 long hash = dis.readLong(); long timestamp = dis.readLong(); hashToTimestamp.put( hash, timestamp ); } // end loop } // end inner try catch ( EOFException e ) { // nothing to do } finally { // C L O S E if ( dis != null ) { dis.close(); } } } // end outer try catch ( IOException e ) { err.println( ">>> Warning. Unable to read " + cacheFile.getAbsolutePath() + " file" ); // we carry on, using as much as we could read. } } // end if else { hashToTimestamp = new HashMap<>( estimatedFiles + estimatedFiles / 4 ); } isOpen = true; }// /method // /methods } // end class