/*
* [Lazy.java]
*
* Summary: Lets us avoid the work of expanding macros if they were done successfully earlier.
*
* Copyright: (c) 2012-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2014-04-21 initial version.
*/
package com.mindprod.htmlmacros.support;
import com.mindprod.common18.EIO;
import com.mindprod.common18.FNV1a64;
import com.mindprod.common18.ST;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import static java.lang.System.*;
/**
* Lets us avoid the work of expanding macros if they were done successfully earlier.
*
* To use it:
* 1. Call the constructor to create a Lazy object.
* 2. call lazy.open
* 3. for each file you are considering processing call lazy.isAlreadyDone
* 4. after you have processed each file call lazy.markStatus.
* 5. when you are done call lazy.close
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2014-04-21 initial version
* @since 2014-04-21
*/
public class Lazy
{
// declarations
/**
* size of buffer in bytes
*/
private static final int BUFFERSIZE_IN_BYTES = 64 * 1024;
/**
* allow 5 seconds of slop in matching dates
*/
private static final long SLOP = TimeUnit.SECONDS.toMillis( 5 );
/**
* length of each record in the serialised cache file in bytes.
* two 64-bit longs.
*/
private static int RECORD_LENGTH = ( 64 + 64 ) / 8;
/**
* lead ignorable string on most files
*/
private String filenamePrefix;
/**
* trailing ignorable string on most files
*/
private String filenameSuffix;
/**
* binary file where we store state between runs.
*/
private File cacheFile;
/**
* look up filename hash-32 to timestamp
*/
private HashMap hashToTimestamp;
// /declarations
/**
* true if this Lasy is open
*/
private boolean isOpen;
// methods
/**
* constructor
*/
public Lazy()
{
isOpen = false;
}
/**
* calculate a hash-64 of the name of the file, not its contents
*
* @param file file to be processed
*
* @return 64-bit FNV1a64 hash.
*/
private long calcHash( final File file )
{
// prune down if possible
// internally use / instead of \ in filenames
final String chopped = ST.chopLeadingString( ST.chopTrailingString( file.getAbsolutePath().replace( '\\', '/' ), this.filenameSuffix ), this.filenamePrefix );
return FNV1a64.computeHash( chopped );
}// /method
/**
* save the contents of the lookup cacheFIle into embellishments/cacheFIle.bin
* It is a binary file of pairs hash-32, timestamp-64
*/
public void close()
{
if ( !isOpen )
{
return;
}
try
{
// O P E N
final DataOutputStream dos = EIO.getDataOutputStream( this.cacheFile, BUFFERSIZE_IN_BYTES );
for ( Entry entry : hashToTimestamp.entrySet() )
{
// W R I T E
final long hash = entry.getKey();
final long timestamp = entry.getValue();
// writing in big-endian binary to be compact and fast.
dos.writeLong( hash ); // we write int and long, not Integer and Long.
dos.writeLong( timestamp );
}
// C L O S E
dos.close();
} // end if
catch ( IOException e )
{
err.println( ">>> Warning. Unable to write " + this.cacheFile.getAbsolutePath() + " file "
+ e.getMessage() );
}
isOpen = false;
}// /method
/**
* has this file already been processed and is unchanged since that time?
*
* @param file file we are processing.
*
* @return true if the file has already been successfully processed.
*/
public boolean isAlreadyDone( File file )
{
if ( !isOpen )
{
throw new IllegalArgumentException( "Lazy.open() has not yet been called." );
}
final long hash = calcHash( file );
final Long timestampL = hashToTimestamp.get( hash );
// if no entry, it was not registered as done.
if ( timestampL == null )
{
return false;
}
// if all is well ,the last modified date should not have changed since we recorded the file as
// successfully processed.
if ( file.lastModified() > timestampL + SLOP )
{
// the file has been modified since we last processed it.
// we will have to reprocess it.
// This cacheFile entry is useless. We might as well get rid of it now to save some space.
hashToTimestamp.remove( hash );
return false;
}
else
{
// it has not been touched since we last successfully processed it.
// the cacheFile entry is fine as is. This is the whole point, to save reprocessing it.
return true;
}
}// /method
/**
* Mark the status of this file.
* MarkStatus false can be use to force to be reprocessed soon, e.g. because includes have have changed.
*
* @param file file we are processing.
* @param status true= file successfully processed, false=file was not successfully processed.
*/
public void markStatus( File file, boolean status )
{
if ( !isOpen )
{
throw new IllegalArgumentException( "Lazy.open() has not yet been called." );
}
final long hash = calcHash( file );
if ( status )
{
// GOOD
// we record the fact by leaving an entry with hash/Now.
// file was just given or will soon be given a timestamp close to this.
hashToTimestamp.put( hash, System.currentTimeMillis() );
// collisions are so rare, we do not worry about them. Two files sharing a hash.
}
else
{
// BAD
// erase all record of it. There may be no record already
hashToTimestamp.remove( hash );
}
}// /method
/**
* Open and read the cacheFIle file.
*
* @param cacheFile cacheFile file with state from last stime storted. If the file does not exist,
* we start over. e.g. new File( "E:\\mindprod\\embellishments\\cacheFIle.bin")
* @param filePrefix If nearly all filenames start the same way, the common lead string, null or "" otherwise.
* e.g. "E:\mindprod\". Use either / or \ in names.
* @param fileSuffix If nearly all filenames end the same way, the common trailing string, null or "" otherwise.
* e.g. ".html". Use either / or \ in names.
* @param estimatedFiles estimate of how many files we will process
*/
public void open( final File cacheFile, final String filePrefix, final String fileSuffix, final int estimatedFiles )
{
if ( isOpen )
{
return;
}
this.cacheFile = cacheFile;
// internally we workn with / in filenames, not \
this.filenamePrefix = ST.canonical( filePrefix ).replace( '\\', '/' );
this.filenameSuffix = ST.canonical( fileSuffix ).replace( '\\', '/' );
if ( cacheFile.exists() && cacheFile.canRead() )
{
// load up the HashMap we use to track when files were last successfully processed.
final int elts = Math.max( estimatedFiles, ( int ) cacheFile.length() / RECORD_LENGTH );
// allow some padding to avoid collisions
hashToTimestamp = new HashMap<>( elts + elts / 4 ); // 25% padding
// load binary long pairs from it.
DataInputStream dis = null;
try
{
try
{
// O P E N
dis = EIO.getDataInputStream( cacheFile, BUFFERSIZE_IN_BYTES );
while ( true )
{
// R E A D pairs hash-64, timestamp-64
long hash = dis.readLong();
long timestamp = dis.readLong();
hashToTimestamp.put( hash, timestamp );
} // end loop
} // end inner try
catch ( EOFException e )
{
// nothing to do
}
finally
{
// C L O S E
if ( dis != null )
{
dis.close();
}
}
} // end outer try
catch ( IOException e )
{
err.println( ">>> Warning. Unable to read " + cacheFile.getAbsolutePath() + " file" );
// we carry on, using as much as we could read.
}
} // end if
else
{
hashToTimestamp = new HashMap<>( estimatedFiles + estimatedFiles / 4 );
}
isOpen = true;
}// /method
// /methods
} // end class