/* * [JPrep.java] * * Summary: Pre-parse snippets to prepare tokenized serialised versions for JDisplay. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.7 2005-07-27 major rewrite using new tokenizer interface * 1.8 2005-12-25 adds support for *.properties files. * 1.9 2005-12-25 add parser for *.csv files * 2.0 2005-12-25 add parser for *.ini files * 2.1 2006-01-22 correct bug in handling of entities in HTMLState * 2.2 2006-03-06 reformat with IntelliJ, add Javadoc * 2.3 2007-05-05 add iformat renderering, use of snippet/ser and snippet/iformat * 2.4 2007-07-26 add support for annotations. * 2.5 2007-08-20 IntelliJ inspector cleanup of code. * 2.6 2008-01-11 add support for hex and octal numerics. * 2.7 2008-02-23 fix Java parser to bold variable definitions. * 2.8 2008-04-18 get JDisplay and CSS font renderings in closer sync * 2.9 2008-04-30 improve way numeric literals are rendered in Java. * 3.0 2008-08-08 add parser for vanilla text * 3.1 2009-04-12 shorter style names, improved highlighting. * 3.2 2009-08-30 tone down colour for keywords. * 3.3 2010-02-08 highlight begin and ends of comments and CDATAs specially. * 3.4 2010-02-10 add manifest tokenizer. * 3.5 2014-08-01 change *.html to *.htm, *.adler to *.checksum, use 64bit FNV1a64 checksums. */ package com.mindprod.jprep; import com.mindprod.common18.EIO; import com.mindprod.common18.FNV1a64Digester; import com.mindprod.common18.Misc; import com.mindprod.compactor.Compactor; import com.mindprod.fastcat.FastCat; import com.mindprod.filter.ExtensionListFilter; import com.mindprod.htmlmacros.support.JPrepConfiguration; import com.mindprod.hunkio.HunkIO; import com.mindprod.jdisplay.Footprint; import com.mindprod.jtokens.Token; import java.awt.Panel; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.FilenameFilter; import java.io.IOException; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.util.List; import java.util.zip.GZIPOutputStream; import static java.lang.System.*; /** * Pre-parse snippets to prepare tokenized serialised versions for JDisplay. *

* JDisplay is an Applet to render * large snippets. JDisplayAux handles inserting code into the HTML for htmlmacros. JPrep parses the * snippets. * Each Tokenizer decides on its own which extensions it will handle. If two tokenizers handle the same extension, * the one that looks at it first will process it. Subsequent ones will see the *.ser file already done and leave it * be. * That is determined by the order of invoking * Tokenizers in prepareAllSnippetsForOneDirectory. * Currently there are specific Tokenizers for: bat, sv, http (headers), html, ini, java, mft, properties, sql, text. * Some tokenizer handle multiple extensions. * TODO: tokenizers for css, C source, ASM source * * @author Roedy Green, Canadian Mind Products * @version 3.5 2014-08-01 change *.html to *.htm, *.adler to *.checksum, use 64bit FNV1a64 checksums. * @since 2004-06-06 */ public final class JPrep { // declarations /** * normally true. Might set false to have a look at the binary stream during debugging. */ private static final boolean COMPRESS_SER = true; /** * DOCTYPE_DTD for iframe snippet, case-sensitive, * using HTML5 */ private static final String DOCTYPE_FOR_IFRAME = ""; /** * undisplayed copyright notice */ @SuppressWarnings( { "UnusedDeclaration" } ) private static final String EMBEDDED_COPYRIGHT = "Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com"; @SuppressWarnings( { "UnusedDeclaration" } ) private static final String RELEASE_DATE = "2014-08-01"; /** * embedded version string. */ @SuppressWarnings( { "UnusedDeclaration" } ) private static final String VERSION_STRING = "3.5"; /** * Needed to get at FontMetrics */ private static final Panel dummyPanelForFontMetrics = new Panel(); /* * R E S P O N S I B I L I T I E S : JPrep : preparing the tokens, * calculating approximate footprint, the approximate lineNumber footprint, * preparing *.ser file. Knows names of various possible JTokenizers. * PrepTokenizer : analysing text and producing an array of Tokens to render * it in pretty form. Knows Token classes using in rendering that class of * document. Token : decides on the text, the font, the colour for the * token, and how it would be rendered inline as prettified HTML, normally * using as CSS style. Knows TokenColourScheme, TokenFonts. JDisplay macro : * deciding how much screen real estate to use, whether to inline, whether a * bar/line numbers are needed. Knows Geometry(margins) JDisplay Applet : * recalculating the geometry based on the user's font metrics. deciding * whether scroll bars are needed, display, both pretty and plain as a * TextArea that supports copy/paste. Knows Geometry(margins) * Also prepares the iframe html snippets, compacted in snippet/iframe. */ /* * V O C A B U L A R Y : The Payload footprint: widthxheight in pixels. This * the space required to render just the tokens. It does not include the * line numbers, margins, bars etc. However, when scrolling it does include * the parts currently offscreen. So it may sometimes be smaller than the * Applet footprint and sometimes larger. */ private static final int FIRST_COPYRIGHT_YEAR = 2004; // /declarations // methods /** * process one file with the given tokenizer to and leave the result on disk in a *.html file. * * @param tokens array of Tokens forming the snippet. * @param snippetDir directory where the file is. * @param snippetName unqualified snippetName to tokenize e.g. "sample.javafrag" * * @return FNV1a64 checksum of iframe. * @throws java.io.IOException if can't write iframe file. */ private static long buildAndSaveIframe( Token[] tokens, final File snippetDir, String snippetName ) throws IOException { // produce the decorated HTML and put in the iframe dir. final File iframe = new File( snippetDir, "iframe/" + snippetName + ".htm" ); final FastCat sb = new FastCat( tokens.length + 18 ); // unix newline conventions. End user does not directly touch this file. sb.append( DOCTYPE_FOR_IFRAME ); sb.append( "Snippet : " ); sb.append( snippetName ); sb.append( "\n" ); sb.append( "\n" ); sb.append( "\n" ); sb.append( "\n" ); // we only need jdisplay.css, not mindprod.css sb.append( "\n" ); // could be screen, print sb.append( "\n" ); sb.append( "" ); for ( Token token : tokens ) { sb.append( token.getHTML() ); } // end for sb.append( "\n" ); sb.append( "\n" ); /// O P E N FileWriter fw = new FileWriter( iframe ); // W R I T E // compact whole HTML file, embedded PRE will be undisturbed. final String contents = Compactor.compactStringKeepingMacrosAndComments( sb.toString(), snippetName ); final FNV1a64Digester digester = new FNV1a64Digester(); digester.update( contents.getBytes( EIO.UTF8 ) ); final long checksumForIFrame = digester.getValue(); fw.write( contents ); // C L O S E fw.close(); return checksumForIFrame; } // /method /** * Build the compressed, serialised token representation of the snippet. * * @param ser snippetFile to write serialised snippet to. * @param footprint snippet geometry. * @param tokens array of Tokens representing parsed snippet. * @param snippetFile file to the original source of the snippet * * @return FNV64 checksum for output * @throws IOException if cannot write the *.ser file. */ private static long buildAndSaveSer( File ser, Footprint footprint, Token[] tokens, final File snippetFile ) throws IOException { // O P E N final ByteArrayOutputStream baos = new ByteArrayOutputStream( 10000 ); // will grow as needed final OutputStream gzos; if ( COMPRESS_SER ) { gzos = new GZIPOutputStream( baos, 64 * 1024/* buffsize */ ); } else { gzos = new BufferedOutputStream( baos, 64 * 1024/* buffsize */ ); } final ObjectOutputStream oos = new ObjectOutputStream( gzos ); // W R I T E // Footprint version #, footprint, tokens[], dateStamp oos.writeObject( Footprint.serialVersionUID ); oos.writeObject( footprint ); oos.writeObject( tokens ); // Internal datestamp foils untouch putting *.ser back prior to // source's date. Out past the end. We never read it. It just // changes the snippetFile checksum. However, if the source is unmodified // since last elapsedTime the *.ser is generated, // the ser snippetFile could reredated, but it // would never be redated earlier than the source. oos.writeObject( System.currentTimeMillis() ); // to foil untouch if ( COMPRESS_SER ) { ( ( GZIPOutputStream ) gzos ).finish(); } oos.flush(); // C L O S E oos.close(); final byte[] result = baos.toByteArray(); final FNV1a64Digester digester = new FNV1a64Digester(); digester.update( result ); final long checksumForSer = digester.getValue(); HunkIO.writeEntireFileAsBytes( ser, result ); return checksumForSer; } // /method /** * process one directory, in turn with each of the tokenizers * * @param snippetDir directory whose files we process, not recursive. */ private static void prepareAllSnippetsForOneDirectory( File snippetDir ) { // tokenizers contain a list of the extensions the tokenizer can handle. // do them in batches by type, within directory prepareSnippetsForOneTokenizerInOneDirectory( new BatTokenizer(), snippetDir ); // prepareSnippetsForOneTokenizerInOneDirectory( new CSSTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new CSVTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new HttpTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new HTMLTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new IniTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new JavaTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new MFTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new PropTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new SQLTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new TextTokenizer(), snippetDir ); prepareSnippetsForOneTokenizerInOneDirectory( new BaliTokenizer(), snippetDir ); } // /method /** * process one file with the given tokenizer to and leave the result on disk in a *.ser file. If the ser file is * already up to date, it does nothing. * * @param tokenizer Tokenizer instance to parse this particular type of file. We have checked earlier than this * Tokenizer can handle this extension. * @param snippetDir directory where the snippet we will ultimately display is. * @param snippetName unqualified snippetName to tokenize e.g. "sample.javafrag" */ private static void prepareOneSnippet( JPrepTokenizer tokenizer, final File snippetDir, String snippetName ) { try { // original source final File snippetFile = new File( snippetDir, snippetName ); // add .ser final File ser = new File( snippetDir, "ser/" + snippetName + ".ser" ); if ( ser.exists() && ser.lastModified() >= snippetFile.lastModified() ) { // silently bypass work if already done return; } out.println( snippetName ); final byte[] bigBytes = HunkIO.readEntireFileAsBytes( snippetFile ); final FNV1a64Digester digester = new FNV1a64Digester(); digester.update( bigBytes ); final long checksumForSnippet = digester.getValue(); final String big = new String( bigBytes, EIO.UTF8 ); int beforeSize = big.length(); if ( !( big.endsWith( "\n" ) || big.endsWith( "\r\n" ) ) ) { err.println( "\007\nfile " + snippetName + " is missing a final line terminator. Please fix." ); } // reuse same JPrepTokenizer object. Token[] tokens = tokenizer.tokenize( big ); // calculate how fat it will be to render this in HTML markup Footprint footprint = new Footprint(); // Footprint knows JToken.Geometry, but we do not. footprint.s1CalcHTMLLength( tokens ); footprint.s2CalcPayloadFootprint( tokens, dummyPanelForFontMetrics ); footprint.s3CalcFat( tokens ); // don't need to calculate scrollable or Applet footprints // Write serialiazed tokens into a compressed sequential snippetFile. final long checksumForSer = buildAndSaveSer( ser, footprint, tokens, snippetFile ); long afterSize = ser.length(); final long checksumForIFrame = buildAndSaveIframe( tokens, snippetDir, snippetName ); showBeforeAndAfter( footprint, beforeSize, afterSize ); saveChecksums( snippetDir, snippetName, checksumForSnippet, checksumForSer, checksumForIFrame ); } catch ( IOException e ) { err.println(); e.printStackTrace( err ); err.println( "\007Problem processing snippet " + snippetName + "." ); err.println(); // we don't stop, HTML macros will be upset enough if snippet is // missing. } } // /method /** * prepare tokens for one tokenizer and all file is one directory. It will handle all extensions that one tokenizer * is capable of, but nro more. * * @param tokenizer Tokenizer to handle one class of lise. * @param snippetDir Where to put the generated snippets. */ private static void prepareSnippetsForOneTokenizerInOneDirectory( JPrepTokenizer tokenizer, final File snippetDir ) { // get list of extensions we are prepared to handle. String[] extensions = tokenizer.getExtensions(); FilenameFilter wantedFileFilter = new ExtensionListFilter( extensions ); // get list of all *.xxx files in snippet directory String[] files = snippetDir.list( wantedFileFilter ); // process all *.xxx files, creating corresponding *.ser for ( String filename : files ) { prepareOneSnippet( tokenizer, snippetDir, filename ); } } // /method /** * save three checksum checksums is separate xxx.checksum file in samm dir as snippet.ser * * @param snippetDir dir where snippets are * @param snippetName name of snippet with extension * @param checksumForSnippet andler checksum for snippet itself * @param checksumForSer checksum checksom for snippet *.ser * @param checksumForIframe checksum checksum for snippet *.ifram * * @throws IOException if cannot write file */ private static void saveChecksums( final File snippetDir, final String snippetName, final long checksumForSnippet, final long checksumForSer, final long checksumForIframe ) throws IOException { // save thee checksums in xxxx.checksum final File checksum = new File( snippetDir, "ser/" + snippetName + ".checksum" ); // so small, no need for buffering. final FileOutputStream fos = new FileOutputStream( checksum, false /* append */ ); final DataOutputStream dos = new DataOutputStream( fos ); dos.writeLong( checksumForSnippet ); dos.writeLong( checksumForSer ); dos.writeLong( checksumForIframe ); dos.close(); } // /method /** * Show sizes before and after tokenizing compression. * * @param footprint snippet geometry. * @param beforeSize size in bytes before. * @param afterSize size in bytes after. */ private static void showBeforeAndAfter( Footprint footprint, int beforeSize, long afterSize ) { // full footprint display . Will see only when snippet changes. out.println( " size:" + footprint .payloadWidth + "x" + footprint .payloadHeight + ":" + footprint .lineNumberWidthInPixels + " lines:" + footprint .totalLines + " before:" + beforeSize /* original document */ + " after:" + afterSize /* compressed *.ser */ + " html:" /* pretty html */ + footprint .htmlLengthInChars/* * do not mult by 2, since html * is 8-bit */ ); } // /method /** * main method * * @param args fully qualified name of snippets directories to process, no files, just dirs. * names of snippet dirs, not contaiing dir. * e.g. E:\mindprod\applet\snippet */ public static void main( String[] args ) { final String configurationName = args[ 0 ]; final JPrepConfiguration c; try { final String binaryClassName = "com.mindprod.htmlmacros.support." + configurationName; // Make sure the class we dynamically load implements the Configuration interface. final Class configurationClass = Class.forName( binaryClassName ).asSubclass( JPrepConfiguration.class ); c = configurationClass.newInstance(); } catch ( ClassCastException e ) { // configuration exists but is screwed up, but the code exists. throw new ClassCastException( "Coding bug: The code to process configuration " + configurationName + " refused access. It needs a public no-arg constructor." ); } catch ( Exception e ) { // might have been ClassNotFoundException, NoClassDefFoundException // Any problem is a failure. throw new IllegalArgumentException( "Fatal error: JPrepConfiguration " + configurationName + " not found" ); } final List sdirsList = c.getDirsContainingSnippets(); final String[] sdirs = sdirsList.toArray( new String[ sdirsList.size() ] ); final String webroot = c.getLocalWebrootWithSlashes(); MiniTools.setWebroot( webroot ); for ( int i = 0; i < sdirs.length; i++ ) { sdirs[ i ] = webroot + "/" + sdirs[ i ] + "/snippet"; } for ( String sdir : sdirs ) { out.println( "Preparing snippets for: " + sdir ); final File sdirFile = new File( sdir ); if ( !sdirFile.isDirectory() ) { err.println( "\007Can't find snippet directory: " + EIO.getCanOrAbsPath( sdirFile ) ); exit( 1 ); } else { prepareAllSnippetsForOneDirectory( sdirFile ); } } out.println( "Snippets prepared" ); Misc.trackLastThread(); System.exit( 0 ); } // /method // /methods }