/* * [ExtractBookMacros.java] * * Summary: One shot to extract Book macro on a page, splitting them out one to a file in the book directory. * * Copyright: (c) 2012-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2012-02-11 initial version */ package com.mindprod.stores; import com.mindprod.commandline.CommandLine; import com.mindprod.common18.EIO; import com.mindprod.common18.Misc; import com.mindprod.csv.CSVWriter; import com.mindprod.fastcat.FastCat; import com.mindprod.filter.AllButSVNDirectoriesFilter; import com.mindprod.filter.ExtensionListFilter; import com.mindprod.htmlmacros.macro.Global; import com.mindprod.htmlmacros.support.ConfigurationForMindprod; import com.mindprod.hunkio.HunkIO; import java.io.File; import java.io.IOException; import static java.lang.System.*; /** * One shot to extract Book macro on a page, splitting them out one to a file in the book directory. *

* When run dry, used to create summary of books *

* Do not run on the book dir, unless using -dry. -dry suppresses replacement of Book macro with Insert. * Exports CSV file C:/temp/bookmacros.csv with isbn, title, author * * @author Roedy Green, Canadian Mind Products * @version 1.0 2012-02-11 initial version * @since 2012-02-11 */ public class ExtractBookMacros { /** * true if want extra debugging information */ private static final boolean DEBUGGING = false; /** * max number of Book macros per page to expect */ private static final int MAX_BOOKS_PER_PAGE = 100; private static final String USAGE = "\nExtractBookMacros.exe Configuration -dry (dry run) -s (subdirs) dir file.html"; /** * where Book files are kept */ private static File bookDir; /** * replace the Book macro with a stripped-down Insert macro * * @param isbn13 EAN to use in Insert macro * * @return generated Insert macro */ private static String buildInsertMacro( final String isbn13, final String title ) { final FastCat sb = new FastCat( 5 ); sb.append( "" ); return sb.toString(); } /** * extract one Book macro as a separate file. * * @param oneMacro text of the macro * @param isbn13 isbn13 * @param title title of the book * * @throws IOException if problems reading/writing file containing Book macros. */ private static void createBookFile( final String oneMacro, final String isbn13, final String title ) throws IOException { // generate separate file to hold just one book macro. final FastCat sb = new FastCat( 6 ); sb.append( "\n" ); sb.append( oneMacro ); sb.append( "\n\n" ); final String bookFileContents = sb.toString(); String bookFilename = isbn13 + ".html"; // without this line compiler worries might not init File bookFile = new File( bookDir, bookFilename ); // avoid overwriting existing book file for ( char letter = 'a'; letter < 'z'; letter++ ) { bookFile = new File( bookDir, bookFilename ); if ( !bookFile.exists() ) { break; } else { bookFilename = isbn13 + letter + ".html"; } } HunkIO.writeEntireFile( bookFile, bookFileContents, HunkIO.UTF8 ); } /** * log a Book macro we found. * * @param w where to log to * @param isbn13 isbn13 of book without dashes. just one value. * @param title title of book entified * @param author author of book entified */ private static void logBookFound( final CSVWriter w, final String isbn13, final String title, final String author ) { w.put( isbn13 ); w.put( title ); w.put( author ); w.nl(); } /** * find values of parm of the form xxx="..." or xxx={...} * * @param parmName name of param to search for, case-sensitive * @param searchIn text in which to search for parms, one Book macro. * * @return value of the parm. null if missing, "" if empty. */ private static String parseOneParm( String parmName, String searchIn ) { int start = searchIn.indexOf( parmName + "=" ); if ( start < 0 ) { return null; // indicate no such parm } final int size = searchIn.length(); start += parmName.length() + 1; if ( start >= size ) { throw new IllegalArgumentException( "Truncated parm= : " + parmName ); } final char c = searchIn.charAt( start ); final int end; switch ( c ) { case ' ': start++; if ( start >= size ) { throw new IllegalArgumentException( "Truncated parm= : " + parmName ); } end = searchIn.indexOf( ' ', start ); if ( end == start ) { throw new IllegalArgumentException( "Malformed parm= : " + parmName ); } break; case '\"': start++; if ( start >= size ) { throw new IllegalArgumentException( "Truncated parm= : " + parmName ); } end = searchIn.indexOf( '\"', start ); break; case '{': start++; if ( start >= size ) { throw new IllegalArgumentException( "Truncated parm= : " + parmName ); } end = searchIn.indexOf( '}', start ); break; default: // e.g. birth=1948-02-04 end = searchIn.indexOf( ' ', start ); break; } if ( end < 0 ) { throw new IllegalArgumentException( "Malformed parm= missing terminator : " + parmName ); } return searchIn.substring( start, end ); } /** * @param args names of files to extract book macros from, -s etc.. * * @throws java.io.IOException if trouble reading or writing files containing Book macros. */ public static void main( String[] args ) throws IOException { Global.installConfiguration( new ConfigurationForMindprod() ); final File webrootDir = new File( Global.configuration.getLocalWebrootWithSlashes() ); bookDir = new File( webrootDir, "book" ); out.println( "Gathering html files to extract..." ); final boolean dry; if ( args.length > 1 && args[ 1 ].equals( "-dry" ) ) { args[ 1 ] = null; dry = true; } else { dry = false; } CommandLine commandLine = new CommandLine( args, new AllButSVNDirectoriesFilter(), new ExtensionListFilter( "html" ) ); if ( commandLine.size() == 0 ) { throw new IllegalArgumentException( "No files found to process\n" + USAGE ); } final CSVWriter w = new CSVWriter( EIO.getPrintWriter( new File( "C:/temp/bookmacros.csv" ), 8 * 1024, EIO.UTF8 ) ); for ( File sourceFile : commandLine ) { final String big = HunkIO.readEntireFile( sourceFile, HunkIO.UTF8 ); // compose shrunken file, with ", start + "" ); } end += " -->".length(); try { final String oneMacro = big.substring( start, end ).replaceAll( "\\s+", " " ); final String isbn13 = parseOneParm( "isbn", oneMacro ); // not a list final String title = parseOneParm( "title", oneMacro ); // we do not reflow, // we want all on one line for CSV final String author = parseOneParm( "author", oneMacro ); if ( isbn13 == null || title == null || author == null ) { throw new IllegalArgumentException( "missing mandatory parm" ); } if ( DEBUGGING && dry ) { out.println( "--- isbn={" + isbn13 + "} title={" + title + "} author={" + author + "}" ); } logBookFound( w, isbn13, title, author ); if ( !dry ) { // create a separate file createBookFile( oneMacro, isbn13, title ); // replace Book macro with an Insert macro // text ahead of