/* * [TidyTscribe.java] * * Summary: tidies tscribe.csv list of transcription software. * * Copyright: (c) 2016-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2016-05-21 initial version */ package com.mindprod.repair; import com.mindprod.common18.EIO; import com.mindprod.common18.ST; import com.mindprod.csv.CSV; import com.mindprod.csv.CSVCondense; import com.mindprod.csv.CSVReader; import com.mindprod.csv.CSVSort; import com.mindprod.csv.CSVWriter; import com.mindprod.hunkio.HunkIO; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.Arrays; import java.util.HashSet; import static java.lang.System.*; /** * tidies tscribe.csv list of transcription software. *

* E:\com\mindprod\repair\tscribe.csv * has format * package name * url * cost in usd * list of file formats supported * * @author Roedy Green, Canadian Mind Products * @version 1.0 2016-05-21 initial version * @since 2016-05-21 */ public class TidyTscribe { /** * footer for the HTML table form of the data * * @param prw where to emit */ private static void emitHTMLFooter( final PrintWriter prw ) { prw.println( "" ); prw.println( "" ); } /** * header for the HTML table form of the data * * @param prw where to emit */ private static void emitHTMLHeader( final PrintWriter prw ) { prw.println( "" ); prw.println( "" ); prw.println( "" ); prw.println( "" ); prw.print( "" ); for ( ScribeFormat f : ScribeFormat.values() ) { prw.print( "" ); } prw.println( "" ); prw.println( "" ); prw.println( "" ); } /** * emit item about one package to the html version * * @param prw where to print * @param packageName name of program * @param url url of produc pace * @param cost cost US$ * @param formats Which formats it supports. */ private static void emitHTMLItem( final PrintWriter prw, final String packageName, final String url, final String cost, final HashSet formats ) { prw.print( "" ); if ( cost.equals( "0" ) || cost.equals( "" ) || cost.equals( "?" ) ) { prw.print( "" ); } else { prw.print( "" ); } for ( ScribeFormat format : ScribeFormat.values() ) { final String cssclass = ( format.ordinal() & 1 ) != 0 ? "stripe2" : "stripe1"; if ( formats.contains( format.name() ) ) { prw.print( "" ); } else { prw.print( "" ); } } prw.println( "" ); } private static void writeTidiedCSVItem( final CSVWriter w, final String packageName, final String url, final String cost, final HashSet formats ) { // sort list of formats. String[] formatArray = formats.toArray( new String[ formats.size() ] ); Arrays.sort( formatArray ); w.put( packageName ); w.put( url ); w.put( cost ); w.put( formatArray ); // dups mean this list might be shorter than it was w.nl(); } /** * prepare java source files by replacing the first header comment with a standard. * When proofing, logs to old.txt and new.txt in the current directory. * * @param args word proof or tidy followed by a list of files and directories to process. * -s mean means process subdirs of subsequently mentioned dirs, comes first. * * @throws IOException if cannot read or write source files. */ public static void main( final String[] args ) throws IOException { File tscribe = new File( "E:/com/mindprod/repair/tscribe.csv" ); // C O N D E N S E try { // file, separatorChar, quoteChar, commentChar, cols that can be different new CSVCondense( tscribe, ',', '\"', '#', CSV.UTF8 ); } catch ( IOException e ) { err.println(); e.printStackTrace( err ); err.println( "CSVCondense failed to condense " + EIO.getCanOrAbsPath( tscribe ) ); System.exit( 2 ); } // S O R T try { // file, cols, types, directions, separatorChar, quoteChar, commentChar new CSVSort( tscribe, new int[] { 0 }, new char[] { 'i' }, new boolean[] { true }, ',', '\"', '#', CSV.UTF8 ); } catch ( IOException e ) { err.println(); e.printStackTrace( err ); err.println( "CSVSort failed to sort " + EIO.getCanOrAbsPath( tscribe ) ); System.exit( 2 ); } // R E A D // trimUnquoted allowMultipleLineFields CSVReader r = new CSVReader( new BufferedReader( new FileReader( tscribe ) ), ',', /* separatorChar between fields */ '\"', /* quoteChar to surround fields containing commas */ "#", /* char that starts comments */ true /* hide comments */, true /* trimQuoted */, true /* trimUnquoted */, true /* allow multiline fields */ ); // tidied file final File tempFile = HunkIO.createTempFile( "temp_", ".tmp", tscribe ); CSVWriter w = new CSVWriter( EIO.getPrintWriter( tempFile, 10 * 1024, EIO.UTF8 ), -1, /* quotelevel * -1 = like 0, but add an extra space after each separator/comma, * 0 = minimal quotes, only around fields containing quotes or separators. * 1 = quotes also around fields containing spaces. * 2 = quotes around all fields, whether or not they contain commas, quotes or spaces. */ ',', /* separatorChar between fields */ '\"', /* quoteChar to surround fields containing commas */ '#', /* char that starts comments */ true /* trim fields of lead and trailing blanks */ ); // HTML version final FileWriter fw = new FileWriter( "E:/mindprod/jgloss/include/tscribe.htmlfrag", false /* append */ ); final BufferedWriter bw = new BufferedWriter( fw, 16_384 /* 32K bytes/16K chars, 50% of 64K byte allocation is optimal */ ); final PrintWriter prw = new PrintWriter( bw, false /* auto flush on println */ ); int errors = 0; try { try { // nl will prepend a # w.nl( " Transcription Software" ); w.nl( "# package,url,cost,formats" ); emitHTMLHeader( prw ); while ( true ) { // read the existing tscribe.csv file final String[] fields = r.getAllFieldsInLine(); final String packageName = fields[ 0 ]; if ( fields.length == 2 ) { err.println( "missing cost for " + packageName + " line: " + r.lineCount() ); errors++; } final String url; if ( fields.length > 1 ) { url = fields[ 1 ]; } else { url = ""; } final String cost; if ( fields.length > 2 ) { cost = fields[ 2 ]; } else { cost = ""; } if ( fields.length > 3 ) { // sort and dedup formats HashSet formats = new HashSet<>( 200 ); for ( int i = 3; i < fields.length; i++ ) { final String format = fields[ i ].toUpperCase(); if ( ST.isEmpty( format ) ) { // just ignore empty fields continue; } try { ScribeFormat.valueOf( format ); } catch ( IllegalArgumentException e ) { err.println( "unknown file format: " + format + " for " + packageName + " line: " + r.lineCount() ); errors++; } formats.add( format ); } writeTidiedCSVItem( w, packageName, url, cost, formats ); // only put items with complete info into HTML emitHTMLItem( prw, packageName, url, cost, formats ); } else { // end shart line w.nl( fields, false ); } } // end while } catch ( EOFException e ) { // normal termination. r.close(); w.close(); if ( errors == 0 ) { HunkIO.deleteAndRename( tempFile, tscribe ); emitHTMLFooter( prw ); prw.close(); } else { err.println( errors + " errors. tscript.csv left as is." ); } } } catch ( IOException e ) { err.println(); e.printStackTrace( err ); System.exit( 2 ); } } }
" + "Transcription Software vendors" + "
PackageCost
" + f.getDecorativeName() + "
" + packageName + "" + cost + "