/* * [FixCSSClasses.java] * * Summary: Correct HTML Class of a link to CMP conventions. * * Copyright: (c) 1999-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2010-11-21 initial version */ package com.mindprod.repair; import com.mindprod.commandline.CommandLine; import com.mindprod.common18.EIO; import com.mindprod.fastcat.FastCat; import com.mindprod.filter.OnlyDirectoriesFilter; import com.mindprod.htmlmacros.macro.Global; import com.mindprod.htmlmacros.support.AssignCSSClasses; import com.mindprod.hunkio.HunkIO; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.FilenameFilter; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.*; /** * Correct HTML Class of a link to CMP conventions. *

* Also puts links on single line with canonical spacing. *

* * @author Roedy Green, Canadian Mind Products * @version 1.0 2010-11-21 initial version. * @noinspection WeakerAccess * @since 2010-11-21 */ public final class FixCSSClasses { // declarations /** * true, updates classes in files. false just displays potential changes. */ private static final boolean CHANGE_FILES = true; /** * true if want extra debugging output */ private static final boolean DEBUGGING = false; private static final int FIRST_COPYRIGHT_YEAR = 1999; /** * undisplayed copyright notice. * * @noinspection UnusedDeclaration */ private static final String EMBEDDED_COPYRIGHT = "Copyright: (c) 1999-2017 Roedy Green, Canadian Mind Products, http://mindprod.com"; /** * piece of pattern uses do find class="xxx" */ private static final String FRAGMENT_CLASS = "class\\s*=\\s*\"([\\p{Lower}\\-]+)\""; /** * piece of pattern used to get href="url". extract just part in quotes. We want the whole thing, including http://www.mindprod.com/jgloss/jgloss.html */ private static final String FRAGMENT_HREF = "href=\"([^\"]+)\""; /** * date this version released. * * @noinspection UnusedDeclaration */ private static final String RELEASE_DATE = "2010-11-21"; /** * embedded version string. * * @noinspection UnusedDeclaration */ private static final String VERSION_STRING = "1.0"; /** * Regex pattern to look for links without a class */ private static final Pattern PATTERN_WITHOUT_CLASS = Pattern.compile( " \$ and \ to \\ return sb.toString(); } // /method /** * fix classes in one file. * * @param fileBeingProcessed the file currently being processed. * * @throws java.io.IOException if can't read file * @noinspection SameParameterValue, WeakerAccess */ private static void fixHTMLClassesInFile( File fileBeingProcessed ) throws IOException { String big = HunkIO.readEntireFile( fileBeingProcessed ); String result = fixHTMLClassesInString( big, fileBeingProcessed ); if ( result.equals( big ) ) { // nothing changed. No need to write results. // out.println( "- " + fileBeingProcessed.getDecoratedStoreName() ); return; } // generate output into a temporary file until we are sure all is ok. // create a temp file in the same directory as filename // it changed out.println( "* " + EIO.getCanOrAbsPath( fileBeingProcessed ) ); if ( CHANGE_FILES ) { final File tempFile = HunkIO.createTempFile( "temp.html", ".tmp.html", fileBeingProcessed ); FileWriter emit = new FileWriter( tempFile ); emit.write( result ); emit.close(); HunkIO.deleteAndRename( tempFile, fileBeingProcessed ); } } // /method /** * fix classes in contents of one file. * * @param contents file contents to repair. * @param fileBeingProcessed file we are repairing. We don't touch file, just use its name to compute css classes. * * @return improved contents of one entire webpage, all link classes updated. * @noinspection SameParameterValue, WeakerAccess */ private static String fixHTMLClassesInString( final String contents, File fileBeingProcessed ) { // we don't modify the head or tail. final int headEndsAt = contents.indexOf( " " + ( betterClass != null ? betterClass : "none" ) + " " + url ); } } final String replacement = composeReplacement( betterClass, url ); mw.appendReplacement( sb, Matcher.quoteReplacement( replacement ) ); // also appends junk between hits } mw.appendTail( sb ); // ||||||||||||||||||||||||||||||||||||||||||||||||||||| // do it all over, this time looking for links without any class. body = sb.toString(); sb.setLength( 0 ); final Matcher mc = PATTERN_WITHOUT_CLASS.matcher( body ); // Matchers are used both for matching and finding. while ( mc.find() ) { if ( DEBUGGING ) { out.println( "found: class:- url:" + mc.group( 1 ) ); } assert mc.groupCount() == 1 : "bad match: " + mc.group( 0 ); final String url = mc.group( 1 ); // might have http:// prefix and any extension. final String betterClass; betterClass = Global.assignCSSClasses.assignCSSClass( url, fileBeingProcessed ); if ( betterClass != null ) { out.println( " --> " + betterClass + " " + url ); } // works with null betterClass too. final String replacement = composeReplacement( betterClass, url ); mc.appendReplacement( sb, Matcher.quoteReplacement( replacement ) ); // also appends junk between hits. } mc.appendTail( sb ); // at this point, the sb contains the body with all classless links repaired. body = sb.toString(); return body; } // /method /** * fixes classes in HTML files. * * @param args configuration then names of files to process, dirs, files, -s, *.*, no wildcards. */ public static void main( final String[] args ) { Global.installConfiguration( args ); if ( DEBUGGING ) { out.println( PATTERN_WITH_CLASS ); out.println( PATTERN_WITHOUT_CLASS ); } // gather all the files mentioned on the command line. // either directories, files, with -s and subdirs option. // warning. Windows expands any wildcards in a nasty way. // do not use wildcards. // See http://mindprod.com/jgloss/wildcard.html out.println( "Gathering html files to tidy the CSS classes on links..." ); // command line lists dirs and files to process, pruned by following filters. CommandLine commandLine = new CommandLine( args, new OnlyDirectoriesFilter(), new ModifiableFilter( Global.assignCSSClasses ) ); for ( File file : commandLine ) { try { // -q gives no output at all, otherwise just files that changed. fixHTMLClassesInFile( file ); } catch ( FileNotFoundException e ) { err.println( "Error: " + EIO.getCanOrAbsPath( file ) + " not found." ); } catch ( Exception e ) { err.println(); e.printStackTrace( err ); err.println( e.getClass().toString() + e.getMessage() + " in file " + EIO.getCanOrAbsPath( file ) ); err.println(); } } // end for System.exit( 0 ); } // end main } // /method // /methods /** * Selects files we are permitted to touch, to upgrade their CSS classes. * Get info from the configuration * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-01-09 - initial version * @since 2011-01-09 */ final class ModifiableFilter implements FilenameFilter { private final AssignCSSClasses assignCSSClasses; /** * constructor * * @param assignCSSClasses AssignCSSClasses configuration */ ModifiableFilter( AssignCSSClasses assignCSSClasses ) { this.assignCSSClasses = assignCSSClasses; } // /method /** * Select only files with that pass muster * * @param dir the directory in which the file was found. * @param name the name of the file * * @return true if and only if the name should be included in the file list; false otherwise. */ public boolean accept( File dir, String name ) { // URL we are considering disturbing, as it appears embedded in the html, e.g. ../jgloss/x.html x.html return assignCSSClasses.needsCSSRepair( new File( dir, name ) ); } // /method } // end ModifiableFilter