/* * [ProofreadAcronyms.java] * * Summary: Check that acronym expansions are plausible. * * Copyright: (c) 2012-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2012-02-26 initial version */ package com.mindprod.acronym; import com.mindprod.common18.Build; import com.mindprod.common18.Misc; import com.mindprod.csv.CSVReader; import com.mindprod.entities.DeEntifyStrings; import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.IOException; import static java.lang.System.*; /** * Check that acronym expansions are plausible. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2012-02-26 initial version * @since 2012-02-26 */ public class ProofreadAcronyms { /** * get emphasised letters in acronym. * * @param acro the acromym, possibly with trailing _n * * @return letters that should have emphasis, as caps */ private static String calcAcroSignature( final String acro ) { final String flat = stripNoise( DeEntifyStrings.deEntifyHTML( acro, ' ' ) ).toUpperCase(); final int place = flat.indexOf( '_' ); if ( place < 0 ) { return flat; } else { return flat.substring( 0, place ); } } /** * get emphasised letters in meaning expansion of acronym * * @param meaning the meaning, possibly with embedded entities, ~ and spaces * * @return letters that should have emphasis, as caps */ private static String calcMeaningSignature( final String meaning ) { final String flat = stripNoise( DeEntifyStrings.deEntifyHTML( meaning, ' ' ) ); final StringBuilder sb = new StringBuilder( flat.length() ); boolean toggle = false; for ( int i = 0; i < flat.length(); i++ ) { final char c = flat.charAt( i ); if ( c == '~' ) { toggle = true; // Treat next cap as lc, or vice versa } else if ( toggle ^ ( Character.isUpperCase( c ) || Character.isDigit( c ) ) ) { if ( toggle ) { sb.append( Character.toUpperCase( c ) ); } else { sb.append( c ); } toggle = false; } else { toggle = false; } } // end for return sb.toString(); } /** * strip punctuation, numbers, spaces from string, except ~ _ `. * * @param s string to string. * * @return string with punctation removed. */ private static String stripNoise( String s ) { final StringBuilder sb = new StringBuilder( s.length() ); for ( int i = 0; i < s.length(); i++ ) { final char c = s.charAt( i ); // convert + to P since two are intercghangeable. if ( c == '+' ) { sb.append( 'P' ); } else if ( !( '0' <= c && c <= '9' || ' ' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '^' || '{' <= c && c <= '}' ) ) { sb.append( c ); } } return sb.toString(); } public static void main( String[] args ) throws IOException { final File webrootDir = new File( Build.MINDPROD_WEBROOT ); final CSVReader r = new CSVReader( new FileReader( new File( webrootDir, "embellishment/acronyms.csv" ) ) ); try { while ( true ) { // read acronym, means, [url] final String[] fields = r.getAllFieldsInLine(); if ( fields.length < 2 ) { err.println( "short line " + r.lineCount() ); System.exit( 2 ); } final String acro = fields[ 0 ]; final String meaning = fields[ 1 ]; final String acroSignature = calcAcroSignature( acro ); final String meaningSignature = calcMeaningSignature( meaning ); if ( !acroSignature.equals( meaningSignature ) ) { out.println( "questionable meaning " + acro + " [" + acroSignature + "] -> " + " [" + meaningSignature + "] " + meaning ); } } // end while } catch ( EOFException e ) { r.close(); } Misc.trackLastThread(); System.exit( 0 ); } }