/* * [MSWordAutocorrects.java] * * Summary: MS Word autocorrects. * * Copyright: (c) 2010-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.7+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2010-03-03 initial version */ package com.mindprod.autocorrect; import com.mindprod.common17.EIO; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.EOFException; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.Map; /** * MS Word autocorrects. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2010-03-03 initial version * @since 2010-03-03 */ class MSWordAutocorrects extends Autocorrects { /** * MS Word 5-bytes of unknown purpose, we leave them alone. */ private byte[] mystery; /** * MS Word big-endian 4-bytes of option bytes as to how autocorrects should be applied. We do not modify them. */ private int options; /** * MS word big-endian 4-byte signature at start of file */ private int signature; /** * Test harness. * * @param args args[0]=C:/temp/MSO1033.acl name of MSWord autocorrects file. * * @throws IOException if cannot read or write autocorrects */ public static void main( String[] args ) throws IOException { MSWordAutocorrects m = new MSWordAutocorrects(); m.load( new File( args[ 0 ] ) ); m.dump(); m.save(); } /** * calculate the total length of the file, if we were to write it * * @return return length of file including 17-byte header and two trailing spacers. */ int calcFileLength() { int length = 4 + 4 + 4 + 5; for ( Map.Entry e : map.entrySet() ) { final String abbreviation = e.getKey(); final String expansion = e.getValue(); length += 8 + ( abbreviation.length() + expansion.length() ) * 2; } length += 4; // two trailing spacers. return length; } @Override void load( final File source ) throws IOException { backingFile = source; // O P E N final DataInputStream dis = EIO.getDataInputStream( source, 64 * 1024 ); // 4 bytes : signature 04 01 96 00 signature = dis.readInt(); if ( signature != 0x04019600 ) { throw new IOException( "Wrong signature. " + EIO.getCanOrAbsPath( source ) + " is not an MS Word " + "autocorrects " + "file." ); } // 4 bytes : option e.g. 22 c0 ef 05 options = dis.readInt(); // bypass 4 bytes: little endian length of file in bytes //noinspection ResultOfMethodCallIgnored dis.skip( 4 ); // 5 bytes mystery e.g 93 03 00 00 b9 or 9f 03 00 00 53 mystery = new byte[ 5 ]; if ( dis.read( mystery ) != 5 ) { throw new IOException( "trouble reading mystery bytes" ); } map.clear(); try { outer: while ( true ) { // bypass possibly multiple spacer 0s. usually 1. // Why bother with spacers? probably originally to make the file more acceptable to C++ // which likes 0-terminators on its strings. // Why variable numbers of them? Probably just to mess with the minds of // people attempting to export the data. int lena = 0; int spacers = 0; while ( lena == 0 || lena == 0x2000 || lena == 0x2100 ) { // read length or spacer lena = dis.readShort(); if ( lena == 0 ) { spacers++; if ( spacers >= 2 ) { // end marked by two 16-bit zeros in a row break outer; } } } // read abbreviation UTF-16BE if ( !( 1 <= lena && lena <= 1024 ) ) { throw new IllegalArgumentException( "corrupt file abbreviation length: " + lena ); } final char[] abbr = new char[ lena ]; for ( int i = 0; i < lena; i++ ) { abbr[ i ] = dis.readChar(); } // bypass possibly multiple spacer 0s. Usually 1. int lene = 0; while ( lene == 0 || lene == 0x2000 || lene == 0x2100 ) { // read length or spacer lene = dis.readShort(); } // read expansion UTF-16BE if ( !( 1 <= lene && lene <= 1024 ) ) { throw new IllegalArgumentException( "corrupt file expansion length: " + lene ); } final char[] expansion = new char[ lene ]; for ( int i = 0; i < lene; i++ ) { expansion[ i ] = dis.readChar(); } map.put( new String( abbr ), new String( expansion ) ); } } catch ( EOFException e ) { dis.close(); } } @Override void save() throws IOException { // O P E N final FileOutputStream fos = new FileOutputStream( backingFile ); final BufferedOutputStream bos = new BufferedOutputStream( fos, 65536 /* 64K bytes */ ); final DataOutputStream dos = new DataOutputStream( bos ); // 4 bytes : signature 04 01 96 00 dos.writeInt( signature ); // 4 bytes : option e.g. 22 c0 ef 05 dos.writeInt( options ); // 4 bytes: little endian length of file in bytes dos.writeInt( Integer.reverseBytes( calcFileLength() ) ); // 5 bytes mystery e.g 93 03 00 00 b9 or 9f 03 00 00 53 dos.write( mystery ); for ( Map.Entry e : map.entrySet() ) { final String abbreviation = e.getKey(); final String expansion = e.getValue(); dos.writeShort( 0 ); dos.writeShort( abbreviation.length() ); for ( int i = 0; i < abbreviation.length(); i++ ) { dos.writeChar( abbreviation.charAt( i ) ); } dos.writeShort( 0 ); dos.writeShort( expansion.length() ); for ( int i = 0; i < expansion.length(); i++ ) { dos.writeChar( expansion.charAt( i ) ); } } // two dummy spacers on the end dos.writeShort( 0 ); dos.writeShort( 0 ); dos.close(); } }