/* * [TestUTF8Diff.java] * * Summary: Discover how a UTF-8 and ISO_8859-1 differ in the encoding of char x0000..0xffff. * * Copyright: (c) 2009-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-01-14 */ package com.mindprod.test; import com.mindprod.common18.EIO; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.Charset; /** * Discover how a UTF-8 and ISO_8859-1 differ in the encoding of char x0000..0xffff. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-01-14 * @since 2011-01-14 */ public final class TestUTF8Diff { /** * encoding for iso-8859-1 */ public static final Charset ISO88591Charset = Charset.forName( "ISO-8859-1" ); /** * Write a sample file in the given encoding * * @throws java.io.IOException on I/O failure. */ private static void writeSampleFile( String filename, Charset encoding ) throws IOException { // O P E N for write FileOutputStream fos = new FileOutputStream( new File( filename ), false/* no append */ ); OutputStreamWriter osw = new OutputStreamWriter( fos, encoding ); // W R I T E for ( char c = 0; c < 4096; c++ ) { if ( c % 64 == 0 ) { osw.write( '\n' ); } osw.write( c ); } // C L O S E osw.close(); } /** * Discover how a UTF-8 and ISO_8859-1 differ in the encoding of char x0000..0xffff. * * @param args not used * * @throws java.io.IOException on I/O failure */ public static void main( String[] args ) throws IOException { writeSampleFile( "C:/temp/temputf8.txt", EIO.UTF8 ); writeSampleFile( "C:/temp/tempiso88591", ISO88591Charset ); // If you compare the output files, you see UTF-8 and ISO-8859-1 encode 7 bit characters identically, // 0x00 .. 0x7f, // but after than that are quite different. } }