/* * [JavaRegex.java] * * Summary: Java Regex Search strings reserve chars for regex use. * * Copyright: (c) 2002-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 4.5 2009-02-26 add both Java string quoting and plain for Java search/regexes. */ package com.mindprod.quoter; import com.mindprod.fastcat.FastCat; /** * Java Regex Search strings reserve chars for regex use. *
* These must be quoted with a leading \. - + * ? ( ) [ ] \ | $ ^ < = * * @author Roedy Green, Canadian Mind Products * @version 4.5 2009-02-26 add both Java string quoting and plain for Java search/regexes. * @since 2002-06-19 */ class JavaRegex extends Translator { /** * translate for chars outside [] */ protected static final String[] outside; /** * translate for chars inside [] */ protected static final String[] inside; /** * quoted string when used as replace string. */ protected static final String[] rep; static { // initialize the table array outside = new String[ 256 ]; for ( int i = 0; i < 32; i++ ) { // to hex \0xhh outside[ i ] = "\\0x" + Integer.toHexString( i + 0x100 ).substring( 1, 3 ); } for ( int i = 32; i < 127; i++ ) { // leave char unmolested. outside[ i ] = String.valueOf( ( char ) i ).intern(); } for ( int i = 127; i < 256; i++ ) { // to hex \0xhh outside[ i ] = "\\0x" + Integer.toHexString( i + 0x100 ).substring( 1, 3 ); } // override with chars that have special meaning in search regex representations // ! - < > = , are ok // $ ( ) * + . ? [ \ ] ^ { | } need quoting outside [] outside[ 7 ] = "\\a";/* Bell characters */ outside[ 8 ] = "\\b";/* Backspace character */ outside[ 9 ] = "\\t";/* TAB */ outside[ 10 ] = "\\n";/* Line Feed clipboard strips cr in crlf */ outside[ 11 ] = "\\v";/* Vertical TAB */ outside[ 12 ] = "\\f";/* Form Feed */ outside[ 13 ] = "\\r";/* cr */ // outside[ '!' ] nothing special // outside[ '\"' ] // outside[ '#' ] outside[ '$' ] = "\\$";/* $ */ outside[ '(' ] = "\\(";/* ( */ outside[ ')' ] = "\\)";/* ) */ outside[ '*' ] = "\\*";/* * */ outside[ '+' ] = "\\+";/* + */ outside[ '-' ] = "-";/* - */ outside[ '.' ] = "\\.";/* - */ // outside[ '/ ] does not need to be quoted // outside[ '<' ] // outside[ '>' ] // outside[ '=' ] outside[ '?' ] = "\\?";/* ? */ outside[ '[' ] = "\\[";/* [ */ outside[ '\\' ] = "\\\\";/* \ */ outside[ ']' ] = "\\]";/* ] */ outside[ '^' ] = "\\^";/* ^ */ outside[ '{' ] = "\\{";/* { */ outside[ '|' ] = "\\|";/* | */ outside[ '}' ] = "\\}";/* } */ } // end static init static { // initialize the table array inside = new String[ 256 ]; for ( int i = 0; i < 32; i++ ) { // to hex \0xhh inside[ i ] = "\\0x" + Integer.toHexString( i + 0x100 ).substring( 1, 3 ); } for ( int i = 32; i < 127; i++ ) { // leave char unmolested. inside[ i ] = String.valueOf( ( char ) i ).intern(); } for ( int i = 127; i < 256; i++ ) { // to hex \0xhh inside[ i ] = "\\0x" + Integer.toHexString( i + 0x100 ).substring( 1, 3 ); } // override with chars that have special meaning in search regex representations // - ^ [ \ ] need quoting inside [] inside[ 7 ] = "\\a";/* Bell characters */ inside[ 8 ] = "\\b";/* Backspace character */ inside[ 9 ] = "\\t";/* TAB */ inside[ 10 ] = "\\n";/* Line Feed clipboard strips cr in crlf */ inside[ 11 ] = "\\v";/* Vertical TAB */ inside[ 12 ] = "\\f";/* Form Feed */ inside[ 13 ] = "\\r";/* cr */ // outside[ '!' ] nothing special // outside[ '\"' ] // outside[ '#' ] inside[ '$' ] = "$";/* $ */ inside[ '(' ] = "(";/* ( */ inside[ ')' ] = ")";/* ) */ inside[ '*' ] = "*";/* * */ inside[ '+' ] = "+";/* + */ inside[ '-' ] = "\\-";/* - */ inside[ '.' ] = ".";/* - */ // outside[ '/ ] does not need to be quoted // outside[ '<' ] // outside[ '>' ] // outside[ '=' ] inside[ '?' ] = "?";/* ? */ inside[ '[' ] = "\\[";/* [ */ inside[ '\\' ] = "\\\\";/* \ */ inside[ ']' ] = "\\]";/* ] */ inside[ '^' ] = "\\^";/* ^ */ inside[ '{' ] = "{";/* { */ inside[ '|' ] = "|";/* | */ inside[ '}' ] = "}";/* } */ } // end static init static { // translation rep for char to Replace literal string form. rep = new String[ 256 ]; // initialize the latin1 array for ( int i = 0; i < 32; i++ ) { // to hex \0xhh rep[ i ] = "\\0x" + Integer.toHexString( i + 0x100 ).substring( 1, 3 ); } for ( int i = 32; i < 127; i++ ) { // leave char unmolested. rep[ i ] = String.valueOf( ( char ) i ).intern(); } for ( int i = 127; i < 256; i++ ) { // to hex \0xhh rep[ i ] = "\\0x" + Integer.toHexString( i + 0x100 ).substring( 1, 3 ); } // override with chars that have special replace representations // % \ < > must be preceded by \ rep[ 7 ] = "\\a"; rep[ 8 ] = "\\b";/* Backspace character */ rep[ 9 ] = "\\t";/* TAB */ rep[ 10 ] = "\\r\\n"; /* * Line Feed clipboard strips cr in crlf, * replace it */ rep[ 11 ] = "\\v";/* Vertical TAB */ rep[ 12 ] = "\\f";/* Form Feed */ rep[ 13 ] = "\\r";/* cr */ rep[ '\\' ] = "\\\\";/* \ */ rep[ '$' ] = "\\$";/* used to mark replace spot */ } // end static init /** * cook this raw string with the translate table */ public static String cook( String[] trt, String raw ) { if ( raw.length() == 0 ) { return ""; } StringBuilder cooked = new StringBuilder( raw.length() * 120 / 100 ); for ( int i = 0; i < raw.length(); i++ ) { char c = raw.charAt( i ); if ( c < 256 ) { cooked.append( trt[ c ] ); } else { // leave alone cooked.append( c ); } } // end for return cooked.toString(); } public static String cookInsideRegex( String raw ) { return cook( inside, raw ); } public static String cookOutsideRegex( String raw ) { return cook( outside, raw ); } public static String cookReplacementRegex( String raw ) { return cook( rep, raw ); } /** * add Java search regex quoting, then Java string literal quoting. * * @param raw text to be translated. * * @return String representing the cooked output, possibly null. */ public String process( String raw ) { assert raw != null : "TextProcessor.process raw must not be null"; final FastCat sb = new FastCat( 6 ); sb.append( "outside [ ]:\n", cookOutsideRegex( raw ) ); sb.append( "\n\ninside [ ]:\n", cookInsideRegex( raw ) ); sb.append( "\n\nas replacement:\n", cookReplacementRegex( raw ) ); return sb.toString(); } // end process }