/* * [JavaCharCategory.java] * * Summary: top level enum to define the categories of character for the BatTokenizer. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 3.1 2009-04-12 shorter style names, improved highlighting. */ package com.mindprod.jprep; /** * top level enum to define the categories of character for the BatTokenizer. * * @author Roedy Green, Canadian Mind Products * @version 3.1 2009-04-12 shorter style names, improved highlighting. * @since 2004-05-15 */ @SuppressWarnings( { "EnumeratedConstantNamingConvention" } ) public enum JavaCharCategory { /** * @ */ AT, /** * backslash, used in quoting in side quotes or ticks */ BACKSLASH, /** * digit 0-9 */ DIGIT, /** * decimal point, qualifying dot */ DOT, /** * End of line character */ EOL, /** * bracketting character e.g. ()[]{} */ FENCE, /** * ignore control chars */ IGNORE, /** * high ascii ` and chars not used in Java */ OTHER, /** * chars used in identifiers, except digits, $ */ PLAIN, /** * _ */ UNDERSCORE, /** * punctuation includes ; */ PUNCTUATION, /** * " */ QUOTE, /** * / */ SLASH, /** * space */ SPACE, /** * star */ STAR, /** * ' */ TICK; /** * Categorise one character * * @param theChar character to categorise * * @return category code, e.g. PLAIN QUOTE */ static JavaCharCategory categorise( char theChar ) { if ( 'a' <= theChar && theChar <= 'z' ) { return PLAIN; } if ( 'A' <= theChar && theChar <= 'Z' ) { return PLAIN; } if ( '0' <= theChar && theChar <= '9' ) { return DIGIT; } switch ( theChar ) { case ' ': case '\t': case 0xa0://   return SPACE; case '\n': return EOL; case '\r': case 127: case 0xfeff: /* bom */ case 0xfffd: /* replaced bom */ return IGNORE; case '$': case '_': return PLAIN; case '*': return STAR; case '/': return SLASH; case '\"': return QUOTE; case '\'': return TICK; case '(': case ')': case '[': case ']': case '{': case '}': return FENCE; case '@':// ok in comments return AT; case '.': return DOT; case '!': case '#':// ok in comments case '%': case '&': case '+': case ',': case '-': case ':': case ';':// treated as punctuation case '<': case '=': case '>': case '?': case '^': case '`':// ok in comments case '|': case '~': return PUNCTUATION; case '\\': return BACKSLASH; default: if ( 0 <= theChar && theChar <= 31 ) { return IGNORE; } else if ( 128 <= theChar ) { // treat high bit on as ordinary alpha return PLAIN; } else { return OTHER; } } // end switch } // end categorise } // end JavaCharCategory