/* * [HTMLTokenizer.java] * * Summary: Decides which extensions will be processed by the HTML finite state automaton parser. * * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2004-05-15 */ package com.mindprod.jprep; import com.mindprod.jtokens.Token; /** * Decides which extensions will be processed by the HTML finite state automaton parser. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2004-05-15 * @since 2004-05-15 */ public final class HTMLTokenizer implements JPrepTokenizer { /** * Constructor */ public HTMLTokenizer() { // we don't load the big HTMLState class until we actually first need to // parse. // Because HTMLState is all static, it will be created only once no // matter // how many // HTMLTokenizers you create. } /** * @inheritDoc */ public String[] getExtensions() { return new String[] { "html", "htmlfrag", "htm", "htmfrag", "dtd", "dtdfrag", "jnlp", "jnlpfrag", // js now handled by bali tokenizer "jsp", "jspfrag", "pom", /* maven xml */ "pomfrag", "tml", "tmlfrag", "xhtml", "xhtmlfrag", "xml", "xmlfrag", "xsd", "xsdfrag" }; } /** * Parse program and return array of tokens * * @param big The string of text to analyse, an entire file or fragment. * * @return tokenized equivalent that encodes the fonts and colours. */ public Token[] tokenize( String big ) { return HTMLState.parse( big ); } } // end HTMLTokenizer