/*
* [HTMLTokenizer.java]
*
* Summary: Decides which extensions will be processed by the HTML finite state automaton parser.
*
* Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2004-05-15
*/
package com.mindprod.jprep;
import com.mindprod.jtokens.Token;
/**
* Decides which extensions will be processed by the HTML finite state automaton parser.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2004-05-15
* @since 2004-05-15
*/
public final class HTMLTokenizer implements JPrepTokenizer
{
/**
* Constructor
*/
public HTMLTokenizer()
{
// we don't load the big HTMLState class until we actually first need to
// parse.
// Because HTMLState is all static, it will be created only once no
// matter
// how many
// HTMLTokenizers you create.
}
/**
* @inheritDoc
*/
public String[] getExtensions()
{
return new String[] {
"html",
"htmlfrag",
"htm",
"htmfrag",
"dtd",
"dtdfrag",
"jnlp",
"jnlpfrag", // js now handled by bali tokenizer
"jsp",
"jspfrag",
"pom", /* maven xml */
"pomfrag",
"tml",
"tmlfrag",
"xhtml",
"xhtmlfrag",
"xml",
"xmlfrag",
"xsd",
"xsdfrag" };
}
/**
* Parse program and return array of tokens
*
* @param big The string of text to analyse, an entire file or fragment.
*
* @return tokenized equivalent that encodes the fonts and colours.
*/
public Token[] tokenize( String big )
{
return HTMLState.parse( big );
}
} // end HTMLTokenizer