/*
 * [HTMLTokenizer.java]
 *
 * Summary: Decides which extensions will be processed by the HTML finite state automaton parser.
 *
 * Copyright: (c) 2004-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  1.0 2004-05-15
 */
package com.mindprod.jprep;

import com.mindprod.jtokens.Token;

/**
 * Decides which extensions will be processed by the HTML finite state automaton parser.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 1.0 2004-05-15
 * @since 2004-05-15
 */
public final class HTMLTokenizer implements JPrepTokenizer
    {
    /**
     * Constructor
     */
    public HTMLTokenizer()
        {
        // we don't load the big HTMLState class until we actually first need to
        // parse.
        // Because HTMLState is all static, it will be created only once no
        // matter
        // how many
        // HTMLTokenizers you create.
        }

    /**
     * @inheritDoc
     */
    public String[] getExtensions()
        {
        return new String[] {
                "html",
                "htmlfrag",
                "htm",
                "htmfrag",
                "dtd",
                "dtdfrag",
                "jnlp",
                "jnlpfrag", // js now handled by bali tokenizer
                "jsp",
                "jspfrag",
                "pom",  /* maven xml */
                "pomfrag",
                "tml",
                "tmlfrag",
                "xhtml",
                "xhtmlfrag",
                "xml",
                "xmlfrag",
                "xsd",
                "xsdfrag" };
        }

    /**
     * Parse program and return array of tokens
     *
     * @param big The string of text to analyse, an entire file or fragment.
     *
     * @return tokenized equivalent that encodes the fonts and colours.
     */
    public Token[] tokenize( String big )
        {
        return HTMLState.parse( big );
        }
    } // end HTMLTokenizer