/*
 * [AssignCSSClassesForMindprod.java]
 *
 * Summary: Determine the css class for a given URL for Mindprod.com.
 *
 * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  1.0 2011-01-10 initial version
 *  1.1 2012-11-16 add isAffiliate to automatically mark affiliate links
 *  1.2 2015-09-27 suport .asia and .traver domains
 */
package com.mindprod.htmlmacros.support;

import com.mindprod.common18.EIO;
import com.mindprod.common18.Misc;
import com.mindprod.common18.ST;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;

import static com.mindprod.common18.ST.trimLeading;
import static java.lang.System.*;

/**
 * Determine the css class for a given URL for Mindprod.com.
 * <p/>
 * Also puts links on single line with canonical spacing.
 * <p/>
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 1.1 2012-11-16 add isAffiliate to automatically mark affiliate links
 * @noinspection WeakerAccess
 * @see ConfigurationForMindprod
 * @since 2011-01-10
 */
public final class AssignCSSClassesForMindprod implements AssignCSSClasses
    {
    // todo: add .com#. , ip, treat ext links to mindprod.com as internal
    // declarations

    /**
     * dir to class
     */
    static final HashMap<String, String> DIR_TO_CLASS =
            new HashMap<>( 50 );

    /**
     * domain to class
     */
    static final HashMap<String, String> DOMAIN_TO_CLASS =
            new HashMap<>( 30 );

    /**
     * existing files that never have a link to them, without .html, links in index
     */
    static final HashSet<String> CLASSLESS_FILES =
            new HashSet<>( Arrays.asList( "brokenlinks.html",
                    "0.html", "1.html", "2.html", "3.html", "4.html", "5.html", "6.html", "7.html", "8.html", "9.html",
                    "0-9.html", "masterindex.html", "punct.html",
                    "a.html", "b.html", "c.html", "d.html", "e.html", "f.html", "g.html", "h.html",
                    "i.html", "j.html", "k.html", "l.html", "m.html", "n.html", "o.html", "p.html",
                    "q.html", "r.html", "s.html", "t.html", "u.html", "v.html", "w.html", "x.html", "y.html", "z.html",
                    "dgloss.html", "egloss.html", "lgloss.html"
                    // overkill. We suppress all class markers in index and glossary.  we really wanted just to suppress index
            ) );

    /**
     * list of 2-char TLD country domain suffixes
     */
    static final HashSet<String> LEGAL_COUNTRIES =
            new HashSet<>( Arrays.asList(
                    "ac",
                    "ad",
                    "ae",
                    "af",
                    "ag",
                    "ai",
                    "al",
                    "am",
                    "an",
                    "ao",
                    "aq",
                    "ar",
                    "as",
                    "at",
                    "au",
                    "aw",
                    "az",
                    "ba",
                    "bb",
                    "bd",
                    "be",
                    "bf",
                    "bg",
                    "bh",
                    "bi",
                    "bj",
                    "bm",
                    "bn",
                    "bo",
                    "br",
                    "bs",
                    "bt",
                    "bv",
                    "bw",
                    "by",
                    "bz",
                    "ca",
                    "cc",
                    "cd",
                    "cf",
                    "cg",
                    "ch",
                    "ci",
                    "ck",
                    "cl",
                    "cm",
                    "cn",
                    "co",
                    "cr",
                    "cu",
                    "cv",
                    "cx",
                    "cy",
                    "cz",
                    "de",
                    "dj",
                    "dk",
                    "dm",
                    "do",
                    "dz",
                    "ec",
                    "ee",
                    "eg",
                    "eh",
                    "er",
                    "es",
                    "et",
                    "eu",
                    "fi",
                    "fj",
                    "fk",
                    "fm",
                    "fo",
                    "fr",
                    "ga",
                    "gd",
                    "ge",
                    "gf",
                    "gg",
                    "gh",
                    "gi",
                    "gl",
                    "gm",
                    "gn",
                    "gp",
                    "gq",
                    "gr",
                    "gs",
                    "gs",
                    "gs",
                    "gt",
                    "gu",
                    "gw",
                    "gy",
                    "hk",
                    "hm",
                    "hn",
                    "hr",
                    "ht",
                    "hu",
                    "id",
                    "ie",
                    "il",
                    "im",
                    "in",
                    "io",
                    "iq",
                    "ir",
                    "is",
                    "it",
                    "je",
                    "jm",
                    "jo",
                    "jp",
                    "ke",
                    "kg",
                    "kh",
                    "ki",
                    "km",
                    "kn",
                    "kp",
                    "kr",
                    "kw",
                    "ky",
                    "kz",
                    "la",
                    "lb",
                    "lc",
                    "li",
                    "lk",
                    "lr",
                    "ls",
                    "lt",
                    "lu",
                    "lv",
                    "ly",
                    "ma",
                    "mc",
                    "md",
                    "me",
                    "mg",
                    "mh",
                    "mk",
                    "ml",
                    "mm",
                    "mn",
                    "mo",
                    "mp",
                    "mq",
                    "mr",
                    "ms",
                    "mt",
                    "mu",
                    "mv",
                    "mw",
                    "mx",
                    "my",
                    "mz",
                    "na",
                    "nc",
                    "ne",
                    "nf",
                    "ng",
                    "ni",
                    "nl",
                    "no",
                    "np",
                    "nr",
                    "nu",
                    "nz",
                    "om",
                    "pa",
                    "pe",
                    "pf",
                    "pg",
                    "ph",
                    "pk",
                    "pl",
                    "pm",
                    "pn",
                    "pr",
                    "ps",
                    "pt",
                    "pw",
                    "py",
                    "qa",
                    "re",
                    "ro",
                    "rs",
                    "ru",
                    "rw",
                    "sa",
                    "sb",
                    "sc",
                    "sd",
                    "se",
                    "sg",
                    "sh",
                    "si",
                    "sj",
                    "sk",
                    "sl",
                    "sm",
                    "sn",
                    "so",
                    "sr",
                    "st",
                    "sv",
                    "sy",
                    "sz",
                    "tc",
                    "td",
                    "tf",
                    "tg",
                    "th",
                    "tj",
                    "tk",
                    "tl",
                    "tm",
                    "tn",
                    "to",
                    "tr",
                    "tt",
                    "tv", // "tv", usually means Television not Tuvalo.
                    "tw",
                    "tz",
                    "ua",
                    "ug",
                    "uk",
                    "um",
                    "us",
                    "uy",
                    "uz",
                    "va",
                    "vc",
                    "ve",
                    "vg",
                    "vi",
                    "vn",
                    "vu",
                    "wf",
                    "ws", // usually means website, not Western Samoa
                    "ye",
                    "yt",
                    "za",
                    "zm",
                    "zw"
            ) );

    /**
     * existing class we update. We also update classless links.
     */
    static final HashSet<String> MODIFIABLE_CSS_CLASSES =
            new HashSet<>( Arrays.asList(
                    "affiliate",
                    "amazon",
                    "asia",
                    "animalrights",
                    "apache",
                    "apple",
                    "applet",
                    "bgloss",
                    "biz",
                    "blogger",
                    "book",
                    "canada",
                    "cbc",
                    "cdrive",
                    "com",
                    "contact",
                    "coop",
                    "deepthoughts",
                    "download",
                    "dvd",
                    "earth",
                    "edu",
                    "electronics",
                    "encoding",
                    "environment",
                    "esper",
                    "ethics",
                    "facebook",
                    "feedback",
                    "ftp",
                    "ggloss",
                    "github",
                    "google",
                    "googlecode",
                    /* "googlesearch" */
                    "handbook",
                    "humanrights",
                    "imdb",
                    "info",
                    "int",
                    "jdrive",
                    "jgloss",
                    "kickstarter",
                    "kook",
                    "ll",
                    "microsoft",
                    "mil",
                    "money",
                    "name",
                    "net",
                    "none",
                    "nyt",
                    "offsite",
                    "offsite-ac",
                    "offsite-ad",
                    "offsite-ae",
                    "offsite-af",
                    "offsite-ag",
                    "offsite-ai",
                    "offsite-al",
                    "offsite-am",
                    "offsite-an",
                    "offsite-ao",
                    "offsite-aq",
                    "offsite-ar",
                    "offsite-as",
                    "offsite-at",
                    "offsite-au",
                    "offsite-aw",
                    "offsite-az",
                    "offsite-ba",
                    "offsite-bb",
                    "offsite-bd",
                    "offsite-be",
                    "offsite-bf",
                    "offsite-bg",
                    "offsite-bh",
                    "offsite-bi",
                    "offsite-bj",
                    "offsite-bm",
                    "offsite-bn",
                    "offsite-bo",
                    "offsite-br",
                    "offsite-bs",
                    "offsite-bt",
                    "offsite-bv",
                    "offsite-bw",
                    "offsite-by",
                    "offsite-bz",
                    "offsite-ca",
                    "offsite-cc",
                    "offsite-cd",
                    "offsite-cf",
                    "offsite-cg",
                    "offsite-ch",
                    "offsite-ci",
                    "offsite-ck",
                    "offsite-cl",
                    "offsite-cm",
                    "offsite-cn",
                    "offsite-co",
                    "offsite-cr",
                    "offsite-cu",
                    "offsite-cv",
                    "offsite-cx",
                    "offsite-cy",
                    "offsite-cz",
                    "offsite-de",
                    "offsite-dj",
                    "offsite-dk",
                    "offsite-dm",
                    "offsite-do",
                    "offsite-dz",
                    "offsite-ec",
                    "offsite-ee",
                    "offsite-eg",
                    "offsite-eh",
                    "offsite-er",
                    "offsite-es",
                    "offsite-et",
                    "offsite-eu",
                    "offsite-fi",
                    "offsite-fj",
                    "offsite-fk",
                    "offsite-fm",
                    "offsite-fo",
                    "offsite-fr",
                    "offsite-ga",
                    "offsite-gd",
                    "offsite-ge",
                    "offsite-gf",
                    "offsite-gg",
                    "offsite-gh",
                    "offsite-gi",
                    "offsite-gl",
                    "offsite-gm",
                    "offsite-gn",
                    "offsite-gp",
                    "offsite-gq",
                    "offsite-gr",
                    "offsite-gs",
                    "offsite-gs",
                    "offsite-gs",
                    "offsite-gt",
                    "offsite-gu",
                    "offsite-gw",
                    "offsite-gy",
                    "offsite-hk",
                    "offsite-hm",
                    "offsite-hn",
                    "offsite-hr",
                    "offsite-ht",
                    "offsite-hu",
                    "offsite-id",
                    "offsite-ie",
                    "offsite-il",
                    "offsite-im",
                    "offsite-in",
                    "offsite-io",
                    "offsite-iq",
                    "offsite-ir",
                    "offsite-is",
                    "offsite-it",
                    "offsite-je",
                    "offsite-jm",
                    "offsite-jo",
                    "offsite-jp",
                    "offsite-ke",
                    "offsite-kg",
                    "offsite-kh",
                    "offsite-ki",
                    "offsite-km",
                    "offsite-kn",
                    "offsite-kp",
                    "offsite-kr",
                    "offsite-kw",
                    "offsite-ky",
                    "offsite-kz",
                    "offsite-la",
                    "offsite-lb",
                    "offsite-lc",
                    "offsite-li",
                    "offsite-lk",
                    "offsite-lr",
                    "offsite-ls",
                    "offsite-lt",
                    "offsite-lu",
                    "offsite-lv",
                    "offsite-ly",
                    "offsite-ma",
                    "offsite-mc",
                    "offsite-md",
                    "offsite-me",
                    "offsite-mg",
                    "offsite-mh",
                    "offsite-mk",
                    "offsite-ml",
                    "offsite-mm",
                    "offsite-mn",
                    "offsite-mo",
                    "offsite-mp",
                    "offsite-mq",
                    "offsite-mr",
                    "offsite-ms",
                    "offsite-mt",
                    "offsite-mu",
                    "offsite-mv",
                    "offsite-mw",
                    "offsite-mx",
                    "offsite-my",
                    "offsite-mz",
                    "offsite-na",
                    "offsite-nc",
                    "offsite-ne",
                    "offsite-nf",
                    "offsite-ng",
                    "offsite-ni",
                    "offsite-nl",
                    "offsite-no",
                    "offsite-np",
                    "offsite-nr",
                    "offsite-nu",
                    "offsite-nz",
                    "offsite-om",
                    "offsite-pa",
                    "offsite-pe",
                    "offsite-pf",
                    "offsite-pg",
                    "offsite-ph",
                    "offsite-pk",
                    "offsite-pl",
                    "offsite-pm",
                    "offsite-pn",
                    "offsite-pr",
                    "offsite-ps",
                    "offsite-pt",
                    "offsite-pw",
                    "offsite-py",
                    "offsite-qa",
                    "offsite-re",
                    "offsite-ro",
                    "offsite-rs",
                    "offsite-ru",
                    "offsite-rw",
                    "offsite-sa",
                    "offsite-sb",
                    "offsite-sc",
                    "offsite-sd",
                    "offsite-se",
                    "offsite-sg",
                    "offsite-sh",
                    "offsite-si",
                    "offsite-sj",
                    "offsite-sk",
                    "offsite-sl",
                    "offsite-sm",
                    "offsite-sn",
                    "offsite-so",
                    "offsite-sr",
                    "offsite-st",
                    "offsite-sv",
                    "offsite-sy",
                    "offsite-sz",
                    "offsite-tc",
                    "offsite-td",
                    "offsite-tf",
                    "offsite-tg",
                    "offsite-th",
                    "offsite-tj",
                    "offsite-tk",
                    "offsite-tl",
                    "offsite-tm",
                    "offsite-tn",
                    "offsite-to",
                    "offsite-tr",
                    "offsite-tt",
                    "offsite-tv",
                    "offsite-tw",
                    "offsite-tz",
                    "offsite-ua",
                    "offsite-ug",
                    "offsite-uk",
                    "offsite-um",
                    "offsite-us",
                    "offsite-uy",
                    "offsite-uz",
                    "offsite-va",
                    "offsite-vc",
                    "offsite-ve",
                    "offsite-vg",
                    "offsite-vi",
                    "offsite-vn",
                    "offsite-vu",
                    "offsite-wf",
                    "offsite-ws",
                    "offsite-ye",
                    "offsite-yt",
                    "offsite-za",
                    "offsite-zm",
                    "offsite-zw",
                    "onpage",
                    "oracle",
                    "org",
                    "paypal",
                    "pdf",
                    "phone",
                    "politics",
                    "project",
                    "quote",
                    "religion",
                    "reunion",
                    "sourceforge",
                    "ssl",
                    "subversion",
                    "sun",
                    "tindie",
                    "tortoisesubversion",
                    "travel",
                    "twitter",
                    "w3schools",
                    "webstart",
                    "wikipedia",
                    "xdrive"
                    // notably absent rat, rec, onion, strawberry, raspberry,  download
                    // not kjv, generated by KJV macro. Can leave alone.
            ) );

    static final HashSet TAKES_PAYPAL =
            new HashSet<>( Arrays.asList(
                    "1800headsets.ca",
                    "abelssoft.de",
                    "allposters.com",
                    "amazon.cn",
                    "angelfins.ca",
                    "aquariumguys.com",
                    "aquariumsupplies.ca",
                    "art.com",
                    "avshop.ca",
                    "axialis.com",
                    "barnesandnoble.com",
                    "bedbathandbeyond.ca",
                    "bedphones.com",
                    "bestbuy.ca",
                    "bestbuy.com",
                    "buylightfixtures.com",
                    "canadacomputers.com",
                    "cardswap.ca",
                    "chegg.com",
                    "corepad.com",
                    "creoly.com",
                    "decathlon.co.uk",
                    "drsfostersmith.com",
                    "dx.com",
                    "ebay.ca",
                    "ebay.com",
                    "eddiebauer.com",
                    "eshave.com",
                    "everbuying.com",
                    "everbuying.net",
                    "excelsiorjet.com",
                    "extreme-pc.ca",
                    "fastspring.com",
                    "fasttech.com",
                    "fendrihan.com",
                    "funduc.com",
                    "gouletpens.com",
                    "headsetsdepot.com",
                    "hyperglide.net",
                    "ikea.com",
                    "indigo.ca",
                    "ipenstore.com",
                    "jetpens.com",
                    "kobobooks.com",
                    "ksoftware.net",
                    "labelcity.com",
                    "limedomains.com",
                    "lsfgroup.ca",
                    "lunarpages.com",
                    "martelelectronics.com",
                    "musclefitbasics.com",
                    "ncix.ca",
                    "ncix.com",
                    "ncixus.com",
                    "newegg.ca",
                    "newegg.com",
                    "nike.com",
                    "notemaker.com.au",
                    "oo-software.com",
                    "paypal.com",
                    "pcpitstop.com",
                    "petmountain.com",
                    "petsandponds.com",
                    "posterburner.com",
                    "posterjack.com",
                    "powells.com",
                    "raxco.com",
                    "reefsupplies.ca",
                    "richarddawkins.net",
                    "sears.com",
                    "slicksurf.com",
                    "simons.ca",
                    "switchshirts.net",
                    "t-shirtwholesaler.com",
                    "thebay.com",
                    "thecopperhat.ca",
                    "tigerdirect.com",
                    "walmart.com",
                    "wonderpens.ca",
                    "wushnet.com"
            ) );

    /**
     * TLD with their own class. Class is the same as the tld.
     */
    static final HashSet<String> TLD_WITH_CLASS =
            new HashSet<>( Arrays.asList(
                    "asia",
                    "biz",
                    "com",
                    "coop",
                    "earth",
                    "edu",
                    "gov",
                    "info",
                    "int",
                    "mil",
                    "name",
                    "net",
                    "org",
                    "travel" ) );

    /**
     * domains that give us commissions.
     */
    private static final HashSet<String> AFFILIATE_DOMAINS =
            new HashSet<>( Arrays.asList(
                    "art.com",
                    "allposters.com",
                    "alt.com",
                    "bmtmicro.com",
                    "bookbyte.com",
                    "buddhagroove.com",
                    "cc-dt.com",
                    "ccbill.com",
                    "cj.com",
                    "clickbank.net",
                    "clicksor.com",
                    "clixgalore.com",
                    "webring.com",
                    "disktrix.com",
                    "doubleclick.net",
                    "dreamhost.com",
                    "element5.com",
                    "esellerate.net",
                    "friendfinder.com",
                    "gayfriendfinder.com",
                    "gayuniverse.com",
                    "greatcall.com",
                    "hide-the-ip.com",
                    "iconshock.com",
                    "jzip.com",
                    "linksynergy.com",
                    "lunarpages.com",
                    "mycommerce.com",
                    "myfonts.com",
                    "outpersonals.com",
                    "sextoyfun.com",
                    "oo-software.com",
                    "sitepal.com",
                    "spamnix.com",
                    "thehubpeople.com",
                    "weatherbuffs.com",
                    "xara.com",
                    "yourfonts.com" ) );

    /**
     * cached previous URL string
     */
    private static String prevUrlString = null;

    /**
     * previous URL we computed, cached
     */
    private static URL prevUrl = null;

    // /declarations
    static
        {
        // domains that have their own class
        DOMAIN_TO_CLASS.put( "amazon.com", "amazon" );
        DOMAIN_TO_CLASS.put( "amazonaws.com", "amazon" );
        DOMAIN_TO_CLASS.put( "apache.net", "apache" );
        DOMAIN_TO_CLASS.put( "apache.org", "apache" );
        DOMAIN_TO_CLASS.put( "apple.com", "apple" );
        DOMAIN_TO_CLASS.put( "blogspot.ca", "blogger" );
        DOMAIN_TO_CLASS.put( "blogspot.com", "blogger" );
        DOMAIN_TO_CLASS.put( "cbc.ca", "cbc" );
        DOMAIN_TO_CLASS.put( "facebook.com", "facebook" );
        DOMAIN_TO_CLASS.put( "imdb.com", "imdb" );
        DOMAIN_TO_CLASS.put( "invisionzone.com", "w3schools" );
        DOMAIN_TO_CLASS.put( "java.com", "oracle" );
        DOMAIN_TO_CLASS.put( "java.net", "javanet" );
        DOMAIN_TO_CLASS.put( "kenai.com", "javanet" );
        DOMAIN_TO_CLASS.put( "kickstarter.com", "kickstarter" );
        DOMAIN_TO_CLASS.put( "live.com", "microsoft" );
        DOMAIN_TO_CLASS.put( "microsoft.com", "microsoft" );
        DOMAIN_TO_CLASS.put( "windowsupdate.com", "microsoft" );
        DOMAIN_TO_CLASS.put( "nytimes.com", "nyt" );
        DOMAIN_TO_CLASS.put( "oracle.com", "oracle" );
        DOMAIN_TO_CLASS.put( "oracleimg.com", "oracle" );
        DOMAIN_TO_CLASS.put( "sourceforge.net", "sourceforge" );
        DOMAIN_TO_CLASS.put( "sun.com", "oracle" );
        DOMAIN_TO_CLASS.put( "tindie.com", "tindie" );
        DOMAIN_TO_CLASS.put( "twitter.com", "twitter" );
        DOMAIN_TO_CLASS.put( "w3.org", "w3c" );
        DOMAIN_TO_CLASS.put( "w3schools.com", "w3schools" );
        DOMAIN_TO_CLASS.put( "wikipedia.org", "wikipedia" );
        DOMAIN_TO_CLASS.put( "yahoo.org", "yahoo" );
        }

    static
        {
        // directories that have their own class , kjv does too, but we do not disturb it.
        DIR_TO_CLASS.put( "animalrights", "animalrights" );
        DIR_TO_CLASS.put( "applet", "applet" );
        DIR_TO_CLASS.put( "bgloss", "bgloss" );
        DIR_TO_CLASS.put( "book", "book" );
        DIR_TO_CLASS.put( "canada", "canada" );
        DIR_TO_CLASS.put( "contact", "contact" );
        DIR_TO_CLASS.put( "deepthoughts", "deepthoughts" );
        DIR_TO_CLASS.put( "dvd", "dvd" );
        DIR_TO_CLASS.put( "electronic", "electronic" );
        DIR_TO_CLASS.put( "environment", "environment" );
        DIR_TO_CLASS.put( "esperanto", "esper" );
        DIR_TO_CLASS.put( "ethics", "ethics" );
        DIR_TO_CLASS.put( "feedback", "feedback" );
        DIR_TO_CLASS.put( "feedback/animals", "feedback" );
        DIR_TO_CLASS.put( "feedback/environment", "feedback" );
        DIR_TO_CLASS.put( "feedback/god", "feedback" );
        DIR_TO_CLASS.put( "feedback/humanrights", "feedback" );
        DIR_TO_CLASS.put( "feedback/money", "feedback" );
        DIR_TO_CLASS.put( "feedback/peace", "feedback" );
        DIR_TO_CLASS.put( "feedback/porn", "feedback" );
        DIR_TO_CLASS.put( "feedback/unmain", "feedback" );
        DIR_TO_CLASS.put( "ggloss", "ggloss" );
        DIR_TO_CLASS.put( "humanrights", "humanrights" );
        DIR_TO_CLASS.put( "javacc", "javacc" );
        DIR_TO_CLASS.put( "jgloss", "jgloss" );
        DIR_TO_CLASS.put( "jgloss/encoding", "encoding" );
        DIR_TO_CLASS.put( "livinglove", "ll" );
        DIR_TO_CLASS.put( "livinglove/handbook", "handbook" );
        DIR_TO_CLASS.put( "livinglove/methods", "ll" );
        DIR_TO_CLASS.put( "money", "money" );
        DIR_TO_CLASS.put( "phone", "phone" );
        DIR_TO_CLASS.put( "politics", "politics" );
        DIR_TO_CLASS.put( "politics/laser", "politics" );
        DIR_TO_CLASS.put( "project", "project" );
        DIR_TO_CLASS.put( "quote", "quote" );
        DIR_TO_CLASS.put( "religion", "religion" );
        DIR_TO_CLASS.put( "reunion", "reunion" );
        DIR_TO_CLASS.put( "webstart", "webstart" );
        }
    // methods

    /**
     * get url corresponding to string. caches result.
     *
     * @param urlString String we want to convert to an URL.
     *
     * @return corresponding URL.
     * @throws MalformedURLException if bad URL
     */
    private static URL getURL( String urlString ) throws MalformedURLException
        {
        if ( urlString.equals( prevUrlString ) )
            {
            return prevUrl;
            }
        else
            {
            prevUrlString = urlString;
            prevUrl = new URL( urlString );
            return prevUrl;
            }
        }// /method

    /**
     * is this url one that gives us commissions?
     *
     * @param domain domain including tld e.g. linkshare.com
     *
     * @return true if linking to a an affiliate
     */
    private static boolean isAffiliate( final String domain )
        {
        return AFFILIATE_DOMAINS.contains( domain );
        }// /method

    /**
     * is this url linking to Cdrive?
     *
     * @param url relative or absolute
     *
     * @return true if linking to Cdrive
     */
    private static boolean isCDrive( final String url )
        {
        return url.startsWith( "file://localhost/C:" ) || url.startsWith( "file:/C:" ) || url.startsWith(
                "file:///C:" ) || url.startsWith( "C:" );
        }// /method

    /**
     * is this url linking to one of the products?.html pages
     *
     * @param url relative or absolute
     *
     * @return true if links to products page or to *.zip product download,
     * but not downloads to other sites
     */
    private static boolean isDownload( final String url )
        {
        //links in the root dir don't necessarily get the class.
        final String bare = trimLeading( url, "./" ); // removes all ../../
        // only count links to spots on page, not the page in its entirety
        return bare.startsWith( "products" );
        }// /method

    /**
     * is this url linking to the github code repository?
     *
     * @param url relative or absolute
     *
     * @return true if linking to github code repository
     */
    private static boolean isGitHub( final String url )
        {
        //
        return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
               && ( url.contains( "github.com" ) || url.contains( "github.io" ) );
        }// /method

    /**
     * is this url linking to the google or android?  but not code or search
     *
     * @param url relative or absolute
     *
     * @return true if linking to google (other than google code)
     */
    private static boolean isGoogle( final String url )
        {
        // http://code.google.com/p/unitsofmeasure
        return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
               && ( url.contains( "google." )
                    && !url.contains( "code.google.com/" )
                    && !url.contains( "/search?" )
               )
               || url.contains( "android.com" )
               || url.contains( "goo.gl" )
               || url.contains( "tensorflow.org" );
        }// /method

    /**
     * is this url linking to the google code repository?
     *
     * @param url relative or absolute
     *
     * @return true if linking to google code repository
     */
    private static boolean isGoogleCode( final String url )
        {
        return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && url.contains( "code.google.com/" );
        }// /method

    /**
     * is this url linking to Jdrive?
     *
     * @param url relative or absolute
     *
     * @return true if linking to Jdrive
     */
    private static boolean isJDrive( final String url )
        {
        // file://localhost/J:/mindprod/products.html
        return url.startsWith( "file://localhost/J:" ) || url.startsWith( "file:/J:" ) || url.startsWith(
                "file:///J:" ) || url.startsWith( "J:" );
        }// /method

    /**
     * is this url linking to a kook conspiracy website
     *
     * @param url relative or absolute
     *
     * @return true if linking to kook conspiracy website
     */
    private static boolean isKook( final String url )
        {
        if ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
            {
            if ( url.contains( "infowars.com" ) )
                {
                return true;
                }
            else if ( url.contains( "breitbart.com" ) )
                {
                return true;
                }
            else if ( url.contains( "spectator.org" ) )
                {
                return true;
                }
            else if ( url.contains( "newsmax.com" ) )
                {
                return true;
                }
            else if ( url.contains( "foxnews.com" ) )
                {
                return true;
                }
            else if ( url.contains( "marklevinshow.com" ) )
                {
                return true;
                }
            else if ( url.contains( "rushlimbaugh.com" ) )
                {
                return true;
                }
            else
                {
                return false;
                }
            }
        else
            {
            return false;
            }
        }// /method

    /**
     * is this url linking offsite, not one of more specialised pages?
     *
     * @param url relative or absolute
     *
     * @return true if linking offsite
     */
    private static boolean isOffsite( final String url )
        {
        return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && !url.startsWith( "http://mindprod.com/" );
        }// /method

    /**
     * is this url linking to a spot on the same page?
     *
     * @param url relative or absolute
     *
     * @return true if on same page.
     */
    private static boolean isOnPage( final String url )
        {
        return url.startsWith( "#" );
        }// /method

    /**
     * is this url linking to the a Adobe PDF/FDF download?
     *
     * @param url relative or absolute
     *
     * @return true if linking to pdf
     */
    private static boolean isPDF( final String url )
        {
        return url.endsWith( ".pdf" ) || url.endsWith( ".fdf" );
        }// /method

    /**
     * is this url linking wush web
     *
     * @param url relative or absolute
     *
     * @return true if linking to wush web subversion, potentially others.
     */
    private static boolean isSubversion( final String url )
        {
        return url.startsWith( "http://wush.net/websvn/" ) || url.startsWith( "https://wush.net/websvn/" );
        }// /method

    /**
     * is this url linking to wush net tortoise subversion
     *
     * @param url relative or absolute
     *
     * @return true if linking to wush.net tortoise subversion
     */
    private static boolean isTortoise( final String url )
        {
        return url.startsWith( "http://wush.net/svn/" ) || url.startsWith( "https://wush.net/svn/" );
        // https://wush.net/websvn/ is treated as ssl
        }// /method

    /**
     * is this url linking to Xdrive?
     *
     * @param url relative or absolute
     *
     * @return true if linking to Xdrive
     */
    private static boolean isXDrive( final String url )
        {
        // file://localhost/X:/Program Files/...
        return url.startsWith( "file://localhost/X:" ) || url.startsWith( "file:/X:" ) || url.startsWith(
                "file:///X:" ) || url.startsWith( "X:" );
        }// /method

    /**
     * assign a CSS class to a local link
     *
     * @param url of link
     *
     * @return css class
     */
    private String assignCSSClassLocal( final String url, File fileBeingDistributed )
        {
        if ( isPDF( url ) )
            {
            return "pdf";
            }
        // local urls
        if ( isClassLess( url ) )
            {
            return null;
            }
        if ( isOnPage( url ) )
            {
            return "onpage";
            }
        if ( isDownload( url ) )
            {
            return "download";
            }
        // no such thing as lead / to do webroot relative
        String branch;
        if ( url.startsWith( "/" ) )
            {
            // absolute (webroot relative url
            branch = Tools.uPathParentWithSlashes( Tools.toFileFromUPath( url ) );
            }
        else
            {
            assert fileBeingDistributed != null : "null fileBeingDistributed";
            assert url != null : "null url";
            // relative urls
            final File parent = fileBeingDistributed.getParentFile();
            assert parent != null : "null parent";
            branch = Tools.uPathParentWithSlashes( new File( parent, url ) );
            }
        branch = ST.chopTrailingString( branch, "/include" );
        // might be null.
        return DIR_TO_CLASS.get( branch );
        }// /method

    /**
     * assign a CSS class to a link with a domain
     *
     * @param url of link
     *
     * @return css class
     */
    private String assignCSSClassWithDomain( final String url )
        {
        // various offsite links
        final String domain;
        try
            {
            domain = Misc.getDomain( getURL( url ) );
            }
        catch ( MalformedURLException e )
            {
            return null;
            }
        if ( TAKES_PAYPAL.contains( domain ) )
            {
            return "paypal";
            }
        final String candidate = DOMAIN_TO_CLASS.get( domain );
        if ( !ST.isEmpty( candidate ) )
            {
            return candidate;
            }
        if ( url.contains( "shitharperdid.ca.nyud.net" ) )
            {
            return "offsite-ca";
            }
        if ( isSubversion( url ) )
            {
            return "subversion";
            }
        if ( isTortoise( url ) )
            {
            return "tortoisesubversion";
            }
        if ( isGoogleCode( url ) )
            {
            return "googlecode";
            }
        if ( isGitHub( url ) )
            {
            return "github";
            }
        if ( isGoogle( url ) )
            {
            return "google";
            }
        if ( isKook( url ) )
            {
            return "kook";
            }
        if ( isAffiliate( domain ) )
            {
            // might be http: or https:
            return "affiliate";
            }
        if ( domain.endsWith( "gov.ca" ) )
            {
            return "gov";
            }
        if ( domain.endsWith( "dnd.ca" ) )
            {
            return "mil";
            }
        final int dotPlace = domain.lastIndexOf( "." );
        final String tld = ( dotPlace > 0 ) ? domain.substring( dotPlace + 1 ) : "";
        if ( TLD_WITH_CLASS.contains( tld ) )
            {
            return tld;   // this handles usuals cases .com .org .net ...
            }
        if ( LEGAL_COUNTRIES.contains( tld ) )
            {
            // e.g. .ca .us. fr .uk .de
            return "offsite-" + tld;
            }
        if ( isOffsite( url ) )
            {
            return "offsite";
            }
        return null;   //  http://mindprod.com, to website, not local has no class, e.g. reference in footer to website.
        }// /method

    /**
     * assign a CSS class to a link containing a : (with protocol or C:)
     *
     * @param url of link
     *
     * @return css class
     */
    private String assignCSSClassWithProtocol( final String url )
        {
        if ( url.startsWith( "mailto:" ) )
            {
            return "mailto";
            }
        if ( url.startsWith( "ftp://" ) )
            {
            return "ftp";
            }
        if ( isCDrive( url ) )
            {
            return "cdrive";
            }
        if ( isJDrive( url ) )
            {
            return "jdrive";
            }
        if ( isXDrive( url ) )
            {
            return "xdrive";
            }
        if ( url.startsWith( "file://" ) )
            {
            return "localfile";
            }
        return null;
        }// /method

    /**
     * is this url one without a class?
     *
     * @param url relative or absolute
     *
     * @return true if linking to Oracle.com
     */
    private boolean isClassLess( final String url )
        {
        return !isURLModifiable( url );
        }// /method

    /**
     * debugging harness
     *
     * @param args first parm is URL to get css class  for
     */
    public static void main( final String[] args )
        {
        final AssignCSSClasses c = new AssignCSSClassesForMindprod();
        out.println( c.assignCSSClass( args[ 0 ], new File( "E:/mindprod/index.html" ) ) );
        }

    /**
     * compute a better htmlClass given the URL
     *
     * @param url                  URL of glink, external http: absolute or relative, possibly with a path.
     *                             lead / means webroot relative.
     *                             includes things like ../bgloss/x.html
     *                             abc.html#XYZ
     *                             #XYZ
     *                             http://abc.com/x.html
     *                             mailto:
     *                             https://abc.com/x.html
     * @param fileBeingDistributed where link is embedded. Needed to recognise class of local links without any dir
     *                             names. Normally CSS class does not depend on where link is embedded.
     *
     * @return css class for this link, null for none
     */
    public String assignCSSClass( final String url, File fileBeingDistributed )
        {
        if ( url == null )
            {
            return null;
            }
        if ( isOnPage( url ) )
            {
            return "onpage";
            }
        if ( fileBeingDistributed == null )
            {
            fileBeingDistributed = new File( "E:/mindprod/index.html" );// dummy
            }
        if ( url.contains( ":" ) )
            {
            // chose class based on protocol
            String candidate = assignCSSClassWithProtocol( url );
            if ( candidate != null )
                {
                return candidate;
                }
            else
                {
                // has both a protocal and domain.
                return assignCSSClassWithDomain( url );
                }
            }
        else
            {
            // for includes, treat as if context were one level out, where include will probably be inserted.
            String dir = Tools.dirWithSlashes( fileBeingDistributed );
            if ( dir.endsWith( "/include" ) )
                {
                dir = ST.chopTrailingString( dir, "/include" );
                fileBeingDistributed = Tools.toFileFromUPath( dir + "/" + EIO.getCoreName( fileBeingDistributed ) + ".html" );
                }
            // assign a class to  link without a protocol.
            return assignCSSClassLocal( url, fileBeingDistributed );
            }
        }// /method

    /**
     * do we consider disturbing this class of link?
     *
     * @param htmlClass class of URL we are considering disturbing
     *
     * @return true if ok to disturb
     */
    public boolean isCSSClassModifiable( final String htmlClass )
        {
        return MODIFIABLE_CSS_CLASSES.contains( htmlClass );
        }// /method

    /**
     * do we consider disturbing this class of link?
     *
     * @param url URL we are considering disturbing, as it appears embedded in the html, e.g. ../jgloss/x.html x.html
     *
     * @return true if ok to disturb
     */
    public boolean isURLModifiable( final String url )
        {
        if ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
            {
            return true;
            }
        else
            {
            // just filename without .html
            int place = url.lastIndexOf( '/' );
            final String fn;
            if ( place >= 0 )
                {
                fn = url.substring( place + 1 );
                }
            else
                {
                fn = url;
                }
            if ( CLASSLESS_FILES.contains( fn ) )
                {
                return false;
                }
            else
                {
                final int secondLast = fn.length() - 2;
                return !( secondLast >= 0 && fn.charAt( secondLast ) == '#' || fn.endsWith( "#MASTERINDEX" ) || fn
                        .endsWith( "#PUNCT" ) );
                }
            }
        }// /method

    /**
     * does this file need its CSS classes repaired?
     *
     * @param fileBeingProcessed file in question
     */
    public boolean needsCSSRepair( File fileBeingProcessed )
        {
        if ( fileBeingProcessed.isDirectory() )
            {
            return false;
            }
        // we don't usually repair includes. Their relative URLs are out of whack
        // we generate *.htmlfrag with classes already in place.
        final String dir = Tools.dirWithSlashes( fileBeingProcessed );
        switch ( dir )
            {
            case "religion":
                return !fileBeingProcessed.getName().equals( "dgloss.html" );
            case "livinglove/methods":
                return !fileBeingProcessed.getName().equals( "lgloss.html" );
            case "environment":
                return !fileBeingProcessed.getName().equals( "egloss.html" );
            case "jgloss/include":
                switch ( fileBeingProcessed.getName() )
                    {
                    case "appvisorguts.htmlfrag":
                    case "candidateguts.htmlfrag":
                    case "deadguts.htmlfrag":
                    case "hassleguts.htmlfrag":
                    case "nohassleguts.htmlfrag":
                    case "tscribe.htmlfrag":
                        // no relative addresses, so it is ok docs are not in final resting place.
                        return true;
                    default:
                        return false;
                    }
            case "applet":
            case "application":
            case "bgloss":
            case "book":
            case "canada":
            case "carol":
            case "contact":
            case "deepthoughts":
            case "dvd":
            case "electronic":
            case "esperanto":
            case "ethics":
            case "feedback":
            case "feedback/animals":
            case "feedback/environment":
            case "feedback/god":
            case "feedback/humanrights":
            case "feedback/money":
            case "feedback/peace":
            case "feedback/porn":
            case "feedback/unmain":
            case "ggloss":
            case "ggloss/blurb":
            case "humanrights":
                // case "include":
            case "jgloss":
            case "jgloss/encoding":
            case "jgloss/sqlvendors":
            case "livinglove":
            case "livinglove/handbook":
            case "livinglove/methods/songlyrics":
            case "money":
                // case "phone":   leave alone
            case "politics":
            case "politics/laser":
            case "project":
            case "quote":
            case "reunion":
            case "utility":
            case "webstart":
                final String ext = EIO.getExtension( fileBeingProcessed );
                if ( !ext.equals( "html" ) )
                    {
                    return false;
                    }
                // we want to avoid processing generated index.htmlfrag files.
                final String core = EIO.getCoreName( fileBeingProcessed );
                if ( core.length() == 1 )
                    {
                    // index letter
                    return false;
                    }
                else if ( core.equals( "masterindex" ) || core.equals( "punct" ) || core.equals( "0-9" ) )
                    {
                    return false;
                    }
                else
                    {
                    return true;
                    }
            default:
                // no snippet, foot
                return false;
            }
        }// /method
    }