/*
* [AssignCSSClassesForMindprodCa.java]
*
* Summary: Determine the css class for a given URL for Mindprod.ca.
*
* Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2011-01-10 initial version
* 1.1 2012-11-16 add isAffiliate to automatically mark affiliate links
* 1.2 2015-09-27 suport .asia and .traver domains
*/
package com.mindprod.htmlmacros.support;
import com.mindprod.common18.EIO;
import com.mindprod.common18.Misc;
import com.mindprod.common18.ST;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import static com.mindprod.common18.ST.trimLeading;
/**
* Determine the css class for a given URL for Mindprod.ca.
*
* Also puts links on single line with canonical spacing.
*
*
* @author Roedy Green, Canadian Mind Products
* @version 1.1 2012-11-16 add isAffiliate to automatically mark affiliate links
* @noinspection WeakerAccess
* @see ConfigurationForMindprod
* @since 2011-01-10
*/
public final class AssignCSSClassesForMindprodCa implements AssignCSSClasses
{
// todo: add .com#. , ip, treat ext links to mindprod.com as internal
// declarations
/**
* dir to class
*/
static final HashMap DIR_TO_CLASS =
new HashMap<>( 50 );
/**
* domain to class
*/
static final HashMap DOMAIN_TO_CLASS =
new HashMap<>( 30 );
/**
* existing files that never have a link to them, without .html, links in index
*/
static final HashSet CLASSLESS_FILES =
new HashSet<>( Arrays.asList( "brokenlinks.html",
"0.html", "1.html", "2.html", "3.html", "4.html", "5.html", "6.html", "7.html", "8.html", "9.html",
"0-9.html", "masterindex.html", "punct.html",
"a.html", "b.html", "c.html", "d.html", "e.html", "f.html", "g.html", "h.html",
"i.html", "j.html", "k.html", "l.html", "m.html", "n.html", "o.html", "p.html",
"q.html", "r.html", "s.html", "t.html", "u.html", "v.html", "w.html", "x.html", "y.html", "z.html",
"dgloss.html", "egloss.html", "lgloss.html"
// overkill. We suppress all class markers in index and glossary. we really wanted just to suppress index
) );
/**
* list of 2-char TLD country domain suffixes
*/
static final HashSet LEGAL_COUNTRIES =
new HashSet<>( Arrays.asList(
"ac",
"ad",
"ae",
"af",
"ag",
"ai",
"al",
"am",
"an",
"ao",
"aq",
"ar",
"as",
"at",
"au",
"aw",
"az",
"ba",
"bb",
"bd",
"be",
"bf",
"bg",
"bh",
"bi",
"bj",
"bm",
"bn",
"bo",
"br",
"bs",
"bt",
"bv",
"bw",
"by",
"bz",
"ca",
"cc",
"cd",
"cf",
"cg",
"ch",
"ci",
"ck",
"cl",
"cm",
"cn",
"co",
"cr",
"cu",
"cv",
"cx",
"cy",
"cz",
"de",
"dj",
"dk",
"dm",
"do",
"dz",
"ec",
"ee",
"eg",
"eh",
"er",
"es",
"et",
"eu",
"fi",
"fj",
"fk",
"fm",
"fo",
"fr",
"ga",
"gd",
"ge",
"gf",
"gg",
"gh",
"gi",
"gl",
"gm",
"gn",
"gp",
"gq",
"gr",
"gs",
"gs",
"gs",
"gt",
"gu",
"gw",
"gy",
"hk",
"hm",
"hn",
"hr",
"ht",
"hu",
"id",
"ie",
"il",
"im",
"in",
"io",
"iq",
"ir",
"is",
"it",
"je",
"jm",
"jo",
"jp",
"ke",
"kg",
"kh",
"ki",
"km",
"kn",
"kp",
"kr",
"kw",
"ky",
"kz",
"la",
"lb",
"lc",
"li",
"lk",
"lr",
"ls",
"lt",
"lu",
"lv",
"ly",
"ma",
"mc",
"md",
"me",
"mg",
"mh",
"mk",
"ml",
"mm",
"mn",
"mo",
"mp",
"mq",
"mr",
"ms",
"mt",
"mu",
"mv",
"mw",
"mx",
"my",
"mz",
"na",
"nc",
"ne",
"nf",
"ng",
"ni",
"nl",
"no",
"np",
"nr",
"nu",
"nz",
"om",
"pa",
"pe",
"pf",
"pg",
"ph",
"pk",
"pl",
"pm",
"pn",
"pr",
"ps",
"pt",
"pw",
"py",
"qa",
"re",
"ro",
"rs",
"ru",
"rw",
"sa",
"sb",
"sc",
"sd",
"se",
"sg",
"sh",
"si",
"sj",
"sk",
"sl",
"sm",
"sn",
"so",
"sr",
"st",
"sv",
"sy",
"sz",
"tc",
"td",
"tf",
"tg",
"th",
"tj",
"tk",
"tl",
"tm",
"tn",
"to",
"tr",
"tt",
"tv", // "tv", usually means Television not Tuvalo.
"tw",
"tz",
"ua",
"ug",
"uk",
"um",
"us",
"uy",
"uz",
"va",
"vc",
"ve",
"vg",
"vi",
"vn",
"vu",
"wf",
"ws", // usually means website, not Western Samoa
"ye",
"yt",
"za",
"zm",
"zw"
) );
/**
* existing class we update. We also update classless links.
*/
static final HashSet MODIFIABLE_CSS_CLASSES =
new HashSet<>( Arrays.asList(
"affiliate",
"amazon",
"asia",
"apache",
"apple",
"applet",
"bgloss",
"biz",
"blogger",
"book",
"canada",
"cbc",
"cdrive",
"com",
"contact",
"deepthoughts",
"download",
"dvd",
"earth",
"edu",
"electronics",
"encoding",
"environment",
"esper",
"ethics",
"feedback",
"ftp",
"ggloss",
"github",
"google",
"googlecode",
/* "googlesearch" */
"handbook",
"humanrights",
"info",
"int",
"jdrive",
"jgloss",
"kickstarter",
"ll",
"microsoft",
"mil",
"money",
"name",
"net",
"none",
"nyt",
"offsite",
"offsite-ac",
"offsite-ad",
"offsite-ae",
"offsite-af",
"offsite-ag",
"offsite-ai",
"offsite-al",
"offsite-am",
"offsite-an",
"offsite-ao",
"offsite-aq",
"offsite-ar",
"offsite-as",
"offsite-at",
"offsite-au",
"offsite-aw",
"offsite-az",
"offsite-ba",
"offsite-bb",
"offsite-bd",
"offsite-be",
"offsite-bf",
"offsite-bg",
"offsite-bh",
"offsite-bi",
"offsite-bj",
"offsite-bm",
"offsite-bn",
"offsite-bo",
"offsite-br",
"offsite-bs",
"offsite-bt",
"offsite-bv",
"offsite-bw",
"offsite-by",
"offsite-bz",
"offsite-ca",
"offsite-cc",
"offsite-cd",
"offsite-cf",
"offsite-cg",
"offsite-ch",
"offsite-ci",
"offsite-ck",
"offsite-cl",
"offsite-cm",
"offsite-cn",
"offsite-co",
"offsite-cr",
"offsite-cu",
"offsite-cv",
"offsite-cx",
"offsite-cy",
"offsite-cz",
"offsite-de",
"offsite-dj",
"offsite-dk",
"offsite-dm",
"offsite-do",
"offsite-dz",
"offsite-ec",
"offsite-ee",
"offsite-eg",
"offsite-eh",
"offsite-er",
"offsite-es",
"offsite-et",
"offsite-eu",
"offsite-fi",
"offsite-fj",
"offsite-fk",
"offsite-fm",
"offsite-fo",
"offsite-fr",
"offsite-ga",
"offsite-gd",
"offsite-ge",
"offsite-gf",
"offsite-gg",
"offsite-gh",
"offsite-gi",
"offsite-gl",
"offsite-gm",
"offsite-gn",
"offsite-gp",
"offsite-gq",
"offsite-gr",
"offsite-gs",
"offsite-gs",
"offsite-gs",
"offsite-gt",
"offsite-gu",
"offsite-gw",
"offsite-gy",
"offsite-hk",
"offsite-hm",
"offsite-hn",
"offsite-hr",
"offsite-ht",
"offsite-hu",
"offsite-id",
"offsite-ie",
"offsite-il",
"offsite-im",
"offsite-in",
"offsite-io",
"offsite-iq",
"offsite-ir",
"offsite-is",
"offsite-it",
"offsite-je",
"offsite-jm",
"offsite-jo",
"offsite-jp",
"offsite-ke",
"offsite-kg",
"offsite-kh",
"offsite-ki",
"offsite-km",
"offsite-kn",
"offsite-kp",
"offsite-kr",
"offsite-kw",
"offsite-ky",
"offsite-kz",
"offsite-la",
"offsite-lb",
"offsite-lc",
"offsite-li",
"offsite-lk",
"offsite-lr",
"offsite-ls",
"offsite-lt",
"offsite-lu",
"offsite-lv",
"offsite-ly",
"offsite-ma",
"offsite-mc",
"offsite-md",
"offsite-me",
"offsite-mg",
"offsite-mh",
"offsite-mk",
"offsite-ml",
"offsite-mm",
"offsite-mn",
"offsite-mo",
"offsite-mp",
"offsite-mq",
"offsite-mr",
"offsite-ms",
"offsite-mt",
"offsite-mu",
"offsite-mv",
"offsite-mw",
"offsite-mx",
"offsite-my",
"offsite-mz",
"offsite-na",
"offsite-nc",
"offsite-ne",
"offsite-nf",
"offsite-ng",
"offsite-ni",
"offsite-nl",
"offsite-no",
"offsite-np",
"offsite-nr",
"offsite-nu",
"offsite-nz",
"offsite-om",
"offsite-pa",
"offsite-pe",
"offsite-pf",
"offsite-pg",
"offsite-ph",
"offsite-pk",
"offsite-pl",
"offsite-pm",
"offsite-pn",
"offsite-pr",
"offsite-ps",
"offsite-pt",
"offsite-pw",
"offsite-py",
"offsite-qa",
"offsite-re",
"offsite-ro",
"offsite-rs",
"offsite-ru",
"offsite-rw",
"offsite-sa",
"offsite-sb",
"offsite-sc",
"offsite-sd",
"offsite-se",
"offsite-sg",
"offsite-sh",
"offsite-si",
"offsite-sj",
"offsite-sk",
"offsite-sl",
"offsite-sm",
"offsite-sn",
"offsite-so",
"offsite-sr",
"offsite-st",
"offsite-sv",
"offsite-sy",
"offsite-sz",
"offsite-tc",
"offsite-td",
"offsite-tf",
"offsite-tg",
"offsite-th",
"offsite-tj",
"offsite-tk",
"offsite-tl",
"offsite-tm",
"offsite-tn",
"offsite-to",
"offsite-tr",
"offsite-tt",
"offsite-tv",
"offsite-tw",
"offsite-tz",
"offsite-ua",
"offsite-ug",
"offsite-uk",
"offsite-um",
"offsite-us",
"offsite-uy",
"offsite-uz",
"offsite-va",
"offsite-vc",
"offsite-ve",
"offsite-vg",
"offsite-vi",
"offsite-vn",
"offsite-vu",
"offsite-wf",
"offsite-ws",
"offsite-ye",
"offsite-yt",
"offsite-za",
"offsite-zm",
"offsite-zw",
"onpage",
"oracle",
"org",
"paypal",
"pdf",
"phone",
"politics",
"project",
"quote",
"religion",
"reunion",
"sourceforge",
"ssl",
"subversion",
"sun",
"tindie",
"tortoisesubversion",
"travel",
"w3schools",
"webstart",
"wikipedia",
"xdrive"
// notably absent rat, rec, onion, strawberry, raspberry, download
// not kjv, generated by KJV macro. Can leave alone.
) );
static final HashSet TAKES_PAYPAL =
new HashSet<>( Arrays.asList(
"1800headsets.ca",
"allposters.com",
"amazon.cn",
"art.com",
"axialis.com",
"barnesandnoble.com",
"bestbuy.ca",
"bestbuy.com",
"buylightfixtures.com",
"canadacomputers.com",
"chegg.com",
"decathlon.co.uk",
"ebay.ca",
"ebay.com",
"eddiebauer.com",
"everbuying.com",
"excelsiorjet.com",
"extreme-pc.ca",
"fastspring.com",
"funduc.com",
"futureshop.ca",
"headsetsdepot.com",
"hyperglide.net",
"kobobooks.com",
"ksoftware.net",
"labelcity.com",
"limedomains.com",
"lunarpages.com",
"musclefitbasics.com",
"ncix.com",
"ncixus.com",
"ncix.ca",
"newegg.ca",
"newegg.com",
"nike.com",
"oo-software.com",
"paypal.com",
"posterburner.com",
"posterjack.com",
"powells.com",
"raxco.com",
"sears.com",
"slicksurf.com",
"switchshirts.net",
"t-shirtwholesaler.com",
"tigerdirect.ca",
"tigerdirect.com",
"walmart.com",
"wushnet.com"
) );
/**
* TLD with their own class. Class is the same as the tld.
*/
static final HashSet TLD_WITH_CLASS =
new HashSet<>( Arrays.asList(
"asia",
"biz",
"com",
"earth",
"edu",
"gov",
"info",
"int",
"mil",
"name",
"net",
"org",
"travel" ) );
/**
* domains that give us commissions.
*/
private static final HashSet AFFILIATE_DOMAINS =
new HashSet<>( Arrays.asList(
"art.com",
"allposters.com",
"alt.com",
"bmtmicro.com",
"bookbyte.com",
"buddhagroove.com",
"cc-dt.com",
"ccbill.com",
"cj.com",
"clickbank.net",
"clicksor.com",
"clixgalore.com",
"webring.com",
"disktrix.com",
"doubleclick.net",
"dreamhost.com",
"element5.com",
"esellerate.net",
"friendfinder.com",
"gayfriendfinder.com",
"gayuniverse.com",
"greatcall.com",
"hide-the-ip.com",
"iconshock.com",
"jzip.com",
"linksynergy.com",
"lunarpages.com",
"mycommerce.com",
"myfonts.com",
"outpersonals.com",
"sextoyfun.com",
"oo-software.com",
"sitepal.com",
"spamnix.com",
"thehubpeople.com",
"weatherbuffs.com",
"xara.com",
"yourfonts.com" ) );
/**
* cached previous URL string
*/
private static String prevUrlString = null;
/**
* previous URL we computed, cached
*/
private static URL prevUrl = null;
// /declarations
static
{
// domains that have their own class
DOMAIN_TO_CLASS.put( "amazon.com", "amazon" );
DOMAIN_TO_CLASS.put( "amazonaws.com", "amazon" );
DOMAIN_TO_CLASS.put( "apache.net", "apache" );
DOMAIN_TO_CLASS.put( "apache.org", "apache" );
DOMAIN_TO_CLASS.put( "apple.com", "apple" );
DOMAIN_TO_CLASS.put( "blogspot.ca", "blogger" );
DOMAIN_TO_CLASS.put( "blogspot.com", "blogger" );
DOMAIN_TO_CLASS.put( "cbc.ca", "cbc" );
DOMAIN_TO_CLASS.put( "invisionzone.com", "w3schools" );
DOMAIN_TO_CLASS.put( "java.com", "oracle" );
DOMAIN_TO_CLASS.put( "java.net", "javanet" );
DOMAIN_TO_CLASS.put( "kenai.com", "javanet" );
DOMAIN_TO_CLASS.put( "kickstarter.com", "kickstarter" );
DOMAIN_TO_CLASS.put( "live.com", "microsoft" );
DOMAIN_TO_CLASS.put( "microsoft.com", "microsoft" );
DOMAIN_TO_CLASS.put( "nytimes.com", "nyt" );
DOMAIN_TO_CLASS.put( "oracle.com", "oracle" );
DOMAIN_TO_CLASS.put( "oracleimg.com", "oracle" );
DOMAIN_TO_CLASS.put( "sourceforge.net", "sourceforge" );
DOMAIN_TO_CLASS.put( "sun.com", "oracle" );
DOMAIN_TO_CLASS.put( "tindie.com", "tindie" );
DOMAIN_TO_CLASS.put( "w3.org", "w3c" );
DOMAIN_TO_CLASS.put( "w3schools.com", "w3schools" );
DOMAIN_TO_CLASS.put( "wikipedia.org", "wikipedia" );
DOMAIN_TO_CLASS.put( "yahoo.org", "yahoo" );
}
static
{
// directories that have their own class , kjv does too, but we do not disturb it.
DIR_TO_CLASS.put( "animalrights", "animalrights" );
DIR_TO_CLASS.put( "bgloss", "bgloss" );
DIR_TO_CLASS.put( "book", "book" );
DIR_TO_CLASS.put( "canada", "canada" );
DIR_TO_CLASS.put( "contact", "contact" );
DIR_TO_CLASS.put( "deepthoughts", "deepthoughts" );
DIR_TO_CLASS.put( "dvd", "dvd" );
DIR_TO_CLASS.put( "electronic", "electronic" );
DIR_TO_CLASS.put( "environment", "environment" );
DIR_TO_CLASS.put( "esperanto", "esper" );
DIR_TO_CLASS.put( "ethics", "ethics" );
DIR_TO_CLASS.put( "feedback", "feedback" );
DIR_TO_CLASS.put( "feedback/animals", "feedback" );
DIR_TO_CLASS.put( "feedback/environment", "feedback" );
DIR_TO_CLASS.put( "feedback/god", "feedback" );
DIR_TO_CLASS.put( "feedback/humanrights", "feedback" );
DIR_TO_CLASS.put( "feedback/money", "feedback" );
DIR_TO_CLASS.put( "feedback/peace", "feedback" );
DIR_TO_CLASS.put( "feedback/porn", "feedback" );
DIR_TO_CLASS.put( "ggloss", "ggloss" );
DIR_TO_CLASS.put( "humanrights", "humanrights" );
DIR_TO_CLASS.put( "javacc", "javacc" );
DIR_TO_CLASS.put( "livinglove", "ll" );
DIR_TO_CLASS.put( "livinglove/handbook", "handbook" );
DIR_TO_CLASS.put( "livinglove/methods", "ll" );
DIR_TO_CLASS.put( "money", "money" );
DIR_TO_CLASS.put( "phone", "phone" );
DIR_TO_CLASS.put( "politics", "politics" );
DIR_TO_CLASS.put( "politics/laser", "politics" );
DIR_TO_CLASS.put( "project", "project" );
DIR_TO_CLASS.put( "quote", "quote" );
DIR_TO_CLASS.put( "religion", "religion" );
DIR_TO_CLASS.put( "reunion", "reunion" );
}
// methods
/**
* get url corresponding to string. caches result.
*
* @param urlString String we want to convert to an URL.
*
* @return corresponding URL.
* @throws MalformedURLException if bad URL
*/
private static URL getURL( String urlString ) throws MalformedURLException
{
if ( urlString.equals( prevUrlString ) )
{
return prevUrl;
}
else
{
prevUrlString = urlString;
prevUrl = new URL( urlString );
return prevUrl;
}
}// /method
/**
* is this url one that gives us commissions?
*
* @param domain domain including tld e.g. linkshare.com
*
* @return true if linking to a an affiliate
*/
private static boolean isAffiliate( final String domain )
{
return AFFILIATE_DOMAINS.contains( domain );
}// /method
/**
* is this url linking to Cdrive?
*
* @param url relative or absolute
*
* @return true if linking to Cdrive
*/
private static boolean isCDrive( final String url )
{
return url.startsWith( "file://localhost/C:" ) || url.startsWith( "file:/C:" ) || url.startsWith(
"file:///C:" ) || url.startsWith( "C:" );
}// /method
/**
* is this url linking to one of the products?.html pages
*
* @param url relative or absolute
*
* @return true if links to products page or to *.zip product download,
* but not downloads to other sites
*/
private static boolean isDownload( final String url )
{
//links in the root dir don't necessarily get the class.
final String bare = trimLeading( url, "./" ); // removes all ../../
// only count links to spots on page, not the page in its entirety
return bare.startsWith( "products" );
}// /method
/**
* is this url linking to the github code repository?
*
* @param url relative or absolute
*
* @return true if linking to google code repository
*/
private static boolean isGitHub( final String url )
{
//
return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
&& ( url.contains( "github.com" ) || url.contains( "github.io" ) );
}// /method
/**
* is this url linking to the google or android? but not code or search
*
* @param url relative or absolute
*
* @return true if linking to google (other than google code)
*/
private static boolean isGoogle( final String url )
{
// http://code.google.com/p/unitsofmeasure
return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
&& ( url.contains( "google." )
&& !url.contains( "code.google.com/" )
&& !url.contains( "/search?" )
)
|| url.contains( "android.com" );
}// /method
/**
* is this url linking to the google code repository?
*
* @param url relative or absolute
*
* @return true if linking to google code repository
*/
private static boolean isGoogleCode( final String url )
{
return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && url.contains( "code.google.com/" );
}// /method
/**
* is this url linking to Jdrive?
*
* @param url relative or absolute
*
* @return true if linking to Jdrive
*/
private static boolean isJDrive( final String url )
{
// file://localhost/J:/mindprod/products.html
return url.startsWith( "file://localhost/J:" ) || url.startsWith( "file:/J:" ) || url.startsWith(
"file:///J:" ) || url.startsWith( "J:" );
}// /method
/**
* is this url linking offsite, not one of more specialised pages?
*
* @param url relative or absolute
*
* @return true if linking offsite
*/
private static boolean isOffsite( final String url )
{
return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && !url.startsWith( "http://mindprod.com/" );
}// /method
/**
* is this url linking to a spot on the same page?
*
* @param url relative or absolute
*
* @return true if on same page.
*/
private static boolean isOnPage( final String url )
{
return url.startsWith( "#" );
}// /method
/**
* is this url linking to the a Adobe PDF/FDF download?
*
* @param url relative or absolute
*
* @return true if linking to pdf
*/
private static boolean isPDF( final String url )
{
return url.endsWith( ".pdf" ) || url.endsWith( ".fdf" );
}// /method
/**
* is this url linking wush web
*
* @param url relative or absolute
*
* @return true if linking to wush web subversion, potentially others.
*/
private static boolean isSubversion( final String url )
{
return url.startsWith( "http://wush.net/websvn/" ) || url.startsWith( "https://wush.net/websvn/" );
}// /method
/**
* is this url linking to wush net tortoise subversion
*
* @param url relative or absolute
*
* @return true if linking to wush.net tortoise subversion
*/
private static boolean isTortoise( final String url )
{
return url.startsWith( "http://wush.net/svn/" ) || url.startsWith( "https://wush.net/svn/" );
// https://wush.net/websvn/ is treated as ssl
}// /method
/**
* is this url linking to Xdrive?
*
* @param url relative or absolute
*
* @return true if linking to Xdrive
*/
private static boolean isXDrive( final String url )
{
// file://localhost/X:/Program Files/...
return url.startsWith( "file://localhost/X:" ) || url.startsWith( "file:/X:" ) || url.startsWith(
"file:///X:" ) || url.startsWith( "X:" );
}// /method
/**
* assign a CSS class to a local link
*
* @param url of link
*
* @return css class
*/
private String assignCSSClassLocal( final String url, File fileBeingDistributed )
{
if ( isPDF( url ) )
{
return "pdf";
}
// local urls
if ( isClassLess( url ) )
{
return null;
}
if ( isOnPage( url ) )
{
return "onpage";
}
if ( isDownload( url ) )
{
return "download";
}
// no such thing as lead / to do webroot relative
String branch;
if ( url.startsWith( "/" ) )
{
// absolute (webroot relative url
branch = Tools.uPathParentWithSlashes( Tools.toFileFromUPath( url ) );
}
else
{
assert fileBeingDistributed != null : "null fileBeingDistributed";
assert url != null : "null url";
// relative urls
final File parent = fileBeingDistributed.getParentFile();
assert parent != null : "null parent";
branch = Tools.uPathParentWithSlashes( new File( parent, url ) );
}
branch = ST.chopTrailingString( branch, "/include" );
// might be null.
return DIR_TO_CLASS.get( branch );
}// /method
/**
* assign a CSS class to a link with a domain
*
* @param url of link
*
* @return css class
*/
private String assignCSSClassWithDomain( final String url )
{
// various offsite links
final String domain;
try
{
domain = Misc.getDomain( getURL( url ) );
}
catch ( MalformedURLException e )
{
return null;
}
if ( TAKES_PAYPAL.contains( domain ) )
{
return "paypal";
}
final String candidate = DOMAIN_TO_CLASS.get( domain );
if ( !ST.isEmpty( candidate ) )
{
return candidate;
}
if ( url.contains( "shitharperdid.ca.nyud.net" ) )
{
return "offsite-ca";
}
if ( isSubversion( url ) )
{
return "subversion";
}
if ( isTortoise( url ) )
{
return "tortoisesubversion";
}
if ( isGoogleCode( url ) )
{
return "googlecode";
}
if ( isGitHub( url ) )
{
return "github";
}
if ( isGoogle( url ) )
{
return "google";
}
if ( isAffiliate( domain ) )
{
// might be http: or https:
return "affiliate";
}
if ( domain.endsWith( "gov.ca" ) )
{
return "gov";
}
if ( domain.endsWith( "dnd.ca" ) )
{
return "mil";
}
final int dotPlace = domain.lastIndexOf( "." );
final String tld = ( dotPlace > 0 ) ? domain.substring( dotPlace + 1 ) : "";
if ( TLD_WITH_CLASS.contains( tld ) )
{
return tld; // this handles usuals cases .com .org .net ...
}
if ( LEGAL_COUNTRIES.contains( tld ) )
{
// e.g. .ca .us. fr .uk .de
return "offsite-" + tld;
}
if ( isOffsite( url ) )
{
return "offsite";
}
return null; // http://mindprod.com, to website, not local has no class, e.g. reference in footer to website.
}// /method
/**
* assign a CSS class to a link containing a : (with protocol or C:)
*
* @param url of link
*
* @return css class
*/
private String assignCSSClassWithProtocol( final String url )
{
if ( url.startsWith( "mailto:" ) )
{
return "mailto";
}
if ( url.startsWith( "ftp://" ) )
{
return "ftp";
}
if ( isCDrive( url ) )
{
return "cdrive";
}
if ( isJDrive( url ) )
{
return "jdrive";
}
if ( isXDrive( url ) )
{
return "xdrive";
}
return null;
}// /method
/**
* is this url one without a class?
*
* @param url relative or absolute
*
* @return true if linking to Oracle.com
*/
private boolean isClassLess( final String url )
{
return !isURLModifiable( url );
}// /method
/**
* compute a better htmlClass given the URL
*
* @param url URL of glink, external http: absolute or relative, possibly with a path.
* lead / means webroot relative.
* includes things like ../bgloss/x.html
* abc.html#XYZ
* #XYZ
* http://abc.com/x.html
* mailto:
* https://abc.com/x.html
* @param fileBeingDistributed where link is embedded. Needed to recognise class of local links without any dir
* names. Normally CSS class does not depend on where link is embedded.
*
* @return css class for this link, null for none
*/
public String assignCSSClass( final String url, File fileBeingDistributed )
{
if ( url == null )
{
return null;
}
if ( isOnPage( url ) )
{
return "onpage";
}
if ( fileBeingDistributed == null )
{
fileBeingDistributed = new File( "E:/mindprod/index.html" );// dummy
}
if ( url.contains( ":" ) )
{
String candidate = assignCSSClassWithProtocol( url );
if ( candidate != null )
{
return candidate;
}
else
{
return assignCSSClassWithDomain( url );
}
}
else
{
// for includes, treat as if context were one level out, where include will probably be inserted.
String dir = Tools.dirWithSlashes( fileBeingDistributed );
if ( dir.endsWith( "/include" ) )
{
dir = ST.chopTrailingString( dir, "/include" );
fileBeingDistributed = Tools.toFileFromUPath( dir + "/" + EIO.getCoreName( fileBeingDistributed ) + ".html" );
}
return assignCSSClassLocal( url, fileBeingDistributed );
}
}// /method
/**
* do we consider disturbing this class of link?
*
* @param htmlClass class of URL we are considering disturbing
*
* @return true if ok to disturb
*/
public boolean isCSSClassModifiable( final String htmlClass )
{
return MODIFIABLE_CSS_CLASSES.contains( htmlClass );
}// /method
/**
* do we consider disturbing this class of link?
*
* @param url URL we are considering disturbing, as it appears embedded in the html, e.g. ../jgloss/x.html x.html
*
* @return true if ok to disturb
*/
public boolean isURLModifiable( final String url )
{
if ( url.startsWith( "http://" ) || url.startsWith( "https://" ) )
{
return true;
}
else
{
// just filename without .html
int place = url.lastIndexOf( '/' );
final String fn;
if ( place >= 0 )
{
fn = url.substring( place + 1 );
}
else
{
fn = url;
}
if ( CLASSLESS_FILES.contains( fn ) )
{
return false;
}
else
{
final int secondLast = fn.length() - 2;
return !( secondLast >= 0 && fn.charAt( secondLast ) == '#' || fn.endsWith( "#MASTERINDEX" ) || fn
.endsWith( "#PUNCT" ) );
}
}
}// /method
/**
* does this file need its CSS classes repaired?
*
* @param fileBeingProcessed file in question
*/
public boolean needsCSSRepair( File fileBeingProcessed )
{
if ( fileBeingProcessed.isDirectory() )
{
return false;
}
// we don't repair includes. Their relative URLs are out of whack
// we generate *.htmlfrag with classes already in place.
final String dir = Tools.dirWithSlashes( fileBeingProcessed );
switch ( dir )
{
case "religion":
return !fileBeingProcessed.getName().equals( "dgloss.html" );
case "livinglove/methods":
return !fileBeingProcessed.getName().equals( "lgloss.html" );
case "environment":
return !fileBeingProcessed.getName().equals( "egloss.html" );
case "jgloss/include":
switch ( fileBeingProcessed.getName() )
{
case "appvisorguts.htmlfrag":
case "candidateguts.htmlfrag":
case "deadguts.htmlfrag":
case "hassleguts.htmlfrag":
case "nohassleguts.htmlfrag":
// no relative addresses, so ok they are not in place.
return true;
default:
return false;
}
case "applet":
case "application":
case "bgloss":
case "book":
case "canada":
case "carol":
case "contact":
case "deepthoughts":
case "dvd":
case "electronic":
case "esperanto":
case "ethics":
case "feedback":
case "feedback/animals":
case "feedback/environment":
case "feedback/god":
case "feedback/humanrights":
case "feedback/money":
case "feedback/peace":
case "feedback/porn":
case "feedback/unmain":
case "ggloss":
case "ggloss/blurb":
case "humanrights":
// case "include":
case "jgloss":
case "jgloss/encoding":
case "jgloss/sqlvendors":
case "livinglove":
case "livinglove/handbook":
case "livinglove/methods/songlyrics":
case "money":
// case "phone": leave alone
case "politics":
case "politics/laser":
case "project":
case "quote":
case "reunion":
case "utility":
case "webstart":
final String ext = EIO.getExtension( fileBeingProcessed );
if ( !ext.equals( "html" ) )
{
return false;
}
// we want to avoid processing generated index.htmlfrag files.
final String core = EIO.getCoreName( fileBeingProcessed );
if ( core.length() == 1 )
{
// index letter
return false;
}
else if ( core.equals( "masterindex" ) || core.equals( "punct" ) || core.equals( "0-9" ) )
{
return false;
}
else
{
return true;
}
default:
// no snippet, foot
return false;
}
}// /method
// /methods
}