/* * [AssignCSSClassesForMindprod.java] * * Summary: Determine the css class for a given URL for Mindprod.com. * * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-01-10 initial version * 1.1 2012-11-16 add isAffiliate to automatically mark affiliate links * 1.2 2015-09-27 suport .asia and .traver domains */ package com.mindprod.htmlmacros.support; import com.mindprod.common18.EIO; import com.mindprod.common18.Misc; import com.mindprod.common18.ST; import java.io.File; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import static com.mindprod.common18.ST.trimLeading; import static java.lang.System.*; /** * Determine the css class for a given URL for Mindprod.com. *

* Also puts links on single line with canonical spacing. *

* * @author Roedy Green, Canadian Mind Products * @version 1.1 2012-11-16 add isAffiliate to automatically mark affiliate links * @noinspection WeakerAccess * @see ConfigurationForMindprod * @since 2011-01-10 */ public final class AssignCSSClassesForMindprod implements AssignCSSClasses { // todo: add .com#. , ip, treat ext links to mindprod.com as internal // declarations /** * dir to class */ static final HashMap DIR_TO_CLASS = new HashMap<>( 50 ); /** * domain to class */ static final HashMap DOMAIN_TO_CLASS = new HashMap<>( 30 ); /** * existing files that never have a link to them, without .html, links in index */ static final HashSet CLASSLESS_FILES = new HashSet<>( Arrays.asList( "brokenlinks.html", "0.html", "1.html", "2.html", "3.html", "4.html", "5.html", "6.html", "7.html", "8.html", "9.html", "0-9.html", "masterindex.html", "punct.html", "a.html", "b.html", "c.html", "d.html", "e.html", "f.html", "g.html", "h.html", "i.html", "j.html", "k.html", "l.html", "m.html", "n.html", "o.html", "p.html", "q.html", "r.html", "s.html", "t.html", "u.html", "v.html", "w.html", "x.html", "y.html", "z.html", "dgloss.html", "egloss.html", "lgloss.html" // overkill. We suppress all class markers in index and glossary. we really wanted just to suppress index ) ); /** * list of 2-char TLD country domain suffixes */ static final HashSet LEGAL_COUNTRIES = new HashSet<>( Arrays.asList( "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gs", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "st", "sv", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tr", "tt", "tv", // "tv", usually means Television not Tuvalo. "tw", "tz", "ua", "ug", "uk", "um", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", // usually means website, not Western Samoa "ye", "yt", "za", "zm", "zw" ) ); /** * existing class we update. We also update classless links. */ static final HashSet MODIFIABLE_CSS_CLASSES = new HashSet<>( Arrays.asList( "affiliate", "amazon", "asia", "animalrights", "apache", "apple", "applet", "bgloss", "biz", "blogger", "book", "canada", "cbc", "cdrive", "com", "contact", "coop", "deepthoughts", "download", "dvd", "earth", "edu", "electronics", "encoding", "environment", "esper", "ethics", "facebook", "feedback", "ftp", "ggloss", "github", "google", "googlecode", /* "googlesearch" */ "handbook", "humanrights", "imdb", "info", "int", "jdrive", "jgloss", "kickstarter", "kook", "ll", "microsoft", "mil", "money", "name", "net", "none", "nyt", "offsite", "offsite-ac", "offsite-ad", "offsite-ae", "offsite-af", "offsite-ag", "offsite-ai", "offsite-al", "offsite-am", "offsite-an", "offsite-ao", "offsite-aq", "offsite-ar", "offsite-as", "offsite-at", "offsite-au", "offsite-aw", "offsite-az", "offsite-ba", "offsite-bb", "offsite-bd", "offsite-be", "offsite-bf", "offsite-bg", "offsite-bh", "offsite-bi", "offsite-bj", "offsite-bm", "offsite-bn", "offsite-bo", "offsite-br", "offsite-bs", "offsite-bt", "offsite-bv", "offsite-bw", "offsite-by", "offsite-bz", "offsite-ca", "offsite-cc", "offsite-cd", "offsite-cf", "offsite-cg", "offsite-ch", "offsite-ci", "offsite-ck", "offsite-cl", "offsite-cm", "offsite-cn", "offsite-co", "offsite-cr", "offsite-cu", "offsite-cv", "offsite-cx", "offsite-cy", "offsite-cz", "offsite-de", "offsite-dj", "offsite-dk", "offsite-dm", "offsite-do", "offsite-dz", "offsite-ec", "offsite-ee", "offsite-eg", "offsite-eh", "offsite-er", "offsite-es", "offsite-et", "offsite-eu", "offsite-fi", "offsite-fj", "offsite-fk", "offsite-fm", "offsite-fo", "offsite-fr", "offsite-ga", "offsite-gd", "offsite-ge", "offsite-gf", "offsite-gg", "offsite-gh", "offsite-gi", "offsite-gl", "offsite-gm", "offsite-gn", "offsite-gp", "offsite-gq", "offsite-gr", "offsite-gs", "offsite-gs", "offsite-gs", "offsite-gt", "offsite-gu", "offsite-gw", "offsite-gy", "offsite-hk", "offsite-hm", "offsite-hn", "offsite-hr", "offsite-ht", "offsite-hu", "offsite-id", "offsite-ie", "offsite-il", "offsite-im", "offsite-in", "offsite-io", "offsite-iq", "offsite-ir", "offsite-is", "offsite-it", "offsite-je", "offsite-jm", "offsite-jo", "offsite-jp", "offsite-ke", "offsite-kg", "offsite-kh", "offsite-ki", "offsite-km", "offsite-kn", "offsite-kp", "offsite-kr", "offsite-kw", "offsite-ky", "offsite-kz", "offsite-la", "offsite-lb", "offsite-lc", "offsite-li", "offsite-lk", "offsite-lr", "offsite-ls", "offsite-lt", "offsite-lu", "offsite-lv", "offsite-ly", "offsite-ma", "offsite-mc", "offsite-md", "offsite-me", "offsite-mg", "offsite-mh", "offsite-mk", "offsite-ml", "offsite-mm", "offsite-mn", "offsite-mo", "offsite-mp", "offsite-mq", "offsite-mr", "offsite-ms", "offsite-mt", "offsite-mu", "offsite-mv", "offsite-mw", "offsite-mx", "offsite-my", "offsite-mz", "offsite-na", "offsite-nc", "offsite-ne", "offsite-nf", "offsite-ng", "offsite-ni", "offsite-nl", "offsite-no", "offsite-np", "offsite-nr", "offsite-nu", "offsite-nz", "offsite-om", "offsite-pa", "offsite-pe", "offsite-pf", "offsite-pg", "offsite-ph", "offsite-pk", "offsite-pl", "offsite-pm", "offsite-pn", "offsite-pr", "offsite-ps", "offsite-pt", "offsite-pw", "offsite-py", "offsite-qa", "offsite-re", "offsite-ro", "offsite-rs", "offsite-ru", "offsite-rw", "offsite-sa", "offsite-sb", "offsite-sc", "offsite-sd", "offsite-se", "offsite-sg", "offsite-sh", "offsite-si", "offsite-sj", "offsite-sk", "offsite-sl", "offsite-sm", "offsite-sn", "offsite-so", "offsite-sr", "offsite-st", "offsite-sv", "offsite-sy", "offsite-sz", "offsite-tc", "offsite-td", "offsite-tf", "offsite-tg", "offsite-th", "offsite-tj", "offsite-tk", "offsite-tl", "offsite-tm", "offsite-tn", "offsite-to", "offsite-tr", "offsite-tt", "offsite-tv", "offsite-tw", "offsite-tz", "offsite-ua", "offsite-ug", "offsite-uk", "offsite-um", "offsite-us", "offsite-uy", "offsite-uz", "offsite-va", "offsite-vc", "offsite-ve", "offsite-vg", "offsite-vi", "offsite-vn", "offsite-vu", "offsite-wf", "offsite-ws", "offsite-ye", "offsite-yt", "offsite-za", "offsite-zm", "offsite-zw", "onpage", "oracle", "org", "paypal", "pdf", "phone", "politics", "project", "quote", "religion", "reunion", "sourceforge", "ssl", "subversion", "sun", "tindie", "tortoisesubversion", "travel", "twitter", "w3schools", "webstart", "wikipedia", "xdrive" // notably absent rat, rec, onion, strawberry, raspberry, download // not kjv, generated by KJV macro. Can leave alone. ) ); static final HashSet TAKES_PAYPAL = new HashSet<>( Arrays.asList( "1800headsets.ca", "abelssoft.de", "allposters.com", "amazon.cn", "angelfins.ca", "aquariumguys.com", "aquariumsupplies.ca", "art.com", "avshop.ca", "axialis.com", "barnesandnoble.com", "bedbathandbeyond.ca", "bedphones.com", "bestbuy.ca", "bestbuy.com", "buylightfixtures.com", "canadacomputers.com", "cardswap.ca", "chegg.com", "corepad.com", "creoly.com", "decathlon.co.uk", "drsfostersmith.com", "dx.com", "ebay.ca", "ebay.com", "eddiebauer.com", "eshave.com", "everbuying.com", "everbuying.net", "excelsiorjet.com", "extreme-pc.ca", "fastspring.com", "fasttech.com", "fendrihan.com", "funduc.com", "gouletpens.com", "headsetsdepot.com", "hyperglide.net", "ikea.com", "indigo.ca", "ipenstore.com", "jetpens.com", "kobobooks.com", "ksoftware.net", "labelcity.com", "limedomains.com", "lsfgroup.ca", "lunarpages.com", "martelelectronics.com", "musclefitbasics.com", "ncix.ca", "ncix.com", "ncixus.com", "newegg.ca", "newegg.com", "nike.com", "notemaker.com.au", "oo-software.com", "paypal.com", "pcpitstop.com", "petmountain.com", "petsandponds.com", "posterburner.com", "posterjack.com", "powells.com", "raxco.com", "reefsupplies.ca", "richarddawkins.net", "sears.com", "slicksurf.com", "simons.ca", "switchshirts.net", "t-shirtwholesaler.com", "thebay.com", "thecopperhat.ca", "tigerdirect.com", "walmart.com", "wonderpens.ca", "wushnet.com" ) ); /** * TLD with their own class. Class is the same as the tld. */ static final HashSet TLD_WITH_CLASS = new HashSet<>( Arrays.asList( "asia", "biz", "com", "coop", "earth", "edu", "gov", "info", "int", "mil", "name", "net", "org", "travel" ) ); /** * domains that give us commissions. */ private static final HashSet AFFILIATE_DOMAINS = new HashSet<>( Arrays.asList( "art.com", "allposters.com", "alt.com", "bmtmicro.com", "bookbyte.com", "buddhagroove.com", "cc-dt.com", "ccbill.com", "cj.com", "clickbank.net", "clicksor.com", "clixgalore.com", "webring.com", "disktrix.com", "doubleclick.net", "dreamhost.com", "element5.com", "esellerate.net", "friendfinder.com", "gayfriendfinder.com", "gayuniverse.com", "greatcall.com", "hide-the-ip.com", "iconshock.com", "jzip.com", "linksynergy.com", "lunarpages.com", "mycommerce.com", "myfonts.com", "outpersonals.com", "sextoyfun.com", "oo-software.com", "sitepal.com", "spamnix.com", "thehubpeople.com", "weatherbuffs.com", "xara.com", "yourfonts.com" ) ); /** * cached previous URL string */ private static String prevUrlString = null; /** * previous URL we computed, cached */ private static URL prevUrl = null; // /declarations static { // domains that have their own class DOMAIN_TO_CLASS.put( "amazon.com", "amazon" ); DOMAIN_TO_CLASS.put( "amazonaws.com", "amazon" ); DOMAIN_TO_CLASS.put( "apache.net", "apache" ); DOMAIN_TO_CLASS.put( "apache.org", "apache" ); DOMAIN_TO_CLASS.put( "apple.com", "apple" ); DOMAIN_TO_CLASS.put( "blogspot.ca", "blogger" ); DOMAIN_TO_CLASS.put( "blogspot.com", "blogger" ); DOMAIN_TO_CLASS.put( "cbc.ca", "cbc" ); DOMAIN_TO_CLASS.put( "facebook.com", "facebook" ); DOMAIN_TO_CLASS.put( "imdb.com", "imdb" ); DOMAIN_TO_CLASS.put( "invisionzone.com", "w3schools" ); DOMAIN_TO_CLASS.put( "java.com", "oracle" ); DOMAIN_TO_CLASS.put( "java.net", "javanet" ); DOMAIN_TO_CLASS.put( "kenai.com", "javanet" ); DOMAIN_TO_CLASS.put( "kickstarter.com", "kickstarter" ); DOMAIN_TO_CLASS.put( "live.com", "microsoft" ); DOMAIN_TO_CLASS.put( "microsoft.com", "microsoft" ); DOMAIN_TO_CLASS.put( "windowsupdate.com", "microsoft" ); DOMAIN_TO_CLASS.put( "nytimes.com", "nyt" ); DOMAIN_TO_CLASS.put( "oracle.com", "oracle" ); DOMAIN_TO_CLASS.put( "oracleimg.com", "oracle" ); DOMAIN_TO_CLASS.put( "sourceforge.net", "sourceforge" ); DOMAIN_TO_CLASS.put( "sun.com", "oracle" ); DOMAIN_TO_CLASS.put( "tindie.com", "tindie" ); DOMAIN_TO_CLASS.put( "twitter.com", "twitter" ); DOMAIN_TO_CLASS.put( "w3.org", "w3c" ); DOMAIN_TO_CLASS.put( "w3schools.com", "w3schools" ); DOMAIN_TO_CLASS.put( "wikipedia.org", "wikipedia" ); DOMAIN_TO_CLASS.put( "yahoo.org", "yahoo" ); } static { // directories that have their own class , kjv does too, but we do not disturb it. DIR_TO_CLASS.put( "animalrights", "animalrights" ); DIR_TO_CLASS.put( "applet", "applet" ); DIR_TO_CLASS.put( "bgloss", "bgloss" ); DIR_TO_CLASS.put( "book", "book" ); DIR_TO_CLASS.put( "canada", "canada" ); DIR_TO_CLASS.put( "contact", "contact" ); DIR_TO_CLASS.put( "deepthoughts", "deepthoughts" ); DIR_TO_CLASS.put( "dvd", "dvd" ); DIR_TO_CLASS.put( "electronic", "electronic" ); DIR_TO_CLASS.put( "environment", "environment" ); DIR_TO_CLASS.put( "esperanto", "esper" ); DIR_TO_CLASS.put( "ethics", "ethics" ); DIR_TO_CLASS.put( "feedback", "feedback" ); DIR_TO_CLASS.put( "feedback/animals", "feedback" ); DIR_TO_CLASS.put( "feedback/environment", "feedback" ); DIR_TO_CLASS.put( "feedback/god", "feedback" ); DIR_TO_CLASS.put( "feedback/humanrights", "feedback" ); DIR_TO_CLASS.put( "feedback/money", "feedback" ); DIR_TO_CLASS.put( "feedback/peace", "feedback" ); DIR_TO_CLASS.put( "feedback/porn", "feedback" ); DIR_TO_CLASS.put( "feedback/unmain", "feedback" ); DIR_TO_CLASS.put( "ggloss", "ggloss" ); DIR_TO_CLASS.put( "humanrights", "humanrights" ); DIR_TO_CLASS.put( "javacc", "javacc" ); DIR_TO_CLASS.put( "jgloss", "jgloss" ); DIR_TO_CLASS.put( "jgloss/encoding", "encoding" ); DIR_TO_CLASS.put( "livinglove", "ll" ); DIR_TO_CLASS.put( "livinglove/handbook", "handbook" ); DIR_TO_CLASS.put( "livinglove/methods", "ll" ); DIR_TO_CLASS.put( "money", "money" ); DIR_TO_CLASS.put( "phone", "phone" ); DIR_TO_CLASS.put( "politics", "politics" ); DIR_TO_CLASS.put( "politics/laser", "politics" ); DIR_TO_CLASS.put( "project", "project" ); DIR_TO_CLASS.put( "quote", "quote" ); DIR_TO_CLASS.put( "religion", "religion" ); DIR_TO_CLASS.put( "reunion", "reunion" ); DIR_TO_CLASS.put( "webstart", "webstart" ); } // methods /** * get url corresponding to string. caches result. * * @param urlString String we want to convert to an URL. * * @return corresponding URL. * @throws MalformedURLException if bad URL */ private static URL getURL( String urlString ) throws MalformedURLException { if ( urlString.equals( prevUrlString ) ) { return prevUrl; } else { prevUrlString = urlString; prevUrl = new URL( urlString ); return prevUrl; } }// /method /** * is this url one that gives us commissions? * * @param domain domain including tld e.g. linkshare.com * * @return true if linking to a an affiliate */ private static boolean isAffiliate( final String domain ) { return AFFILIATE_DOMAINS.contains( domain ); }// /method /** * is this url linking to Cdrive? * * @param url relative or absolute * * @return true if linking to Cdrive */ private static boolean isCDrive( final String url ) { return url.startsWith( "file://localhost/C:" ) || url.startsWith( "file:/C:" ) || url.startsWith( "file:///C:" ) || url.startsWith( "C:" ); }// /method /** * is this url linking to one of the products?.html pages * * @param url relative or absolute * * @return true if links to products page or to *.zip product download, * but not downloads to other sites */ private static boolean isDownload( final String url ) { //links in the root dir don't necessarily get the class. final String bare = trimLeading( url, "./" ); // removes all ../../ // only count links to spots on page, not the page in its entirety return bare.startsWith( "products" ); }// /method /** * is this url linking to the github code repository? * * @param url relative or absolute * * @return true if linking to github code repository */ private static boolean isGitHub( final String url ) { // return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && ( url.contains( "github.com" ) || url.contains( "github.io" ) ); }// /method /** * is this url linking to the google or android? but not code or search * * @param url relative or absolute * * @return true if linking to google (other than google code) */ private static boolean isGoogle( final String url ) { // http://code.google.com/p/unitsofmeasure return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && ( url.contains( "google." ) && !url.contains( "code.google.com/" ) && !url.contains( "/search?" ) ) || url.contains( "android.com" ) || url.contains( "goo.gl" ) || url.contains( "tensorflow.org" ); }// /method /** * is this url linking to the google code repository? * * @param url relative or absolute * * @return true if linking to google code repository */ private static boolean isGoogleCode( final String url ) { return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && url.contains( "code.google.com/" ); }// /method /** * is this url linking to Jdrive? * * @param url relative or absolute * * @return true if linking to Jdrive */ private static boolean isJDrive( final String url ) { // file://localhost/J:/mindprod/products.html return url.startsWith( "file://localhost/J:" ) || url.startsWith( "file:/J:" ) || url.startsWith( "file:///J:" ) || url.startsWith( "J:" ); }// /method /** * is this url linking to a kook conspiracy website * * @param url relative or absolute * * @return true if linking to kook conspiracy website */ private static boolean isKook( final String url ) { if ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) { if ( url.contains( "infowars.com" ) ) { return true; } else if ( url.contains( "breitbart.com" ) ) { return true; } else if ( url.contains( "spectator.org" ) ) { return true; } else if ( url.contains( "newsmax.com" ) ) { return true; } else if ( url.contains( "foxnews.com" ) ) { return true; } else if ( url.contains( "marklevinshow.com" ) ) { return true; } else if ( url.contains( "rushlimbaugh.com" ) ) { return true; } else { return false; } } else { return false; } }// /method /** * is this url linking offsite, not one of more specialised pages? * * @param url relative or absolute * * @return true if linking offsite */ private static boolean isOffsite( final String url ) { return ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) && !url.startsWith( "http://mindprod.com/" ); }// /method /** * is this url linking to a spot on the same page? * * @param url relative or absolute * * @return true if on same page. */ private static boolean isOnPage( final String url ) { return url.startsWith( "#" ); }// /method /** * is this url linking to the a Adobe PDF/FDF download? * * @param url relative or absolute * * @return true if linking to pdf */ private static boolean isPDF( final String url ) { return url.endsWith( ".pdf" ) || url.endsWith( ".fdf" ); }// /method /** * is this url linking wush web * * @param url relative or absolute * * @return true if linking to wush web subversion, potentially others. */ private static boolean isSubversion( final String url ) { return url.startsWith( "http://wush.net/websvn/" ) || url.startsWith( "https://wush.net/websvn/" ); }// /method /** * is this url linking to wush net tortoise subversion * * @param url relative or absolute * * @return true if linking to wush.net tortoise subversion */ private static boolean isTortoise( final String url ) { return url.startsWith( "http://wush.net/svn/" ) || url.startsWith( "https://wush.net/svn/" ); // https://wush.net/websvn/ is treated as ssl }// /method /** * is this url linking to Xdrive? * * @param url relative or absolute * * @return true if linking to Xdrive */ private static boolean isXDrive( final String url ) { // file://localhost/X:/Program Files/... return url.startsWith( "file://localhost/X:" ) || url.startsWith( "file:/X:" ) || url.startsWith( "file:///X:" ) || url.startsWith( "X:" ); }// /method /** * assign a CSS class to a local link * * @param url of link * * @return css class */ private String assignCSSClassLocal( final String url, File fileBeingDistributed ) { if ( isPDF( url ) ) { return "pdf"; } // local urls if ( isClassLess( url ) ) { return null; } if ( isOnPage( url ) ) { return "onpage"; } if ( isDownload( url ) ) { return "download"; } // no such thing as lead / to do webroot relative String branch; if ( url.startsWith( "/" ) ) { // absolute (webroot relative url branch = Tools.uPathParentWithSlashes( Tools.toFileFromUPath( url ) ); } else { assert fileBeingDistributed != null : "null fileBeingDistributed"; assert url != null : "null url"; // relative urls final File parent = fileBeingDistributed.getParentFile(); assert parent != null : "null parent"; branch = Tools.uPathParentWithSlashes( new File( parent, url ) ); } branch = ST.chopTrailingString( branch, "/include" ); // might be null. return DIR_TO_CLASS.get( branch ); }// /method /** * assign a CSS class to a link with a domain * * @param url of link * * @return css class */ private String assignCSSClassWithDomain( final String url ) { // various offsite links final String domain; try { domain = Misc.getDomain( getURL( url ) ); } catch ( MalformedURLException e ) { return null; } if ( TAKES_PAYPAL.contains( domain ) ) { return "paypal"; } final String candidate = DOMAIN_TO_CLASS.get( domain ); if ( !ST.isEmpty( candidate ) ) { return candidate; } if ( url.contains( "shitharperdid.ca.nyud.net" ) ) { return "offsite-ca"; } if ( isSubversion( url ) ) { return "subversion"; } if ( isTortoise( url ) ) { return "tortoisesubversion"; } if ( isGoogleCode( url ) ) { return "googlecode"; } if ( isGitHub( url ) ) { return "github"; } if ( isGoogle( url ) ) { return "google"; } if ( isKook( url ) ) { return "kook"; } if ( isAffiliate( domain ) ) { // might be http: or https: return "affiliate"; } if ( domain.endsWith( "gov.ca" ) ) { return "gov"; } if ( domain.endsWith( "dnd.ca" ) ) { return "mil"; } final int dotPlace = domain.lastIndexOf( "." ); final String tld = ( dotPlace > 0 ) ? domain.substring( dotPlace + 1 ) : ""; if ( TLD_WITH_CLASS.contains( tld ) ) { return tld; // this handles usuals cases .com .org .net ... } if ( LEGAL_COUNTRIES.contains( tld ) ) { // e.g. .ca .us. fr .uk .de return "offsite-" + tld; } if ( isOffsite( url ) ) { return "offsite"; } return null; // http://mindprod.com, to website, not local has no class, e.g. reference in footer to website. }// /method /** * assign a CSS class to a link containing a : (with protocol or C:) * * @param url of link * * @return css class */ private String assignCSSClassWithProtocol( final String url ) { if ( url.startsWith( "mailto:" ) ) { return "mailto"; } if ( url.startsWith( "ftp://" ) ) { return "ftp"; } if ( isCDrive( url ) ) { return "cdrive"; } if ( isJDrive( url ) ) { return "jdrive"; } if ( isXDrive( url ) ) { return "xdrive"; } if ( url.startsWith( "file://" ) ) { return "localfile"; } return null; }// /method /** * is this url one without a class? * * @param url relative or absolute * * @return true if linking to Oracle.com */ private boolean isClassLess( final String url ) { return !isURLModifiable( url ); }// /method /** * debugging harness * * @param args first parm is URL to get css class for */ public static void main( final String[] args ) { final AssignCSSClasses c = new AssignCSSClassesForMindprod(); out.println( c.assignCSSClass( args[ 0 ], new File( "E:/mindprod/index.html" ) ) ); } /** * compute a better htmlClass given the URL * * @param url URL of glink, external http: absolute or relative, possibly with a path. * lead / means webroot relative. * includes things like ../bgloss/x.html * abc.html#XYZ * #XYZ * http://abc.com/x.html * mailto: * https://abc.com/x.html * @param fileBeingDistributed where link is embedded. Needed to recognise class of local links without any dir * names. Normally CSS class does not depend on where link is embedded. * * @return css class for this link, null for none */ public String assignCSSClass( final String url, File fileBeingDistributed ) { if ( url == null ) { return null; } if ( isOnPage( url ) ) { return "onpage"; } if ( fileBeingDistributed == null ) { fileBeingDistributed = new File( "E:/mindprod/index.html" );// dummy } if ( url.contains( ":" ) ) { // chose class based on protocol String candidate = assignCSSClassWithProtocol( url ); if ( candidate != null ) { return candidate; } else { // has both a protocal and domain. return assignCSSClassWithDomain( url ); } } else { // for includes, treat as if context were one level out, where include will probably be inserted. String dir = Tools.dirWithSlashes( fileBeingDistributed ); if ( dir.endsWith( "/include" ) ) { dir = ST.chopTrailingString( dir, "/include" ); fileBeingDistributed = Tools.toFileFromUPath( dir + "/" + EIO.getCoreName( fileBeingDistributed ) + ".html" ); } // assign a class to link without a protocol. return assignCSSClassLocal( url, fileBeingDistributed ); } }// /method /** * do we consider disturbing this class of link? * * @param htmlClass class of URL we are considering disturbing * * @return true if ok to disturb */ public boolean isCSSClassModifiable( final String htmlClass ) { return MODIFIABLE_CSS_CLASSES.contains( htmlClass ); }// /method /** * do we consider disturbing this class of link? * * @param url URL we are considering disturbing, as it appears embedded in the html, e.g. ../jgloss/x.html x.html * * @return true if ok to disturb */ public boolean isURLModifiable( final String url ) { if ( url.startsWith( "http://" ) || url.startsWith( "https://" ) ) { return true; } else { // just filename without .html int place = url.lastIndexOf( '/' ); final String fn; if ( place >= 0 ) { fn = url.substring( place + 1 ); } else { fn = url; } if ( CLASSLESS_FILES.contains( fn ) ) { return false; } else { final int secondLast = fn.length() - 2; return !( secondLast >= 0 && fn.charAt( secondLast ) == '#' || fn.endsWith( "#MASTERINDEX" ) || fn .endsWith( "#PUNCT" ) ); } } }// /method /** * does this file need its CSS classes repaired? * * @param fileBeingProcessed file in question */ public boolean needsCSSRepair( File fileBeingProcessed ) { if ( fileBeingProcessed.isDirectory() ) { return false; } // we don't usually repair includes. Their relative URLs are out of whack // we generate *.htmlfrag with classes already in place. final String dir = Tools.dirWithSlashes( fileBeingProcessed ); switch ( dir ) { case "religion": return !fileBeingProcessed.getName().equals( "dgloss.html" ); case "livinglove/methods": return !fileBeingProcessed.getName().equals( "lgloss.html" ); case "environment": return !fileBeingProcessed.getName().equals( "egloss.html" ); case "jgloss/include": switch ( fileBeingProcessed.getName() ) { case "appvisorguts.htmlfrag": case "candidateguts.htmlfrag": case "deadguts.htmlfrag": case "hassleguts.htmlfrag": case "nohassleguts.htmlfrag": case "tscribe.htmlfrag": // no relative addresses, so it is ok docs are not in final resting place. return true; default: return false; } case "applet": case "application": case "bgloss": case "book": case "canada": case "carol": case "contact": case "deepthoughts": case "dvd": case "electronic": case "esperanto": case "ethics": case "feedback": case "feedback/animals": case "feedback/environment": case "feedback/god": case "feedback/humanrights": case "feedback/money": case "feedback/peace": case "feedback/porn": case "feedback/unmain": case "ggloss": case "ggloss/blurb": case "humanrights": // case "include": case "jgloss": case "jgloss/encoding": case "jgloss/sqlvendors": case "livinglove": case "livinglove/handbook": case "livinglove/methods/songlyrics": case "money": // case "phone": leave alone case "politics": case "politics/laser": case "project": case "quote": case "reunion": case "utility": case "webstart": final String ext = EIO.getExtension( fileBeingProcessed ); if ( !ext.equals( "html" ) ) { return false; } // we want to avoid processing generated index.htmlfrag files. final String core = EIO.getCoreName( fileBeingProcessed ); if ( core.length() == 1 ) { // index letter return false; } else if ( core.equals( "masterindex" ) || core.equals( "punct" ) || core.equals( "0-9" ) ) { return false; } else { return true; } default: // no snippet, foot return false; } }// /method }