/* * [AssignSiteNames.java] * * Summary: Assign site names to candidates. Overrides existing names. * * Copyright: (c) 2010-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2010-01-10 initial version */ package com.mindprod.submitter; import com.mindprod.common18.Build; import com.mindprod.common18.EIO; import com.mindprod.csv.CSVReader; import com.mindprod.csv.CSVWriter; import com.mindprod.hunkio.HunkIO; import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.Comparator; import java.util.regex.Pattern; import static java.lang.System.*; /** * Assign site names to candidates. Overrides existing names. *
* Input: candidates.csv output: candidates.csv, 1 to 5 fields. url in second column * Designed primarily for Roedy's use to research new sites. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2010-01-10 initial version * @since 2010 */ public class AssignSiteNames { /** * list of words often used in domain names. All must be lower case. */ private static final String[] camelWords = { // list will later be automatically sorted longest first. "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "about", "active", "add", "all", "allied", "alpha", "alt", "alternative", "alternatives", "android", "and", "arcade", "arcades", "arch", "arches", "bank", "best", "big", "bigger", "blue", "box", "bronze", "bulletin", "bulletins", "buy", "buyer", "buyers", "calculator", "calculators", "cellular", "central", "chief", "completely", "consult", "consulting", "content", "contents", "cool", "cope", "corner", "corners", "data", "deep", "depot", "depots", "designer", "designers", "design", "digital", "dir", "direct", "directories", "directory", "discoveries", "discovery", "doc", "dog", "donkey", "download", "downloads", "dump", "easy", "exchange", "faq", "field", "file", "files", "filez", "find", "fire", "flow", "for", "forge", "forum", "free", "freeware", "freewares", "frog", "fusion", "game", "games", "garage", "germany", "get", "go", "gold", "green", "group", "guide", "have", "horse", "host", "hot", "how", "hub", "ice", "info", "information", "installer", "installers", "internet", "jet", "key", "keys", "king", "knight", "knowledge", "landmark", "libraries", "library", "link", "links", "liquid", "list", "lists", "load", "loads", "machine", "machines", "master", "matrix", "media", "mega", "million", "mini", "mobile", "money", "monster", "move", "mp3", "must", "my", "nano", "net", "nets", "news", "newz", "new", "note", "notes", "now", "o2p", "office", "one", "online", "pad", "pads", "page", "pages", "paradise", "pc", "pcs", "pedia", "pedias", "phone", "phones", "pick", "picks", "pile", "piles", "ping", "platinum", "popular", "press", "prime", "project", "projects", "purely", "quality", "rank", "red", "research", "resource", "resources", "rocket", "rockets", "saver", "savers", "sea", "search", "searches", "seek", "seeks", "selling", "shareware", "sharewares", "simple", "site", "slash", "smooth", "soft", "softs", "software", "softwares", "source", "sources", "space", "spaces", "spider", "spiders", "standard", "standards", "stock", "stop", "store", "stores", "storm", "storms", "submit", "submits", "suggest", "suggestion", "suggestions", "suggests", "super", "tail", "tails", "tech", "techs", "the", "titanium", "transform", "transforms", "trial", "trials", "url", "urls", "video", "videos", "videoz", "viz", "ware", "wares", "warez", "water", "web", "webs", "which", "wide", "wiki", "wikis", "window", "windows", "wing", "wings", "wire", "wires", "wise", "wizard", "wizards", "work", "works", "working", "world", "write", "xp", "vista", "zone", "zones", }; private static final Pattern SPLIT_ON_DOT = Pattern.compile( "\\." ); static { // put longest strings first so will find longest match first Arrays.sort( camelWords, new LongestFirst() ); } /** * guess a site name for this URL * * @param submissionURL URL of pad submission site. * * @return a guess for a camel caps name for the site */ private static String guessSiteName( String submissionURL ) { if ( !submissionURL.startsWith( "http://" ) ) { submissionURL = "http://" + submissionURL; } String host; try { final URL u = new URL( submissionURL ); host = u.getHost(); } catch ( MalformedURLException e ) { err.println( "MalformedURL" + submissionURL ); host = submissionURL; } // split at dots final String[] pieces = SPLIT_ON_DOT.split( host ); final int count = pieces.length; if ( count < 2 ) { err.println( "URL without TLD" + submissionURL ); return submissionURL; } else { if ( pieces[ count - 2 ].equals( "co" ) && count >= 3 ) { return toCamel( pieces[ count - 3 ] ); } else { return toCamel( pieces[ count - 2 ] ); } } } /** * capitalise common words embedded in name, and remove dashes, marking the spots with caps. * * @param name name to camel-capitalise * * @return name with caps. */ private static String toCamel( String name ) { return toCamelCaps( toCamelDashes( name ) ); } /** * capitalise embedded words, Camel case style * * @param name name to capitalise. * * @return name with embedded words capitalised. */ private static String toCamelCaps( String name ) { // capitalise any embedded words final StringBuilder sb = new StringBuilder( name.length() ); boolean capitaliseNextLetter = true; outer: for ( int i = 0; i < name.length(); i++ ) { // see if any toCamel words start at this position in name for ( String word : camelWords ) { if ( name.substring( i ).startsWith( word ) ) { // capitalise this word and the next. if ( i != 0 ) { sb.append( "-" ); } sb.append( Character.toUpperCase( name.charAt( i ) ) ); sb.append( word.substring( 1 ) ); i += word.length() - 1; capitaliseNextLetter = true; continue outer; } } // end inner loop without finding any matches. if ( capitaliseNextLetter ) { if ( i != 0 ) { sb.append( "-" ); } sb.append( Character.toUpperCase( name.charAt( i ) ) ); capitaliseNextLetter = false; } else { sb.append( name.charAt( i ) ); } } // end outer loop return sb.toString(); // add