/* * [Crack.java] * * Summary: Analyses a submit form to extract information needed to automate submission. * * Copyright: (c) 2009-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 4.3 2009-04-11 add AivSoft, FilesGuard, FreeFileSeek, GetAllSoft, HotFileDownload, * Seek4Software, SharewarePower, Soft-db, Softake, Softholm, SoftLookup * 4.4 2009-05-11 remove DL Daddy, add Softwarelode, Digimodes, Download-by * 4.5 2009-05-19 remove TrialFiles, add Publish-Me, AlphaDownloads , DownloadChoice, SoftwareArchiveIsGreat, * Download5000, DownloadArsivi, DownloadShareware, DownloadStation, * EliasSoftDownloads, FreeSoftwareSharewareDownloads, * FreeShareWeb, FreewareArchiv, Freeware1, * FreeSoftwareApps, Goooggle, SafeFreeDownloads, SafeFreeSoftware, SafeFreeSoftwareDowload * 4.6 2009-05-19 remove WebAcclaim, add PadfileInfo, PadFM, PeachSeed, ShareApple, Vandino, Webzf * 4.7 2009-06-06 add Geeez, GeneralShareware, Newsoft2006, pc24hours, * ResourceDB, RetailerDeals, Share32, SharewareBay, SharewareKing, SoftAllWare, BobSoft, SoftLow * 4.8 2009-07-11 remove BobSoft, EnterHelp, Softake * add Top4Download, SoftwareMass, SoftwareSizzle, SuperDownloads, TrialWare, TryingBuying, WestDownload * 4.9 2009-07-30 remove BestSoftware, add SubmitPadFile, ProgrammersHeaven * 5.0 2009-09-22 remove DownloadWast, Seek4Software. CSV files to track hassle and hassle-free. * Prober discards sites already processed. * add GeneralFreeware, Enterhelp, Bobsoft, SoftCrown, Softmerge * 5.1 2009-10-25 remove Techwoods, add SuperWebHunt, ABDownloads, Downloads2K, Soft321. * 5.2 2009-11-03 remove Sharewareville. Add Windows7Download, SoftwareDownloads * 5.3 2009-11-21 remove http://www.allapp.com/Submit-Software/ * add http://www.5moons.net/submit.php * http://www.8844download.com/submit.htm * http://www.affiliate-referrals.net/submit.php (DLDaddy) * http://www.goodownload.com/submit.html * http://www.resourcefill.com/submit.php * http://www.uniqueidea.net/download/submit.asp * http://www.acidfiles.com/submit.html * 5.4 2009-12-02 remove FileVolution, add Download11, SoftCab, AfDown, DeltaLoad, DesktopShareware, 12buzz, * GetSharewareForFree, FreePadDatabase, FreewareTown * 5.5 2009-12-11 add 11 sites: DownloadDir, EuSoftNet, FreeSoftwarePrograms, IMfreeware, dlTube, * MySoftwareList, NewDownload, SoftwareCrown, PeterBurgess, SearchSomeSoft, SearchAllSoft * 5.6 2009-12-13 add 10 sites: 4software2Download, SharewareCheap, SoftwareRatings, * ShopLagom, SmallFreeware, Soft4Sale, SoftMobile, SoftwareMatrix, SystemUtils, WorldSoftwareArchive * 5.7 2009-12-17 add 16 sites: 123Freesoft, 4software2Download, DownloadYourSoftware, EzySoft, FastShareware, * FileEdge, FilePicks, FileProfile, LoadFree, , ObtainSoft, ReviewWorld, Download4a, * DownloadExpo, DownloadHeaven, DownloadPile, EasyFileDownloads * 5.8 2009-12-18 add 10 sites: FilePile, FilesStore, FindBestSoft, FineDownloads, * FreewareDump, FreewareSoft, MetaDownloads, PocketPCSoftwareDownloads, Run2, SafeFreeSoftware * 5.9 2009-12-19 add 15 sites: SharewareDump, Sharewareville, Smilestone, SoftDir, SoftwareDetails, * SoftwareKB, SoftwarePyramid, SoftWeb, TechWoods, Telecharger, TopSharewareDownloads, * VideoSoftwareDirect, * WinColors, WindFile, BigSoftwareBox */ package com.mindprod.submitter; import com.mindprod.common18.ST; import com.mindprod.entities.DeEntifyStrings; import com.mindprod.http.Get; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.*; /** * Analyses a submit form to extract information needed to automate submission. *

* Rough and ready. * Designed primarily for Roedy's use to research new sites. * Scans the submit page and generates most of the code needed to add support for * yet another submission site. * Takes siteName, home and submitURL with our without trailing commas. Space separated * * @author Roedy Green, Canadian Mind Products * @version 1.0 2009-12-19 original * @since 2009 */ public class Crack { //todo: avoid /submit/submit when base ends in ./submit sometimes. /** * true if want additional output to help figure out why a site won't analyse. */ private static final boolean DEBUGGING = false; /** * regex to go inside [] to match all punctuation chars, not counting space *

* !"#$%&'()*+,-./:;<=>?[\]^_`{|}~ regeq quoted */ private static final String ALL_PUNCT = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?\\[\\\\\\]\\^_`\\{\\|\\}~"; /** * regex to go inside [] for all punct chars but > */ private static final String ALL_PUNCT_BUT_GT = ALL_PUNCT + "&&[^>]"; /** * finds parm in form , * e.g. * * * * * * *    *

* allow anything inside except a > */ private static final Pattern PARM_FINDER = Pattern.compile( "<(?:input|textarea|select)[\\s\\p{Alnum}" + ALL_PUNCT_BUT_GT + "]+>", Pattern.CASE_INSENSITIVE ); /** * regex to go inside [ ] for all chars but " and ' */ private static final String ALL_PUNCT_BUT_QUOTES = ALL_PUNCT + "&&[^'\"]"; /** * finds value in parm * all punct except "' , ideally should eat Russian values etc. too. */ private static final Pattern VALUE_FINDER = Pattern.compile( "value=['\"]([ \\p{Alnum}" + ALL_PUNCT_BUT_QUOTES + "]+)['\"]", Pattern.CASE_INSENSITIVE ); /** * words in the action field that indicate we definitely do not the submit form */ private static final String[] BAD_ACTION_INDICATORS = { // must be pure lower case to compare "google", "search", "show", "wp-comments", }; /** * words in the action field that indicate we have the submit form */ private static final String[] GOOD_ACTION_INDICATORS = { // must be pure lower case to compare "", "add_software", "cgi-sys/formmail.pl", "check-pad", "checkadd", "edt_pad_location", "insertpad", "pad-check", "submit", "submit-pad-file.html", "submit_pad_file.php", "submit-software.php", "www.downbroad.com/com/soft/admin_ex/index.php", }; /** * words in parameter name that indicate this is the padURL param */ private static final String[] PAD_URL_INDICATORS = { // must be pure lower case for compare. "cale_pad", "edt_pad_location", "frm[url]", "manualsubmissionurl", "manyurls", "notes", "p", "pad", "pad_file", "pad-file", "pad_file_box", "pad[url]", "pad-url", "pad_url", "padfile", "padfileurl", "pad_file_url", "padfilename", "padlocation", "padurl", "padurls", "paffile", "so", "submit_pad[url]", "submitpadfileform[padfileurl]", "url", "url_pad", "url_pad_file", "textboxurl", "validateurl", "web", "xmlpad", "xmlpath", "xmlurl" }; //

/** * usual x=y pair parms */ private static final ArrayList parmPairs = new ArrayList<>( 11 ); /** * find action post/get * e.g. action="http://freesafesoft.com/search.php?pindex=1&showimage=on"> or just plain action> */ private static final Pattern ACTION_FINDER = Pattern.compile( "action=[\"']?([\\p{Alnum}&\\-\\./:=\\?_~]*)[\"']?|action", Pattern.CASE_INSENSITIVE ); /** * finds end of form on the page */ private static final Pattern END_FORM_FINDER = Pattern.compile( "", Pattern.CASE_INSENSITIVE ); /** * finds form on the page and extracts the action. * e.g. *
* * *

* allow anything inside except a > */ private static final Pattern FORM_FINDER = Pattern.compile( "", Pattern.CASE_INSENSITIVE ); /** * finds method in */ private static final Pattern NAME_FINDER = Pattern.compile( "name=['\"]([\\p{Alnum}\\$\\._\\-\\[/\\]]*)[\"']", Pattern.CASE_INSENSITIVE ); /** * primitive split of encoded pairs of parms */ private static final Pattern PARM_SPLITTER = Pattern.compile( "[\\?=&]" ); /** * action with lead / and possible path. */ private static String absoluteAction; /** * aux parms from the url */ private static String[] auxParmPairs; /** * camel case human name for site */ private static String humanName; /** * true if method="POST" false if "GET" */ private static boolean isViaPost; /** * url of the manual submission page */ private static URL manualSubmissionURL; /** * contents of web page for manual submit that we analyse */ private static String page; /** * find and extract information needed to fake a post on the web page * * @throws java.net.MalformedURLException if bad action url */ private static void analyse() throws MalformedURLException { if ( DEBUGGING ) { out.println( page ); } int totalFormsFound = 0; int plausibleFormsFound = 0; final Matcher startFormFinder = FORM_FINDER.matcher( page ); while ( startFormFinder.find() ) { totalFormsFound++; final String formContents = startFormFinder.group( 1 ); final Matcher af = ACTION_FINDER.matcher( formContents ); if ( af.find() ) { String action = af.groupCount() > 0 ? af.group( 1 ) : ""; if ( DEBUGGING ) { out.println(); out.println( "------------------------------------------------------------------------------------------" ); out.println(); out.println( "found form" ); } final String lcAction = analyseAction( action ); analyseMethod( formContents ); if ( isActionOfInterest( lcAction ) ) { plausibleFormsFound++; analyseForm( startFormFinder.end() ); } else if ( DEBUGGING ) { out.println( "ignoring form " + lcAction ); } } else { out.println( "no action found, presuming default " ); analyseAction( "" ); analyseMethod( formContents ); plausibleFormsFound++; analyseForm( startFormFinder.end() ); } } // end form-finder loop out.println( totalFormsFound + " total tag */ private static void analyseForm( final int startOffset ) { final Matcher endFormFinder = END_FORM_FINDER.matcher( page ); endFormFinder.region( startOffset, page.length() ); if ( !endFormFinder.find() ) { out.println( "can't find " ); } else { final Matcher parmFinder = PARM_FINDER.matcher( page ); parmFinder.region( startOffset, endFormFinder.start() ); // parmFinder is limited to form bounds analyseParms( parmFinder ); generateJavaCode(); } } /** * analyse the method parm of the