/* * [BulkProber.java] * * Summary: Do multithread probes to find out if products (books, dvds, electronics) are in stock. * * Copyright: (c) 2014-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2014-07-19 initial version */ package com.mindprod.stores; import com.mindprod.aws.ProbeViaAWS; import com.mindprod.common18.Misc; import com.mindprod.common18.Progress; import com.mindprod.common18.ST; import com.mindprod.fastcat.FastCat; import com.mindprod.htmlmacros.macro.Global; import com.mindprod.htmlmacros.support.Configuration; import com.mindprod.htmlmacros.support.ConfigurationForMindprod; import com.mindprod.htmlmacros.support.Shutdown; import com.mindprod.http.Get; import com.mindprod.hunkio.HunkIO; import org.jetbrains.annotations.NotNull; import sun.util.logging.PlatformLogger; import sun.util.logging.PlatformLogger.Level; import javax.xml.ws.WebServiceException; import java.io.File; import java.io.IOException; import java.net.CookieHandler; import java.net.CookieManager; import java.net.CookiePolicy; import java.net.MalformedURLException; import java.net.URL; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import static com.mindprod.stores.StockStatus.*; import static java.lang.System.*; /** * Do multithread probes to find out if products (books, dvds, electronics) are in stock. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2014-07-19 initial version * @since 2014-07-19 */ public class BulkProber { public static final String INDENT = " "; /** * access the global configuration object */ static final Configuration CONFIGURATION = Global.configuration; /* how it works: We figure out which bookstores need to be reprobed, based on environment SET variables. We create a queue for each store, and enque all the products it carries to be probed. We start off one thread per store. We analyse the result to see if this means instock or out of stock, and update the stock record. We use clues peculiar to each store to interpret the results. For Amazon stores we find status by using AWS lookup of the ASIN. The custom probe method for each bookstore determines if probes via amazon or the clue system or something else. When we are done, we expand Book, Electronic and DVD macros. We look up the status with bookStore.getProductStatus( product ). In turn it asks StockStatus getProductStatus( final String product, int storeOrdinal ) If it does not have it in RAM, via getBundle, it schedule a probe with BulkProber.bulkProbe1OneProduct( stores, product ); Then Replacer expands macros and probes new products. */ // declarations /** * true if should detect stall. Turn off for single step debug */ private static final boolean DETECT_STALL = Misc.parseBoolean( System.getenv( "detectstall" ), true ); /** * if no progress for 5 minutes, consider as stalled. */ private static final int STALL_TIMEOUT = ( int ) TimeUnit.MINUTES.toMillis( 5 ); /** * 3 report clues on everything including INSTOCK * 2 report clues on OUTOFSTOCK * 1 report clues only on errors. * 0 no clue reporting. */ private static final int REPORTING_LEVEL = Misc.parseInt( System.getenv( "reportingLevel" ), 1 ); /** * true if want extra output. */ private static final boolean DEBUGGING = true; /** * how long to wait for site to connect in millis. Same for all bookstores. Interprobe time is configurable though. */ private static final int CONNECT_TIMEOUT = ( int ) TimeUnit.SECONDS.toMillis( 45 ); /** * how long to wait for site to respond in millis while reading. Same for all bookstores. */ private static final int READ_TIMEOUT = ( int ) TimeUnit.SECONDS.toMillis( 30 ); /** * force a + sign */ private static final DecimalFormat DF = new DecimalFormat( "'+'0;'-'0" ); /** * used to maintain progress counter */ private static Progress progress; // /declarations // methods /** * analyse response code to decide what to do with the book * * @param store the store we probed * @param url url of book we probed. * @param responseCode responseCode from probe, guaranteed not = 200. * * @return StockStatus enum */ private static StockStatus analyseResponseCode( final OnlineStore store, final String url, final int responseCode ) { if ( 400 <= responseCode && responseCode <= 407 ) { // no error message. This is a common way of indicating the book is not carried. return NOTCARRIED; } if ( responseCode == 408 || responseCode == 504 ) { out.println( "Timeout " + responseCode + " while probing " + url + store ); return UNKNOWN; } if ( 409 <= responseCode && responseCode <= 411 ) { // no error message. This is a common way of indicating the book is not carried. return NOTCARRIED; } else if ( ( 500 <= responseCode && responseCode <= 503 ) ) { // refusing probes if ( responseCode == 503 ) { out.println( "not responding " + url + " " + store ); } else { out.println( responseCode + " while probing " + url + " " + store ); } return REFUSINGPROBES; } else if ( responseCode == 301 || responseCode == 302 ) { out.println( probeErr( store, "Protocol changed. redirected URL in checking which products are in stock. Likely should have used https: to access site ", responseCode, url ) ); return UNKNOWN; } else if ( responseCode != 200 ) { out.println( probeErr( store, "strange responseCode in checking which products are in stock", responseCode, url ) ); return UNKNOWN; } assert false : "program bug. should never get here"; return UNKNOWN; }// /method /** * Do a multi-threaded probe of all products in the specified stores * and record the statuses. Probes to the same store are usually * delayed. This method will not return until all the probes are completed. * Called with a new product to probe in Stock.getStockStatus * * @param storesToProbe which bookstores, dvd stores, electronic stores to probe. * * @see Stock#getProductStatus(String, int) */ private static void bulkProbe1AllProducts( final OnlineStore[] storesToProbe ) { // clear stats for ( OnlineStore store : storesToProbe ) { store.clearStats(); } // prepare list of products for each store bulkProbe2GetProbesForStoresXProducts( storesToProbe, null ); // process product lists, each on its own thread. if ( storesToProbe.length == 1 ) { bulkProbe3BuildProbeWithoutThreads( storesToProbe, false /* not stragglers */ ); } else { bulkProbe3BuildProbeThreads( storesToProbe ); } }// /method /** * check which bookstores/electronics stores/dvd stores carry each product listed in the lookup map. * * @param storesToProbe stores to probe (might be bookstores, EStores or DVD stores) * @param productToProbe null means probe all products that store lists, otherwise just the one product * null used to reprobe all products. Product # used to probe a new product just encountered. * List of probes created in OnlineStore.enqueueProductToProbe. */ private static void bulkProbe2GetProbesForStoresXProducts( final OnlineStore[] storesToProbe, String productToProbe ) { /* count total number of probes store x product we are about to do, ALL stores combined */ int probesAllStores = 0; for ( OnlineStore store : storesToProbe ) { int probesForStore = 0; int recentlyProbed = 0; if ( productToProbe == null /* probe all products */ ) { final String[] allProducts = store.getAllSortedProducts(); for ( String product : allProducts ) { if ( !store.wasRecentlyProbed( product ) ) { // avoid reprobing if probed in a recently stalled run store.enqueueProductToProbe( product ); probesForStore++; } else { recentlyProbed++; } } } else { // probe just a single product // we always do it, even if done recently store.enqueueProductToProbe( productToProbe ); probesForStore++; } if ( recentlyProbed != 0 ) { out.println( store.productCategory() + " " + store.getEnumName() + " : " + recentlyProbed + " products recently probed, " + probesForStore + " probes to go" ); } probesAllStores += probesForStore; } progress = new Progress( probesAllStores, 1 /* modulus */ ); }// /method /** * Set up one thread per store to probe products. All products for a gives store are probed on the same thread. * * @param storesToProbe list stores to probe */ private static void bulkProbe3BuildProbeThreads( final OnlineStore[] storesToProbe ) { out.println(); int threadsDispatched = 0; // loop through stores and start a thread for each one. Might be mix of books, dvd, estore. final ExecutorService es = Executors.newFixedThreadPool( storesToProbe.length ); for ( OnlineStore store : storesToProbe ) { final int count = store.getCountOfProductsYetToProbe(); if ( count == 0 ) { continue; } else if ( count >= 1 ) { out.println( store.productCategory() + " " + store.getEnumName() + " : " + count + " total probes for store" ); } threadsDispatched++; // rem must be final to use as param to anonymous class. final OnlineStore finalStore = store; // same thread handles all products from that store. // define thread for store. es.submit( () -> bulkProbe4AllProductsInStore( finalStore, false /* sorted bulk, not stragglers */ ) ); } // end store loop // launch detectStall on a parallel thread if ( DETECT_STALL ) { // one thread for all stores new Thread( () -> detectStall( storesToProbe, es ) ).start(); } try { // we start the shutdown right away, before any probes done. es.shutdown(); es.awaitTermination( 10, TimeUnit.HOURS ); // sleep till all probes done. } catch ( InterruptedException e ) { out.println( "BulkProber bug: probing interrupted." ); } // we do not return until all the threads have completed. for ( OnlineStore store : storesToProbe ) { // unless a store has frozen and stopped doing probes, its queue // should be empty final int unfinishedProbes = store.getCountOfProductsYetToProbe(); if ( ( unfinishedProbes == 0 || !store.isAcceptingProbes() ) ) { continue; } out.println( "Program bug: " + unfinishedProbes + " unfinished probes in " + store.productCategory() + " " + store + " Should be 0." ); } }// /method /** * Picking up stragglers. * Set up one thread per store to probe products. All products for a gives store are probed on the same thread. * * @param storesToProbe list stores to probe */ private static void bulkProbe3BuildProbeWithoutThreads( final OnlineStore[] storesToProbe, final boolean stragglers ) { for ( OnlineStore store : storesToProbe ) { final int togo = store.getCountOfProductsYetToProbe(); if ( togo == 0 ) { continue; } // rem must be final to use as param to anonymous class. if ( togo > 1 ) { out.println( store.productCategory() + " " + store.getEnumName() + " : " + togo + " total probes to do" ); } // probe each store in turn bulkProbe4AllProductsInStore( store, stragglers /* stragglers */ ); } // end store loop // we have finished all the probes for ( OnlineStore store : storesToProbe ) { // unless a store has frozen and stopped doing probes, its queue // should be empty final int unfinishedProbes = store.getCountOfProductsYetToProbe(); if ( ( unfinishedProbes == 0 || !store.isAcceptingProbes() ) ) { continue; } out.println( "Program bug: " + unfinishedProbes + " unfinished probes in " + store.productCategory() + " " + store + " Should be 0." ); } }// /method /** * probe all products for this store. Usually run on its own thread to handle all products in one store, in parallel. * * @param store online store * @param straggler true if processing random straggler, false if processing sorted bulk products. */ private static void bulkProbe4AllProductsInStore( final OnlineStore store, final boolean straggler ) { // at this point store has its own thread. if ( store.suppressCookieErrors() ) { // Kobo emits faulty cookies. Ignore the error messages. // needs javac -XDignore.symbol.file to let us get at the unofficial PlatformLogger class in rt.jar // Turn off Java logging even SEVERE to hide faulty cookie messages from Kobo // ZZZ arranges this with the Forgive.USES_RT option. final PlatformLogger logger = PlatformLogger.getLogger( "java.net.CookieManager" ); logger.setLevel( Level.OFF ); // instead we might try configuring logging.properties or logging in Java control panel. CookieHandler.setDefault( new CookieManager( null /* in ram store */, CookiePolicy.ACCEPT_ORIGINAL_SERVER ) ); } final String[] productsToProbe = store.getSortedProductsToProbe(); assert productsToProbe != null && productsToProbe.length > 0; // should be filtered out by caller for ( String product : productsToProbe ) { if ( !store.isAcceptingProbes() || Global.gracefulStop ) { out.println( store + " not accepting probes" ); progress.progress( out ); // progress after probe and analysis out.println(); // separate output from next probe break; } // probe all products on same store on same thread. Also analyse probe result // Removes product from enquedProbesSet bulkProbe5PhysicalProbeOneProduct( store, product, straggler ); // give up if too many rejected probes in a row, or graceful stop requested progress.progress( out ); // progress after probe and analysis out.println(); // separate output from next prob } // end for // // all product probes for store are complete at this point // print SUMMARY of products added/dropped. DETAIL printed as we go. // elements are removed as probed. final int added = store.getProductsAddedCount(); if ( added > 1 ) { out.println( ">>> " + store.productCategory() + " " + store + " probes done with " + ST.things( added, store.productCategory() ) + " added" ); out.flush(); } // it is possible to have both additions and dropped final int dropped = store.getProductsDroppedCount(); if ( dropped > 1 ) { out.println( ">>> " + store.productCategory() + " " + store + " probes done with " + ST.things( dropped, store.productCategory() ) + " dropped" ); out.flush(); } if ( added == 0 && dropped == 0 && !straggler ) { out.println( ">>> " + store.productCategory() + " " + store + " probes done without changes" ); out.flush(); } final int unfinishedProbes = store.getCountOfProductsYetToProbe(); if ( unfinishedProbes != 0 ) { out.println( "Program bug: " + unfinishedProbes + " unfinished probes in " + store.productCategory() + " " + store + " Should be 0." ); } }// /method /** * probe one product at one store. Mark probe completed in Store * * @param store which store * @param product product isbn13, asin, upc * @param straggler true if we processing random stragglers, false if sorted bulk * * @see com.mindprod.stores.BulkProber#whatToProbe() to look at env sets to decide what to probe */ private static void bulkProbe5PhysicalProbeOneProduct( final OnlineStore store, final String product, final boolean straggler ) { store.setProbeCompletionStage( 0 ); // make sure we do not swamp store website. Delay sufficiently since last probe completed. // some of delay likely already done. long choke = Math.min( store.getInterProbeTime() - ( System.currentTimeMillis() - store.getTimeLastProbeCompleted() ), TimeUnit.SECONDS.toMillis( 10 ) ); if ( choke > 10 /* millis */ ) { try { store.setProbeCompletionStage( 1 ); // nothing is locked during the sleep Thread.sleep( choke ); } catch ( InterruptedException e ) { } store.setProbeCompletionStage( 2 ); } store.setLastProductProbed( product ); // this does the probe over the net either via aws or manual // in stock. // NOTCARRIED, // OUTOFSTOCK, // INSTOCK, // UNKNOWN, // REFUSINGPROBES final StockStatus nowStatus; if ( store.isAlive() ) { final StockStatus override = CONFIGURATION.stockStatusOverride( store, product ); if ( override != null ) { nowStatus = override; } else { // special cases where AWS or clues give wrong status // T H E G U T S, use custom store probe. store.setLastProductProbed( product ); nowStatus = store.probe( product ); // probe will call BulkProber#screenScrapeProbe or BulkProber#awsProbe // BulkProber#screenScrapeProbe will call BulkProber#tallyClueEffects which may display the Clues and hits. // BulkProber#screenScrapeProbe handles the added, dropped and still messages. // can override this status with Configure.stockStatusOverride in case AWS database is wrong } } else { // store is down. Treat all products as not carried stock nowStatus = StockStatus.NOTCARRIED; } final StockStatus wasStatus = store.getProductStatus( product, false /* no reprobe to get old status */ ); store.setProductStockStatus( product, nowStatus ); String message = null; store.noteCompletedProbe( product, straggler ); switch ( nowStatus ) { case NOTCARRIED: case OUTOFSTOCK: store.forgiveRefusals(); if ( wasStatus == StockStatus.INSTOCK ) { message = store.productCategory() + " " + store.getEnumName() + " " + product + " dropped as " + nowStatus.name(); store.noteProductDropped(); } else { message = store.productCategory() + " " + store.getEnumName() + " " + product + " still " + nowStatus.name(); } break; case INSTOCK: store.forgiveRefusals(); if ( wasStatus != INSTOCK ) { message = store.productCategory() + " " + store.getEnumName() + " " + product + " added as " + nowStatus.name(); store.noteProductAdded(); } else { message = store.productCategory() + " " + store.getEnumName() + " " + product + " still " + nowStatus.name(); } break; case UNKNOWN: // indeterminate, handled elsewhere break; case REFUSINGPROBES: // we will not try again for another week store.noteRefusedProbe( product ); break; default: throw new IllegalArgumentException( "Invalid result from probe." ); } if ( message != null ) { out.println( wrap( message ) ); out.flush(); } }// /method /** * ensure probes have not stalled * storesToProbe store that are probing * es used to shutdown threads doing probing. */ private static void detectStall( final OnlineStore[] storesToProbe, final ExecutorService es ) { int[] old = new int[ storesToProbe.length ]; int j = 0; for ( OnlineStore store : storesToProbe ) { old[ j++ ] = store.getCountOfProductsYetToProbe(); } boolean completed; do /* forever */ { try { // nothing is locked during the sleep Thread.sleep( STALL_TIMEOUT ); } catch ( InterruptedException e ) { } // terminated with stall boolean hopelesslyStalled = false; // assume all work completed completed = true; j = 0; for ( OnlineStore store : storesToProbe ) { final int togo = store.getCountOfProductsYetToProbe(); if ( togo > 0 ) { completed = false; } if ( togo > 0 && togo == old[ j ] ) { // no progress, and still work to do // we have a stall. out.println( "|||| " + store.productCategory() + " " + store.getEnumName() + "[" + store.getOrdinal() + "] " + store.getLastProductProbed() + " stalled with " + togo + " probes to go. " + " stage: " + store.getProbeCompletionStage() ); store.noteStall(); if ( store.isHopelesslyStalled() ) { hopelesslyStalled = true; // keep going to report other stalled stores } else { // kill the connection, hoping to restart Get g = store.getGetInProgress(); if ( g != null ) { g.disconnect(); } } } else { // this store is progressing old[ j ] = togo; } j++; } // end for // we have done one cycle through the stores if ( hopelesslyStalled ) { for ( OnlineStore store : storesToProbe ) { final int togo = store.getCountOfProductsYetToProbe(); if ( togo > 0 ) { out.println( store.productCategory() + " " + store.getEnumName() + "[" + store.getOrdinal() + "] " + togo + " probes to go." ); } } if ( false ) { out.println( "stack traces for all threads" ); final Map traces = Thread.getAllStackTraces(); final Collection coll = traces.values(); for ( StackTraceElement[] tes : coll ) { out.println( "\nstack trace" ); for ( StackTraceElement te : tes ) { out.println( te.toString() ); } } } es.shutdownNow(); Shutdown.shutdown(); System.exit( 2 ); } } while ( !completed ); } /** * emit an error message * * @param store store that caused the trouble. * @param message error message * @param retCode return code * @param probeUrl url we sent * * @return String joining various parms. */ private static String probeErr( final OnlineStore store, final String message, final int retCode, final String probeUrl ) { return wrap( INDENT + store + " " + message ) + "\n" + wrap( INDENT + "retcode: " + retCode ) + "\n" // 301 means should have been done with https: + wrap( INDENT + "probe url: " + probeUrl ); }// /method /** * dump out a UTF-8 file so can proofread that accented letters are correct and no dups. * * @param stores list of stores to proofread, * @param dump file to dump the report to. */ private static void proofread( final OnlineStore[] stores, final File dump ) { // loops nested 3 deep // make sure no dups. for ( OnlineStore store : stores ) { // only dups only within store matter. final Clue[] dups = store.getClues(); for ( Clue a : dups ) { final String aMarker = a.getMarker().getRawMarker(); for ( Clue b : dups ) { final String bMarker = b.getMarker().getRawMarker(); if ( a != b /* not equals! */ && aMarker.contains( bMarker ) ) { out.println( "Duplicate (superset) marker for " + store + " : " + aMarker + " : " + bMarker ); } } } } final FastCat sb = new FastCat( stores.length * ( 20 * 6 + 2 ) ); for ( OnlineStore store : stores ) { sb.append( store ); sb.append( "\n" ); for ( Clue clue : store.getClues() ) { sb.append( clue.getHint(), " ", clue.getMarker().toString(), "\n" ); } } try { HunkIO.writeEntireFile( dump, sb.toString(), HunkIO.UTF8 ); } catch ( IOException e ) { out.println( "proofread failed" ); } }// /method /** * when markers are ambiguous in deciding if a book is in stock, we display and error message * * @param store the store * @param product THe product number * @param originalResult the resulting text from the probe * @param trimmedResult the resulting text from the probe after CHOP_TAIL hints have trimmed it. */ private static void reportAbbreviatedMarkers( final OnlineStore store, final String product, final String originalResult, final String trimmedResult ) { // We recompute just for the debugging detail. final String report = reportClueHits( store, product, originalResult, trimmedResult ); out.print( report ); /* already had trailing \n */ }// /method /** * when markers are ambiguous in deciding if a book is in stock, we display and error message * * @param store the store * @param product THe product number * @param probeUrl the url to probe the product * @param responseCode the response code we got from the probe * @param originalResult the resulting text from the probe * @param trimmedResult the resulting text from the probe after CHOP_TAIL hints have trimmed it. */ private static void reportAmbiguousMarkers( final OnlineStore store, final String product, final String probeUrl, final int responseCode, final String originalResult, final String trimmedResult ) { // we make actual decision in screenScrapeProbe // We recompute just for the error message here because we want more detail. // Compose an error message. // we are just scanning products, not expanding pages, so we do not know the pageUrl. final String responseDumpFile = "C:\\temp\\" + store.productCategory().toUpperCase() + "_" + store.getEnumName() + "_" + product + ".probe.html"; final String responseProblemFile = "C:\\temp\\" + store.productCategory().toUpperCase() + "_" + store.getEnumName() + "_" + product + ".ambiguous"; final FastCat sb = new FastCat( 14 + store.getClues().length * 2 + 7 ); sb.append( "A M B I G U O U S\n" ); sb.append( probeErr( store, "website unclear on whether " + store.productCategory() + " [" + product + "] is in stock.", responseCode, probeUrl ) ); sb.append( "\n" + INDENT + "contents: ", responseDumpFile, "\n" ); final String report = reportClueHits( store, product, originalResult, trimmedResult ); sb.append( report ); // report already has trailing \n final String problem = sb.toString(); out.println( problem ); try { HunkIO.writeEntireFile( new File( responseProblemFile ), problem, HunkIO.UTF8 ); // dump problematic html response for later study HunkIO.writeEntireFile( new File( responseDumpFile ), originalResult, HunkIO.UTF8 ); } catch ( IOException e ) { out.println( "Unable to capture .ambiguous file" ); } }// /method /** * @param store which store we are probing * @param product product number we are probing right now * @param originalResult the raw result from the online store * @param trimmedResult result with head and tail trimmed via CHOP_LEAD and CHOP_TAIL * * @return report computed from net effect of all clues on console. */ private static String reportClueHits( final OnlineStore store, final String product, final String originalResult, final String trimmedResult ) { int netInStock = 0; int netCarry = 0; final ArrayList lines = new ArrayList<>( store.getClues().length ); for ( Clue clue : store.getClues() ) { final Hint hint = clue.getHint(); switch ( hint ) { case CHOP_LEAD: case CHOP_TAIL: case FREEZE: case NEWER: case REFUSED: break; default: // search for marker in the result, and add to effect count. clue.setProduct( product ); int instockEffect = clue.getInStockEffect( trimmedResult ); netInStock += instockEffect; int carryEffect = clue.getCarryEffect(); netCarry += carryEffect; if ( instockEffect != 0 || carryEffect != 0 ) { // only report hit/miss that has overall effect final int offset = clue.indexAt( originalResult ); final FastCat sb2 = new FastCat( 21 ); sb2.append( ST.toLZ( offset, 7 ) ); // invisible. to use for sort sb2.append( ST.rightPad( hint.name(), 9, false ) ); if ( offset >= 0 ) { sb2.append( " hit " ); } else { sb2.append( " miss " ); } sb2.append( " inStockEffect:", DF.format( instockEffect ) ); sb2.append( " carryEffect:", DF.format( carryEffect ) ); sb2.append( " offset:", offset >= 0 ? Integer.toString( offset ) : "-" ); final String marker = clue.getMarker().toString(); sb2.append( " marker:", marker, "\n" ); lines.add( sb2.toString() ); // create one line to sort // don't return anything yet, we have not yet looked at all clues. } } // end switch } // end for all clues if ( lines.size() == 0 ) { lines.add( " no hits" ); } // sort by offset/type Collections.sort( lines ); // collect the bits together to put in the problem file for analysis final FastCat sb = new FastCat( lines.size() ); for ( String aline : lines ) { // strip off lead field used for sorting. sb.append( wrap( aline.substring( 7 ) ) ); } // leave off \n, so caller can do println // caller will have problem description waiting in sb return sb.toString(); } /** * when markers are ambiguous in deciding if a book is in stock, we display and error message * * @param store the store * @param product the product number * @param probeUrl the url of probe */ private static void reportNewerProduct( final OnlineStore store, final String product, final String probeUrl ) { // we are just probing products, not expanding pages, so we do not know the pageUrl final String responseProblemFile = "C:\\temp\\" + store.productCategory().toUpperCase() + "_" + store.getEnumName() + "_" + product + ".newer"; final FastCat sb = new FastCat( 9 ); sb.append( "N E W E R\n" ); sb.append( "There is a newer product available for ", product, " at ", store, "\n" ); sb.append( "probe url:", probeUrl, "\n" ); final String problem = sb.toString(); try { HunkIO.writeEntireFile( new File( responseProblemFile ), problem, HunkIO.UTF8 ); } catch ( IOException e ) { out.println( "Unable to write .newer file." ); } }// /method /** * Evaluate all clues to get a StockStatus * * @param store which store we are probing * @param product product number we are probing right now * @param probeUrl the url we are probing * @param responseCode http response code * @param originalResult the raw result from the online store * @param trimmedResult result with head and tail trimmed via CHOP_LEAD and CHOP_TAIL * * @return StockStatus computed from net effect of all clues. */ @NotNull private static StockStatus tallyClueEffects( final OnlineStore store, final String product, final String probeUrl, final int responseCode, final String originalResult, final String trimmedResult ) { int netInStock = 0; int netCarry = 0; for ( Clue clue : store.getClues() ) { final Hint hint = clue.getHint(); switch ( hint ) { case CHOP_LEAD: case CHOP_TAIL: // chops handled separately earlier in trimEndMarkers break; case FREEZE: if ( clue.indexAt( trimmedResult ) >= 0 ) { return UNKNOWN /* don't change. Website is malfunctioning. */; } break; case NEWER: if ( REPORTING_LEVEL >= 1 && clue.indexAt( trimmedResult ) >= 0 && Global.configuration.possiblyNewer( product ) ) { reportNewerProduct( store, product, probeUrl ); // keep looking at other markers. } break; case REFUSED: if ( clue.indexAt( trimmedResult ) >= 0 ) { store.noteRefusedProbe( product ); return REFUSINGPROBES; } break; default: // search for marker in the result, and add to effect count. clue.setProduct( product ); int instockEffect = clue.getInStockEffect( trimmedResult ); netInStock += instockEffect; int carryEffect = clue.getCarryEffect(); netCarry += carryEffect; // don't return until all clues examined } // end switch } // end for all clues switch ( Misc.signum( netInStock ) ) { case 0: /* ambiguous */ if ( REPORTING_LEVEL >= 1 ) { reportAmbiguousMarkers( store, product, probeUrl, responseCode, originalResult, trimmedResult ); } return UNKNOWN; case 1: /* in stock */ if ( REPORTING_LEVEL >= 3 ) { reportAbbreviatedMarkers( store, product, originalResult, trimmedResult ); } return INSTOCK; // ignore the the carried effect. case -1: /* netInStock < 0 */ if ( REPORTING_LEVEL >= 2 ) { reportAbbreviatedMarkers( store, product, originalResult, trimmedResult ); } return netCarry > 0 ? OUTOFSTOCK : NOTCARRIED; default: throw new IllegalArgumentException( "invalid case" ); } } /** * trim off the end markers from the probe response * * @param store the store we are probing * @param originalResult the raw result back from probing the store for a given bok * * @return result with end markers chopped off */ private static String trimEndMarkers( final OnlineStore store, final String originalResult ) { int earliest = Integer.MIN_VALUE; int latest = Integer.MAX_VALUE; for ( Clue clue : store.getClues() ) { switch ( clue.getHint() ) { case CHOP_LEAD: int eo = clue.indexAt( originalResult ); if ( eo >= 0 ) { eo += clue.getMarker().getRawMarker().length(); if ( eo > earliest ) { earliest = eo; } } break; case CHOP_TAIL: final int lo = clue.indexAt( originalResult ); if ( lo >= 0 && lo < latest ) { latest = lo; } break; default: } } if ( earliest == Integer.MIN_VALUE ) { earliest = 0; } if ( latest == Integer.MAX_VALUE ) { latest = originalResult.length(); } if ( earliest >= latest ) { return ""; } else { return originalResult.substring( earliest, latest ); } }// /method /** * check that a string is a asin * * @param asin number */ private static void validateAsin( String asin ) { // relaxed testing. This could be book, electronic, dvd, kindle, audio etc. assert asin.length() == 10 : "bad asin [" + asin + "]"; }// /method /** * decide what to reprobe based on environment SET parameters. */ private static OnlineStore[] whatToProbe() { // Will be no duplicates because of way the list is built. final int totalStoreCount = BStore.values().length + DStore.values().length + EStore.values().length; final ArrayList toProbe = new ArrayList<>( totalStoreCount ); boolean checkabebookstores = Misc.parseBoolean( System.getenv( "checkabebookstores" ), false ); boolean checkbookstores = Misc.parseBoolean( System.getenv( "checkbookstores" ), false ); boolean checkdvdstores = Misc.parseBoolean( System.getenv( "checkdvdstores" ), false ); boolean checkelectronicsstores = Misc.parseBoolean( System.getenv( "checkelectronicsstores" ), false ); boolean checkstores = Misc.parseBoolean( System.getenv( "checkstores" ), false ); boolean checkamazonbookstores = Misc.parseBoolean( System.getenv( "checkamazonbookstores" ), false ); boolean checkotherbookstores = Misc.parseBoolean( System.getenv( "checkotherbookstores" ), false ); boolean checkamazondvdstores = Misc.parseBoolean( System.getenv( "checkamazondvdstores" ), false ); boolean checkamazonelectronicsstores = Misc.parseBoolean( System.getenv( "checkamazonelectronicsstores" ), false ); for ( BStore bookstore : BStore.values() ) { if ( ( bookstore.isAlive() && bookstore != BStore.ABEANZ && bookstore != BStore.ABECA ) && ( checkstores || checkbookstores // look for set checkamazonca || bookstore.shouldCheck() /* do a individual check for checkxxxx=yes */ || checkamazonbookstores && BStore.AMAZON_BOOKSTORES.contains( bookstore ) || checkotherbookstores && BStore.OTHER_BOOKSTORES.contains( bookstore ) || checkabebookstores && BStore.ABE_BOOKSTORES.contains( bookstore ) ) ) { toProbe.add( bookstore ); } } for ( DStore dvdstore : DStore.values() ) { if ( dvdstore.isAlive() && ( checkstores || checkdvdstores // look for set checkamazoncadvds || dvdstore.shouldCheck() /* do a individual check for checkxxxx=yes */ || checkamazondvdstores && DStore.AMAZON_DVDSTORES.contains( dvdstore ) ) ) { toProbe.add( dvdstore ); } } for ( EStore estore : EStore.values() ) { if ( estore.isAlive() && ( checkstores || checkelectronicsstores /* do a individual check for checkxxxx=yes */ // look for set checktigerelectronics checkamazoncaelectronics || estore.shouldCheck() || checkamazonelectronicsstores && EStore.AMAZON_ESTORES.contains( estore ) ) ) { toProbe.add( estore ); } } if ( toProbe.size() > 0 ) { // display stores we will probe in a n x 8 grid. out.println( "Bookstores to probe:" ); int j = 0; for ( OnlineStore store : toProbe ) { if ( store instanceof BStore ) { out.print( store.getEnumName() + " " ); j++; if ( j % 8 == 0 ) { out.println(); } } } if ( j > 0 ) { out.println(); out.flush(); } out.println( "DVD stores to probe:" ); j = 0; for ( OnlineStore store : toProbe ) { if ( store instanceof DStore ) { out.print( store.getEnumName() + " " ); j++; if ( j % 8 == 0 ) { out.println(); } } } if ( j > 0 ) { out.println(); out.flush(); } out.println( "eStores to probe:" ); j = 0; for ( OnlineStore store : toProbe ) { if ( store instanceof EStore ) { out.print( store.getEnumName() + " " ); j++; if ( j % 8 == 0 ) { out.println(); } } } if ( j > 0 ) { out.println(); out.flush(); } } return toProbe.toArray( new OnlineStore[ toProbe.size() ] ); }// /method /** * wrap strings, and indent them * * @param line string to wrap * * @return string with 2 (INDENT) lead spaces to indent, wrapped to fit on 80-char line with final \n */ private static String wrap( String line ) { final int charsPerLine = 80 - INDENT.length(); final FastCat sb = new FastCat( 20 ); while ( true ) { if ( line.length() == 0 ) { return sb.toString(); } else if ( line.length() <= charsPerLine ) { sb.append( INDENT, line ); return sb.toString(); } // need to split on multiple lines final int p = line.substring( 0, charsPerLine ).lastIndexOf( ' ' ); if ( p >= 0 ) { sb.append( INDENT, line.substring( 0, p ) ); // leave out the space. line = line.substring( p + 1 ); } else { sb.append( INDENT + line.substring( 0, charsPerLine ) ); line = line.substring( charsPerLine ); } if ( line.length() > 0 ) { sb.append( "\n" ); } // second line is ready to process, passibly split too. } } /** * does this Amazon carry this asin? * Used to compose probe methods for an Amazon store. * * @param store which store * @param asin product number * * @return StockStatus enum including REFUSINGPROBES etc. */ public static StockStatus awsProbe( final OnlineStore store, final String asin ) { if ( ST.isEmpty( asin ) ) { return NOTCARRIED; } validateAsin( asin ); try { return ProbeViaAWS.isASINInStockViaAWS( asin, store ); } catch ( WebServiceException e ) { // just report if something other than a garden variety 503 if ( !e.getMessage().contains( "503" ) ) { out.println( e.getMessage() + " while probing asin:" + asin + " " + store.productCategory() + " " + store.getEnumName() ); } return REFUSINGPROBES; } }// /method /** * PROBE A STRAGGLER * Do a single-threaded probe of one product in the specified stores * and record the statuses. * Called with a new product to probe in Stock.getBundle * * @param storesToProbe which bookstores, dvd stores, electronic stores to probe. * @param productToProbe product to probe, if null, probe them all. * * @see Stock#getProductStatus(String, int) */ public static void bulkProbe1OneProduct( final OnlineStore[] storesToProbe, final String productToProbe ) { // clear stats for ( OnlineStore store : storesToProbe ) { store.clearStats(); } // prepare list of products for each store bulkProbe2GetProbesForStoresXProducts( storesToProbe, productToProbe ); bulkProbe3BuildProbeWithoutThreads( storesToProbe, true /* stragglers */ ); }// /method /** * get started */ public static void fireup() { // we do our manual probes before we expand embedded macros, or probe for new products. if ( DEBUGGING ) { proofread( BStore.values(), new File( "C:/temp/proofreadbookstores.txt" ) ); proofread( DStore.values(), new File( "C:/temp/proofreaddvdstores.txt" ) ); proofread( EStore.values(), new File( "C:/temp/proofreadestores.txt" ) ); } final OnlineStore[] stores = whatToProbe(); if ( stores.length != 0 ) { // bulk probe all products bulkProbe1AllProducts( stores ); BStore.summary(); DStore.summary(); EStore.summary(); } }// /method /** * debug driver , test * * @param args not used */ public static void main( final String[] args ) { Global.installConfiguration( new ConfigurationForMindprod() ); String product = "9781783710942"; BStore b = BStore.NOOK; out.println( b.probe( product ) ); // probe and come up with INSTOCK enum. } /** * does this store this isbn, asin, upc in stock? * used to implement probe method in online stores. Does not use AWS, probes website. * Used by both bulk probe and single product probe. * * @param store which bookstore * @param product isbn of book to test, or electronic product * @param probeUrl url as string * * @return StockStatus enum * @see #awsProbe */ public static StockStatus screenScrapeProbe( final OnlineStore store, final String product, final String probeUrl ) { // we are just scanning products, not expanding pages so we don't know the pageURL try { store.setProbeCompletionStage( 3 ); final Get get = new Get(); get.setConnectTimeout( CONNECT_TIMEOUT ); get.setReadTimeout( READ_TIMEOUT ); get.setInstanceFollowRedirects( true ); try { // nothing is locked while we wait for the get to complete. final String originalResult = get.send( new URL( probeUrl ), Get.UTF8 ); store.setProbeCompletionStage( 4 ); final int responseCode = get.getResponseCode(); out.println( wrap( "probe: " + probeUrl ) ); // ideally do before probe, but then would get interleaved with other stores if ( responseCode != 200 && responseCode != -1 ) { return analyseResponseCode( store, probeUrl, responseCode ); } else if ( originalResult == null || originalResult.length() < store.getMinimumResponseLength() ) { final int length = originalResult == null ? 0 : originalResult.length(); out.println( probeErr( store, "empty or short response [" + length + "] when checking which products are in stock", responseCode, probeUrl ) ); return UNKNOWN; } // chop of tail of response so we will not be confused by markers about other books. final String trimmedResult = trimEndMarkers( store, originalResult ); // compute net effects of all the clues, no output, just calculation // as side effect may display clues and hits. TallyClue effects analyses REPORTING_LEVEL. return tallyClueEffects( store, product, probeUrl, responseCode, originalResult, trimmedResult ); } catch ( MalformedURLException e ) { // trouble in new URL near the top out.println( probeErr( store, "malformed url in checking which products are in stock", -1, probeUrl ) ); return NOTCARRIED; } } catch ( Exception e ) { out.print( "Fatal error" ); e.printStackTrace( err ); out.print( e.getMessage() ); System.exit( 2 ); return UNKNOWN; } }// /method // /methods }