/* * [LinkInfo.java] * * Summary: describes one link, with multiple from URLs. * * Copyright: (c) 2008-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2008-07-30 initial version, created by refactoring * 1.1 2011-11-22 store numeric status, but not statusWording */ package com.mindprod.brokenlinks; import com.mindprod.common18.EIO; import com.mindprod.common18.ST; import com.mindprod.fastcat.FastCat; import org.jetbrains.annotations.NotNull; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.GregorianCalendar; import java.util.List; import static java.lang.System.*; /** * describes one link, with multiple from URLs. * * @author Roedy Green, Canadian Mind Products * @version 1.1 2011-11-22 store numeric status, but not statusWording * @since 2008-07-30 */ class LinkInfo implements Comparable { // we don't have a SerialVersionID since we use DataOutputStream. /** * initial capacity for number of froms to each link */ private static final int FROMS_CAPACITY = 5; /** * max capacity for number of froms to each link, after that we discard froms. */ private static final int MAX_FROMS = 10; /** * mask for: ISO YYYY-MM-DD */ private static final SimpleDateFormat SDF = new SimpleDateFormat( "yyyy-MM-dd" ); /** * time this program was run. Same timestamp used for duration of run */ private static long now = System.currentTimeMillis(); /** * all the from URLs that point to this link, kept unique and sorted. */ private final ArrayList froms; /** * should we update badTimestamp on a fail, or false, wait for a later pass to determine it. */ private transient boolean shouldWeUpdateBadTimestamp; private StatusKind statusKind; /** * true if should use SNI SSL problems. false for non-SSL or non-SNI SSL **/ private boolean useSNI; /** * url the link points to, make be broken */ private String to; /** * was this URL permanently redirected? * Actually final, but compiler does not know that. */ private boolean isPermanentlyRedirected; /** * was this URL temporarily redirected? * Actually final, but compiler does not know that. */ private boolean isTemporarilyRedirected; /** * 0=use GET 1=use HEAD 2=use HEAD then GET, 3=use FETCH, 4=use File.exists() */ private int howProbe = 2; /** * status code eg. 200, not final, may change later. */ private int status; /** * when this link was last detected as bad */ private long badTimestamp; /** * when this link was last detected as good, Long.MIN_VALUE = never */ private long goodTimestamp; /** * no arg constructor, only for use in restore */ private LinkInfo() { this.froms = new ArrayList<>( FROMS_CAPACITY ); } /** * constructor * * @param status e.g. 200 for ok * @param to to link URL * @param froms possibly empty list of froms to add * * @ param from from link URL */ LinkInfo( final int status, final String to, String... froms ) { // force reprobe for a brand new link. If fails, report right away. goodTimestamp = now - Config.suspectForgivenessMillis; badTimestamp = now; this.to = to; this.froms = new ArrayList<>( FROMS_CAPACITY ); addFroms( froms ); this.setStatus( status ); } /** * read one link from DataInputStream, old format * * @param d stream to read from. * * @return Broken record read. * @throws java.io.IOException if trouble reading. */ static LinkInfo oldRead( DataInputStream d ) throws IOException { // this is not serialised // allocate a new LinkInfo object and fill in the fields. LinkInfo b = new LinkInfo(); b.to = d.readUTF(); b.setStatus( d.readInt() ); // will set b.statusKind b.isPermanentlyRedirected and b.isTemporarilyRedirected // override the timestamps setStatus just computed. b.goodTimestamp = d.readLong(); b.badTimestamp = d.readLong(); b.howProbe = d.readInt(); b.useSNI = false; // we don't restore the "from" history. Fresh data will soon come from Xenu. // or updateBadTimestamp return b; } /** * read one link from DataInputStream * * @param d stream to read from. * * @return Broken record read. * @throws java.io.IOException if trouble reading. */ static LinkInfo read( DataInputStream d ) throws IOException { // this is not serialised // allocate a new LinkInfo object and fill in the fields. LinkInfo b = new LinkInfo(); b.to = d.readUTF(); b.setStatus( d.readInt() ); // will set b.statusKind b.isPermanentlyRedirected and b.isTemporarilyRedirected // override the timestamps setStatus just computed. b.goodTimestamp = d.readLong(); b.badTimestamp = d.readLong(); b.howProbe = d.readInt(); b.useSNI = d.readBoolean(); // we don't restore the "from" history. Fresh data will soon come from Xenu. // or updateBadTimestamp return b; } /** * update now clock to use in timestamping transactions. */ static void setTime( long timeStamp ) { now = timeStamp; } /** * get timestamp as a localised ISO date YYYY-MM-DD * * @param timeStamp timestamp to display * * @return timestamp in yyyy-MM-DD format. */ private static String toISoDate( long timeStamp ) { GregorianCalendar g = new GregorianCalendar(); g.setTimeInMillis( timeStamp ); SDF.setCalendar( g ); return SDF.format( g.getTime() ); } /** * add URL of a from, or group of froms. * * @param froms list or array of froms to add */ void addFroms( final String... froms ) { for ( String from : froms ) { if ( this.froms.size() >= MAX_FROMS ) { break; // discard excess froms. } // get rid of leading file:/ or file:/// from = from.trim(); from = ST.chopLeadingString( from, Config.localWebsiteURL ); from = from.intern(); if ( !this.froms.contains( from ) ) { this.froms.add( from ); } } } /** * Get all the from URLs associated with this link. * * @return unmodifiable list of URLs, possibly empty. */ List getFroms() { return Collections.unmodifiableList( froms ); } /** * Find out how site/url can be probed. * * @return 0=use GET 1=use HEAD 2=use HEAD then GET, 3=use FETCH. */ int getHowProbe() { return howProbe; } /** * @param howProbe 0=use GET 1=use HEAD 2=use HEAD then GET, 3=use FETCH. */ void setHowProbe( final int howProbe ) { this.howProbe = howProbe; } /** * get the most recent http return code status for this link * * @return http code */ int getStatus() { return status; } /** * set the new status for this link, as side effect sets isPermanentlyRedirected, isTemporarilyRedirected, * goodTimestamp, badTimestamp. We don't store the statusMessage (synthesised or actual) or the StatusKind. * * @param status e.g. 200 */ void setStatus( final int status ) { this.status = status; // keep the most recent information. final StatusKind statusKind = StatusKind.categoriseStatus( status ); this.statusKind = statusKind; // we record status but not StatusKind in history file. switch ( statusKind ) { case GOOD: if ( now > goodTimestamp ) { goodTimestamp = now; } isPermanentlyRedirected = false; isTemporarilyRedirected = false; break; case PERM_REDIRECT: if ( now > badTimestamp ) { badTimestamp = now; } isPermanentlyRedirected = true; isTemporarilyRedirected = false; break; case TEMP_REDIRECT: if ( now > badTimestamp ) { badTimestamp = now; } isPermanentlyRedirected = false; isTemporarilyRedirected = true; break; case BAD: setUseSNI( !useSNI() ); // try changing useSNI next time if ( now > badTimestamp && shouldWeUpdateBadTimestamp ) { badTimestamp = now; } isPermanentlyRedirected = false; isTemporarilyRedirected = false; break; case IGNORE: isPermanentlyRedirected = false; isTemporarilyRedirected = false; break; case UNKNOWN: default: err.println( "\n<><>Warning<><> unknown status : " + getVerboseStatusMessage() + " when linking to " + to + ", assumed bad" ); if ( now > badTimestamp && shouldWeUpdateBadTimestamp ) { badTimestamp = now; } isPermanentlyRedirected = false; isTemporarilyRedirected = false; break; } } /** * get StatusKind, regenerated from status. * There is no setStatusKind. Happens as a side effect of setStatus. * * @return e.g. StatusKind.IGNORE StatusKind.GOOD */ StatusKind getStatusKind() { return statusKind; } /** * get status message from most recent probe, either by Xenu or reprobe * * @return e.g. "ok" */ String getTerseStatusMessage() { return StatusKind.terseStatusMessage( this.status ); } /** * Get the to URL * * @return to URL */ String getTo() { return to; } /** * get status message from most recent probe, either by Xenu or reprobe * * @return e.g. "ok" */ String getVerboseStatusMessage() { return StatusKind.verboseStatusMessage( this.status, null ); } /** * get status message from most recent probe, either by Xenu or reprobe * * @param actualStatusMessage actual status message from server, usually non-standard wording * * @return e.g. "ok" */ String getVerboseStatusMessage( final String actualStatusMessage ) { return StatusKind.verboseStatusMessage( this.status, actualStatusMessage ); } /** * return true if this link is no longer being used somewhere on the website. * * @return true if link is no longer being used on website. */ boolean isDead() { return froms.size() == 0; // we don't count IGNORE as dead. } /** * Is this a local link? * return true if this link is local, i.e. one to the local disk-based website mirror. * * @return true if this is a link to the local hard disk */ private boolean isLocal() { return to.startsWith( Config.localWebsiteURL ); } /** * return true if this link is in trouble, i.e. most recently failed a test, * usually by Xenu, but could also be by reprobe. * * @return true if link broken as of the the most recent probe by Xenu or reprobe. */ boolean isNowBroken() { return badTimestamp >= goodTimestamp && statusKind != StatusKind.IGNORE; } /** * Was this URL permanently redirected? * * @return true if this URL has been permanently redirected. */ boolean isPermanentlyRedirected() { return isPermanentlyRedirected; } /** * return true if this link is considered seriously broken in need of repair. * If it was good recently, (within brokenForgivenessMillis) it does not count as broken. * i.e. links that need to be reprobed, or links have been dead after several days. * redirected links don't count as solidly broken. * * @return true if link broken. */ boolean isSolidlyBroken() { // do we consider this link currently broken? if ( isPermanentlyRedirected || isTemporarilyRedirected || statusKind == StatusKind.IGNORE ) { return false; } else { if ( isLocal() ) { return badTimestamp >= goodTimestamp; } else { return now > goodTimestamp + Config.brokenForgivenessMillis; } } } /** * return true if this link is considered suspect, might be soon in need of repair. * If it was good very recently, (within suspectForgivenessMillis), it does not count as broken. * * @return true if link suspect. Redirected are not considered suspect. Ignored are not considered suspect. */ boolean isSuspect() { if ( isPermanentlyRedirected || isTemporarilyRedirected || statusKind == StatusKind.IGNORE ) { return false; } else { // do we consider this link currently broken? if ( isLocal() ) { return badTimestamp >= goodTimestamp;// no forgiveness for "temporarily" broken local links,s } else { return now > goodTimestamp + Config.suspectForgivenessMillis; } } } /** * was this link temporarily redirected * * @return true if temporarily redirected */ boolean isTemporarilyRedirected() { return isTemporarilyRedirected; } /** * should this URl be probed with SNI * * @param useSNI true if should use SNI */ void setUseSNI( boolean useSNI ) { this.useSNI = useSNI; } /** * sort the froms into alphabetical order */ void sort() { if ( this.froms.size() > 1 ) { Collections.sort( this.froms ); } } /** * probe this URL with SNI turned on. * * @return false for non-SSL, or SSL without SNI, true for SSL with SNI. */ boolean useSNI() { return useSNI; } /** * write out one link to a DataOutputStream * * @param d stream to write to. * * @throws java.io.IOException if trouble writing. */ void write( final DataOutputStream d ) throws IOException { // this is not serialised. // put out to first so will have for error messages on read. d.writeUTF( to ); d.writeInt( status ); // don't write out statusKind, isPermanentlyRedirected or isTemporarilyRedirected Regenerate on read. d.writeLong( goodTimestamp ); d.writeLong( badTimestamp ); d.writeInt( howProbe ); d.writeBoolean( useSNI ); // we don't need to save the from history // or updateBadTimestamp } /** * Sort by statusMessage with to link as tie-breaker. * Defines default the sort order for LinkInfo Objects. * Compare this LinkInfo with another LinkInfo. * Compares statusMessage then to. * Informally, returns (this-other) or +ve if this is more positive than other. * * @param other other LinkInfo to compare with this one * * @return +ve if this>other, 0 if this==other, -ve if this<other */ public final int compareTo( @NotNull LinkInfo other ) { int diff = this.status - other.status; if ( diff != 0 ) { return diff; } return this.to.compareToIgnoreCase( other.to ); } /** * set whether we should update badTimestamp on fail * * @param shouldWeUpdateBadTimestamp true if should update BadTimestamp */ public void setShouldWeUpdateBadTimestamp( final boolean shouldWeUpdateBadTimestamp ) { this.shouldWeUpdateBadTimestamp = shouldWeUpdateBadTimestamp; } /** * Human readable display * * @return contents of Broken object */ public String toString() { final FastCat sb = new FastCat( 9 + froms.size() * 2 ); sb.append( " Xenu: " ); sb.append( getVerboseStatusMessage() ); sb.append( "\n previously good: " ); sb.append( toISoDate( goodTimestamp ) ); sb.append( " previously bad: " ); sb.append( toISoDate( badTimestamp ) ); sb.append( "\n link to: " ); try { URL temp = new URL( to ); if ( temp.getProtocol().equals( "file" ) ) { sb.append( EIO.getCanOrAbsPath( new File( temp.getFile().substring( 1 ) ) ) ); } else { sb.append( to ); } } catch ( MalformedURLException e ) { sb.append( "malformed " ); sb.append( to ); } for ( String from : froms ) { sb.append( "\n from: " ); sb.append( from ); } return sb.toString(); } }