/*
 * [RedirectProbe.java]
 *
 * Summary: For thread to probe one site URL to see how it redirects.
 *
 * Copyright: (c) 2012-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.8+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  1.0 2012-02-28 initial version
 */
package com.mindprod.brokenlinks;

import com.mindprod.csv.CSVWriter;
import com.mindprod.http.Chase;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.concurrent.Semaphore;

import static java.lang.System.*;

/**
 * For thread to probe one site URL to see how it redirects.
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 1.0 2012-02-26 initial version
 * @since 2012-02-28
 */
class RedirectProbe implements Runnable
    {
    /**
     * debugging turns off threads
     */
    static final boolean DEBUGGING = false;

    /**
     * number of probes to queue up ready to be executed
     */
    static final int TASK_POOL_SIZE = 32;

    /**
     * number of threads in pool to do store probes
     */
    static final int THREAD_POOL_SIZE = 30;

    /**
     * throttles how many probes in queue or executing
     */
    private static final Semaphore semaphore = new Semaphore( TASK_POOL_SIZE );

    /**
     * URL to probe
     */
    final URL originalURL;

    private final CSVWriter w;

    /**
     * LinkInfo where info about this link is stored, where we record status of link
     */
    private final LinkInfo b;

    /**
     * to probe one url at one site, constructs packet later run() executed.
     *
     * @param originalURL URL to probe
     * @param b           LinkInfo where info about this link is stored, where we record status of link
     * @param w           CSVWriter where to log this transaction.
     */
    RedirectProbe( final URL originalURL, final LinkInfo b, final CSVWriter w )
        {
        this.originalURL = originalURL;
        this.b = b;
        this.w = w;
        // the pool will not block when full, so we do it manually.
        try
            {
            if ( !DEBUGGING )
                {
                semaphore.acquire();
                }
            }
        catch ( InterruptedException e )
            {
            // will block until can increment the semaphore
            }
        }

    /**
     * export the redirect
     *
     * @param w                CSVWriter to export to
     * @param originalLocation original URL as String
     * @param newLocation      redirected URL as String
     * @param b                LinkInfo object where we track this URL
     */
    private static void exportRedirect( CSVWriter w, String originalLocation, String newLocation, LinkInfo b )
        {
        // E C H O on console in one atomic burp.
        out.println( "    redirect " + originalLocation + "\n         --> " + newLocation + "\n" );
        // W R I T E
        // one one line the old and new URLs and the pages where they occur.  If occurs multiple times on a page,
        // will be mentioned only once. Only redirects we could determine, others are left,
        // hopefully fixed on subsequent runs.
        synchronized ( w )
            {
            w.put( originalLocation );
            w.put( newLocation );
            // b already locked by caller
            for ( String from : b.getFroms() )
                {
                w.put( Config.toFileWebsitePrefix + from );
                }
            w.nl();
            }
        }

    /**
     * method to run on separate thread. Probes to chase redirect
     */
    public void run()
        {
        try
            {
            String newLocation;
            String originalLocation = originalURL.toString();
            // We find the new URL from the Location field of response header.
            // We just resolve one leg of a redirect chain.  Otherwise could get confused by mixture of temp & permanent
            final Chase chase = new Chase();
            final String relativeNewLocation = chase.send( originalURL );
            int status = chase.getResponseCode();   // e.g. 301 still redirecting
            StatusKind statusKind = StatusKind.categoriseStatus( status );
            switch ( statusKind )
                {
                case TEMP_REDIRECT:
                case PERM_REDIRECT:
                    break;
                case BAD:
                case GOOD:
                case IGNORE:
                case UNKNOWN:
                    // not redirected anymore
                    return;
                }
            // we don't lock b until the Chase is finished. Rest should be relatively quick.
            synchronized ( b )
                {
                // we don't change our records. We just export old/new and let user decide if should apply change.
                if ( relativeNewLocation == null )
                    {
                    err.println( "\n<><>Warning<><> redirect missing target (" + status + ")\n   " + b +
                                 "\n" );
                    return;
                    }
                else if ( relativeNewLocation.startsWith( "http://" ) || relativeNewLocation.startsWith( "https://" ) )
                    {
                    newLocation = relativeNewLocation; // replace old completely with new
                    if ( newLocation.equals( originalLocation ) )
                        {
                        err.println( "\n<><>Warning<><>  following link redirected to itself, " +
                                     "possible partial redirect, " +
                                     "(" + status + ")\n  " + b + "\n" );
                        return; //ignore it
                        }
                    // otherwise newLocation is the correct redirect
                    }
                else
                    {
                    // merge old and new to make newLocation fully qualified.
                    try
                        {
                        final URL newLocationURL = new URL( originalURL, relativeNewLocation );
                        newLocation = newLocationURL.toString();
                        final String newProtocol = newLocationURL.getProtocol().toLowerCase();
                        if ( ( newProtocol.equals( "http" ) || newProtocol.equals( "https" ) )
                             && newLocationURL.getPort() == 80 )
                            {
                            // chop out nugatory :80 will find only on tail of host, not in path.
                            final int place = newLocation.indexOf( ":80" );
                            newLocation = newLocation.substring( 0, place ) + newLocation.substring( place + 3 );
                            }
                        // it this point newLocation is the correct redirect
                        // could also have used URI.resolve.
                        }
                    catch ( MalformedURLException e )
                        {
                        // merge failed
                        err.println( "\n<><>Warning<><> redirect [" + originalLocation + "] + [" + relativeNewLocation
                                     + "] failed to merge (" + status + ")\n  " + b + "\n" );
                        return; // ignore it
                        }
                    if ( newLocation == null )
                        {
                        err.println( "\n<><>Warning<><> redirect [" + originalLocation + "] + [" + relativeNewLocation
                                     + "] failed to merge (" + status + ")\n  " + b + "\n" );
                        return; //ignore it
                        }
                    else if ( newLocation.equals( originalLocation ) )
                        {
                        err.println( "\n<><>Warning<><> following link redirected to itself (" + status + ")\n  " + b
                                     + "\n" );
                        return; //ignore it
                        }
                    }
                // passed all tests. Keep the redirect.
                exportRedirect( w, originalLocation, newLocation, b );
                }
            }
        finally
            {
            if ( !DEBUGGING )
                {
                semaphore.release();
                }
            }
        }
    }