/*
* [PadSites.java]
*
* Summary: enum for various padsite csv collections. Used by DetectDupPadSites.
*
* Copyright: (c) 2016-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2016-08-06 initial version
*/
package com.mindprod.submitter;
import com.mindprod.common18.Build;
import com.mindprod.common18.Misc;
import com.mindprod.common18.ST;
import com.mindprod.csv.CSVReader;
import com.mindprod.entities.DeEntify;
import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashSet;
import static java.lang.System.*;
/**
* enum for various padsite csv collections. Used by DetectDupPadSites.
*
* used to dedup both on name without and on the home domain
* Designed primarily for Roedy's use to validate new sites.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2016-08-06 initial version
* #see DetectDupPadSites
* @since 2016-08-06
*/
public enum PadSites
{
ALLSITES( false, /* .list */ true /* mandatory */ ),
APPVISOR( true, /* .isMultiColumn */ true ),
CANDIDATES( true, false ),
DEAD( true, true ),
HASSLE( true, true ),
NEWSITES( false, true ),
NOHASSLE( true, true ),
POSSAPPVISOR( true, false );
static final EnumSet CSVS = EnumSet.of( APPVISOR, CANDIDATES, DEAD, HASSLE, NOHASSLE, POSSAPPVISOR );
/**
* sites that violate the naming conventions
*/
private static final HashSet HOME_EXCEPTIONS = new HashSet<>( Arrays.asList(
"appvisor.com",
"cnet.com",
"download-lair.com",
"hs-lab.com.ua",
"softpedia.com",
"softlist.ws",
"z-down.com" ) );
/**
* count of errors found
*/
private static int errors = 0;
/**
* true if the backing file for this must exist
*/
private final boolean isMandatory;
private final boolean isMultiColumn;
private final HashSet names = new HashSet<>( 1000 );
private final HashSet homes = new HashSet<>( 1000 );
/**
* construtor
*
* @param isMultiColumn true if csv, false if *.list 1 column
* @param isMandatory true if backing file is mandatory
*/
PadSites( final boolean isMultiColumn, final boolean isMandatory )
{
this.isMultiColumn = isMultiColumn;
this.isMandatory = isMandatory;
}
/**
* how many errors since unem loaded
*/
static int errorCount()
{
return errors;
}
/**
* detect any home dups in these two PadSite lists
*
* @param a first list
* @param b second list
*/
static void findDupsInHomes( final PadSites a, final PadSites b )
{
for ( String itema : a.homes )
{
b.homes.stream().filter( itema::equals ).forEach( itemb ->
{
err.println( "Home " + itema + " duplicated in " + a.name().toLowerCase() + " and " + b.name().toLowerCase() + "\n" );
} );
}
}
/**
* detect any site name dups in these two PadSite lists
*
* @param a first list
* @param b second list
*/
static void findDupsInNames( final PadSites a, final PadSites b )
{
for ( String itema : a.names )
{
b.names.stream().filter( itema::equals ).forEach( itemb ->
{
err.println( "Sitename " + itema + " duplicated in " + a.name().toLowerCase() + " and " + b.name().toLowerCase() + "\n" );
} );
}
}
/**
* Enusere all allements of one set are contained in another
*
* @param contained the small list that must be contained in the big list
* @param container big list that must contain the small list
*/
static void mustContain( final PadSites contained, final PadSites container )
{
contained.names.stream().filter( item -> !container.names.contains( item ) ).forEach( item ->
{
err.println( "Sitename " + item + " in " + contained.name().toLowerCase() + " but not " + container.name().toLowerCase() + "\n" );
} );
}
/**
* load HashMaps from isMultiColumn and list files.
*/
void load()
{
try
{
final File file = new File( Build.MINDPROD_SOURCE + "/submitter/" + name().toLowerCase() + ( isMultiColumn ? ".csv" : ".list" ) );
if ( !file.exists() )
{
if ( this.isMandatory() )
{
err.println( "missing file " + file );
System.exit( 1 );
}
else
{
return; /* empty file */
}
}
final CSVReader r = new CSVReader( new FileReader( file ) );
try
{
// detect dups within each PadSite list.
while ( true )
{
// get padsite name
final String siteName = r.get();
if ( siteName == null )
{
err.println( "null sitename in " + file + " at line " + r.lineCount() + "\n" );
break;
}
String condensedSiteName = DeEntify.stripHTMLTags( siteName ).toLowerCase();
condensedSiteName = condensedSiteName.replace( "-", "" );
condensedSiteName = condensedSiteName.replace( ".", "" );
if ( names.contains( condensedSiteName ) )
{
errors++;
err.println( "Error: duplicate siteName " + siteName +
" in " + file + " (case-sensitive) at line " + r.lineCount() + "\n" );
}
else
{
names.add( condensedSiteName );
}
// get padsite home domain
if ( isMultiColumn )
{
final String home = r.get();
final String condensedHome = Misc.getDomain( new URL( home ) ).toLowerCase();
if ( homes.contains( condensedHome ) && !HOME_EXCEPTIONS.contains( condensedHome ) )
{
errors++;
err.println( "Error: duplicate home " + home + " in " + file + " (case-sensitive) at line " + r.lineCount() + "\n" );
}
else
{
homes.add( condensedHome );
}
// check Site name and home URL are consistent
String protoSite = ST.chopLeadingString( ST.chopLeadingString( home.toLowerCase(), "http://" ), "https://" );
protoSite = protoSite.replace( "-", "" );
protoSite = protoSite.replace( ".", "" );
if ( !protoSite.contains( ST.trimTrailing( condensedSiteName, "dash" ) ) )
{
errors++;
err.println( "Error: site name " + siteName + " does not match home url " + home + " in " + file + " at line " + r.lineCount() + "\n" );
}
}
r.skipToNextLine();
}
}
catch ( EOFException e )
{
}
finally
{
r.close();
}
}
catch ( IOException e )
{
err.println( "Error: reading " + name().toLowerCase() + "\n" );
}
}
/**
* return true if the backing file for this must exist
*
* @return true if file must exist
*/
public boolean isMandatory()
{
return isMandatory;
}
// /methods
}