/*
* [Scrape.java]
*
* Summary: Screenscrape information about sands from various websites.
*
* Copyright: (c) 2014-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.8+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2014-08-14 initial version
*/
package com.mindprod.sanddepth;
import com.mindprod.common18.EIO;
import com.mindprod.common18.ST;
import com.mindprod.fastcat.FastCat;
import com.mindprod.filetransfer.FileTransfer;
import com.mindprod.hunkio.HunkIO;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DecimalFormat;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.lang.System.*;
/**
* Screenscrape information about sands from various websites.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2014-08-14 initial version
* @since 2014-08-14
*/
public class Scrape
{
// declarations
private static final DecimalFormat DF1 = new DecimalFormat( "0.0" );
private static final DecimalFormat DF2 = new DecimalFormat( "0.00" );
// /declarations
// methods
/**
* Get info about CaribSea Arag-alive sands
*/
private static void aragalive() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_marine_substrates_aragalive.html
// preloaded to E:/temp/aragalive.html
out.println( "aragalive" );
final String url = "http://www.caribsea.com/caribsea/itempage_marine_substrates_aragalive.html";
final String type = "Marine Arag-Alive";
final File page = new File( "E:/temp/aragalive.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea cichlid sands
*/
private static void cichlid() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_freshwater_substrates_africancichlidmix.html
// preloaded to E:/temp/cichlid.html
out.println( "cichlid" );
final String url = "http://www.caribsea.com/caribsea/itempage_freshwater_substrates_africancichlidmix.html";
final String type = "African Cichlid Mix";
final File page = new File( "E:/temp/cichlid.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea coraline sands
*/
private static void coraline() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_marine_substrates_coraline.html
// preloaded to E:/temp/coraline.html
out.println( "coraline" );
final String url = "http://www.caribsea.com/caribsea/itempage_marine_substrates_coraline.html";
final String type = "Marine, Coraline";
final File page = new File( "E:/temp/coraline.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea ocean direct sands
*/
private static void dryAragonite() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_marine_substrates_aragonite.html
// preloaded to E:/temp/dryaragonite.html
out.println( "dryAragonite" );
final String url = "http://www.caribsea.com/caribsea/itempage_marine_substrates_aragonite.html";
final String type = "Marine, dry Aragonite";
final File page = new File( "E:/temp/dryaragonite.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea eco sands
*/
private static void eco() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_freshwater_substrates_Ecoplanted.html
// preloaded to E:/temp/eco.html
out.println( "eco" );
final String url = "http://www.caribsea.com/caribsea/itempage_freshwater_substrates_Ecoplanted.html";
final String type = "Eco Complete Planted";
final File page = new File( "E:/temp/eco.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
private static void extract( final String url, final String type, final String big ) throws MalformedURLException
{
final Pattern pattern = Pattern.compile( "" +
"
\\s*" +
"([ A-Za-z0-9\\-_]+)\\s*" +
"\\(([0-9\\.]+)\\s*\\-\\s*([0-9\\.]+)mm\\s*\\|\\s*([0-9]+)#/ft3\\)" );
final Matcher m = pattern.matcher( big );
while ( m.find() )
{
final String lc = m.group( 1 );
final String remoteImageName = lc + ".png";
final String localImageName = tidyLocalImageName( remoteImageName );
String camel = m.group( 2 );
// remove duplicate of type
camel = ST.chopLeadingString( camel, "Eco-Complete™ Planted " );
int place = camel.indexOf( "™" );
if ( place >= 0 )
{
// cut out the tm entity
camel = camel.substring( 0, place ) + camel.substring( place + "™".length() );
}
final double low = Double.parseDouble( m.group( 3 ) );
final double high = Double.parseDouble( m.group( 4 ) );
final double wt = Double.parseDouble( m.group( 5 ) );
final double sg = wt * 0.016_084_6d;
FastCat sb = new FastCat( 25 );
sb.append( "CaribSea, " );
sb.append( type, ", ", camel, " | " );
sb.append( "" + DF2.format( sg ) + " | " );
sb.append( "" + DF1.format( low ) + " - " + DF1.format( high ) + " | " );
sb.append( " | " );
sb.append( "", wouldFloat( sg ), " |
" );
out.println( sb.toString() );
fetchImage( remoteImageName, localImageName );
}
}
/**
* download the image from Carib and save it on locaal hard disk in E:\mindprod\image\sand
*
* @param remoteImageName name at Carib with .png, no dir
* @param localImageName name on Mindprod with .png, no dir
*
* @throws MalformedURLException
*/
private static void fetchImage( final String remoteImageName, final String localImageName ) throws MalformedURLException
{
final File image = new File( "E:/mindprod/image/sand/" + localImageName );
if ( image.exists() )
{
out.println( EIO.getCanOrAbsPath( image ) + " already exists" );
}
else
{
final URL u = new URL( "http://caribsea.com/images/substrate_" + remoteImageName );
final boolean success = new FileTransfer().download( u, image, false );
if ( !success )
{
out.println( "download of " + u + " failed." );
}
}
}
/**
* Get info about CaribSea floramax sands
*/
private static void floramax() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_freshwater_substrates_floramax.html
// preloaded to E:/temp/floramax.html
out.println( "floramax" );
final String url = "http://www.caribsea.com/caribsea/itempage_freshwater_substrates_floramax.html";
final String type = "Floramax";
final File page = new File( "E:/temp/floramax.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea instant aquarium sands
*/
private static void instant() throws IOException
{
out.println( "instant" );
// http://www.caribsea.com/caribsea/itempage_freshwater_substrates_instantaquarium.html
// preloaded into E:/temp/instant.html
final String url = "http://www.caribsea.com/caribsea/itempage_freshwater_substrates_instantaquarium.html";
final String type = "Instant Aquarium";
final File page = new File( "E:/temp/instant.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea ocean direct sands
*/
private static void oceanDirect() throws IOException
{
// http://www.caribsea.com/caribsea/itempage_marine_substrates_ocean%20direct.html
// preloaded to E:/temp/oceandirect.html
out.println( "oceanDirect" );
final String url = "http://www.caribsea.com/caribsea/itempage_marine_substrates_ocean%20direct.html";
final String type = "Marine, Ocean Direct";
final File page = new File( "E:/temp/oceandirect.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* Get info about CaribSea floramax sands
*/
private static void superNatural() throws IOException
{
//http://www.caribsea.com/caribsea/itempage_freshwater_substrates_supernaturals.html
// preloaded to E:/temp/supernatural.html
out.println( "supernatural" );
final String url = "http://www.caribsea.com/caribsea/itempage_freshwater_substrates_supernaturals.html";
final String type = "Super Natural";
final File page = new File( "E:/temp/supernatural.html" );
new FileTransfer().download( new URL( url ), page, false );
final String big = HunkIO.readEntireFile( page, EIO.UTF8 );
extract( url, type, big );
}// /method
/**
* get rid of any upper case _ - chars in local image name
*
* @param imageName eg. This_sand.png -> thissand.png
*
* @return tidies name
*/
private static String tidyLocalImageName( final String imageName )
{
return ST.stripNaughtyCharacters( imageName.toLowerCase(), "-_ " );
}// /method
private static String wouldFloat( double sg )
{
if ( sg <= 1.0d )
{
return "This sand would float. Perhaps it sinks after you let it soak.";
}
else
{
return "";
}
}// /method
/**
* main
*
* @param args not used
*/
public static void main( String[] args ) throws IOException
{
if ( true )
{
aragalive();
cichlid();
coraline();
dryAragonite();
eco();
floramax();
instant();
oceanDirect();
superNatural();
}
}// /method
// /methods
}