/*
* [ImportECS.java]
*
* Summary: Import all data about motherboards from the ECS Website.
*
* Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.7+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2011-03-30 initial version
*/
package com.mindprod.mother;
import com.mindprod.common17.BigDate;
import com.mindprod.csv.CSVReader;
import com.mindprod.htmlmacros.support.Configuration;
import com.mindprod.htmlmacros.support.ConfigurationForMindprod;
import com.mindprod.http.Get;
import com.mindprod.hunkio.HunkIO;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.lang.System.*;
/**
* Import all data about motherboards from the ECS Website.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2011-03-30 initial version
* @since 2011-03-30
*/
public class ImportECS extends ImportManufacturer
{
/**
* where master list of all ECS motherboards are
*/
private static final String[] INDEX_URLS = {
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model" +
".aspx?CategoryID=1&TypeID=1&MenuID=18&childid=M_7&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=22&MenuID=22&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model" +
".aspx?CategoryID=1&TypeID=32&MenuID=16&childid=M_7&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=35&MenuID=21&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=43&MenuID=20&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model" +
".aspx?CategoryID=1&TypeID=46&MenuID=17&childid=M_7&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model" +
".aspx?CategoryID=1&TypeID=65&MenuID=14&childid=M_7&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=68&MenuID=19&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model" +
".aspx?CategoryID=1&TypeID=72&MenuID=15&childid=M_7&LanID=0",
"http://www.ecsusa.com/ECSWebSite/Product/Product_Model" +
".aspx?CategoryID=1&TypeID=86&MenuID=103&childid=M_7&LanID=0",
// type 88 on tw site but not US site
};
/**
* mindprod configuration
*/
private static final Configuration configuration;
private static final File sourceDir;
/**
* looks for Audio channels
*/
private static final Pattern AUDIO_CHANNEL_FINDER = Pattern.compile(
"([\\.\\d]+)[\\- ]ch", Pattern.CASE_INSENSITIVE );
/**
* looks for info about the form factor
*/
private static final Pattern FORM_FACTOR_FINDER = Pattern.compile(
" ([\\p{Alnum}\\- ]+?)(?: Size| Form Factor)?[, \\(]*(\\d+)(?:mm)?[\\*x](\\d+)(?:mm)",
Pattern.CASE_INSENSITIVE );
/**
* looks for count of IDE ports
*/
private static final Pattern IDE_FINDER = Pattern.compile(
"(\\d{1,2}) x [E]*IDE", Pattern.CASE_INSENSITIVE );
/**
* looks for max ram capacity
*/
private static final Pattern MAX_GIG_FINDER = Pattern.compile(
"support up to (\\d{1,2})\\s*GB", Pattern.CASE_INSENSITIVE );
/**
* find links to mb pages.
*/
private static final Pattern MB_FINDER = Pattern.compile(
"title=\"(\\d+)\" [\\p{Print}&&[^>]]+>([\\p{Alnum}\\-/ ]+?)\\ \\;\\(V([\\p{Alnum}\\.]+)\\)",
Pattern.CASE_INSENSITIVE );
/**
* looks for info about RAM
*/
private static final Pattern MEMORY_TYPE_FINDER = Pattern.compile(
"(?:channel|pin) (DDR2|DDR3|DDR )", Pattern.CASE_INSENSITIVE );
/**
* find menu id in URL for index page
*/
private static final Pattern MENU_ID_FINDER = Pattern.compile( "&MenuID=(\\d+)&" );
/**
* looks for ram speed
*/
private static final Pattern RAM_SPEED_MHZ_FINDER = Pattern.compile(
"(?:)?DDR[23]*(?:)?\\s*(?:up to )?((\\s*/\\s*|\\d{3,4}\\(OC\\)|\\d{3,4}\\(XMP\\)|\\d{3," +
"4}O\\.C\\.|OC\\d{3,4}+|||\\d{3,4})+)",
Pattern.CASE_INSENSITIVE
);
private static final Pattern RAM_SPEED_MHZ_SPLITTER = Pattern.compile(
"\\s*/\\s*||", Pattern.CASE_INSENSITIVE );
/**
* looks for SATA2 ports capacity
*/
private static final Pattern SATA2_FINDER_1 = Pattern.compile(
"(\\d{1,2})[x ]*Serial ATA(?:II| 3Gb/s)?(?:device|connector)?", Pattern.CASE_INSENSITIVE );
/**
* looks for SATA2 ports capacity
*/
private static final Pattern SATA2_FINDER_2 = Pattern.compile(
"(\\d{1,2})[x ]*SATA 3Gb/s connectors", Pattern.CASE_INSENSITIVE );
/**
* looks for SATA3 ports capacity
*/
private static final Pattern SATA3_FINDER = Pattern.compile(
"(\\d{1,2})[x ]*Serial ATA 6.0Gb/s devices", Pattern.CASE_INSENSITIVE );
/**
* looks for socket
*/
private static final Pattern SOCKET_FINDER = Pattern.compile(
">socket\\s([\\+ \\p{Alnum}]+)[\\(<]", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Internal ports capacity
*/
private static final Pattern USB2_INTERNAL_FINDER = Pattern.compile(
"(\\d{1,2})[x ]*USB (?:2\\.0 )?headers", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Rear ports capacity
*/
private static final Pattern USB2_REAR_FINDER = Pattern.compile(
"(\\d{1,2})[x ]*USB (?:2\\.0 )?ports", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Internal ports capacity
*/
private static final Pattern USB3_INTERNAL_FINDER = Pattern.compile(
"(\\d{1,2})[x ]*USB 3\\.0 headers", Pattern.CASE_INSENSITIVE );
/**
* looks for USB3Rear ports capacity
*/
private static final Pattern USB3_REAR_FINDER = Pattern.compile(
"(\\d{1,2})[x ]*USB 3\\.0 ports", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER = Pattern.compile(
"GRAPHICS
º (?:|On Chip|Integrated|)" +
"*[\\( ]*(\\p{Alnum}+)",
Pattern.CASE_INSENSITIVE
);
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_VALIDATOR = Pattern.compile( "GRAPHICS" );
static
{
configuration = new ConfigurationForMindprod();
// combine dirsWithMacros and dirsWithIncludes into dirsToProcess;
sourceDir = new File( configuration.getSourceDirWithSlashes() );
}
/**
* collect all motherboard specs from the ECS website
*
* @throws java.io.IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException
*/
public static void fetchAllMBs() throws IOException, SQLException
{
manufacturer = Manufacturer.ECS;
lastUpdated = BigDate.localToday();
conn = connect();
final PreparedStatement inserter = conn.prepareStatement(
"INSERT INTO mboards( manufacturer, model, manufacturerPartNo, revision, lastUpdated) VALUES(?,?,?,?," +
"?);"
);
for ( String indexURL : INDEX_URLS )
{
final Matcher m1 = MENU_ID_FINDER.matcher( indexURL );
String menuID = null;
if ( m1.find() )
{
menuID = m1.group( 1 );
}
else
{
err.println( " no menuID in " + indexURL );
System.exit( 1 );
}
final Get getIndex = new Get();
final String indexContents = getIndex.send( new URL( indexURL ), Get.UTF8 );
final int responseCode = getIndex.getResponseCode();
if ( responseCode >= 300 || indexContents == null )
{
err.println( "response: " + responseCode );
err.println( "Could not fetch a motherboard index ECS page " + indexURL );
System.exit( 1 );
}
final Matcher m2 = MB_FINDER.matcher( indexContents ); // Matchers are used both for matching and finding.
while ( m2.find() )
{
manufacturerPartNo = menuID + "-" + m2.group( 1 );
model = m2.group( 2 ).trim();
revision = m2.group( 3 );
final Get getMiningMB = new Get();
final String specificURL = manufacturer.miningURL( manufacturerPartNo );
final String mbSpecsContents = getMiningMB.send( new URL( specificURL ), Get.UTF8 );
final int responseCode2 = getMiningMB.getResponseCode();
if ( responseCode2 >= 300 || mbSpecsContents == null )
{
err.println( "response: " + responseCode2 );
err.println( "Could not fetch specific ECS motherboard page " + model + " " + specificURL );
continue;
}
HunkIO.writeEntireFile( new File( "E:/mb/ecs/" + model.replace( '/', '$' ) + "_" + revision + ".html" ),
mbSpecsContents,
HunkIO.UTF8 );
// record our findings in SQL
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false, " + lastUpdated );
inserter.setInt( 1, manufacturer.ordinal() );
inserter.setString( 2, model );
inserter.setString( 3, manufacturerPartNo );
inserter.setString( 4, revision );
inserter.setInt( 5, lastUpdated.ordinal() );
inserter.executeUpdate();
} // end find loop
}
conn.close();
}
/**
* Put the skeleton info into the DB.
*
* @throws IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
public static void skeleton() throws IOException, SQLException
{
manufacturer = Manufacturer.ECS;
lastUpdated = BigDate.localToday();
conn = connect();
final PreparedStatement inserter = conn.prepareStatement(
"INSERT INTO mboards( manufacturer, model, manufacturerPartNo, revision, lastUpdated) VALUES(?,?,?,?," +
"?);"
);
CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/ecs.csv" ) ) ) );
try
{
while ( true )
{
// manufacturer, model, revision, mfr no, verified, last-update
r.skip( 1 ); // mfr
model = r.get();
revision = r.get();
manufacturerPartNo = r.get();
r.skipToNextLine();
// record our findings in SQL
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false, " + lastUpdated );
inserter.setInt( 1, manufacturer.ordinal() );
inserter.setString( 2, model );
inserter.setString( 3, manufacturerPartNo );
inserter.setString( 4, revision );
inserter.setInt( 5, lastUpdated.ordinal() );
inserter.executeUpdate();
}
}
catch ( EOFException e )
{
r.close();
conn.close();
}
}
/**
* extract specs from previously downloaded raw specs
*
* @throws java.sql.SQLException
* @throws java.io.IOException
*/
private static void extractSpecs() throws SQLException, IOException
{
manufacturer = Manufacturer.ECS;
lastUpdated = BigDate.localToday();
initDatabase();
final File dir = new File( "E:/mb/ecs/" );
CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/ecs.csv" ) ) ) );
try
{
while ( true )
{
r.skip( 1 ); // mfr
model = r.get();
revision = r.get();
manufacturerPartNo = r.get();
r.skipToNextLine();
String rawMBSpecs = HunkIO.readEntireFile( new File( dir,
model.replace( '/', '$' ) + "_" + revision + ".html" ), HunkIO.UTF8 );
clearMBSpecs();
extractAudioChannels( rawMBSpecs, new Pattern[] { AUDIO_CHANNEL_FINDER } );
extractFormFactor( rawMBSpecs, FORM_FACTOR_FINDER, .1 );
extractIde( rawMBSpecs, new Pattern[] { IDE_FINDER } );
extractMemoryType( rawMBSpecs, MEMORY_TYPE_FINDER );
extractMaxGig( rawMBSpecs, MAX_GIG_FINDER );
extractRamSpeedMHz( rawMBSpecs, RAM_SPEED_MHZ_FINDER, RAM_SPEED_MHZ_SPLITTER );
extractSata2( rawMBSpecs, new Pattern[] { SATA2_FINDER_1, SATA2_FINDER_2 } );
extractSata3( rawMBSpecs, new Pattern[] { SATA3_FINDER } );
extractSocket( rawMBSpecs, new Pattern[] { SOCKET_FINDER } );
extractUSB( rawMBSpecs,
new Pattern[ 0 ],
new Pattern[] { USB2_REAR_FINDER },
new Pattern[] { USB2_INTERNAL_FINDER },
new Pattern[ 0 ],
new Pattern[] { USB3_REAR_FINDER },
new Pattern[] { USB3_INTERNAL_FINDER } );
extractVideo( rawMBSpecs, new Pattern[] { VIDEO_FINDER } );
validateVideo( rawMBSpecs, VIDEO_VALIDATOR );
// no extractWatts since no info available.
dumpExtracts();
updateMBFields();
}
}
catch ( EOFException e )
{
out.println( incomplete + " incomplete records" );
r.close();
closeDatabase();
}
}
/**
* handle oddities, typos on website etc.
*/
private static void oddDucks() throws SQLException
{
manufacturer = Manufacturer.ECS;
lastUpdated = BigDate.localToday();
conn = connect();
final Statement updater = conn.createStatement();
updater.executeUpdate( "UPDATE mboards "
+ "SET maxGig="
+ "8"
+ "WHERE manufacturer="
+ manufacturer.ordinal()
+ " AND model='IC41T-A'" );
final PreparedStatement SocketUpdater = conn.prepareStatement( "UPDATE mboards " +
"SET socket=?, lastUpdated=? " +
"WHERE manufacturer=? AND model=?;" );
conn.close();
}
private static void test()
{
String s = " Support DDR2 1066/800/667/533/400 DDR2 SDRAM ";
Pattern TEST = Pattern.compile( "(?:)?(?:DDR2|DDR3|DDR)(?:)?\\s*((\\d+|\\(OC\\)|/||)+)",
Pattern.CASE_INSENSITIVE );
Matcher m = TEST.matcher( s );
if ( m.find() )
{
for ( int i = 0; i <= m.groupCount(); i++ )
{
out.println( i + " [" + m.group( i ) + "]" );
}
}
else
{
out.println( "no find" );
}
}
/**
* extract specs from Gigagbyte website previously downloaded.
*
* @param args not used
*
* @throws java.io.IOException
* @throws java.sql.SQLException
*/
public static void main( final String[] args ) throws IOException, SQLException
{
// test();
// fetchAllMBs();
// skeleton();
extractSpecs();
oddDucks();
}
//todo screenscrape the following fields. Export to database.
//video
// socket
}
|