/*
* [ImportGigabyte.java]
*
* Summary: Import all data about motherboards from the Gigabyte Website.
*
* Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.7+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2011-02-14 initial version
*/
package com.mindprod.mother;
import com.mindprod.common17.BigDate;
import com.mindprod.csv.CSVReader;
import com.mindprod.htmlmacros.support.Configuration;
import com.mindprod.htmlmacros.support.ConfigurationForMindprod;
import com.mindprod.http.Get;
import com.mindprod.hunkio.HunkIO;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.lang.System.*;
/**
* Import all data about motherboards from the Gigabyte Website.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2011-02-14 initial version
* @since 2011-02-14
*/
public class ImportGigabyte extends ImportManufacturer
{
/**
* where master list of all Gigabyte motherboards are
*/
private static final String[] INDEX_URLS = {
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=2&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=3&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=4&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=5&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=6&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=7&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=8&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM3", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=9&par=2&val=2&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=2&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=3&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=4&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=5&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=6&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2+", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=7&par=2&val=3&lgc=&tp=0&ss=0",
"AMD AM2", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?&p=1&par=2&val=4&lgc=&tp=0&ss=0",
"AMD AM2", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=2&par=2&val=4&lgc=&tp=0&ss=0",
"AMD AM2", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=3&par=2&val=4&lgc=&tp=0&ss=0",
"AMD AM2", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=4&par=2&val=4&lgc=&tp=0&ss=0",
"AMD AM2", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=5&par=2&val=4&lgc=&tp=0&ss=0",
"AMD AM2", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?&p=6&par=2&val=4&lgc=&tp=0&ss=0",
"AMD BGA FT1", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=25&lgc=&tp=0&ss=0",
"Intel 1366", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=1&lgc=&tp=0&ss=0",
"Intel 1366", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=2&par=2&val=1&lgc=&tp=0&ss=0",
"Intel 1155", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=24&lgc=&tp=0&ss=0",
"Intel 1155", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=2&par=2&val=24&lgc=&tp=0&ss=0",
"Intel 1156", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=11&lgc=&tp=0&ss=0",
"Intel 1156", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=2&par=2&val=11&lgc=&tp=0&ss=0",
"Intel 1156", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=3&par=2&val=11&lgc=&tp=0&ss=0",
"Intel 1156", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=4&par=2&val=11&lgc=&tp=0&ss=0",
"Intel 1156", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=5&par=2&val=11&lgc=&tp=0&ss=0",
"Intel 1156", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=6&par=2&val=11&lgc=&tp=0&ss=0",
"Intel BGA 559", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=23&lgc=&tp=0&ss=0",
"Intel 479/437", "http://www.gigabyte.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=8&lgc=&tp=0&ss=0",
};
/**
* mindprod configuration
*/
private static final Configuration configuration;
private static final File sourceDir;
/**
* looks for Audio channels
*/
private static final Pattern AUDIO_CHANNEL_FINDER = Pattern.compile(
"([\\.\\d]+)[\\- ]channel", Pattern.CASE_INSENSITIVE );
/**
* looks for info about the form factor
*/
private static final Pattern FORM_FACTOR_FINDER = Pattern.compile(
">Form Factor
(?:\\s*- )?([\\p{Alnum}\\- ]+)[;:] " +
"([0-9\\.]+)\\s*cm x ([0-9\\.]+)\\s*cm",
Pattern.CASE_INSENSITIVE
);
/**
* looks for count of IDE ports
*/
private static final Pattern IDE_FINDER = Pattern.compile(
"(\\d{1,2}) x IDE[E]*", Pattern.CASE_INSENSITIVE );
// >Up to 12 USB 2.0/1.1 ports (8 on the back panel, 4 via the USB brackets connected to the internal USB headers)
/**
* looks for max ram capacity
*/
private static final Pattern MAX_GIG_FINDER = Pattern.compile(
">Memory
| [\\p{Print}\\s]+?([0-9]+)\\s*GB",
Pattern.CASE_INSENSITIVE );
/**
* look for links like this:
* "name": "GA-880GMA-UD2H (rev. 2.0)",
* "value": "3424",
*/
private static final Pattern MB_FINDER = Pattern.compile(
"\"name\": \"([\\p{Alnum} \\-]+) \\(rev\\. ([\\p{Digit}\\.]+)\\)\"," +
"\\s+\"value\": \"(\\p{Digit}+)\"",
Pattern.CASE_INSENSITIVE
);
/**
* looks for info about RAM
*/
private static final Pattern MEMORY_TYPE_FINDER = Pattern.compile(
">Memory | \\s*- \\p{Print}+?(DDR2|DDR3)",
Pattern.CASE_INSENSITIVE );
/**
* looks for ram speed
*/
private static final Pattern RAM_SPEED_MHZ_FINDER = Pattern.compile(
">Memory
| [\\p{Print}\\s]+?(?:DDR2|DDR3) ((\\(Note " +
"\\d\\)|\\d{3,4}\\s*\\(O\\.C\\.\\)|\\d{3,4}\\s*\\(OC\\)|\\d{3,4}|\\s*/\\s*)+)",
Pattern.CASE_INSENSITIVE
);
private static final Pattern RAM_SPEED_MHZ_SPLITTER = Pattern.compile(
"\\s*/\\s*", Pattern.CASE_INSENSITIVE );
private static final Pattern SATA2_FINDER_1 = Pattern.compile(
"(\\d{1,2})[ x]*SATA 3Gb/s connector", Pattern.CASE_INSENSITIVE );
private static final Pattern SATA2_FINDER_2 = Pattern.compile(
"(\\d{1,2}) SATA connector", Pattern.CASE_INSENSITIVE );
private static final Pattern SATA3_FINDER = Pattern.compile(
"(\\d{1,2})[ x]*SATA 6Gb/s connector", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Internal ports capacity
*/
private static final Pattern USB2_INTERNAL_FINDER_1 = Pattern.compile(
"USB 2.0\\p{Print}+?(\\d+) via the USB", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Internal ports capacity
*/
private static final Pattern USB2_INTERNAL_FINDER_2 = Pattern.compile(
"additional (\\d{1,2}) USB 2\\.0/(?:1\\.1)? ports by cables", Pattern.CASE_INSENSITIVE );
// Up to 8 USB 3.0/2.0 ports (4 on the back panel, 4 via the USB brackets
/**
* looks for USB2Rear ports capacity
*/
private static final Pattern USB2_REAR_FINDER_1 = Pattern.compile(
"USB 2\\.0\\p{Print}+?\\((\\d+)(?: ports)? on the back panel", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Rear ports capacity
*/
private static final Pattern USB2_REAR_FINDER_2 = Pattern.compile(
"(\\d{1,2})USB 2\\.0(|:/1.1)? ports", Pattern.CASE_INSENSITIVE );
/**
* looks for USB3Internal ports capacity
*/
private static final Pattern USB3_INTERNAL_FINDER = Pattern.compile(
"USB 3\\.0\\p{Print}+?(\\d+) via the USB", Pattern.CASE_INSENSITIVE );
/**
* looks for USB3Rear ports capacity
*/
private static final Pattern USB3_REAR_FINDER = Pattern.compile(
"USB 3\\.0\\p{Print}+?(\\d+)(?: ports)? on the back panel", Pattern.CASE_INSENSITIVE );
/**
* looks for USB3 ports capacity
*/
private static final Pattern USB3_TOTAL_FINDER = Pattern.compile(
"(\\d+)[x ]*USB 3\\.0", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_1 = Pattern.compile(
"GRAPHICSº (?:|On Chip|Integrated|)" +
"*[\\( ]*(\\p{Alnum}+)",
Pattern.CASE_INSENSITIVE
);
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_2 = Pattern.compile(
"(H55|H57|H61|H67|945GC) Express", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_3 = Pattern.compile(
"(630a|690G|740G|760G|780G|785G|790GX|880G|890GX|GeForce 8100|GeForce 8200|nForce 750a|AMD Radeon HD 6310)",
Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_4 = Pattern.compile(
"(Multi-Graphics Technology)", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_VALIDATOR = Pattern.compile(
"(?!graphics (technology|slot|card))graphics|onboard graphics|video|HDMI",
Pattern.CASE_INSENSITIVE );
static
{
configuration = new ConfigurationForMindprod();
// combine dirsWithMacros and dirsWithIncludes into dirsToProcess;
sourceDir = new File( configuration.getSourceDirWithSlashes() );
}
/**
* collect all motherboard specs from the Gigabyte website
*
* @throws java.io.IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
public static void fetchAllMBs() throws IOException, SQLException
{
manufacturer = Manufacturer.GIGABYTE;
// no parms needed
conn = connect();
for ( int i = 1; i < INDEX_URLS.length; i += 2 )
{
final String indexURL = INDEX_URLS[ i ];
final Get get = new Get();
final String htmlListOfGigabyteMBs = get.send( new URL( indexURL ), Get.UTF8 );
final int responseCode = get.getResponseCode();
if ( responseCode >= 300 || htmlListOfGigabyteMBs == null )
{
err.println( "Could not fetch a master index Gigabyte page " + indexURL );
System.exit( 1 );
}
final Matcher m = MB_FINDER.matcher( htmlListOfGigabyteMBs ); // Matchers are used both for matching and
// finding.
while ( m.find() )
{
assert m.groupCount() == 3 : "bug in regex";
model = m.group( 1 );
revision = m.group( 2 );
// links to individual mbs look like this
// http://www.gigabyte.com/products/product-page.aspx?pid=3748#sp
manufacturerPartNo = m.group( 3 );
// e.g. http://www.gigabyte.com/products/product-page.aspx?pid=2849#sp
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false" );
// learning URL
// "http://www.gigabyte.com/products/product-page.aspx?pid=" + 3748 + "#sp";
// mining URL
// "http://www.gigabyte.com/products/product-page_ajax.aspx?&t=sp&pid=" + 3748 + "&dlt=&cg=2&ck=2";
final Get miningPage = new Get();
final String rawMBSpecs = miningPage.send( new URL( manufacturer.miningURL( manufacturerPartNo ) ),
Get.UTF8 );
final int responseCode2 = get.getResponseCode();
if ( responseCode2 >= 300 || rawMBSpecs == null )
{
err.println( "Could not fetch Gigabyte motherboard page " + model );
System.exit( 1 );
}
HunkIO.writeEntireFile( new File( "E:/mb/gigabyte/" + model + ".html" ), rawMBSpecs,
HunkIO.UTF8 );
} // end find loop
}
}
/**
* Put the skeleton info into the DB.
*
* @throws IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
public static void skeleton() throws IOException, SQLException
{
manufacturer = Manufacturer.GIGABYTE;
lastUpdated = BigDate.localToday();
conn = connect();
final PreparedStatement inserter = conn.prepareStatement(
"INSERT INTO mboards( manufacturer, model, revision, manufacturerPartNo, lastUpdated) VALUES(?,?,?,?," +
"?);"
);
CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir,
"mother/gigabyte.csv" ) ) ) );
try
{
while ( true )
{
// manufacturer, model, revision, mfr no, verified, last-update
r.skip( 1 ); // mfr
model = r.get();
revision = r.get();
manufacturerPartNo = r.get();
r.skipToNextLine();
// record our findings in SQL
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false, " + lastUpdated );
inserter.setInt( 1, manufacturer.ordinal() );
inserter.setString( 2, model );
inserter.setString( 3, revision );
inserter.setString( 4, manufacturerPartNo );
inserter.setInt( 5, lastUpdated.ordinal() );
inserter.executeUpdate();
}
}
catch ( EOFException e )
{
r.close();
conn.close();
}
}
/**
* Put the socket info into the the DB.
*
* @throws IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
private static void applySockets() throws IOException, SQLException
{
// the socket info is not not the spec page. It it depends on grouping of MBs.
manufacturer = Manufacturer.GIGABYTE;
lastUpdated = BigDate.localToday();
conn = connect();
final PreparedStatement updater = conn.prepareStatement( "UPDATE mboards " +
"SET socket=?, lastUpdated=? " +
"WHERE manufacturer=? AND model=?;" );
for ( int i = 0; i < INDEX_URLS.length; i += 2 )
{
socket = SocketType.valueOfAlias( INDEX_URLS[ i ] );
final String indexURL = INDEX_URLS[ i + 1 ];
final Get get = new Get();
final String htmlListOfGigabyteMBs = get.send( new URL( indexURL ), Get.UTF8 );
final int responseCode = get.getResponseCode();
if ( responseCode >= 300 || htmlListOfGigabyteMBs == null )
{
err.println( "Could not fetch a master index Gigabyte page " + indexURL );
System.exit( 1 );
}
final Matcher m = MB_FINDER.matcher( htmlListOfGigabyteMBs ); // Matchers are used both for matching and
// finding.
while ( m.find() )
{
assert m.groupCount() == 3 : "bug in regex";
model = m.group( 1 );
revision = m.group( 2 );
// links to individual mbs look like this
// http://www.gigabyte.com/products/product-page.aspx?pid=3748#sp
manufacturerPartNo = m.group( 3 );
// e.g. http://www.gigabyte.com/products/product-page.aspx?pid=2849#sp
// record our findings in SQL
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false, " + lastUpdated + ", " + socket );
updater.setInt( 1, socket.ordinal() );
updater.setInt( 2, lastUpdated.ordinal() );
updater.setInt( 3, manufacturer.ordinal() );
updater.setString( 4, model );
updater.executeUpdate();
}
}
conn.close();
}
/**
* extract specs from previously downloaded raw specs
*
* @throws java.sql.SQLException if cannot write to SQL
* @throws java.io.IOException if cannot read mb page.
*/
private static void extractSpecs() throws SQLException, IOException
{
manufacturer = Manufacturer.GIGABYTE;
lastUpdated = BigDate.localToday();
initDatabase();
final PreparedStatement socketFetcher = conn.prepareStatement(
"SELECT socket FROM mboards WHERE model=? AND revision=?" );
final File dir = new File( "E:/mb/gigabyte/" );
CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir,
"mother/gigabyte.csv" ) ) ) );
try
{
while ( true )
{
r.skip( 1 ); // mfr
model = r.get();
revision = r.get();
manufacturerPartNo = r.get();
r.skipToNextLine();
String rawMBSpecs = HunkIO.readEntireFile( new File( dir, model + ".html" ), HunkIO.UTF8 );
clearMBSpecs();
extractAudioChannels( rawMBSpecs, new Pattern[] { AUDIO_CHANNEL_FINDER } );
extractFormFactor( rawMBSpecs, FORM_FACTOR_FINDER, 1 );
extractIde( rawMBSpecs, new Pattern[] { IDE_FINDER } );
extractMemoryType( rawMBSpecs, MEMORY_TYPE_FINDER );
extractMaxGig( rawMBSpecs, MAX_GIG_FINDER );
extractRamSpeedMHz( rawMBSpecs, RAM_SPEED_MHZ_FINDER, RAM_SPEED_MHZ_SPLITTER );
extractSata2( rawMBSpecs, new Pattern[] { SATA2_FINDER_1, SATA2_FINDER_2 } );
extractSata3( rawMBSpecs, new Pattern[] { SATA3_FINDER } );
// get socket from database
socketFetcher.setString( 1, model );
socketFetcher.setString( 2, revision );
socketFetcher.executeQuery();
final ResultSet rs = socketFetcher.getResultSet();
rs.next();
socket = SocketType.values()[ rs.getInt( 1 ) ];
rs.close();
extractUSB( rawMBSpecs,
new Pattern[ 0 ],
new Pattern[] { USB2_REAR_FINDER_1, USB2_REAR_FINDER_2 },
new Pattern[] { USB2_INTERNAL_FINDER_1, USB2_INTERNAL_FINDER_2 },
new Pattern[] { USB3_TOTAL_FINDER },
new Pattern[] { USB3_REAR_FINDER },
new Pattern[] { USB3_INTERNAL_FINDER } );
extractVideo( rawMBSpecs,
new Pattern[] { VIDEO_FINDER_1, VIDEO_FINDER_2, VIDEO_FINDER_3, VIDEO_FINDER_4 } );
validateVideo( rawMBSpecs, VIDEO_VALIDATOR );
// no extractWatts since no info available.
dumpExtracts();
updateMBFields();
}
}
catch ( EOFException e )
{
out.println( incomplete + " incomplete records" );
r.close();
closeDatabase();
}
}
/**
* handle oddities, typos on website etc.
*
* @throws java.sql.SQLException if cannot write exceptions to database
*/
private static void oddDucks() throws SQLException
{
manufacturer = Manufacturer.GIGABYTE;
lastUpdated = BigDate.localToday();
conn = connect();
final Statement updater = conn.createStatement();
updater.executeUpdate( "UPDATE mboards "
+ "SET formFactor="
+ FormFactor.ATX.ordinal()
+ ",widthInCm=30.5,heightInCm=22.4 "
+ "WHERE manufacturer="
+ manufacturer.ordinal()
+ " AND model='GA-M750SLI-DS4'" );
final PreparedStatement SocketUpdater = conn.prepareStatement( "UPDATE mboards " +
"SET socket=?, lastUpdated=? " +
"WHERE manufacturer=? AND model=?;" );
// handle two socket exceptions.
socket = SocketType.valueOfAlias( "Intel Atom" );
SocketUpdater.setInt( 1, socket.ordinal() );
SocketUpdater.setInt( 2, lastUpdated.ordinal() );
SocketUpdater.setInt( 3, manufacturer.ordinal() );
SocketUpdater.setString( 4, "GA-CG330UD" );
SocketUpdater.executeUpdate();
SocketUpdater.setString( 4, "GA-CG230D" );
SocketUpdater.executeUpdate();
conn.close();
}
/**
* extract specs from Gigagbyte website previously downloaded.
*
* @param args not used
*
* @throws java.io.IOException if can read mb pages.
* @throws java.sql.SQLException if can't write to database
*/
public static void main( final String[] args ) throws IOException, SQLException
{
// fetchAllMBs();
// skeleton();
// applySockets();
extractSpecs();
oddDucks();
}
//todo screenscrape the following fields. Export to database.
//video
}
| |