/*
* [ImportAsus.java]
*
* Summary: Import all data about motherboards from the Asus Website.
*
* Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com
*
* Licence: This software may be copied and used freely for any purpose but military.
* http://mindprod.com/contact/nonmil.html
*
* Requires: JDK 1.7+
*
* Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
*
* Version History:
* 1.0 2011-02-14 initial version
*/
package com.mindprod.mother;
import com.mindprod.common17.BigDate;
import com.mindprod.csv.CSVReader;
import com.mindprod.htmlmacros.support.Configuration;
import com.mindprod.htmlmacros.support.ConfigurationForMindprod;
import com.mindprod.http.Get;
import com.mindprod.hunkio.HunkIO;
import java.io.BufferedReader;
import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.lang.System.*;
/**
* Import all data about motherboards from the Asus Website.
*
* @author Roedy Green, Canadian Mind Products
* @version 1.0 2011-02-14 initial version
* @since 2011-02-14
*/
public class ImportAsus extends ImportManufacturer
{
/**
* where master list of all Asus motherboards are
*/
private static final String[] INDEX_URLS = {
"AMD AM3", "http://www.Asus.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=2&lgc=&tp=0&ss=0"
};
/**
* mindprod configuration
*/
private static final Configuration configuration;
private static final File sourceDir;
/**
* looks for Audio channels
*/
private static final Pattern AUDIO_CHANNEL_FINDER = Pattern.compile(
"([\\.\\d]+)[\\- ]channel", Pattern.CASE_INSENSITIVE );
/**
* looks for info about the form factor
*/
private static final Pattern FORM_FACTOR_FINDER = Pattern.compile(
"Form Factor
\\s*([\\p{Alnum}\\- ]+?)\\s*(?:Form Factor)?\\s* [inchx 0-9]*\\(\\s*([0-9\\.]+)" +
"\\s*cm x ([0-9\\.]+)\\s*cm",
Pattern.CASE_INSENSITIVE
);
// Form Factor | ATX Form Factor 12 inch x 8 inch ( 30.5 cm x 20.3 cm )
/**
* looks for count of IDE ports
*/
private static final Pattern IDE_FINDER = Pattern.compile(
"(\\d{1,2}) x IDE[E]*", Pattern.CASE_INSENSITIVE );
// >Up to 12 USB 2.0/1.1 ports (8 on the back panel, 4 via the USB brackets connected to the internal USB headers)
/**
* looks for max ram capacity
*/
private static final Pattern MAX_GIG_FINDER = Pattern.compile(
"Max\\.\\s*(\\d{1,3})\\s*GB", Pattern.CASE_INSENSITIVE );
// Max. 16GB
// Max. 8 GB
/**
* look for links like this:
* "name": "GA-880GMA-UD2H (rev. 2.0)",
* "value": "3424",
*/
private static final Pattern MB_FINDER = Pattern.compile(
"\"name\": \"([\\p{Alnum} \\-]+) \\(rev\\. ([\\p{Digit}\\.]+)\\)\"," +
"\\s+\"value\": \"(\\p{Digit}+)\"",
Pattern.CASE_INSENSITIVE
);
/**
* looks for info about RAM
*/
private static final Pattern MEMORY_TYPE_FINDER = Pattern.compile(
"\\s+(DDR2|DDR3)\\s+", Pattern.CASE_INSENSITIVE );
/**
* looks for ram speed
*/
private static final Pattern RAM_SPEED_MHZ_FINDER = Pattern.compile(
"\\s+(?:DDR2|DDR3)\\s+(\\d{3,4}\\s*\\(O\\.C\\.\\)|\\d{3,4}\\*|\\d{3,4}|\\s*/\\s*)+",
Pattern.CASE_INSENSITIVE );
// DDR2 1066*/800/667 ECC
// DDR3 2000(O.C.)/1866(O.C.)/1800(O.C.)/1600(O.C.)/1333/1066 Hz
private static final Pattern RAM_SPEED_MHZ_SPLITTER = Pattern.compile(
"\\s*/\\s*", Pattern.CASE_INSENSITIVE );
private static final Pattern SATA2_FINDER = Pattern.compile(
"(\\d{1,2})[ x]*SATA\\s*3\\s*Gb/s", Pattern.CASE_INSENSITIVE );
private static final Pattern SATA3_FINDER = Pattern.compile(
"(\\d{1,2})[ x]*SATA\\s*6\\s*Gb/s", Pattern.CASE_INSENSITIVE );
// // 10 x USB 2.0 port(s) (4 at Back Pannel, Black,6 at Mid-Board)
/**
* looks for USB2Internal ports capacity
*/
private static final Pattern USB2_INTERNAL_FINDER_1 = Pattern.compile(
"USB 2.0\\p{Print}+?(\\d+) via the USB", Pattern.CASE_INSENSITIVE );
/**
* looks for USB2Internal ports capacity
*/
private static final Pattern USB2_INTERNAL_FINDER_2 = Pattern.compile(
"additional (\\d{1,2}) USB 2\\.0/(?:1\\.1)? ports by cables", Pattern.CASE_INSENSITIVE );
// Up to 8 USB 3.0/2.0 ports (4 on the back panel, 4 via the USB brackets
/**
* looks for USB2Rear ports capacity
*/
private static final Pattern USB2_REAR_FINDER = Pattern.compile(
"USB 2\\.0\\p{Print}+?\\((\\d+)(?: ports)? on the back panel", Pattern.CASE_INSENSITIVE );
/**
* looks for total USB2 ports capacity
*/
private static final Pattern USB2_TOTAL_FINDER = Pattern.compile(
"(\\d{1,2})[ x]+USB 2.0 ports", Pattern.CASE_INSENSITIVE );
// ASUS MyLogo 2 | Back Panel I/O Ports | 1 x PS/2 Keyboard 1 x PS/2 Mouse
// 1 x LAN(RJ45) port 4 x USB 2.0/1.1 1 x COM port 6 -Channel Audio I/O 1 x Parallel
// Port |
Internal I/O Connectors | 3 x USB connectors support additional 6 USB 2.0/1
// .1 ports 1 x IDE connector 4 x SATA connectors 1 x CPU Fan connector 1 x Speaker
// connector 1 x CD audio in connector
// 10 x USB 2.0 port(s) (4 at Back Pannel, Black,6 at Mid-Board)
/**
* looks for USB3Internal ports capacity
*/
private static final Pattern USB3_INTERNAL_FINDER = Pattern.compile(
"USB 3\\.0\\p{Print}+?(\\d+) via the USB", Pattern.CASE_INSENSITIVE );
/**
* looks for USB3Rear ports capacity
*/
private static final Pattern USB3_REAR_FINDER = Pattern.compile(
"USB 3\\.0\\p{Print}+?(\\d+)(?: ports)? on the back panel", Pattern.CASE_INSENSITIVE );
/**
* looks for USB3 ports capacity
*/
private static final Pattern USB3_TOTAL_FINDER = Pattern.compile(
"(\\d+)[x ]*USB 3\\.0", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_1 = Pattern.compile(
"GRAPHICS | º (?:|On Chip|Integrated|)" +
"*[\\( ]*(\\p{Alnum}+)",
Pattern.CASE_INSENSITIVE
);
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_2 = Pattern.compile(
"(H55|H57|H61|H67|945GC) Express", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_3 = Pattern.compile(
"(630a|690G|740G|760G|780G|785G|790GX|880G|890GX|GeForce 8100|GeForce 8200|nForce 750a|AMD Radeon HD 6310)",
Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_FINDER_4 = Pattern.compile(
"(Multi-Graphics Technology)", Pattern.CASE_INSENSITIVE );
/**
* looks for integrated Video
*/
private static final Pattern VIDEO_VALIDATOR = Pattern.compile(
"(?!graphics (technology|slot|card))graphics|onboard graphics|video|HDMI",
Pattern.CASE_INSENSITIVE );
static
{
configuration = new ConfigurationForMindprod();
// combine dirsWithMacros and dirsWithIncludes into dirsToProcess;
sourceDir = new File( configuration.getSourceDirWithSlashes() );
}
/**
* collect all motherboard specs from the Asus website
*
* @throws java.io.IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
public static void fetchAllMBs() throws IOException, SQLException
{
manufacturer = Manufacturer.ASUS;
// no parms needed
conn = connect();
for ( int i = 1; i < INDEX_URLS.length; i += 2 )
{
final String indexURL = INDEX_URLS[ i ];
final Get get = new Get();
final String htmlListOfAsusMBs = get.send( new URL( indexURL ), Get.UTF8 );
final int responseCode = get.getResponseCode();
if ( responseCode >= 300 || htmlListOfAsusMBs == null )
{
err.println( "Could not fetch a master index Asus page " + indexURL );
System.exit( 1 );
}
final Matcher m = MB_FINDER.matcher( htmlListOfAsusMBs ); // Matchers are used both for matching and
// finding.
while ( m.find() )
{
assert m.groupCount() == 3 : "bug in regex";
model = m.group( 1 );
revision = m.group( 2 );
// links to individual mbs look like this
// http://www.Asus.com/products/product-page.aspx?pid=3748#sp
manufacturerPartNo = m.group( 3 );
// e.g. http://www.Asus.com/products/product-page.aspx?pid=2849#sp
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false" );
// learning URL
// "http://www.Asus.com/products/product-page.aspx?pid=" + 3748 + "#sp";
// mining URL
// "http://www.Asus.com/products/product-page_ajax.aspx?&t=sp&pid=" + 3748 + "&dlt=&cg=2&ck=2";
final Get miningPage = new Get();
final String rawMBSpecs = miningPage.send( new URL( manufacturer.miningURL( manufacturerPartNo ) ),
Get.UTF8 );
final int responseCode2 = get.getResponseCode();
if ( responseCode2 >= 300 || rawMBSpecs == null )
{
err.println( "Could not fetch Asus motherboard page " + model );
System.exit( 1 );
}
HunkIO.writeEntireFile( new File( "E:/mb/asus/" + model + ".html" ), rawMBSpecs, HunkIO.UTF8 );
} // end find loop
}
}
/**
* Put the skeleton info into the DB.
*
* @throws java.io.IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
public static void skeleton() throws IOException, SQLException
{
manufacturer = Manufacturer.ASUS;
lastUpdated = BigDate.localToday();
conn = connect();
final PreparedStatement inserter = conn.prepareStatement(
"INSERT INTO mboards( manufacturer, model, revision, manufacturerPartNo, lastUpdated) VALUES(?,?,?,?," +
"?);"
);
CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/asus.csv" ) ) ) );
try
{
while ( true )
{
// manufacturer, model, revision, mfr no, verified, last-update
r.skip( 1 ); // mfr
model = r.get();
revision = r.get();
manufacturerPartNo = r.get();
r.skipToNextLine();
// record our findings in SQL
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false, " + lastUpdated );
inserter.setInt( 1, manufacturer.ordinal() );
inserter.setString( 2, model );
inserter.setString( 3, revision );
inserter.setString( 4, manufacturerPartNo );
inserter.setInt( 5, lastUpdated.ordinal() );
inserter.executeUpdate();
}
}
catch ( EOFException e )
{
r.close();
conn.close();
}
}
/**
* Put the socket info into the the DB.
*
* @throws java.io.IOException if urls malformed or I/O trouble
* @throws java.sql.SQLException if cannot write to database
*/
private static void applySockets() throws IOException, SQLException
{
// the socket info is not not the spec page. It it depends on grouping of MBs.
manufacturer = Manufacturer.ASUS;
lastUpdated = BigDate.localToday();
conn = connect();
final PreparedStatement updater = conn.prepareStatement( "UPDATE mboards " +
"SET socket=?, lastUpdated=? " +
"WHERE manufacturer=? AND model=?;" );
for ( int i = 0; i < INDEX_URLS.length; i += 2 )
{
socket = SocketType.valueOfAlias( INDEX_URLS[ i ] );
final String indexURL = INDEX_URLS[ i + 1 ];
final Get get = new Get();
final String htmlListOfAsusMBs = get.send( new URL( indexURL ), Get.UTF8 );
final int responseCode = get.getResponseCode();
if ( responseCode >= 300 || htmlListOfAsusMBs == null )
{
err.println( "Could not fetch a master index Asus page " + indexURL );
System.exit( 1 );
}
final Matcher m = MB_FINDER.matcher( htmlListOfAsusMBs ); // Matchers are used both for matching and
// finding.
while ( m.find() )
{
assert m.groupCount() == 3 : "bug in regex";
model = m.group( 1 );
revision = m.group( 2 );
// links to individual mbs look like this
// http://www.Asus.com/products/product-page.aspx?pid=3748#sp
manufacturerPartNo = m.group( 3 );
// e.g. http://www.Asus.com/products/product-page.aspx?pid=2849#sp
// record our findings in SQL
out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," +
" false, " + lastUpdated + ", " + socket );
updater.setInt( 1, socket.ordinal() );
updater.setInt( 2, lastUpdated.ordinal() );
updater.setInt( 3, manufacturer.ordinal() );
updater.setString( 4, model );
updater.executeUpdate();
}
}
conn.close();
}
/**
* extract specs from previously downloaded raw specs
*
* @throws java.sql.SQLException if cannot write to SQL
* @throws java.io.IOException if cannot read mb page.
*/
private static void extractSpecs() throws SQLException, IOException
{
manufacturer = Manufacturer.ASUS;
lastUpdated = BigDate.localToday();
initDatabase();
final PreparedStatement socketFetcher = conn.prepareStatement(
"SELECT socket FROM mboards WHERE model=? AND revision=?" );
final File dir = new File( "E:/mb/asus/" );
CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/asus.csv" ) ) ) );
try
{
while ( true )
{
r.skip( 1 ); // mfr
model = r.get();
revision = r.get();
manufacturerPartNo = r.get();
r.skipToNextLine();
String rawMBSpecs = HunkIO.readEntireFile( new File( dir, model + ".html" ), HunkIO.UTF8 );
clearMBSpecs();
extractAudioChannels( rawMBSpecs, new Pattern[] { AUDIO_CHANNEL_FINDER } );
extractFormFactor( rawMBSpecs, FORM_FACTOR_FINDER, 1 );
extractIde( rawMBSpecs, new Pattern[] { IDE_FINDER } );
extractMemoryType( rawMBSpecs, MEMORY_TYPE_FINDER );
extractMaxGig( rawMBSpecs, MAX_GIG_FINDER );
extractRamSpeedMHz( rawMBSpecs, RAM_SPEED_MHZ_FINDER, RAM_SPEED_MHZ_SPLITTER );
extractSata2( rawMBSpecs, new Pattern[] { SATA2_FINDER } );
extractSata3( rawMBSpecs, new Pattern[] { SATA3_FINDER } );
// get socket from database
socketFetcher.setString( 1, model );
socketFetcher.setString( 2, revision );
socketFetcher.executeQuery();
final ResultSet rs = socketFetcher.getResultSet();
rs.next();
socket = SocketType.values()[ rs.getInt( 1 ) ];
rs.close();
extractUSB( rawMBSpecs,
new Pattern[] { USB2_TOTAL_FINDER },
new Pattern[] { USB2_REAR_FINDER },
new Pattern[] { USB2_INTERNAL_FINDER_1, USB2_INTERNAL_FINDER_2 },
new Pattern[] { USB3_TOTAL_FINDER },
new Pattern[] { USB3_REAR_FINDER },
new Pattern[] { USB3_INTERNAL_FINDER } );
extractVideo( rawMBSpecs,
new Pattern[] { VIDEO_FINDER_1, VIDEO_FINDER_2, VIDEO_FINDER_3, VIDEO_FINDER_4 } );
validateVideo( rawMBSpecs, VIDEO_VALIDATOR );
// no extractWatts since no info available.
dumpExtracts();
updateMBFields();
}
}
catch ( EOFException e )
{
out.println( incomplete + " incomplete records" );
r.close();
closeDatabase();
}
}
/**
* extract specs from Asus website previously downloaded.
*
* @param args not used
*
* @throws java.io.IOException if can read mb pages.
* @throws java.sql.SQLException if can't write to database
*/
public static void main( final String[] args ) throws IOException, SQLException
{
// fetchAllMBs();
// skeleton();
// applySockets();
extractSpecs();
// oddDucks();
}
}
|