/* * [ImportAsus.java] * * Summary: Import all data about motherboards from the Asus Website. * * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.7+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-02-14 initial version */ package com.mindprod.mother; import com.mindprod.common17.BigDate; import com.mindprod.csv.CSVReader; import com.mindprod.htmlmacros.support.Configuration; import com.mindprod.htmlmacros.support.ConfigurationForMindprod; import com.mindprod.http.Get; import com.mindprod.hunkio.HunkIO; import java.io.BufferedReader; import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.net.URL; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.*; /** * Import all data about motherboards from the Asus Website. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-02-14 initial version * @since 2011-02-14 */ public class ImportAsus extends ImportManufacturer { /** * where master list of all Asus motherboards are */ private static final String[] INDEX_URLS = { "AMD AM3", "http://www.Asus.com/products/pl-mb-list_ajax.aspx?p=1&par=2&val=2&lgc=&tp=0&ss=0" }; /** * mindprod configuration */ private static final Configuration configuration; private static final File sourceDir; /** * looks for Audio channels */ private static final Pattern AUDIO_CHANNEL_FINDER = Pattern.compile( "([\\.\\d]+)[\\- ]channel", Pattern.CASE_INSENSITIVE ); /** * looks for info about the form factor */ private static final Pattern FORM_FACTOR_FINDER = Pattern.compile( "Form Factor\\s*([\\p{Alnum}\\- ]+?)\\s*(?:Form Factor)?\\s*
[inchx 0-9]*\\(\\s*([0-9\\.]+)" + "\\s*cm x ([0-9\\.]+)\\s*cm", Pattern.CASE_INSENSITIVE ); // Form Factor ATX Form Factor
12 inch x 8 inch ( 30.5 cm x 20.3 cm ) /** * looks for count of IDE ports */ private static final Pattern IDE_FINDER = Pattern.compile( "(\\d{1,2}) x IDE[E]*", Pattern.CASE_INSENSITIVE ); // >Up to 12 USB 2.0/1.1 ports (8 on the back panel, 4 via the USB brackets connected to the internal USB headers) /** * looks for max ram capacity */ private static final Pattern MAX_GIG_FINDER = Pattern.compile( "Max\\.\\s*(\\d{1,3})\\s*GB", Pattern.CASE_INSENSITIVE ); // Max. 16GB // Max. 8 GB /** * look for links like this: * "name": "GA-880GMA-UD2H (rev. 2.0)", * "value": "3424", */ private static final Pattern MB_FINDER = Pattern.compile( "\"name\": \"([\\p{Alnum} \\-]+) \\(rev\\. ([\\p{Digit}\\.]+)\\)\"," + "\\s+\"value\": \"(\\p{Digit}+)\"", Pattern.CASE_INSENSITIVE ); /** * looks for info about RAM */ private static final Pattern MEMORY_TYPE_FINDER = Pattern.compile( "\\s+(DDR2|DDR3)\\s+", Pattern.CASE_INSENSITIVE ); /** * looks for ram speed */ private static final Pattern RAM_SPEED_MHZ_FINDER = Pattern.compile( "\\s+(?:DDR2|DDR3)\\s+(\\d{3,4}\\s*\\(O\\.C\\.\\)|\\d{3,4}\\*|\\d{3,4}|\\s*/\\s*)+", Pattern.CASE_INSENSITIVE ); // DDR2 1066*/800/667 ECC // DDR3 2000(O.C.)/1866(O.C.)/1800(O.C.)/1600(O.C.)/1333/1066 Hz private static final Pattern RAM_SPEED_MHZ_SPLITTER = Pattern.compile( "\\s*/\\s*", Pattern.CASE_INSENSITIVE ); private static final Pattern SATA2_FINDER = Pattern.compile( "(\\d{1,2})[ x]*SATA\\s*3\\s*Gb/s", Pattern.CASE_INSENSITIVE ); private static final Pattern SATA3_FINDER = Pattern.compile( "(\\d{1,2})[ x]*SATA\\s*6\\s*Gb/s", Pattern.CASE_INSENSITIVE ); // // 10 x USB 2.0 port(s) (4 at Back Pannel, Black,6 at Mid-Board) /** * looks for USB2Internal ports capacity */ private static final Pattern USB2_INTERNAL_FINDER_1 = Pattern.compile( "USB 2.0\\p{Print}+?(\\d+) via the USB", Pattern.CASE_INSENSITIVE ); /** * looks for USB2Internal ports capacity */ private static final Pattern USB2_INTERNAL_FINDER_2 = Pattern.compile( "additional (\\d{1,2}) USB 2\\.0/(?:1\\.1)? ports by cables", Pattern.CASE_INSENSITIVE ); // Up to 8 USB 3.0/2.0 ports (4 on the back panel, 4 via the USB brackets /** * looks for USB2Rear ports capacity */ private static final Pattern USB2_REAR_FINDER = Pattern.compile( "USB 2\\.0\\p{Print}+?\\((\\d+)(?: ports)? on the back panel", Pattern.CASE_INSENSITIVE ); /** * looks for total USB2 ports capacity */ private static final Pattern USB2_TOTAL_FINDER = Pattern.compile( "(\\d{1,2})[ x]+USB 2.0 ports", Pattern.CASE_INSENSITIVE ); //
ASUS MyLogo 2
Back Panel I/O Ports 1 x PS/2 Keyboard
1 x PS/2 Mouse //
1 x LAN(RJ45) port
4 x USB 2.0/1.1
1 x COM port
6 -Channel Audio I/O
1 x Parallel // Port Internal I/O Connectors 3 x USB connectors support additional 6 USB 2.0/1 // .1 ports
1 x IDE connector
4 x SATA connectors
1 x CPU Fan connector
1 x Speaker // connector
1 x CD audio in connector // 10 x USB 2.0 port(s) (4 at Back Pannel, Black,6 at Mid-Board) /** * looks for USB3Internal ports capacity */ private static final Pattern USB3_INTERNAL_FINDER = Pattern.compile( "USB 3\\.0\\p{Print}+?(\\d+) via the USB", Pattern.CASE_INSENSITIVE ); /** * looks for USB3Rear ports capacity */ private static final Pattern USB3_REAR_FINDER = Pattern.compile( "USB 3\\.0\\p{Print}+?(\\d+)(?: ports)? on the back panel", Pattern.CASE_INSENSITIVE ); /** * looks for USB3 ports capacity */ private static final Pattern USB3_TOTAL_FINDER = Pattern.compile( "(\\d+)[x ]*USB 3\\.0", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_FINDER_1 = Pattern.compile( "GRAPHICSº (?:|On Chip|Integrated|)" + "*[\\( ]*(\\p{Alnum}+)", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_FINDER_2 = Pattern.compile( "(H55|H57|H61|H67|945GC) Express", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_FINDER_3 = Pattern.compile( "(630a|690G|740G|760G|780G|785G|790GX|880G|890GX|GeForce 8100|GeForce 8200|nForce 750a|AMD Radeon HD 6310)", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_FINDER_4 = Pattern.compile( "(Multi-Graphics Technology)", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_VALIDATOR = Pattern.compile( "(?!graphics (technology|slot|card))graphics|onboard graphics|video|HDMI", Pattern.CASE_INSENSITIVE ); static { configuration = new ConfigurationForMindprod(); // combine dirsWithMacros and dirsWithIncludes into dirsToProcess; sourceDir = new File( configuration.getSourceDirWithSlashes() ); } /** * collect all motherboard specs from the Asus website * * @throws java.io.IOException if urls malformed or I/O trouble * @throws java.sql.SQLException if cannot write to database */ public static void fetchAllMBs() throws IOException, SQLException { manufacturer = Manufacturer.ASUS; // no parms needed conn = connect(); for ( int i = 1; i < INDEX_URLS.length; i += 2 ) { final String indexURL = INDEX_URLS[ i ]; final Get get = new Get(); final String htmlListOfAsusMBs = get.send( new URL( indexURL ), Get.UTF8 ); final int responseCode = get.getResponseCode(); if ( responseCode >= 300 || htmlListOfAsusMBs == null ) { err.println( "Could not fetch a master index Asus page " + indexURL ); System.exit( 1 ); } final Matcher m = MB_FINDER.matcher( htmlListOfAsusMBs ); // Matchers are used both for matching and // finding. while ( m.find() ) { assert m.groupCount() == 3 : "bug in regex"; model = m.group( 1 ); revision = m.group( 2 ); // links to individual mbs look like this // http://www.Asus.com/products/product-page.aspx?pid=3748#sp manufacturerPartNo = m.group( 3 ); // e.g. http://www.Asus.com/products/product-page.aspx?pid=2849#sp out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," + " false" ); // learning URL // "http://www.Asus.com/products/product-page.aspx?pid=" + 3748 + "#sp"; // mining URL // "http://www.Asus.com/products/product-page_ajax.aspx?&t=sp&pid=" + 3748 + "&dlt=&cg=2&ck=2"; final Get miningPage = new Get(); final String rawMBSpecs = miningPage.send( new URL( manufacturer.miningURL( manufacturerPartNo ) ), Get.UTF8 ); final int responseCode2 = get.getResponseCode(); if ( responseCode2 >= 300 || rawMBSpecs == null ) { err.println( "Could not fetch Asus motherboard page " + model ); System.exit( 1 ); } HunkIO.writeEntireFile( new File( "E:/mb/asus/" + model + ".html" ), rawMBSpecs, HunkIO.UTF8 ); } // end find loop } } /** * Put the skeleton info into the DB. * * @throws java.io.IOException if urls malformed or I/O trouble * @throws java.sql.SQLException if cannot write to database */ public static void skeleton() throws IOException, SQLException { manufacturer = Manufacturer.ASUS; lastUpdated = BigDate.localToday(); conn = connect(); final PreparedStatement inserter = conn.prepareStatement( "INSERT INTO mboards( manufacturer, model, revision, manufacturerPartNo, lastUpdated) VALUES(?,?,?,?," + "?);" ); CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/asus.csv" ) ) ) ); try { while ( true ) { // manufacturer, model, revision, mfr no, verified, last-update r.skip( 1 ); // mfr model = r.get(); revision = r.get(); manufacturerPartNo = r.get(); r.skipToNextLine(); // record our findings in SQL out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," + " false, " + lastUpdated ); inserter.setInt( 1, manufacturer.ordinal() ); inserter.setString( 2, model ); inserter.setString( 3, revision ); inserter.setString( 4, manufacturerPartNo ); inserter.setInt( 5, lastUpdated.ordinal() ); inserter.executeUpdate(); } } catch ( EOFException e ) { r.close(); conn.close(); } } /** * Put the socket info into the the DB. * * @throws java.io.IOException if urls malformed or I/O trouble * @throws java.sql.SQLException if cannot write to database */ private static void applySockets() throws IOException, SQLException { // the socket info is not not the spec page. It it depends on grouping of MBs. manufacturer = Manufacturer.ASUS; lastUpdated = BigDate.localToday(); conn = connect(); final PreparedStatement updater = conn.prepareStatement( "UPDATE mboards " + "SET socket=?, lastUpdated=? " + "WHERE manufacturer=? AND model=?;" ); for ( int i = 0; i < INDEX_URLS.length; i += 2 ) { socket = SocketType.valueOfAlias( INDEX_URLS[ i ] ); final String indexURL = INDEX_URLS[ i + 1 ]; final Get get = new Get(); final String htmlListOfAsusMBs = get.send( new URL( indexURL ), Get.UTF8 ); final int responseCode = get.getResponseCode(); if ( responseCode >= 300 || htmlListOfAsusMBs == null ) { err.println( "Could not fetch a master index Asus page " + indexURL ); System.exit( 1 ); } final Matcher m = MB_FINDER.matcher( htmlListOfAsusMBs ); // Matchers are used both for matching and // finding. while ( m.find() ) { assert m.groupCount() == 3 : "bug in regex"; model = m.group( 1 ); revision = m.group( 2 ); // links to individual mbs look like this // http://www.Asus.com/products/product-page.aspx?pid=3748#sp manufacturerPartNo = m.group( 3 ); // e.g. http://www.Asus.com/products/product-page.aspx?pid=2849#sp // record our findings in SQL out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," + " false, " + lastUpdated + ", " + socket ); updater.setInt( 1, socket.ordinal() ); updater.setInt( 2, lastUpdated.ordinal() ); updater.setInt( 3, manufacturer.ordinal() ); updater.setString( 4, model ); updater.executeUpdate(); } } conn.close(); } /** * extract specs from previously downloaded raw specs * * @throws java.sql.SQLException if cannot write to SQL * @throws java.io.IOException if cannot read mb page. */ private static void extractSpecs() throws SQLException, IOException { manufacturer = Manufacturer.ASUS; lastUpdated = BigDate.localToday(); initDatabase(); final PreparedStatement socketFetcher = conn.prepareStatement( "SELECT socket FROM mboards WHERE model=? AND revision=?" ); final File dir = new File( "E:/mb/asus/" ); CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/asus.csv" ) ) ) ); try { while ( true ) { r.skip( 1 ); // mfr model = r.get(); revision = r.get(); manufacturerPartNo = r.get(); r.skipToNextLine(); String rawMBSpecs = HunkIO.readEntireFile( new File( dir, model + ".html" ), HunkIO.UTF8 ); clearMBSpecs(); extractAudioChannels( rawMBSpecs, new Pattern[] { AUDIO_CHANNEL_FINDER } ); extractFormFactor( rawMBSpecs, FORM_FACTOR_FINDER, 1 ); extractIde( rawMBSpecs, new Pattern[] { IDE_FINDER } ); extractMemoryType( rawMBSpecs, MEMORY_TYPE_FINDER ); extractMaxGig( rawMBSpecs, MAX_GIG_FINDER ); extractRamSpeedMHz( rawMBSpecs, RAM_SPEED_MHZ_FINDER, RAM_SPEED_MHZ_SPLITTER ); extractSata2( rawMBSpecs, new Pattern[] { SATA2_FINDER } ); extractSata3( rawMBSpecs, new Pattern[] { SATA3_FINDER } ); // get socket from database socketFetcher.setString( 1, model ); socketFetcher.setString( 2, revision ); socketFetcher.executeQuery(); final ResultSet rs = socketFetcher.getResultSet(); rs.next(); socket = SocketType.values()[ rs.getInt( 1 ) ]; rs.close(); extractUSB( rawMBSpecs, new Pattern[] { USB2_TOTAL_FINDER }, new Pattern[] { USB2_REAR_FINDER }, new Pattern[] { USB2_INTERNAL_FINDER_1, USB2_INTERNAL_FINDER_2 }, new Pattern[] { USB3_TOTAL_FINDER }, new Pattern[] { USB3_REAR_FINDER }, new Pattern[] { USB3_INTERNAL_FINDER } ); extractVideo( rawMBSpecs, new Pattern[] { VIDEO_FINDER_1, VIDEO_FINDER_2, VIDEO_FINDER_3, VIDEO_FINDER_4 } ); validateVideo( rawMBSpecs, VIDEO_VALIDATOR ); // no extractWatts since no info available. dumpExtracts(); updateMBFields(); } } catch ( EOFException e ) { out.println( incomplete + " incomplete records" ); r.close(); closeDatabase(); } } /** * extract specs from Asus website previously downloaded. * * @param args not used * * @throws java.io.IOException if can read mb pages. * @throws java.sql.SQLException if can't write to database */ public static void main( final String[] args ) throws IOException, SQLException { // fetchAllMBs(); // skeleton(); // applySockets(); extractSpecs(); // oddDucks(); } }