/* * [ImportECS.java] * * Summary: Import all data about motherboards from the ECS Website. * * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.7+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-03-30 initial version */ package com.mindprod.mother; import com.mindprod.common17.BigDate; import com.mindprod.csv.CSVReader; import com.mindprod.htmlmacros.support.Configuration; import com.mindprod.htmlmacros.support.ConfigurationForMindprod; import com.mindprod.http.Get; import com.mindprod.hunkio.HunkIO; import java.io.BufferedReader; import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.net.URL; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.*; /** * Import all data about motherboards from the ECS Website. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-03-30 initial version * @since 2011-03-30 */ public class ImportECS extends ImportManufacturer { /** * where master list of all ECS motherboards are */ private static final String[] INDEX_URLS = { "http://www.ecsusa.com/ECSWebSite/Product/Product_Model" + ".aspx?CategoryID=1&TypeID=1&MenuID=18&childid=M_7&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=22&MenuID=22&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model" + ".aspx?CategoryID=1&TypeID=32&MenuID=16&childid=M_7&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=35&MenuID=21&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=43&MenuID=20&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model" + ".aspx?CategoryID=1&TypeID=46&MenuID=17&childid=M_7&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model" + ".aspx?CategoryID=1&TypeID=65&MenuID=14&childid=M_7&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model.aspx?CategoryID=1&TypeID=68&MenuID=19&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model" + ".aspx?CategoryID=1&TypeID=72&MenuID=15&childid=M_7&LanID=0", "http://www.ecsusa.com/ECSWebSite/Product/Product_Model" + ".aspx?CategoryID=1&TypeID=86&MenuID=103&childid=M_7&LanID=0", // type 88 on tw site but not US site }; /** * mindprod configuration */ private static final Configuration configuration; private static final File sourceDir; /** * looks for Audio channels */ private static final Pattern AUDIO_CHANNEL_FINDER = Pattern.compile( "([\\.\\d]+)[\\- ]ch", Pattern.CASE_INSENSITIVE ); /** * looks for info about the form factor */ private static final Pattern FORM_FACTOR_FINDER = Pattern.compile( " ([\\p{Alnum}\\- ]+?)(?: Size| Form Factor)?[, \\(]*(\\d+)(?:mm)?[\\*x](\\d+)(?:mm)", Pattern.CASE_INSENSITIVE ); /** * looks for count of IDE ports */ private static final Pattern IDE_FINDER = Pattern.compile( "(\\d{1,2}) x [E]*IDE", Pattern.CASE_INSENSITIVE ); /** * looks for max ram capacity */ private static final Pattern MAX_GIG_FINDER = Pattern.compile( "support up to (\\d{1,2})\\s*GB", Pattern.CASE_INSENSITIVE ); /** * find links to mb pages. */ private static final Pattern MB_FINDER = Pattern.compile( "title=\"(\\d+)\" [\\p{Print}&&[^>]]+>([\\p{Alnum}\\-/ ]+?)\\ \\;\\(V([\\p{Alnum}\\.]+)\\)", Pattern.CASE_INSENSITIVE ); /** * looks for info about RAM */ private static final Pattern MEMORY_TYPE_FINDER = Pattern.compile( "(?:channel|pin) (DDR2|DDR3|DDR )", Pattern.CASE_INSENSITIVE ); /** * find menu id in URL for index page */ private static final Pattern MENU_ID_FINDER = Pattern.compile( "&MenuID=(\\d+)&" ); /** * looks for ram speed */ private static final Pattern RAM_SPEED_MHZ_FINDER = Pattern.compile( "(?:)?DDR[23]*(?:)?\\s*(?:up to )?((\\s*/\\s*|\\d{3,4}\\(OC\\)|\\d{3,4}\\(XMP\\)|\\d{3," + "4}O\\.C\\.|OC\\d{3,4}+|||\\d{3,4})+)", Pattern.CASE_INSENSITIVE ); private static final Pattern RAM_SPEED_MHZ_SPLITTER = Pattern.compile( "\\s*/\\s*||", Pattern.CASE_INSENSITIVE ); /** * looks for SATA2 ports capacity */ private static final Pattern SATA2_FINDER_1 = Pattern.compile( "(\\d{1,2})[x ]*Serial ATA(?:II| 3Gb/s)?(?:device|connector)?", Pattern.CASE_INSENSITIVE ); /** * looks for SATA2 ports capacity */ private static final Pattern SATA2_FINDER_2 = Pattern.compile( "(\\d{1,2})[x ]*SATA 3Gb/s connectors", Pattern.CASE_INSENSITIVE ); /** * looks for SATA3 ports capacity */ private static final Pattern SATA3_FINDER = Pattern.compile( "(\\d{1,2})[x ]*Serial ATA 6.0Gb/s devices", Pattern.CASE_INSENSITIVE ); /** * looks for socket */ private static final Pattern SOCKET_FINDER = Pattern.compile( ">socket\\s([\\+ \\p{Alnum}]+)[\\(<]", Pattern.CASE_INSENSITIVE ); /** * looks for USB2Internal ports capacity */ private static final Pattern USB2_INTERNAL_FINDER = Pattern.compile( "(\\d{1,2})[x ]*USB (?:2\\.0 )?headers", Pattern.CASE_INSENSITIVE ); /** * looks for USB2Rear ports capacity */ private static final Pattern USB2_REAR_FINDER = Pattern.compile( "(\\d{1,2})[x ]*USB (?:2\\.0 )?ports", Pattern.CASE_INSENSITIVE ); /** * looks for USB2Internal ports capacity */ private static final Pattern USB3_INTERNAL_FINDER = Pattern.compile( "(\\d{1,2})[x ]*USB 3\\.0 headers", Pattern.CASE_INSENSITIVE ); /** * looks for USB3Rear ports capacity */ private static final Pattern USB3_REAR_FINDER = Pattern.compile( "(\\d{1,2})[x ]*USB 3\\.0 ports", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_FINDER = Pattern.compile( "GRAPHICSº (?:|On Chip|Integrated|)" + "*[\\( ]*(\\p{Alnum}+)", Pattern.CASE_INSENSITIVE ); /** * looks for integrated Video */ private static final Pattern VIDEO_VALIDATOR = Pattern.compile( "GRAPHICS" ); static { configuration = new ConfigurationForMindprod(); // combine dirsWithMacros and dirsWithIncludes into dirsToProcess; sourceDir = new File( configuration.getSourceDirWithSlashes() ); } /** * collect all motherboard specs from the ECS website * * @throws java.io.IOException if urls malformed or I/O trouble * @throws java.sql.SQLException */ public static void fetchAllMBs() throws IOException, SQLException { manufacturer = Manufacturer.ECS; lastUpdated = BigDate.localToday(); conn = connect(); final PreparedStatement inserter = conn.prepareStatement( "INSERT INTO mboards( manufacturer, model, manufacturerPartNo, revision, lastUpdated) VALUES(?,?,?,?," + "?);" ); for ( String indexURL : INDEX_URLS ) { final Matcher m1 = MENU_ID_FINDER.matcher( indexURL ); String menuID = null; if ( m1.find() ) { menuID = m1.group( 1 ); } else { err.println( " no menuID in " + indexURL ); System.exit( 1 ); } final Get getIndex = new Get(); final String indexContents = getIndex.send( new URL( indexURL ), Get.UTF8 ); final int responseCode = getIndex.getResponseCode(); if ( responseCode >= 300 || indexContents == null ) { err.println( "response: " + responseCode ); err.println( "Could not fetch a motherboard index ECS page " + indexURL ); System.exit( 1 ); } final Matcher m2 = MB_FINDER.matcher( indexContents ); // Matchers are used both for matching and finding. while ( m2.find() ) { manufacturerPartNo = menuID + "-" + m2.group( 1 ); model = m2.group( 2 ).trim(); revision = m2.group( 3 ); final Get getMiningMB = new Get(); final String specificURL = manufacturer.miningURL( manufacturerPartNo ); final String mbSpecsContents = getMiningMB.send( new URL( specificURL ), Get.UTF8 ); final int responseCode2 = getMiningMB.getResponseCode(); if ( responseCode2 >= 300 || mbSpecsContents == null ) { err.println( "response: " + responseCode2 ); err.println( "Could not fetch specific ECS motherboard page " + model + " " + specificURL ); continue; } HunkIO.writeEntireFile( new File( "E:/mb/ecs/" + model.replace( '/', '$' ) + "_" + revision + ".html" ), mbSpecsContents, HunkIO.UTF8 ); // record our findings in SQL out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," + " false, " + lastUpdated ); inserter.setInt( 1, manufacturer.ordinal() ); inserter.setString( 2, model ); inserter.setString( 3, manufacturerPartNo ); inserter.setString( 4, revision ); inserter.setInt( 5, lastUpdated.ordinal() ); inserter.executeUpdate(); } // end find loop } conn.close(); } /** * Put the skeleton info into the DB. * * @throws IOException if urls malformed or I/O trouble * @throws java.sql.SQLException if cannot write to database */ public static void skeleton() throws IOException, SQLException { manufacturer = Manufacturer.ECS; lastUpdated = BigDate.localToday(); conn = connect(); final PreparedStatement inserter = conn.prepareStatement( "INSERT INTO mboards( manufacturer, model, manufacturerPartNo, revision, lastUpdated) VALUES(?,?,?,?," + "?);" ); CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/ecs.csv" ) ) ) ); try { while ( true ) { // manufacturer, model, revision, mfr no, verified, last-update r.skip( 1 ); // mfr model = r.get(); revision = r.get(); manufacturerPartNo = r.get(); r.skipToNextLine(); // record our findings in SQL out.println( manufacturer.ordinal() + ", " + model + ", " + revision + ", " + manufacturerPartNo + "," + " false, " + lastUpdated ); inserter.setInt( 1, manufacturer.ordinal() ); inserter.setString( 2, model ); inserter.setString( 3, manufacturerPartNo ); inserter.setString( 4, revision ); inserter.setInt( 5, lastUpdated.ordinal() ); inserter.executeUpdate(); } } catch ( EOFException e ) { r.close(); conn.close(); } } /** * extract specs from previously downloaded raw specs * * @throws java.sql.SQLException * @throws java.io.IOException */ private static void extractSpecs() throws SQLException, IOException { manufacturer = Manufacturer.ECS; lastUpdated = BigDate.localToday(); initDatabase(); final File dir = new File( "E:/mb/ecs/" ); CSVReader r = new CSVReader( new BufferedReader( new FileReader( new File( sourceDir, "mother/ecs.csv" ) ) ) ); try { while ( true ) { r.skip( 1 ); // mfr model = r.get(); revision = r.get(); manufacturerPartNo = r.get(); r.skipToNextLine(); String rawMBSpecs = HunkIO.readEntireFile( new File( dir, model.replace( '/', '$' ) + "_" + revision + ".html" ), HunkIO.UTF8 ); clearMBSpecs(); extractAudioChannels( rawMBSpecs, new Pattern[] { AUDIO_CHANNEL_FINDER } ); extractFormFactor( rawMBSpecs, FORM_FACTOR_FINDER, .1 ); extractIde( rawMBSpecs, new Pattern[] { IDE_FINDER } ); extractMemoryType( rawMBSpecs, MEMORY_TYPE_FINDER ); extractMaxGig( rawMBSpecs, MAX_GIG_FINDER ); extractRamSpeedMHz( rawMBSpecs, RAM_SPEED_MHZ_FINDER, RAM_SPEED_MHZ_SPLITTER ); extractSata2( rawMBSpecs, new Pattern[] { SATA2_FINDER_1, SATA2_FINDER_2 } ); extractSata3( rawMBSpecs, new Pattern[] { SATA3_FINDER } ); extractSocket( rawMBSpecs, new Pattern[] { SOCKET_FINDER } ); extractUSB( rawMBSpecs, new Pattern[ 0 ], new Pattern[] { USB2_REAR_FINDER }, new Pattern[] { USB2_INTERNAL_FINDER }, new Pattern[ 0 ], new Pattern[] { USB3_REAR_FINDER }, new Pattern[] { USB3_INTERNAL_FINDER } ); extractVideo( rawMBSpecs, new Pattern[] { VIDEO_FINDER } ); validateVideo( rawMBSpecs, VIDEO_VALIDATOR ); // no extractWatts since no info available. dumpExtracts(); updateMBFields(); } } catch ( EOFException e ) { out.println( incomplete + " incomplete records" ); r.close(); closeDatabase(); } } /** * handle oddities, typos on website etc. */ private static void oddDucks() throws SQLException { manufacturer = Manufacturer.ECS; lastUpdated = BigDate.localToday(); conn = connect(); final Statement updater = conn.createStatement(); updater.executeUpdate( "UPDATE mboards " + "SET maxGig=" + "8" + "WHERE manufacturer=" + manufacturer.ordinal() + " AND model='IC41T-A'" ); final PreparedStatement SocketUpdater = conn.prepareStatement( "UPDATE mboards " + "SET socket=?, lastUpdated=? " + "WHERE manufacturer=? AND model=?;" ); conn.close(); } private static void test() { String s = " Support DDR2 1066/800/667/533/400 DDR2 SDRAM "; Pattern TEST = Pattern.compile( "(?:)?(?:DDR2|DDR3|DDR)(?:)?\\s*((\\d+|\\(OC\\)|/||)+)", Pattern.CASE_INSENSITIVE ); Matcher m = TEST.matcher( s ); if ( m.find() ) { for ( int i = 0; i <= m.groupCount(); i++ ) { out.println( i + " [" + m.group( i ) + "]" ); } } else { out.println( "no find" ); } } /** * extract specs from Gigagbyte website previously downloaded. * * @param args not used * * @throws java.io.IOException * @throws java.sql.SQLException */ public static void main( final String[] args ) throws IOException, SQLException { // test(); // fetchAllMBs(); // skeleton(); extractSpecs(); oddDucks(); } //todo screenscrape the following fields. Export to database. //video // socket }