/* * [OldImportBioStar.java] * * Summary: Import all data about motherboards from the BioStar Website. * * Copyright: (c) 2011-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.6+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2011-02-14 initial version */ package com.mindprod.mother; import com.mindprod.common11.BigDate; import com.mindprod.common15.STA; import com.mindprod.csv.CSVReader; import com.mindprod.filter.ExtensionListFilter; import com.mindprod.http.Get; import com.mindprod.hunkio.HunkIO; import java.io.BufferedReader; import java.io.EOFException; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.net.URL; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; import java.util.regex.Matcher; import java.util.regex.Pattern; import static java.lang.System.err; import static java.lang.System.out; /** * Import all data about motherboards from the BioStar Website. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2011-02-14 initial version * @since 2011-02-14 */ public class OldImportBioStar extends ImportManufacturer { // ------------------------------ CONSTANTS ------------------------------ /** * where master list of all Biostar motherboards is. */ private static final String INDEX_URL = "http://www.biostar-usa.com/app/en-us/mb/index.php"; /** * looks for max ram capacity */ private static final Pattern MAX_GIG_FINDER_S = Pattern.compile( "(?:Supports up to |DDR2:)([0-9]+)\\s*GB", Pattern.CASE_INSENSITIVE ); // Supports up to 2GB Memory /** * looks for info about RAM */ private static final Pattern MEMORY_TYPE_FINDER_S = Pattern.compile( ">(?:Support Dual Channel|Support Triple Channel|supported|Support|DIMM| x) (DDR2|DDR3|DDR|SDRAM)", Pattern.CASE_INSENSITIVE ); /** * looks for ram speed */ private static final Pattern RAM_SPEED_MHZ_FINDER_S = Pattern.compile( "(?:DDR2|DDR3|DDR|DIMM PC)\\s*([0-9/\\(\\)OC\\.]+)", Pattern.CASE_INSENSITIVE ); /** * split apart DDR ram speeds */ private static final Pattern SLASH_SPLITTER = Pattern.compile( "/" ); /** * looks for info about the form factor * >Micro ATX Form Factor Dimension: 23.5cm X 18.2cm */ private static Pattern FORM_FACTOR_FINDER_S = Pattern.compile( ">([\\p{Alnum}\\- ]+?) Form Factor Dimension: ([0-9\\.]+)\\s*(?:cm)*\\s*X\\s*([0-9\\.]+)\\s*(?:cm)*", Pattern.CASE_INSENSITIVE ); // ATX Form Factor Dimension: /** * look for links like this: * "name": "GA-880GMA-UD2H (rev. 2.0)", * "value": "3424", */ private static Pattern MB_FINDER_S = Pattern.compile( "\"name\": \"([\\p{Alnum} \\-]+) \\(rev\\. ([\\p{Digit}\\.]+)\\)\",\\s+\"value\": \"(\\p{Digit}+)\"" ); /** * found out what type of CPU socket, effectively types of CPU supported */ private static Pattern SOCKET_FINDER_S = Pattern.compile( "Support for Socket (AM3|AM2\\+|AM2|AMD BGA FT1|Intel 1366|Intel 1155|Intel 1156|Intel 775|Intel BGA 559|Intel 479|Intel 478|Intel 437|Intel IM|Intel 423|Intel 370) processors" ); // -------------------------- PUBLIC STATIC METHODS -------------------------- /** * collect all motherboard data from the Biostar website * * * * @throws java.io.IOException if urls malformed or I/O trouble */ public static void fetchAllMBs() throws IOException { manufacturer = Manufacturer.BIOSTAR; final Get get = new Get(); // no parms needed final String htmlListOfBiostarMBs = get.send( new URL( INDEX_URL ), "UTF-8" ); final int responseCode = get.getResponseCode(); if ( responseCode >= 300 || htmlListOfBiostarMBs == null ) { err.println( "Could not fetch master Biostar page " + INDEX_URL ); System.exit( 1 ); } final Matcher m = MB_FINDER_S.matcher( htmlListOfBiostarMBs ); // Matchers are used both for matching and finding. while ( m.find() ) { assert m.groupCount() == 3 : "bug in regex"; model = m.group( 3 ); manufacturerPartNo = m.group( 2 ); out.println( manufacturer + ", " + model + ", " + manufacturerPartNo ); final Get oneMB = new Get(); final String rawMBSpecs = oneMB.send( new URL( Manufacturer.BIOSTAR.miningURL( manufacturerPartNo ) ), "UTF-8" ); final int responseCode2 = oneMB.getResponseCode(); if ( responseCode2 >= 300 || rawMBSpecs == null ) { err.println( "Could not fetch Biostar motherboard page " + model ); System.exit( 1 ); } HunkIO.writeEntireFile( "E:/mb/biostar/" + model + ".html", rawMBSpecs, "UTF-8" ); } } /** * Put the skeleton info into the DB. * * @throws java.io.IOException if urls malformed or I/O trouble */ public static void skeleton() throws IOException, SQLException { manufacturer = Manufacturer.BIOSTAR; lastUpdated = BigDate.localToday(); conn = connect(); final PreparedStatement inserter = conn.prepareStatement( "INSERT INTO mboards( manufacturer, model, manufacturerPartNo, lastUpdated) VALUES(?,?,?,?);" ); CSVReader r = new CSVReader( new BufferedReader( new FileReader( "E:/com/mindprod/mother/biostar.csv" ) ) ); try { while ( true ) { r.skip( 1 ); model = r.get(); manufacturerPartNo = r.get(); r.skipToNextLine(); // record our findings in SQL out.println( manufacturer.ordinal() + " [" + model + "] [" + manufacturerPartNo + "] " + lastUpdated ); inserter.setInt( 1, manufacturer.ordinal() ); inserter.setString( 2, model ); inserter.setString( 3, manufacturerPartNo ); inserter.setInt( 4, lastUpdated.ordinal() ); inserter.executeUpdate(); } } catch ( EOFException e ) { r.close(); conn.close(); } } // -------------------------- STATIC METHODS -------------------------- /** * handle oddities, typos on website etc. */ private static void OddDucksS() throws SQLException { manufacturer = Manufacturer.BIOSTAR; lastUpdated = BigDate.localToday(); conn = connect(); final Statement updater = conn.createStatement(); updater.executeUpdate( "UPDATE mboards SET maxGig=8 WHERE manufacturer=" + manufacturer.ordinal() + " AND model='TP43E Combo';" ); updater.executeUpdate( "UPDATE mboards SET formFactor=" + FormFactor.ATX.ordinal() + " WHERE manufacturer=" + manufacturer.ordinal() + " AND model='U8568 Pro';" ); // missing info taken from http://www.biostar-usa.com/mbdetails.asp?model=P4TAW%20EXTREME updater.executeUpdate( "UPDATE mboards SET formFactor=" + FormFactor.ATX.ordinal() + ",widthInCm=24.5,heightInCm=29.3 WHERE manufacturer=" + manufacturer.ordinal() + " AND model='P4TAW Extreme';" ); // missing info taken from http://www.biostar-usa.com/mbdetails.asp?model=p4tdq+pro updater.executeUpdate( "UPDATE mboards SET maxGig=2,ramSpeedMHz=2700,memoryType=" + MemoryType.DDR.ordinal() + " WHERE manufacturer=" + manufacturer.ordinal() + " AND model='P4TDQ Pro';" ); // missing info taken from http://www.biostar-usa.com/mbdetails.asp?model=m7sxg // usually http://www.biostar-usa.com/app/en-us/mb/content.php?S_ID=334 updater.executeUpdate( "UPDATE mboards SET formFactor=" + FormFactor.ATX.ordinal() + ",widthInCm=24.4,heightInCm=24.4" + " WHERE manufacturer=" + manufacturer.ordinal() + " AND model='M7SXG';" ); } /** * extract information about the form factor * * @param mdData motherboard page data from Biostar website. * @param mbName name of motherboard model we are mining. */ private static void extractFormFactor( String mdData, String mbName ) { Matcher m = FORM_FACTOR_FINDER_S.matcher( mdData ); if ( m.find() ) { formFactor = FormFactor.valueOfAlias( m.group( 1 ) ); try { widthInCm = Double.parseDouble( STA.trimTrailing( m.group( 2 ), '.' ) ); heightInCm = Double.parseDouble( STA.trimTrailing( m.group( 3 ), '.' ) ); } catch ( NumberFormatException e ) { err.println( "malformed widthInCm [" + m.group( 2 ) + "] and heightInCm [" + m.group( 3 ) + "] for " + mbName ); } } else { err.println( "failed to find form factor fields for " + mbName ); } } /** * extract maxGig from Biostar motherboard page * * @param mdData motherboard page data from Biostar website. * @param mbName name of motherboard model we are mining. */ private static void extractMaxGigS( String mdData, String mbName ) { Matcher m = MAX_GIG_FINDER_S.matcher( mdData ); if ( m.find() ) { maxGig = Integer.parseInt( m.group( 1 ) ); } else { err.println( "regex failed to find maxGig field for " + mbName ); } } /** * extract memory type from Biostar motherboard page * * @param mdData motherboard page data from Biostar website. * @param mbName name of motherboard model we are mining. */ private static void extractMemoryTypeS( String mdData, String mbName ) { Matcher m = MEMORY_TYPE_FINDER_S.matcher( mdData ); if ( m.find() ) { memoryType = MemoryType.valueOfAlias( STA.trimTrailing( m.group( 1 ), '-' ) ); } else { err.println( "regex failed to find memoryType field for " + mbName ); } } /** * extract maxGig from Biostar motherboard page * * @param mdData motherboard page data from Biostar website. * @param mbName name of motherboard model we are mining. */ private static void extractRamSpeedMHzS( String mdData, String mbName ) { Matcher m = RAM_SPEED_MHZ_FINDER_S.matcher( mdData ); if ( m.find() ) { final String multi = m.group( 1 ); // out.println( "candidates [" + multi + "] " + mbName ); final String[] candidates = SLASH_SPLITTER.split( multi ); int bestSpeed = 0; for ( String candidate : candidates ) { // out.println( "candidate [" + candidate + "]" ); if ( candidate == null ) { continue; } candidate = candidate.toUpperCase(); // ignore overclocked if ( candidate.indexOf( "(" ) >= 0 ) { continue; } if ( candidate.indexOf( "O.C." ) >= 0 ) { continue; } if ( candidate.indexOf( "OC" ) >= 0 ) { continue; } try { int speed = Integer.parseInt( candidate ); if ( speed > bestSpeed ) { bestSpeed = speed; } } catch ( NumberFormatException e ) { err.println( "malformed entry [" + candidate + "] for ramSpeedMHz field for " + mbName ); } } ramSpeedMHz = bestSpeed; } else { err.println( "regex failed to find ramSpeedMHz field for " + mbName ); } } /** * extract information about the form factor * * @param mdData motherboard page data from Biostar website. * @param mbName name of motherboard model we are mining. */ private static void extractSocketS( String mdData, String mbName ) { Matcher m = SOCKET_FINDER_S.matcher( mdData ); if ( m.find() ) { socket = SocketType.valueOfAlias( m.group( 1 ) ); } else { err.println( "failed to find socket for " + mbName ); } } /** * extract specs from previously downloaded raw specs */ private static void extractSpecs() throws SQLException, IOException { manufacturer = Manufacturer.BIOSTAR; lastUpdated = BigDate.localToday(); conn = connect(); final PreparedStatement updater = conn.prepareStatement( "UPDATE mboards " + "SET formFactor=?, widthInCm=?, heightInCm=?, memoryType=?, maxGig=?, ramSpeedMHz=?, lastUpdated=? " + "WHERE manufacturer=? AND model=?;" ); File dir = new File( "E:/mb/biostar/" ); String[] mbs = dir.list( new ExtensionListFilter( "html" ) ); for ( String filename : mbs ) { String rawMBSpecs = HunkIO.readEntireFile( new File( dir, filename ), "UTF-8" ); String model = STA.chopTrailingString( filename, ".html" ); extractFormFactor( rawMBSpecs, model ); extractMemoryTypeS( rawMBSpecs, model ); extractMaxGigS( rawMBSpecs, model ); extractRamSpeedMHzS( rawMBSpecs, model ); out.println( manufacturer.ordinal() + " [" + model + "] " + formFactor + " " + widthInCm + "cm " + heightInCm + "cm " + memoryType + " " + maxGig + "GB " + ramSpeedMHz + "MHz" ); // record our findings in SQL updater.setInt( 1, formFactor.ordinal() ); updater.setDouble( 2, widthInCm ); updater.setDouble( 3, heightInCm ); updater.setInt( 4, memoryType.ordinal() ); updater.setInt( 5, maxGig ); updater.setInt( 6, ramSpeedMHz ); updater.setInt( 7, lastUpdated.ordinal() ); updater.setInt( 8, manufacturer.ordinal() ); updater.setString( 9, model ); updater.executeUpdate(); } OddDucksS(); conn.close(); } // --------------------------- main() method --------------------------- /** * collect all motherboard data from the Biostar website * * @param args not used * * @throws java.io.IOException if urls malformed or I/O trouble */ public static void main( final String[] args ) throws IOException, SQLException { // fetchAllMBs(); // skeleton(); extractSpecs(); } }