/* * [ScrapeTable.java] * * Summary: Scrapes HTML table to extract data from table. * * Copyright: (c) 1999-2017 Roedy Green, Canadian Mind Products, http://mindprod.com * * Licence: This software may be copied and used freely for any purpose but military. * http://mindprod.com/contact/nonmil.html * * Requires: JDK 1.8+ * * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/ * * Version History: * 1.0 2007-06-07 */ package com.mindprod.americantax; import org.w3c.dom.Document; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import java.io.File; import java.io.IOException; import static java.lang.System.*; /** * Scrapes HTML table to extract data from table. *

* HTML should be conditioned with TagSoup and HTMLTidy first to balance tags properly. * * @author Roedy Green, Canadian Mind Products * @version 1.0 2007-06-07 * @since 2007-06-07 */ public class ScrapeTable { /** * read one xhtml file and extract table data from it * * @param args first name of xsd schema, * second name of name of the jnlp file to validate, * e.g. E:\com\mindprod\affirm\affirm.jnlp */ public static void main( String[] args ) { try { DocumentBuilder parser = DocumentBuilderFactory.newInstance().newDocumentBuilder(); // parse the xhtml file on the command line purely as XML and get a DOM tree representation. @SuppressWarnings( { "UnusedAssignment" } ) Document document = parser.parse( new File( args[ 0 ] ) ); // etc. } catch ( ParserConfigurationException e ) { err.println(); e.printStackTrace( err ); err.println(); } catch ( SAXException e ) { err.println(); e.printStackTrace( err ); err.println(); } catch ( IOException e ) { err.println(); e.printStackTrace( err ); err.println(); } } // end main } // end ScrapeTable