diff options
author | David A. Madore <david+git@madore.org> | 2011-08-28 01:14:40 +0200 |
---|---|---|
committer | David A. Madore <david+git@madore.org> | 2011-08-28 01:14:40 +0200 |
commit | 0003925549ea009bb0a99c225b5d1992ccd8e7af (patch) | |
tree | 62e6e5d30ba2d216a4d1a4e0e0b06aaf8aadc3ba /org | |
parent | dd50b6000a13e9010c7688fa4b328fdebd323783 (diff) | |
download | damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.tar.gz damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.tar.bz2 damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.zip |
Preliminary code to populate a PostgreSQL database with weblog entries.
Diffstat (limited to 'org')
-rw-r--r-- | org/madore/damlengine/DamlEngine.java | 28 | ||||
-rw-r--r-- | org/madore/damlengine/WeblogPopulate.java | 152 | ||||
-rw-r--r-- | org/madore/damlengine/cmdlines | 2 | ||||
-rw-r--r-- | org/madore/damlengine/weblog-database.sql | 20 |
4 files changed, 200 insertions, 2 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java index 69403f7..0837bd3 100644 --- a/org/madore/damlengine/DamlEngine.java +++ b/org/madore/damlengine/DamlEngine.java @@ -1,6 +1,8 @@ package org.madore.damlengine; import java.io.OutputStreamWriter; +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import org.w3c.dom.Document; @@ -9,10 +11,34 @@ import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; public final class DamlEngine { - public static final String XML_NS = "http://www.w3.org/XML/1998/namespace"; + public static final String XML_NS = XMLConstants.XML_NS_URI; public static final String XHTML_NS = "http://www.w3.org/1999/xhtml"; public static final String DAML_NS = "http://www.madore.org/~david/NS/daml/"; + public static final class DamlNSMapping implements NamespaceContext { + // This is used for XPath resolution (_not_ for parsing the document). + public String getNamespaceURI(String prefix) { + if ( prefix == null ) + throw new IllegalArgumentException("getNamespaceURI() called with null prefix"); + else if ( prefix.equals("") ) + return XHTML_NS; + else if ( prefix.equals("d") ) + return DAML_NS; + else if ( prefix.equals("xml") ) + return XML_NS; + else if ( prefix.equals("xmlns") ) + return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; + else + return XMLConstants.NULL_NS_URI; + } + public String getPrefix(String uri) { + throw new UnsupportedOperationException("getPrefix() not implemented"); + } + public java.util.Iterator getPrefixes(String uri) { + throw new UnsupportedOperationException("getPrefixes() not implemented"); + } + } + private DamlEngine() { // Forbid instantiation throw new AssertionError("DamlEngine cannot be instantiated"); } diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java new file mode 100644 index 0000000..0cdc971 --- /dev/null +++ b/org/madore/damlengine/WeblogPopulate.java @@ -0,0 +1,152 @@ +package org.madore.damlengine; + +import java.util.Properties; +import java.util.regex.Pattern; +import java.io.OutputStreamWriter; +import java.security.MessageDigest; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.xpath.*; +import org.w3c.dom.*; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSSerializer; +import org.w3c.dom.ls.LSOutput; +// import org.xml.sax.EntityResolver; +import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; +import org.apache.xerces.dom.DOMImplementationSourceImpl; +import org.postgresql.Driver; + +public final class WeblogPopulate { + + private WeblogPopulate() { // Forbid instantiation + throw new AssertionError("WeblogPopulate cannot be instantiated"); + } + + public static final String toHex(byte[] digest) { + StringBuilder sb = new StringBuilder(); + for ( byte b : digest ) + sb.append(String.format("%02x", b)); + return sb.toString(); + } + + public static void main(String[] args) + throws Exception { + + final Resolver resolver = new Resolver(); + final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl(); + dbf.setNamespaceAware(true); + dbf.setValidating(false); + final DocumentBuilder db = dbf.newDocumentBuilder(); + db.setEntityResolver(resolver); + + if ( args.length == 0 ) { + System.err.println("expecting filename as argument"); + } + + final DOMImplementationSource domisrc + = new DOMImplementationSourceImpl(); + final DOMImplementationLS domi + = (DOMImplementationLS)(domisrc.getDOMImplementation("LS")); + LSSerializer ser = domi.createLSSerializer(); + ser.getDomConfig().setParameter("xml-declaration", false); + + MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); + + final String dbUrl = "jdbc:postgresql://localhost/weblog"; + final Properties dbProps = new Properties(); + dbProps.setProperty("user", "david"); + dbProps.setProperty("password", "IHATETHISWHYCANTIUSEUNIXDOMAINSOCKETS"); + dbProps.setProperty("ssl", "true"); + dbProps.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); + final Connection conn = (new Driver()).connect(dbUrl, dbProps); + + final PreparedStatement checkSt + = conn.prepareStatement("SELECT sha1 FROM entries WHERE id=?"); + final PreparedStatement insertSt + = conn.prepareStatement("INSERT INTO entries(id,edate,lang,title,title_xml,content,sha1) VALUES (?,?,?,?,?::xml,?::xml,?)"); + final PreparedStatement updateSt + = conn.prepareStatement("UPDATE entries SET (edate,mdate,lang,title,title_xml,content,sha1)=(?,DEFAULT,?,?,?::xml,?::xml,?) WHERE id=?"); + final PreparedStatement clearCatSt + = conn.prepareStatement("DELETE FROM incat WHERE id=?"); + final PreparedStatement setCatSt + = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)"); + + for (String fname : args) { + Document doc = db.parse(fname); + XPathFactory xpf = XPathFactory.newInstance(); + XPath xp = xpf.newXPath(); + xp.setNamespaceContext(new DamlEngine.DamlNSMapping()); + XPathExpression expr = xp.compile("//d:weblog/d:entry"); + XPathExpression texpr = xp.compile("d:title"); + NodeList entries = (NodeList)(expr.evaluate(doc, XPathConstants.NODESET)); + for ( int i=0 ; i<entries.getLength() ; i++ ) { + Element ent = (Element)(entries.item(i)); + String idStr = ent.getAttributeNS(null, "number"); + if ( ! Pattern.matches("^\\d{4}$", idStr) ) + throw new IllegalArgumentException("entry number attribute must be of the form NNNN"); + int id = Integer.parseInt(idStr); + String date = ent.getAttributeNS(null, "date"); + if ( ! Pattern.matches("^\\d{4}-\\d{2}-\\d{2}$", date) ) + throw new IllegalArgumentException("entry date attribute must be of the form YYYY-MM-DD"); + String catStr = ent.getAttributeNS(null, "cat"); + String[] catList = catStr.split("\\s+"); + String lang = LangHelper.getLangRec(ent); + String content = ser.writeToString(ent); + sha1.reset(); + String digest = toHex(sha1.digest(content.getBytes("UTF-8"))); + + checkSt.setInt(1, id); + ResultSet checkRes = checkSt.executeQuery(); + boolean exists = checkRes.next(); + if ( exists && checkRes.getString(1).equals(digest) ) + continue; + Node titleNode = (Node)(texpr.evaluate(ent, XPathConstants.NODE)); + String titleTxt = (titleNode != null) ? titleNode.getTextContent() : null; + String titleXml = ser.writeToString(titleNode); + conn.setAutoCommit(false); + if ( exists ) { + System.err.println("Updating entry "+id); + updateSt.setString(1, date); + updateSt.setString(2, lang); + updateSt.setString(3, titleTxt); + updateSt.setString(4, titleXml); + updateSt.setString(5, content); + updateSt.setString(6, digest); + updateSt.executeUpdate(); + } else { + System.err.println("Registering entry "+id); + insertSt.setInt(1, id); + insertSt.setString(2, date); + insertSt.setString(3, lang); + insertSt.setString(4, titleTxt); + insertSt.setString(5, titleXml); + insertSt.setString(6, content); + insertSt.setString(7, digest); + insertSt.executeUpdate(); + } + clearCatSt.setInt(1, id); + clearCatSt.executeUpdate(); + for ( String cat : catList ) { + if ( ! cat.equals("") ) { + setCatSt.setInt(1, id); + setCatSt.setString(2, cat); + setCatSt.executeUpdate(); + } + } + conn.commit(); + } + } + + checkSt.close(); + insertSt.close(); + updateSt.close(); + clearCatSt.close(); + setCatSt.close(); + conn.close(); + + } + +} diff --git a/org/madore/damlengine/cmdlines b/org/madore/damlengine/cmdlines index 4db1121..e5be081 100644 --- a/org/madore/damlengine/cmdlines +++ b/org/madore/damlengine/cmdlines @@ -1,2 +1,2 @@ -export CLASSPATH=$HOME/java/damlengine:/usr/share/java/xercesImpl.jar:/usr/share/java/xml-resolver-1.2.jar:/usr/share/java/xml-commons-resolver-1.1.jar +export CLASSPATH=$HOME/java/damlengine:/usr/share/java/xercesImpl.jar:/usr/share/java/xml-resolver-1.2.jar:/usr/share/java/xml-commons-resolver-1.1.jar:/usr/share/java/postgresql-jdbc3.jar java org.madore.damlengine.DamlEngine some/file.daml diff --git a/org/madore/damlengine/weblog-database.sql b/org/madore/damlengine/weblog-database.sql new file mode 100644 index 0000000..ab4146d --- /dev/null +++ b/org/madore/damlengine/weblog-database.sql @@ -0,0 +1,20 @@ +SET TIME ZONE 0; +CREATE TABLE entries ( + id integer PRIMARY KEY , + edate text NOT NULL , + cdate timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP , + mdate timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP , + lang text , + title text , + title_xml xml , + content xml NOT NULL, + sha1 text NOT NULL +) ; +CREATE INDEX entries_edate_key ON entries ( edate ) ; +CREATE TABLE incat ( + id integer NOT NULL , + code text NOT NULL , + FOREIGN KEY ( id ) REFERENCES entries ( id ) ON DELETE CASCADE +) ; +CREATE INDEX incat_id_key ON incat ( id ) ; +CREATE INDEX incat_code_key ON incat ( code ) ; |