From 0003925549ea009bb0a99c225b5d1992ccd8e7af Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Sun, 28 Aug 2011 01:14:40 +0200 Subject: Preliminary code to populate a PostgreSQL database with weblog entries. --- org/madore/damlengine/WeblogPopulate.java | 152 ++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 org/madore/damlengine/WeblogPopulate.java (limited to 'org/madore/damlengine/WeblogPopulate.java') diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java new file mode 100644 index 0000000..0cdc971 --- /dev/null +++ b/org/madore/damlengine/WeblogPopulate.java @@ -0,0 +1,152 @@ +package org.madore.damlengine; + +import java.util.Properties; +import java.util.regex.Pattern; +import java.io.OutputStreamWriter; +import java.security.MessageDigest; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.xpath.*; +import org.w3c.dom.*; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSSerializer; +import org.w3c.dom.ls.LSOutput; +// import org.xml.sax.EntityResolver; +import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; +import org.apache.xerces.dom.DOMImplementationSourceImpl; +import org.postgresql.Driver; + +public final class WeblogPopulate { + + private WeblogPopulate() { // Forbid instantiation + throw new AssertionError("WeblogPopulate cannot be instantiated"); + } + + public static final String toHex(byte[] digest) { + StringBuilder sb = new StringBuilder(); + for ( byte b : digest ) + sb.append(String.format("%02x", b)); + return sb.toString(); + } + + public static void main(String[] args) + throws Exception { + + final Resolver resolver = new Resolver(); + final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl(); + dbf.setNamespaceAware(true); + dbf.setValidating(false); + final DocumentBuilder db = dbf.newDocumentBuilder(); + db.setEntityResolver(resolver); + + if ( args.length == 0 ) { + System.err.println("expecting filename as argument"); + } + + final DOMImplementationSource domisrc + = new DOMImplementationSourceImpl(); + final DOMImplementationLS domi + = (DOMImplementationLS)(domisrc.getDOMImplementation("LS")); + LSSerializer ser = domi.createLSSerializer(); + ser.getDomConfig().setParameter("xml-declaration", false); + + MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); + + final String dbUrl = "jdbc:postgresql://localhost/weblog"; + final Properties dbProps = new Properties(); + dbProps.setProperty("user", "david"); + dbProps.setProperty("password", "IHATETHISWHYCANTIUSEUNIXDOMAINSOCKETS"); + dbProps.setProperty("ssl", "true"); + dbProps.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); + final Connection conn = (new Driver()).connect(dbUrl, dbProps); + + final PreparedStatement checkSt + = conn.prepareStatement("SELECT sha1 FROM entries WHERE id=?"); + final PreparedStatement insertSt + = conn.prepareStatement("INSERT INTO entries(id,edate,lang,title,title_xml,content,sha1) VALUES (?,?,?,?,?::xml,?::xml,?)"); + final PreparedStatement updateSt + = conn.prepareStatement("UPDATE entries SET (edate,mdate,lang,title,title_xml,content,sha1)=(?,DEFAULT,?,?,?::xml,?::xml,?) WHERE id=?"); + final PreparedStatement clearCatSt + = conn.prepareStatement("DELETE FROM incat WHERE id=?"); + final PreparedStatement setCatSt + = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)"); + + for (String fname : args) { + Document doc = db.parse(fname); + XPathFactory xpf = XPathFactory.newInstance(); + XPath xp = xpf.newXPath(); + xp.setNamespaceContext(new DamlEngine.DamlNSMapping()); + XPathExpression expr = xp.compile("//d:weblog/d:entry"); + XPathExpression texpr = xp.compile("d:title"); + NodeList entries = (NodeList)(expr.evaluate(doc, XPathConstants.NODESET)); + for ( int i=0 ; i