From 0003925549ea009bb0a99c225b5d1992ccd8e7af Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Sun, 28 Aug 2011 01:14:40 +0200 Subject: Preliminary code to populate a PostgreSQL database with weblog entries. --- org/madore/damlengine/DamlEngine.java | 28 +++++- org/madore/damlengine/WeblogPopulate.java | 152 ++++++++++++++++++++++++++++++ org/madore/damlengine/cmdlines | 2 +- org/madore/damlengine/weblog-database.sql | 20 ++++ 4 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 org/madore/damlengine/WeblogPopulate.java create mode 100644 org/madore/damlengine/weblog-database.sql diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java index 69403f7..0837bd3 100644 --- a/org/madore/damlengine/DamlEngine.java +++ b/org/madore/damlengine/DamlEngine.java @@ -1,6 +1,8 @@ package org.madore.damlengine; import java.io.OutputStreamWriter; +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import org.w3c.dom.Document; @@ -9,10 +11,34 @@ import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; public final class DamlEngine { - public static final String XML_NS = "http://www.w3.org/XML/1998/namespace"; + public static final String XML_NS = XMLConstants.XML_NS_URI; public static final String XHTML_NS = "http://www.w3.org/1999/xhtml"; public static final String DAML_NS = "http://www.madore.org/~david/NS/daml/"; + public static final class DamlNSMapping implements NamespaceContext { + // This is used for XPath resolution (_not_ for parsing the document). + public String getNamespaceURI(String prefix) { + if ( prefix == null ) + throw new IllegalArgumentException("getNamespaceURI() called with null prefix"); + else if ( prefix.equals("") ) + return XHTML_NS; + else if ( prefix.equals("d") ) + return DAML_NS; + else if ( prefix.equals("xml") ) + return XML_NS; + else if ( prefix.equals("xmlns") ) + return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; + else + return XMLConstants.NULL_NS_URI; + } + public String getPrefix(String uri) { + throw new UnsupportedOperationException("getPrefix() not implemented"); + } + public java.util.Iterator getPrefixes(String uri) { + throw new UnsupportedOperationException("getPrefixes() not implemented"); + } + } + private DamlEngine() { // Forbid instantiation throw new AssertionError("DamlEngine cannot be instantiated"); } diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java new file mode 100644 index 0000000..0cdc971 --- /dev/null +++ b/org/madore/damlengine/WeblogPopulate.java @@ -0,0 +1,152 @@ +package org.madore.damlengine; + +import java.util.Properties; +import java.util.regex.Pattern; +import java.io.OutputStreamWriter; +import java.security.MessageDigest; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.xpath.*; +import org.w3c.dom.*; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSSerializer; +import org.w3c.dom.ls.LSOutput; +// import org.xml.sax.EntityResolver; +import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; +import org.apache.xerces.dom.DOMImplementationSourceImpl; +import org.postgresql.Driver; + +public final class WeblogPopulate { + + private WeblogPopulate() { // Forbid instantiation + throw new AssertionError("WeblogPopulate cannot be instantiated"); + } + + public static final String toHex(byte[] digest) { + StringBuilder sb = new StringBuilder(); + for ( byte b : digest ) + sb.append(String.format("%02x", b)); + return sb.toString(); + } + + public static void main(String[] args) + throws Exception { + + final Resolver resolver = new Resolver(); + final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl(); + dbf.setNamespaceAware(true); + dbf.setValidating(false); + final DocumentBuilder db = dbf.newDocumentBuilder(); + db.setEntityResolver(resolver); + + if ( args.length == 0 ) { + System.err.println("expecting filename as argument"); + } + + final DOMImplementationSource domisrc + = new DOMImplementationSourceImpl(); + final DOMImplementationLS domi + = (DOMImplementationLS)(domisrc.getDOMImplementation("LS")); + LSSerializer ser = domi.createLSSerializer(); + ser.getDomConfig().setParameter("xml-declaration", false); + + MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); + + final String dbUrl = "jdbc:postgresql://localhost/weblog"; + final Properties dbProps = new Properties(); + dbProps.setProperty("user", "david"); + dbProps.setProperty("password", "IHATETHISWHYCANTIUSEUNIXDOMAINSOCKETS"); + dbProps.setProperty("ssl", "true"); + dbProps.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory"); + final Connection conn = (new Driver()).connect(dbUrl, dbProps); + + final PreparedStatement checkSt + = conn.prepareStatement("SELECT sha1 FROM entries WHERE id=?"); + final PreparedStatement insertSt + = conn.prepareStatement("INSERT INTO entries(id,edate,lang,title,title_xml,content,sha1) VALUES (?,?,?,?,?::xml,?::xml,?)"); + final PreparedStatement updateSt + = conn.prepareStatement("UPDATE entries SET (edate,mdate,lang,title,title_xml,content,sha1)=(?,DEFAULT,?,?,?::xml,?::xml,?) WHERE id=?"); + final PreparedStatement clearCatSt + = conn.prepareStatement("DELETE FROM incat WHERE id=?"); + final PreparedStatement setCatSt + = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)"); + + for (String fname : args) { + Document doc = db.parse(fname); + XPathFactory xpf = XPathFactory.newInstance(); + XPath xp = xpf.newXPath(); + xp.setNamespaceContext(new DamlEngine.DamlNSMapping()); + XPathExpression expr = xp.compile("//d:weblog/d:entry"); + XPathExpression texpr = xp.compile("d:title"); + NodeList entries = (NodeList)(expr.evaluate(doc, XPathConstants.NODESET)); + for ( int i=0 ; i