summaryrefslogtreecommitdiffstats
path: root/org/madore/damlengine/WeblogPopulate.java
diff options
context:
space:
mode:
authorDavid A. Madore <david+git@madore.org>2011-08-28 01:14:40 +0200
committerDavid A. Madore <david+git@madore.org>2011-08-28 01:14:40 +0200
commit0003925549ea009bb0a99c225b5d1992ccd8e7af (patch)
tree62e6e5d30ba2d216a4d1a4e0e0b06aaf8aadc3ba /org/madore/damlengine/WeblogPopulate.java
parentdd50b6000a13e9010c7688fa4b328fdebd323783 (diff)
downloaddamlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.tar.gz
damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.tar.bz2
damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.zip
Preliminary code to populate a PostgreSQL database with weblog entries.
Diffstat (limited to 'org/madore/damlengine/WeblogPopulate.java')
-rw-r--r--org/madore/damlengine/WeblogPopulate.java152
1 files changed, 152 insertions, 0 deletions
diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java
new file mode 100644
index 0000000..0cdc971
--- /dev/null
+++ b/org/madore/damlengine/WeblogPopulate.java
@@ -0,0 +1,152 @@
+package org.madore.damlengine;
+
+import java.util.Properties;
+import java.util.regex.Pattern;
+import java.io.OutputStreamWriter;
+import java.security.MessageDigest;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.xpath.*;
+import org.w3c.dom.*;
+import org.w3c.dom.ls.DOMImplementationLS;
+import org.w3c.dom.ls.LSSerializer;
+import org.w3c.dom.ls.LSOutput;
+// import org.xml.sax.EntityResolver;
+import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl;
+import org.apache.xerces.dom.DOMImplementationSourceImpl;
+import org.postgresql.Driver;
+
+public final class WeblogPopulate {
+
+ private WeblogPopulate() { // Forbid instantiation
+ throw new AssertionError("WeblogPopulate cannot be instantiated");
+ }
+
+ public static final String toHex(byte[] digest) {
+ StringBuilder sb = new StringBuilder();
+ for ( byte b : digest )
+ sb.append(String.format("%02x", b));
+ return sb.toString();
+ }
+
+ public static void main(String[] args)
+ throws Exception {
+
+ final Resolver resolver = new Resolver();
+ final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl();
+ dbf.setNamespaceAware(true);
+ dbf.setValidating(false);
+ final DocumentBuilder db = dbf.newDocumentBuilder();
+ db.setEntityResolver(resolver);
+
+ if ( args.length == 0 ) {
+ System.err.println("expecting filename as argument");
+ }
+
+ final DOMImplementationSource domisrc
+ = new DOMImplementationSourceImpl();
+ final DOMImplementationLS domi
+ = (DOMImplementationLS)(domisrc.getDOMImplementation("LS"));
+ LSSerializer ser = domi.createLSSerializer();
+ ser.getDomConfig().setParameter("xml-declaration", false);
+
+ MessageDigest sha1 = MessageDigest.getInstance("SHA-1");
+
+ final String dbUrl = "jdbc:postgresql://localhost/weblog";
+ final Properties dbProps = new Properties();
+ dbProps.setProperty("user", "david");
+ dbProps.setProperty("password", "IHATETHISWHYCANTIUSEUNIXDOMAINSOCKETS");
+ dbProps.setProperty("ssl", "true");
+ dbProps.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory");
+ final Connection conn = (new Driver()).connect(dbUrl, dbProps);
+
+ final PreparedStatement checkSt
+ = conn.prepareStatement("SELECT sha1 FROM entries WHERE id=?");
+ final PreparedStatement insertSt
+ = conn.prepareStatement("INSERT INTO entries(id,edate,lang,title,title_xml,content,sha1) VALUES (?,?,?,?,?::xml,?::xml,?)");
+ final PreparedStatement updateSt
+ = conn.prepareStatement("UPDATE entries SET (edate,mdate,lang,title,title_xml,content,sha1)=(?,DEFAULT,?,?,?::xml,?::xml,?) WHERE id=?");
+ final PreparedStatement clearCatSt
+ = conn.prepareStatement("DELETE FROM incat WHERE id=?");
+ final PreparedStatement setCatSt
+ = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)");
+
+ for (String fname : args) {
+ Document doc = db.parse(fname);
+ XPathFactory xpf = XPathFactory.newInstance();
+ XPath xp = xpf.newXPath();
+ xp.setNamespaceContext(new DamlEngine.DamlNSMapping());
+ XPathExpression expr = xp.compile("//d:weblog/d:entry");
+ XPathExpression texpr = xp.compile("d:title");
+ NodeList entries = (NodeList)(expr.evaluate(doc, XPathConstants.NODESET));
+ for ( int i=0 ; i<entries.getLength() ; i++ ) {
+ Element ent = (Element)(entries.item(i));
+ String idStr = ent.getAttributeNS(null, "number");
+ if ( ! Pattern.matches("^\\d{4}$", idStr) )
+ throw new IllegalArgumentException("entry number attribute must be of the form NNNN");
+ int id = Integer.parseInt(idStr);
+ String date = ent.getAttributeNS(null, "date");
+ if ( ! Pattern.matches("^\\d{4}-\\d{2}-\\d{2}$", date) )
+ throw new IllegalArgumentException("entry date attribute must be of the form YYYY-MM-DD");
+ String catStr = ent.getAttributeNS(null, "cat");
+ String[] catList = catStr.split("\\s+");
+ String lang = LangHelper.getLangRec(ent);
+ String content = ser.writeToString(ent);
+ sha1.reset();
+ String digest = toHex(sha1.digest(content.getBytes("UTF-8")));
+
+ checkSt.setInt(1, id);
+ ResultSet checkRes = checkSt.executeQuery();
+ boolean exists = checkRes.next();
+ if ( exists && checkRes.getString(1).equals(digest) )
+ continue;
+ Node titleNode = (Node)(texpr.evaluate(ent, XPathConstants.NODE));
+ String titleTxt = (titleNode != null) ? titleNode.getTextContent() : null;
+ String titleXml = ser.writeToString(titleNode);
+ conn.setAutoCommit(false);
+ if ( exists ) {
+ System.err.println("Updating entry "+id);
+ updateSt.setString(1, date);
+ updateSt.setString(2, lang);
+ updateSt.setString(3, titleTxt);
+ updateSt.setString(4, titleXml);
+ updateSt.setString(5, content);
+ updateSt.setString(6, digest);
+ updateSt.executeUpdate();
+ } else {
+ System.err.println("Registering entry "+id);
+ insertSt.setInt(1, id);
+ insertSt.setString(2, date);
+ insertSt.setString(3, lang);
+ insertSt.setString(4, titleTxt);
+ insertSt.setString(5, titleXml);
+ insertSt.setString(6, content);
+ insertSt.setString(7, digest);
+ insertSt.executeUpdate();
+ }
+ clearCatSt.setInt(1, id);
+ clearCatSt.executeUpdate();
+ for ( String cat : catList ) {
+ if ( ! cat.equals("") ) {
+ setCatSt.setInt(1, id);
+ setCatSt.setString(2, cat);
+ setCatSt.executeUpdate();
+ }
+ }
+ conn.commit();
+ }
+ }
+
+ checkSt.close();
+ insertSt.close();
+ updateSt.close();
+ clearCatSt.close();
+ setCatSt.close();
+ conn.close();
+
+ }
+
+}