summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid A. Madore <david+git@madore.org>2011-08-28 01:14:40 +0200
committerDavid A. Madore <david+git@madore.org>2011-08-28 01:14:40 +0200
commit0003925549ea009bb0a99c225b5d1992ccd8e7af (patch)
tree62e6e5d30ba2d216a4d1a4e0e0b06aaf8aadc3ba
parentdd50b6000a13e9010c7688fa4b328fdebd323783 (diff)
downloaddamlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.tar.gz
damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.tar.bz2
damlengine-0003925549ea009bb0a99c225b5d1992ccd8e7af.zip
Preliminary code to populate a PostgreSQL database with weblog entries.
-rw-r--r--org/madore/damlengine/DamlEngine.java28
-rw-r--r--org/madore/damlengine/WeblogPopulate.java152
-rw-r--r--org/madore/damlengine/cmdlines2
-rw-r--r--org/madore/damlengine/weblog-database.sql20
4 files changed, 200 insertions, 2 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java
index 69403f7..0837bd3 100644
--- a/org/madore/damlengine/DamlEngine.java
+++ b/org/madore/damlengine/DamlEngine.java
@@ -1,6 +1,8 @@
package org.madore.damlengine;
import java.io.OutputStreamWriter;
+import javax.xml.XMLConstants;
+import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
@@ -9,10 +11,34 @@ import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl;
public final class DamlEngine {
- public static final String XML_NS = "http://www.w3.org/XML/1998/namespace";
+ public static final String XML_NS = XMLConstants.XML_NS_URI;
public static final String XHTML_NS = "http://www.w3.org/1999/xhtml";
public static final String DAML_NS = "http://www.madore.org/~david/NS/daml/";
+ public static final class DamlNSMapping implements NamespaceContext {
+ // This is used for XPath resolution (_not_ for parsing the document).
+ public String getNamespaceURI(String prefix) {
+ if ( prefix == null )
+ throw new IllegalArgumentException("getNamespaceURI() called with null prefix");
+ else if ( prefix.equals("") )
+ return XHTML_NS;
+ else if ( prefix.equals("d") )
+ return DAML_NS;
+ else if ( prefix.equals("xml") )
+ return XML_NS;
+ else if ( prefix.equals("xmlns") )
+ return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
+ else
+ return XMLConstants.NULL_NS_URI;
+ }
+ public String getPrefix(String uri) {
+ throw new UnsupportedOperationException("getPrefix() not implemented");
+ }
+ public java.util.Iterator getPrefixes(String uri) {
+ throw new UnsupportedOperationException("getPrefixes() not implemented");
+ }
+ }
+
private DamlEngine() { // Forbid instantiation
throw new AssertionError("DamlEngine cannot be instantiated");
}
diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java
new file mode 100644
index 0000000..0cdc971
--- /dev/null
+++ b/org/madore/damlengine/WeblogPopulate.java
@@ -0,0 +1,152 @@
+package org.madore.damlengine;
+
+import java.util.Properties;
+import java.util.regex.Pattern;
+import java.io.OutputStreamWriter;
+import java.security.MessageDigest;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.xpath.*;
+import org.w3c.dom.*;
+import org.w3c.dom.ls.DOMImplementationLS;
+import org.w3c.dom.ls.LSSerializer;
+import org.w3c.dom.ls.LSOutput;
+// import org.xml.sax.EntityResolver;
+import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl;
+import org.apache.xerces.dom.DOMImplementationSourceImpl;
+import org.postgresql.Driver;
+
+public final class WeblogPopulate {
+
+ private WeblogPopulate() { // Forbid instantiation
+ throw new AssertionError("WeblogPopulate cannot be instantiated");
+ }
+
+ public static final String toHex(byte[] digest) {
+ StringBuilder sb = new StringBuilder();
+ for ( byte b : digest )
+ sb.append(String.format("%02x", b));
+ return sb.toString();
+ }
+
+ public static void main(String[] args)
+ throws Exception {
+
+ final Resolver resolver = new Resolver();
+ final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl();
+ dbf.setNamespaceAware(true);
+ dbf.setValidating(false);
+ final DocumentBuilder db = dbf.newDocumentBuilder();
+ db.setEntityResolver(resolver);
+
+ if ( args.length == 0 ) {
+ System.err.println("expecting filename as argument");
+ }
+
+ final DOMImplementationSource domisrc
+ = new DOMImplementationSourceImpl();
+ final DOMImplementationLS domi
+ = (DOMImplementationLS)(domisrc.getDOMImplementation("LS"));
+ LSSerializer ser = domi.createLSSerializer();
+ ser.getDomConfig().setParameter("xml-declaration", false);
+
+ MessageDigest sha1 = MessageDigest.getInstance("SHA-1");
+
+ final String dbUrl = "jdbc:postgresql://localhost/weblog";
+ final Properties dbProps = new Properties();
+ dbProps.setProperty("user", "david");
+ dbProps.setProperty("password", "IHATETHISWHYCANTIUSEUNIXDOMAINSOCKETS");
+ dbProps.setProperty("ssl", "true");
+ dbProps.setProperty("sslfactory", "org.postgresql.ssl.NonValidatingFactory");
+ final Connection conn = (new Driver()).connect(dbUrl, dbProps);
+
+ final PreparedStatement checkSt
+ = conn.prepareStatement("SELECT sha1 FROM entries WHERE id=?");
+ final PreparedStatement insertSt
+ = conn.prepareStatement("INSERT INTO entries(id,edate,lang,title,title_xml,content,sha1) VALUES (?,?,?,?,?::xml,?::xml,?)");
+ final PreparedStatement updateSt
+ = conn.prepareStatement("UPDATE entries SET (edate,mdate,lang,title,title_xml,content,sha1)=(?,DEFAULT,?,?,?::xml,?::xml,?) WHERE id=?");
+ final PreparedStatement clearCatSt
+ = conn.prepareStatement("DELETE FROM incat WHERE id=?");
+ final PreparedStatement setCatSt
+ = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)");
+
+ for (String fname : args) {
+ Document doc = db.parse(fname);
+ XPathFactory xpf = XPathFactory.newInstance();
+ XPath xp = xpf.newXPath();
+ xp.setNamespaceContext(new DamlEngine.DamlNSMapping());
+ XPathExpression expr = xp.compile("//d:weblog/d:entry");
+ XPathExpression texpr = xp.compile("d:title");
+ NodeList entries = (NodeList)(expr.evaluate(doc, XPathConstants.NODESET));
+ for ( int i=0 ; i<entries.getLength() ; i++ ) {
+ Element ent = (Element)(entries.item(i));
+ String idStr = ent.getAttributeNS(null, "number");
+ if ( ! Pattern.matches("^\\d{4}$", idStr) )
+ throw new IllegalArgumentException("entry number attribute must be of the form NNNN");
+ int id = Integer.parseInt(idStr);
+ String date = ent.getAttributeNS(null, "date");
+ if ( ! Pattern.matches("^\\d{4}-\\d{2}-\\d{2}$", date) )
+ throw new IllegalArgumentException("entry date attribute must be of the form YYYY-MM-DD");
+ String catStr = ent.getAttributeNS(null, "cat");
+ String[] catList = catStr.split("\\s+");
+ String lang = LangHelper.getLangRec(ent);
+ String content = ser.writeToString(ent);
+ sha1.reset();
+ String digest = toHex(sha1.digest(content.getBytes("UTF-8")));
+
+ checkSt.setInt(1, id);
+ ResultSet checkRes = checkSt.executeQuery();
+ boolean exists = checkRes.next();
+ if ( exists && checkRes.getString(1).equals(digest) )
+ continue;
+ Node titleNode = (Node)(texpr.evaluate(ent, XPathConstants.NODE));
+ String titleTxt = (titleNode != null) ? titleNode.getTextContent() : null;
+ String titleXml = ser.writeToString(titleNode);
+ conn.setAutoCommit(false);
+ if ( exists ) {
+ System.err.println("Updating entry "+id);
+ updateSt.setString(1, date);
+ updateSt.setString(2, lang);
+ updateSt.setString(3, titleTxt);
+ updateSt.setString(4, titleXml);
+ updateSt.setString(5, content);
+ updateSt.setString(6, digest);
+ updateSt.executeUpdate();
+ } else {
+ System.err.println("Registering entry "+id);
+ insertSt.setInt(1, id);
+ insertSt.setString(2, date);
+ insertSt.setString(3, lang);
+ insertSt.setString(4, titleTxt);
+ insertSt.setString(5, titleXml);
+ insertSt.setString(6, content);
+ insertSt.setString(7, digest);
+ insertSt.executeUpdate();
+ }
+ clearCatSt.setInt(1, id);
+ clearCatSt.executeUpdate();
+ for ( String cat : catList ) {
+ if ( ! cat.equals("") ) {
+ setCatSt.setInt(1, id);
+ setCatSt.setString(2, cat);
+ setCatSt.executeUpdate();
+ }
+ }
+ conn.commit();
+ }
+ }
+
+ checkSt.close();
+ insertSt.close();
+ updateSt.close();
+ clearCatSt.close();
+ setCatSt.close();
+ conn.close();
+
+ }
+
+}
diff --git a/org/madore/damlengine/cmdlines b/org/madore/damlengine/cmdlines
index 4db1121..e5be081 100644
--- a/org/madore/damlengine/cmdlines
+++ b/org/madore/damlengine/cmdlines
@@ -1,2 +1,2 @@
-export CLASSPATH=$HOME/java/damlengine:/usr/share/java/xercesImpl.jar:/usr/share/java/xml-resolver-1.2.jar:/usr/share/java/xml-commons-resolver-1.1.jar
+export CLASSPATH=$HOME/java/damlengine:/usr/share/java/xercesImpl.jar:/usr/share/java/xml-resolver-1.2.jar:/usr/share/java/xml-commons-resolver-1.1.jar:/usr/share/java/postgresql-jdbc3.jar
java org.madore.damlengine.DamlEngine some/file.daml
diff --git a/org/madore/damlengine/weblog-database.sql b/org/madore/damlengine/weblog-database.sql
new file mode 100644
index 0000000..ab4146d
--- /dev/null
+++ b/org/madore/damlengine/weblog-database.sql
@@ -0,0 +1,20 @@
+SET TIME ZONE 0;
+CREATE TABLE entries (
+ id integer PRIMARY KEY ,
+ edate text NOT NULL ,
+ cdate timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP ,
+ mdate timestamp with time zone NOT NULL DEFAULT CURRENT_TIMESTAMP ,
+ lang text ,
+ title text ,
+ title_xml xml ,
+ content xml NOT NULL,
+ sha1 text NOT NULL
+) ;
+CREATE INDEX entries_edate_key ON entries ( edate ) ;
+CREATE TABLE incat (
+ id integer NOT NULL ,
+ code text NOT NULL ,
+ FOREIGN KEY ( id ) REFERENCES entries ( id ) ON DELETE CASCADE
+) ;
+CREATE INDEX incat_id_key ON incat ( id ) ;
+CREATE INDEX incat_code_key ON incat ( code ) ;