summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid A. Madore <david+git@madore.org>2011-09-04 20:19:48 +0200
committerDavid A. Madore <david+git@madore.org>2011-09-04 20:19:48 +0200
commit3365a175e1721e0fda7a56ad53e24a7e337c13cf (patch)
tree14206f14ba0168f741fe12cffdb476c669824b4d
parentc17d2165ea20325fc78f95791875ffa4c785e548 (diff)
downloaddamlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.tar.gz
damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.tar.bz2
damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.zip
Eliminate use of DocumentBuilder (use DOM LS everywhere) + use validation to catch undefined entities.
There seems to be no way to catch the undefined entity error (when it is considered a validation error, e.g., when the document has an external subset) other than by turning on validation. So we register an error handler to throw away useless validation errors.
-rw-r--r--org/madore/damlengine/DamlEngine.java70
-rw-r--r--org/madore/damlengine/WeblogPopulate.java17
-rw-r--r--org/madore/damlengine/WeblogRSS.java21
3 files changed, 67 insertions, 41 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java
index c0010b6..7bc81bd 100644
--- a/org/madore/damlengine/DamlEngine.java
+++ b/org/madore/damlengine/DamlEngine.java
@@ -1,5 +1,6 @@
package org.madore.damlengine;
+import java.util.MissingResourceException;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.InputStream;
@@ -12,12 +13,13 @@ import java.io.BufferedReader;
import java.io.PrintStream;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.ParserConfigurationException;
-import org.w3c.dom.Document;
-// import org.xml.sax.EntityResolver;
-import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl;
+import org.w3c.dom.*;
+import org.w3c.dom.ls.DOMImplementationLS;
+import org.w3c.dom.ls.LSParser;
+import org.w3c.dom.ls.LSInput;
+import org.apache.xerces.dom.DOMImplementationSourceImpl;
+import org.apache.xerces.xni.parser.XMLErrorHandler;
+import org.apache.xerces.xni.parser.XMLParseException;
public final class DamlEngine {
@@ -54,19 +56,37 @@ public final class DamlEngine {
}
}
- public static final class GetDocumentBuilder {
- static final DocumentBuilder db;
- static {
- final Resolver resolver = new Resolver();
- final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl();
- dbf.setNamespaceAware(true);
- dbf.setValidating(false);
- try {
- db = dbf.newDocumentBuilder();
- } catch (ParserConfigurationException e) {
- throw new RuntimeException(e);
+ public static class SelectiveErrorHandler implements XMLErrorHandler {
+ public void warning(String domain, String key, XMLParseException exc) {
+ System.err.println("warning: line "+exc.getLineNumber()
+ +": "+exc.getMessage());
+ }
+ public void error(String domain, String key, XMLParseException exc) {
+ if ( domain.equals("http://www.w3.org/TR/1998/REC-xml-19980210")
+ && key.equals("MSG_ELEMENT_NOT_DECLARED") )
+ return;
+ System.err.println("error: line "+exc.getLineNumber()
+ +": "+exc.getMessage());
+ }
+ public void fatalError(String domain, String key, XMLParseException exc) {
+ System.err.println("fatal error: line "+exc.getLineNumber()
+ +": "+exc.getMessage());
+ throw exc;
+ }
+ }
+
+ public static final class IncantDOM {
+ static DOMImplementation domi;
+ public static DOMImplementation getDOMI() {
+ if ( domi == null ) {
+ DOMImplementationSource source
+ = new DOMImplementationSourceImpl();
+ domi = source.getDOMImplementation("XML 3.0 Core 3.0 LS 3.0");
+ if ( domi == null )
+ throw new MissingResourceException("failed to obtain DOM implementation",
+ "org.w3c.dom.ls.DOMImplementationLS", "");
}
- db.setEntityResolver(resolver);
+ return domi;
}
}
@@ -103,9 +123,17 @@ public final class DamlEngine {
Context.WeblogSelectionContext wsc)
throws Exception {
- final DocumentBuilder db = GetDocumentBuilder.db;
-
- Document doc = db.parse(in);
+ final DOMImplementationLS domils
+ = (DOMImplementationLS)(IncantDOM.getDOMI());
+ LSParser par
+ = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
+ par.getDomConfig().setParameter("resource-resolver", new Resolver());
+ par.getDomConfig().setParameter("http://xml.org/sax/features/validation", true);
+ par.getDomConfig().setParameter("http://xml.org/sax/features/namespaces", true);
+ par.getDomConfig().setParameter("http://apache.org/xml/properties/internal/error-handler", new SelectiveErrorHandler());
+ LSInput input = domils.createLSInput();
+ input.setByteStream(in);
+ Document doc = par.parse(input);
processDocument(doc, wsc);
doc.normalizeDocument();
Unparser unparser
diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java
index 56e51ac..16c7ea0 100644
--- a/org/madore/damlengine/WeblogPopulate.java
+++ b/org/madore/damlengine/WeblogPopulate.java
@@ -6,12 +6,12 @@ import java.security.MessageDigest;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
-import javax.xml.parsers.DocumentBuilder;
import javax.xml.xpath.*;
import org.w3c.dom.*;
import org.w3c.dom.ls.DOMImplementationLS;
+import org.w3c.dom.ls.LSParser;
import org.w3c.dom.ls.LSSerializer;
-// import org.xml.sax.EntityResolver;
+import org.w3c.dom.ls.LSInput;
public final class WeblogPopulate {
@@ -29,12 +29,11 @@ public final class WeblogPopulate {
public static void populate(InputStream in)
throws Exception {
- final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db;
-
- final DOMImplementationLS domi
- = (DOMImplementationLS)(db.getDOMImplementation());
- LSSerializer ser = domi.createLSSerializer();
+ final DOMImplementationLS domils
+ = (DOMImplementationLS)(DamlEngine.IncantDOM.getDOMI());
+ LSSerializer ser = domils.createLSSerializer();
ser.getDomConfig().setParameter("xml-declaration", false);
+ LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
MessageDigest sha1 = MessageDigest.getInstance("SHA-1");
@@ -53,7 +52,9 @@ public final class WeblogPopulate {
final PreparedStatement setCatSt
= conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)");
- Document doc = db.parse(in);
+ LSInput input = domils.createLSInput();
+ input.setByteStream(in);
+ Document doc = par.parse(input);
XPathFactory xpf = XPathFactory.newInstance();
XPath xp = xpf.newXPath();
xp.setNamespaceContext(new DamlEngine.DamlNSMapping());
diff --git a/org/madore/damlengine/WeblogRSS.java b/org/madore/damlengine/WeblogRSS.java
index 5d918b8..3bf6489 100644
--- a/org/madore/damlengine/WeblogRSS.java
+++ b/org/madore/damlengine/WeblogRSS.java
@@ -6,7 +6,6 @@ import java.io.OutputStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
-import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.*;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSParser;
@@ -32,21 +31,19 @@ public final class WeblogRSS {
final ResultSet selRes = selSt.executeQuery();
- final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db;
-
- final DOMImplementationLS domi
- = (DOMImplementationLS)(db.getDOMImplementation());
- LSSerializer ser = domi.createLSSerializer();
+ final DOMImplementation domi
+ = (DamlEngine.IncantDOM.getDOMI());
+ final DOMImplementationLS domils = (DOMImplementationLS)(domi);
+ LSSerializer ser = domils.createLSSerializer();
ser.getDomConfig().setParameter("xml-declaration", true);
- LSParser par = domi.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
+ LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
- final LSOutput lsout = domi.createLSOutput();
+ final LSOutput lsout = domils.createLSOutput();
lsout.setByteStream(out);
lsout.setEncoding("UTF-8");
- Document rssDoc = db.newDocument();
- Element rssRoot = rssDoc.createElementNS(DamlEngine.RDF_NS, "rdf:RDF");
- rssDoc.appendChild(rssRoot);
+ Document rssDoc = domi.createDocument(DamlEngine.RDF_NS, "rdf:RDF", null);
+ Element rssRoot = rssDoc.getDocumentElement();
// rssDoc.appendChild(rssDoc.createTextNode("\n"));
rssRoot.setAttributeNS(DamlEngine.XMLNS_NS, "xmlns:rdf", DamlEngine.RDF_NS);
@@ -138,7 +135,7 @@ public final class WeblogRSS {
item.appendChild(itemDate);
item.appendChild(rssDoc.createTextNode("\n"));
itemDate.appendChild(rssDoc.createTextNode(cdate));
- LSInput input = domi.createLSInput();
+ LSInput input = domils.createLSInput();
input.setStringData(contentXml);
Document temp = par.parse(input);
String contentStr = temp.getDocumentElement().getTextContent();