diff options
author | David A. Madore <david+git@madore.org> | 2011-09-04 20:19:48 +0200 |
---|---|---|
committer | David A. Madore <david+git@madore.org> | 2011-09-04 20:19:48 +0200 |
commit | 3365a175e1721e0fda7a56ad53e24a7e337c13cf (patch) | |
tree | 14206f14ba0168f741fe12cffdb476c669824b4d | |
parent | c17d2165ea20325fc78f95791875ffa4c785e548 (diff) | |
download | damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.tar.gz damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.tar.bz2 damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.zip |
Eliminate use of DocumentBuilder (use DOM LS everywhere) + use validation to catch undefined entities.
There seems to be no way to catch the undefined entity error (when it
is considered a validation error, e.g., when the document has an
external subset) other than by turning on validation. So we register
an error handler to throw away useless validation errors.
-rw-r--r-- | org/madore/damlengine/DamlEngine.java | 70 | ||||
-rw-r--r-- | org/madore/damlengine/WeblogPopulate.java | 17 | ||||
-rw-r--r-- | org/madore/damlengine/WeblogRSS.java | 21 |
3 files changed, 67 insertions, 41 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java index c0010b6..7bc81bd 100644 --- a/org/madore/damlengine/DamlEngine.java +++ b/org/madore/damlengine/DamlEngine.java @@ -1,5 +1,6 @@ package org.madore.damlengine; +import java.util.MissingResourceException; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.InputStream; @@ -12,12 +13,13 @@ import java.io.BufferedReader; import java.io.PrintStream; import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.ParserConfigurationException; -import org.w3c.dom.Document; -// import org.xml.sax.EntityResolver; -import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; +import org.w3c.dom.*; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSParser; +import org.w3c.dom.ls.LSInput; +import org.apache.xerces.dom.DOMImplementationSourceImpl; +import org.apache.xerces.xni.parser.XMLErrorHandler; +import org.apache.xerces.xni.parser.XMLParseException; public final class DamlEngine { @@ -54,19 +56,37 @@ public final class DamlEngine { } } - public static final class GetDocumentBuilder { - static final DocumentBuilder db; - static { - final Resolver resolver = new Resolver(); - final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl(); - dbf.setNamespaceAware(true); - dbf.setValidating(false); - try { - db = dbf.newDocumentBuilder(); - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); + public static class SelectiveErrorHandler implements XMLErrorHandler { + public void warning(String domain, String key, XMLParseException exc) { + System.err.println("warning: line "+exc.getLineNumber() + +": "+exc.getMessage()); + } + public void error(String domain, String key, XMLParseException exc) { + if ( domain.equals("http://www.w3.org/TR/1998/REC-xml-19980210") + && key.equals("MSG_ELEMENT_NOT_DECLARED") ) + return; + System.err.println("error: line "+exc.getLineNumber() + +": "+exc.getMessage()); + } + public void fatalError(String domain, String key, XMLParseException exc) { + System.err.println("fatal error: line "+exc.getLineNumber() + +": "+exc.getMessage()); + throw exc; + } + } + + public static final class IncantDOM { + static DOMImplementation domi; + public static DOMImplementation getDOMI() { + if ( domi == null ) { + DOMImplementationSource source + = new DOMImplementationSourceImpl(); + domi = source.getDOMImplementation("XML 3.0 Core 3.0 LS 3.0"); + if ( domi == null ) + throw new MissingResourceException("failed to obtain DOM implementation", + "org.w3c.dom.ls.DOMImplementationLS", ""); } - db.setEntityResolver(resolver); + return domi; } } @@ -103,9 +123,17 @@ public final class DamlEngine { Context.WeblogSelectionContext wsc) throws Exception { - final DocumentBuilder db = GetDocumentBuilder.db; - - Document doc = db.parse(in); + final DOMImplementationLS domils + = (DOMImplementationLS)(IncantDOM.getDOMI()); + LSParser par + = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); + par.getDomConfig().setParameter("resource-resolver", new Resolver()); + par.getDomConfig().setParameter("http://xml.org/sax/features/validation", true); + par.getDomConfig().setParameter("http://xml.org/sax/features/namespaces", true); + par.getDomConfig().setParameter("http://apache.org/xml/properties/internal/error-handler", new SelectiveErrorHandler()); + LSInput input = domils.createLSInput(); + input.setByteStream(in); + Document doc = par.parse(input); processDocument(doc, wsc); doc.normalizeDocument(); Unparser unparser diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java index 56e51ac..16c7ea0 100644 --- a/org/madore/damlengine/WeblogPopulate.java +++ b/org/madore/damlengine/WeblogPopulate.java @@ -6,12 +6,12 @@ import java.security.MessageDigest; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; -import javax.xml.parsers.DocumentBuilder; import javax.xml.xpath.*; import org.w3c.dom.*; import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSParser; import org.w3c.dom.ls.LSSerializer; -// import org.xml.sax.EntityResolver; +import org.w3c.dom.ls.LSInput; public final class WeblogPopulate { @@ -29,12 +29,11 @@ public final class WeblogPopulate { public static void populate(InputStream in) throws Exception { - final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db; - - final DOMImplementationLS domi - = (DOMImplementationLS)(db.getDOMImplementation()); - LSSerializer ser = domi.createLSSerializer(); + final DOMImplementationLS domils + = (DOMImplementationLS)(DamlEngine.IncantDOM.getDOMI()); + LSSerializer ser = domils.createLSSerializer(); ser.getDomConfig().setParameter("xml-declaration", false); + LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); @@ -53,7 +52,9 @@ public final class WeblogPopulate { final PreparedStatement setCatSt = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)"); - Document doc = db.parse(in); + LSInput input = domils.createLSInput(); + input.setByteStream(in); + Document doc = par.parse(input); XPathFactory xpf = XPathFactory.newInstance(); XPath xp = xpf.newXPath(); xp.setNamespaceContext(new DamlEngine.DamlNSMapping()); diff --git a/org/madore/damlengine/WeblogRSS.java b/org/madore/damlengine/WeblogRSS.java index 5d918b8..3bf6489 100644 --- a/org/madore/damlengine/WeblogRSS.java +++ b/org/madore/damlengine/WeblogRSS.java @@ -6,7 +6,6 @@ import java.io.OutputStream; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; -import javax.xml.parsers.DocumentBuilder; import org.w3c.dom.*; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSParser; @@ -32,21 +31,19 @@ public final class WeblogRSS { final ResultSet selRes = selSt.executeQuery(); - final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db; - - final DOMImplementationLS domi - = (DOMImplementationLS)(db.getDOMImplementation()); - LSSerializer ser = domi.createLSSerializer(); + final DOMImplementation domi + = (DamlEngine.IncantDOM.getDOMI()); + final DOMImplementationLS domils = (DOMImplementationLS)(domi); + LSSerializer ser = domils.createLSSerializer(); ser.getDomConfig().setParameter("xml-declaration", true); - LSParser par = domi.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); + LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); - final LSOutput lsout = domi.createLSOutput(); + final LSOutput lsout = domils.createLSOutput(); lsout.setByteStream(out); lsout.setEncoding("UTF-8"); - Document rssDoc = db.newDocument(); - Element rssRoot = rssDoc.createElementNS(DamlEngine.RDF_NS, "rdf:RDF"); - rssDoc.appendChild(rssRoot); + Document rssDoc = domi.createDocument(DamlEngine.RDF_NS, "rdf:RDF", null); + Element rssRoot = rssDoc.getDocumentElement(); // rssDoc.appendChild(rssDoc.createTextNode("\n")); rssRoot.setAttributeNS(DamlEngine.XMLNS_NS, "xmlns:rdf", DamlEngine.RDF_NS); @@ -138,7 +135,7 @@ public final class WeblogRSS { item.appendChild(itemDate); item.appendChild(rssDoc.createTextNode("\n")); itemDate.appendChild(rssDoc.createTextNode(cdate)); - LSInput input = domi.createLSInput(); + LSInput input = domils.createLSInput(); input.setStringData(contentXml); Document temp = par.parse(input); String contentStr = temp.getDocumentElement().getTextContent(); |