From 3365a175e1721e0fda7a56ad53e24a7e337c13cf Mon Sep 17 00:00:00 2001 From: "David A. Madore" Date: Sun, 4 Sep 2011 20:19:48 +0200 Subject: Eliminate use of DocumentBuilder (use DOM LS everywhere) + use validation to catch undefined entities. There seems to be no way to catch the undefined entity error (when it is considered a validation error, e.g., when the document has an external subset) other than by turning on validation. So we register an error handler to throw away useless validation errors. --- org/madore/damlengine/DamlEngine.java | 70 +++++++++++++++++++++---------- org/madore/damlengine/WeblogPopulate.java | 17 ++++---- org/madore/damlengine/WeblogRSS.java | 21 ++++------ 3 files changed, 67 insertions(+), 41 deletions(-) diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java index c0010b6..7bc81bd 100644 --- a/org/madore/damlengine/DamlEngine.java +++ b/org/madore/damlengine/DamlEngine.java @@ -1,5 +1,6 @@ package org.madore.damlengine; +import java.util.MissingResourceException; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.InputStream; @@ -12,12 +13,13 @@ import java.io.BufferedReader; import java.io.PrintStream; import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.ParserConfigurationException; -import org.w3c.dom.Document; -// import org.xml.sax.EntityResolver; -import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; +import org.w3c.dom.*; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSParser; +import org.w3c.dom.ls.LSInput; +import org.apache.xerces.dom.DOMImplementationSourceImpl; +import org.apache.xerces.xni.parser.XMLErrorHandler; +import org.apache.xerces.xni.parser.XMLParseException; public final class DamlEngine { @@ -54,19 +56,37 @@ public final class DamlEngine { } } - public static final class GetDocumentBuilder { - static final DocumentBuilder db; - static { - final Resolver resolver = new Resolver(); - final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl(); - dbf.setNamespaceAware(true); - dbf.setValidating(false); - try { - db = dbf.newDocumentBuilder(); - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); + public static class SelectiveErrorHandler implements XMLErrorHandler { + public void warning(String domain, String key, XMLParseException exc) { + System.err.println("warning: line "+exc.getLineNumber() + +": "+exc.getMessage()); + } + public void error(String domain, String key, XMLParseException exc) { + if ( domain.equals("http://www.w3.org/TR/1998/REC-xml-19980210") + && key.equals("MSG_ELEMENT_NOT_DECLARED") ) + return; + System.err.println("error: line "+exc.getLineNumber() + +": "+exc.getMessage()); + } + public void fatalError(String domain, String key, XMLParseException exc) { + System.err.println("fatal error: line "+exc.getLineNumber() + +": "+exc.getMessage()); + throw exc; + } + } + + public static final class IncantDOM { + static DOMImplementation domi; + public static DOMImplementation getDOMI() { + if ( domi == null ) { + DOMImplementationSource source + = new DOMImplementationSourceImpl(); + domi = source.getDOMImplementation("XML 3.0 Core 3.0 LS 3.0"); + if ( domi == null ) + throw new MissingResourceException("failed to obtain DOM implementation", + "org.w3c.dom.ls.DOMImplementationLS", ""); } - db.setEntityResolver(resolver); + return domi; } } @@ -103,9 +123,17 @@ public final class DamlEngine { Context.WeblogSelectionContext wsc) throws Exception { - final DocumentBuilder db = GetDocumentBuilder.db; - - Document doc = db.parse(in); + final DOMImplementationLS domils + = (DOMImplementationLS)(IncantDOM.getDOMI()); + LSParser par + = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); + par.getDomConfig().setParameter("resource-resolver", new Resolver()); + par.getDomConfig().setParameter("http://xml.org/sax/features/validation", true); + par.getDomConfig().setParameter("http://xml.org/sax/features/namespaces", true); + par.getDomConfig().setParameter("http://apache.org/xml/properties/internal/error-handler", new SelectiveErrorHandler()); + LSInput input = domils.createLSInput(); + input.setByteStream(in); + Document doc = par.parse(input); processDocument(doc, wsc); doc.normalizeDocument(); Unparser unparser diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java index 56e51ac..16c7ea0 100644 --- a/org/madore/damlengine/WeblogPopulate.java +++ b/org/madore/damlengine/WeblogPopulate.java @@ -6,12 +6,12 @@ import java.security.MessageDigest; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; -import javax.xml.parsers.DocumentBuilder; import javax.xml.xpath.*; import org.w3c.dom.*; import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSParser; import org.w3c.dom.ls.LSSerializer; -// import org.xml.sax.EntityResolver; +import org.w3c.dom.ls.LSInput; public final class WeblogPopulate { @@ -29,12 +29,11 @@ public final class WeblogPopulate { public static void populate(InputStream in) throws Exception { - final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db; - - final DOMImplementationLS domi - = (DOMImplementationLS)(db.getDOMImplementation()); - LSSerializer ser = domi.createLSSerializer(); + final DOMImplementationLS domils + = (DOMImplementationLS)(DamlEngine.IncantDOM.getDOMI()); + LSSerializer ser = domils.createLSSerializer(); ser.getDomConfig().setParameter("xml-declaration", false); + LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); @@ -53,7 +52,9 @@ public final class WeblogPopulate { final PreparedStatement setCatSt = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)"); - Document doc = db.parse(in); + LSInput input = domils.createLSInput(); + input.setByteStream(in); + Document doc = par.parse(input); XPathFactory xpf = XPathFactory.newInstance(); XPath xp = xpf.newXPath(); xp.setNamespaceContext(new DamlEngine.DamlNSMapping()); diff --git a/org/madore/damlengine/WeblogRSS.java b/org/madore/damlengine/WeblogRSS.java index 5d918b8..3bf6489 100644 --- a/org/madore/damlengine/WeblogRSS.java +++ b/org/madore/damlengine/WeblogRSS.java @@ -6,7 +6,6 @@ import java.io.OutputStream; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; -import javax.xml.parsers.DocumentBuilder; import org.w3c.dom.*; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSParser; @@ -32,21 +31,19 @@ public final class WeblogRSS { final ResultSet selRes = selSt.executeQuery(); - final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db; - - final DOMImplementationLS domi - = (DOMImplementationLS)(db.getDOMImplementation()); - LSSerializer ser = domi.createLSSerializer(); + final DOMImplementation domi + = (DamlEngine.IncantDOM.getDOMI()); + final DOMImplementationLS domils = (DOMImplementationLS)(domi); + LSSerializer ser = domils.createLSSerializer(); ser.getDomConfig().setParameter("xml-declaration", true); - LSParser par = domi.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); + LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); - final LSOutput lsout = domi.createLSOutput(); + final LSOutput lsout = domils.createLSOutput(); lsout.setByteStream(out); lsout.setEncoding("UTF-8"); - Document rssDoc = db.newDocument(); - Element rssRoot = rssDoc.createElementNS(DamlEngine.RDF_NS, "rdf:RDF"); - rssDoc.appendChild(rssRoot); + Document rssDoc = domi.createDocument(DamlEngine.RDF_NS, "rdf:RDF", null); + Element rssRoot = rssDoc.getDocumentElement(); // rssDoc.appendChild(rssDoc.createTextNode("\n")); rssRoot.setAttributeNS(DamlEngine.XMLNS_NS, "xmlns:rdf", DamlEngine.RDF_NS); @@ -138,7 +135,7 @@ public final class WeblogRSS { item.appendChild(itemDate); item.appendChild(rssDoc.createTextNode("\n")); itemDate.appendChild(rssDoc.createTextNode(cdate)); - LSInput input = domi.createLSInput(); + LSInput input = domils.createLSInput(); input.setStringData(contentXml); Document temp = par.parse(input); String contentStr = temp.getDocumentElement().getTextContent(); -- cgit v1.2.3