diff options
author | David A. Madore <david+git@madore.org> | 2011-09-04 20:19:48 +0200 |
---|---|---|
committer | David A. Madore <david+git@madore.org> | 2011-09-04 20:19:48 +0200 |
commit | 3365a175e1721e0fda7a56ad53e24a7e337c13cf (patch) | |
tree | 14206f14ba0168f741fe12cffdb476c669824b4d /org/madore/damlengine | |
parent | c17d2165ea20325fc78f95791875ffa4c785e548 (diff) | |
download | damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.tar.gz damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.tar.bz2 damlengine-3365a175e1721e0fda7a56ad53e24a7e337c13cf.zip |
Eliminate use of DocumentBuilder (use DOM LS everywhere) + use validation to catch undefined entities.
There seems to be no way to catch the undefined entity error (when it
is considered a validation error, e.g., when the document has an
external subset) other than by turning on validation. So we register
an error handler to throw away useless validation errors.
Diffstat (limited to 'org/madore/damlengine')
-rw-r--r-- | org/madore/damlengine/DamlEngine.java | 70 | ||||
-rw-r--r-- | org/madore/damlengine/WeblogPopulate.java | 17 | ||||
-rw-r--r-- | org/madore/damlengine/WeblogRSS.java | 21 |
3 files changed, 67 insertions, 41 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java index c0010b6..7bc81bd 100644 --- a/org/madore/damlengine/DamlEngine.java +++ b/org/madore/damlengine/DamlEngine.java @@ -1,5 +1,6 @@ package org.madore.damlengine; +import java.util.MissingResourceException; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.InputStream; @@ -12,12 +13,13 @@ import java.io.BufferedReader; import java.io.PrintStream; import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.ParserConfigurationException; -import org.w3c.dom.Document; -// import org.xml.sax.EntityResolver; -import org.apache.xerces.jaxp.DocumentBuilderFactoryImpl; +import org.w3c.dom.*; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSParser; +import org.w3c.dom.ls.LSInput; +import org.apache.xerces.dom.DOMImplementationSourceImpl; +import org.apache.xerces.xni.parser.XMLErrorHandler; +import org.apache.xerces.xni.parser.XMLParseException; public final class DamlEngine { @@ -54,19 +56,37 @@ public final class DamlEngine { } } - public static final class GetDocumentBuilder { - static final DocumentBuilder db; - static { - final Resolver resolver = new Resolver(); - final DocumentBuilderFactory dbf = new DocumentBuilderFactoryImpl(); - dbf.setNamespaceAware(true); - dbf.setValidating(false); - try { - db = dbf.newDocumentBuilder(); - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); + public static class SelectiveErrorHandler implements XMLErrorHandler { + public void warning(String domain, String key, XMLParseException exc) { + System.err.println("warning: line "+exc.getLineNumber() + +": "+exc.getMessage()); + } + public void error(String domain, String key, XMLParseException exc) { + if ( domain.equals("http://www.w3.org/TR/1998/REC-xml-19980210") + && key.equals("MSG_ELEMENT_NOT_DECLARED") ) + return; + System.err.println("error: line "+exc.getLineNumber() + +": "+exc.getMessage()); + } + public void fatalError(String domain, String key, XMLParseException exc) { + System.err.println("fatal error: line "+exc.getLineNumber() + +": "+exc.getMessage()); + throw exc; + } + } + + public static final class IncantDOM { + static DOMImplementation domi; + public static DOMImplementation getDOMI() { + if ( domi == null ) { + DOMImplementationSource source + = new DOMImplementationSourceImpl(); + domi = source.getDOMImplementation("XML 3.0 Core 3.0 LS 3.0"); + if ( domi == null ) + throw new MissingResourceException("failed to obtain DOM implementation", + "org.w3c.dom.ls.DOMImplementationLS", ""); } - db.setEntityResolver(resolver); + return domi; } } @@ -103,9 +123,17 @@ public final class DamlEngine { Context.WeblogSelectionContext wsc) throws Exception { - final DocumentBuilder db = GetDocumentBuilder.db; - - Document doc = db.parse(in); + final DOMImplementationLS domils + = (DOMImplementationLS)(IncantDOM.getDOMI()); + LSParser par + = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); + par.getDomConfig().setParameter("resource-resolver", new Resolver()); + par.getDomConfig().setParameter("http://xml.org/sax/features/validation", true); + par.getDomConfig().setParameter("http://xml.org/sax/features/namespaces", true); + par.getDomConfig().setParameter("http://apache.org/xml/properties/internal/error-handler", new SelectiveErrorHandler()); + LSInput input = domils.createLSInput(); + input.setByteStream(in); + Document doc = par.parse(input); processDocument(doc, wsc); doc.normalizeDocument(); Unparser unparser diff --git a/org/madore/damlengine/WeblogPopulate.java b/org/madore/damlengine/WeblogPopulate.java index 56e51ac..16c7ea0 100644 --- a/org/madore/damlengine/WeblogPopulate.java +++ b/org/madore/damlengine/WeblogPopulate.java @@ -6,12 +6,12 @@ import java.security.MessageDigest; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; -import javax.xml.parsers.DocumentBuilder; import javax.xml.xpath.*; import org.w3c.dom.*; import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSParser; import org.w3c.dom.ls.LSSerializer; -// import org.xml.sax.EntityResolver; +import org.w3c.dom.ls.LSInput; public final class WeblogPopulate { @@ -29,12 +29,11 @@ public final class WeblogPopulate { public static void populate(InputStream in) throws Exception { - final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db; - - final DOMImplementationLS domi - = (DOMImplementationLS)(db.getDOMImplementation()); - LSSerializer ser = domi.createLSSerializer(); + final DOMImplementationLS domils + = (DOMImplementationLS)(DamlEngine.IncantDOM.getDOMI()); + LSSerializer ser = domils.createLSSerializer(); ser.getDomConfig().setParameter("xml-declaration", false); + LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); MessageDigest sha1 = MessageDigest.getInstance("SHA-1"); @@ -53,7 +52,9 @@ public final class WeblogPopulate { final PreparedStatement setCatSt = conn.prepareStatement("INSERT INTO incat(id,code) VALUES (?,?)"); - Document doc = db.parse(in); + LSInput input = domils.createLSInput(); + input.setByteStream(in); + Document doc = par.parse(input); XPathFactory xpf = XPathFactory.newInstance(); XPath xp = xpf.newXPath(); xp.setNamespaceContext(new DamlEngine.DamlNSMapping()); diff --git a/org/madore/damlengine/WeblogRSS.java b/org/madore/damlengine/WeblogRSS.java index 5d918b8..3bf6489 100644 --- a/org/madore/damlengine/WeblogRSS.java +++ b/org/madore/damlengine/WeblogRSS.java @@ -6,7 +6,6 @@ import java.io.OutputStream; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; -import javax.xml.parsers.DocumentBuilder; import org.w3c.dom.*; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSParser; @@ -32,21 +31,19 @@ public final class WeblogRSS { final ResultSet selRes = selSt.executeQuery(); - final DocumentBuilder db = DamlEngine.GetDocumentBuilder.db; - - final DOMImplementationLS domi - = (DOMImplementationLS)(db.getDOMImplementation()); - LSSerializer ser = domi.createLSSerializer(); + final DOMImplementation domi + = (DamlEngine.IncantDOM.getDOMI()); + final DOMImplementationLS domils = (DOMImplementationLS)(domi); + LSSerializer ser = domils.createLSSerializer(); ser.getDomConfig().setParameter("xml-declaration", true); - LSParser par = domi.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); + LSParser par = domils.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null); - final LSOutput lsout = domi.createLSOutput(); + final LSOutput lsout = domils.createLSOutput(); lsout.setByteStream(out); lsout.setEncoding("UTF-8"); - Document rssDoc = db.newDocument(); - Element rssRoot = rssDoc.createElementNS(DamlEngine.RDF_NS, "rdf:RDF"); - rssDoc.appendChild(rssRoot); + Document rssDoc = domi.createDocument(DamlEngine.RDF_NS, "rdf:RDF", null); + Element rssRoot = rssDoc.getDocumentElement(); // rssDoc.appendChild(rssDoc.createTextNode("\n")); rssRoot.setAttributeNS(DamlEngine.XMLNS_NS, "xmlns:rdf", DamlEngine.RDF_NS); @@ -138,7 +135,7 @@ public final class WeblogRSS { item.appendChild(itemDate); item.appendChild(rssDoc.createTextNode("\n")); itemDate.appendChild(rssDoc.createTextNode(cdate)); - LSInput input = domi.createLSInput(); + LSInput input = domils.createLSInput(); input.setStringData(contentXml); Document temp = par.parse(input); String contentStr = temp.getDocumentElement().getTextContent(); |