diff options
author | David A. Madore <david@procyon.(none)> | 2010-04-12 18:48:34 +0200 |
---|---|---|
committer | David A. Madore <david@procyon.(none)> | 2010-04-12 18:48:34 +0200 |
commit | 2312810ac995f9cf2d93c63df29cefd6aa02156f (patch) | |
tree | 711af4d97065d0fa7d87792a3f5291919cb3557d | |
parent | 00dc8d9bbb54cf9a0cc47e6326a8444df92a14f1 (diff) | |
download | damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.gz damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.bz2 damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.zip |
Write a semi-decent unparser(=serializer).
Still a bit rough at the edges, but this basically does what I want.
-rw-r--r-- | org/madore/damlengine/DamlEngine.java | 1 | ||||
-rw-r--r-- | org/madore/damlengine/Unparser.java | 86 |
2 files changed, 74 insertions, 13 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java index e5454da..93cfea5 100644 --- a/org/madore/damlengine/DamlEngine.java +++ b/org/madore/damlengine/DamlEngine.java @@ -34,6 +34,7 @@ public final class DamlEngine { for (String fname : args) { doc = db.parse(fname); processDocument(); + doc.normalizeDocument(); Unparser unparser = new Unparser(doc, new OutputStreamWriter(System.out)); unparser.unparse(); diff --git a/org/madore/damlengine/Unparser.java b/org/madore/damlengine/Unparser.java index a903523..b6ea4fc 100644 --- a/org/madore/damlengine/Unparser.java +++ b/org/madore/damlengine/Unparser.java @@ -6,17 +6,27 @@ import org.w3c.dom.*; public final class Unparser { + private Document doc; private Node cursor; private enum Dir { PUSHING, POPPING }; private Dir dir; private Writer out; public Unparser(Document doc, Writer out) { - cursor = doc; - dir = Dir.PUSHING; + this.doc = doc; this.out = out; } + protected void enter() { + Node chld = cursor.getFirstChild(); + if ( chld == null ) { + dir = Dir.POPPING; + } else { + cursor = chld; + dir = Dir.PUSHING; + } + } + protected void skip() { Node nsib = cursor.getNextSibling(); if ( nsib == null ) { @@ -28,43 +38,77 @@ public final class Unparser { } } + public static String quote(String s) { + StringBuilder buf = new StringBuilder(s.length()+64); + int ptr0 = 0; int ptr; + for ( ptr=0 ; ptr<s.length() ; ptr++ ) { + char ch = s.charAt(ptr); + String qch = null; + switch ( ch ) { + case '&': qch = "&"; break; + case '<': qch = "<"; break; + case '>': qch = ">"; break; + case '"': qch = """; break; + } + if ( qch != null ) { + buf.append(s, ptr0, ptr); + buf.append(qch); + } + } + buf.append(s, ptr0, ptr); + return buf.toString(); + } + protected void unparseOne() throws IOException { + // System.err.println("unparseOne(): cursor.nodeName="+cursor.getNodeName()); switch ( dir ) { case PUSHING: if ( cursor.getNodeType() == Node.ELEMENT_NODE ) { Element elt = (Element)cursor; - out.write("<"+elt.getTagName()); + String qname = elt.getTagName(); + out.write("<"+qname); if ( elt.hasAttributes() ) { NamedNodeMap attrs = elt.getAttributes(); Node n2; for ( int i=0 ; (n2=attrs.item(i)) != null ; i++ ) { Attr attr = (Attr)n2; - out.write(" "+attr.getName() - +"=\"(value)\""); + out.write(" "+attr.getName()+"=\""+quote(attr.getValue())+"\""); + } } - if ( ! elt.hasChildNodes() ) { + if ( ! elt.hasChildNodes() && ( qname.equals("br") || qname.equals("hr") + || qname.equals("img") + || qname.equals("area") + || qname.equals("base") + || qname.equals("basefont") + || qname.equals("col") + || qname.equals("frame") + || qname.equals("input") + || qname.equals("isindex") + || qname.equals("link") + || qname.equals("meta") + || qname.equals("param") ) ) { out.write(" />"); skip(); } else { out.write(">"); - cursor = elt.getFirstChild(); + enter(); } } else if ( cursor.getNodeType() == Node.DOCUMENT_NODE ) { - cursor = cursor.getFirstChild(); - skip(); + enter(); } else if ( cursor.getNodeType() == Node.TEXT_NODE ) { - out.write("(text)"); + out.write(quote(((Text)cursor).getData())); skip(); } else if ( cursor.getNodeType() == Node.COMMENT_NODE ) { - out.write("<!--(comment)-->"); + // FIXME: comments could conceivably contain the "--" string + out.write("<!--"+((Comment)cursor).getData()+"-->"); skip(); } else if ( cursor.getNodeType() == Node.CDATA_SECTION_NODE ) { - out.write("<![CDATA[(cdata)]]>"); + // FIXME: cdata section could conceivably contain the "]]>" string + out.write("<![CDATA["+((CDATASection)cursor).getData()+"]]>"); skip(); } else { - out.write("<!--(some other kind of node)-->"); skip(); } break; @@ -80,9 +124,25 @@ public final class Unparser { public void unparse() throws IOException { + out.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>"); + DocumentType doctype = doc.getDoctype(); + if ( doctype != null ) { + String externalId = ""; + if ( doctype.getPublicId() != null ) { + externalId = " PUBLIC \""+doctype.getPublicId()+"\" \"" + +doctype.getSystemId()+"\""; + } else if ( doctype.getSystemId() != null ) { + externalId = " SYSTEM \""+doctype.getSystemId()+"\""; + } + out.write("<!DOCTYPE "+doctype.getName()+externalId+">\n"); + } + cursor = doc.getDocumentElement(); + dir = Dir.PUSHING; while ( cursor != null ) { unparseOne(); } + out.write("\n"); + out.flush(); } } |