summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid A. Madore <david@procyon.(none)>2010-04-12 18:48:34 +0200
committerDavid A. Madore <david@procyon.(none)>2010-04-12 18:48:34 +0200
commit2312810ac995f9cf2d93c63df29cefd6aa02156f (patch)
tree711af4d97065d0fa7d87792a3f5291919cb3557d
parent00dc8d9bbb54cf9a0cc47e6326a8444df92a14f1 (diff)
downloaddamlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.gz
damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.bz2
damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.zip
Write a semi-decent unparser(=serializer).
Still a bit rough at the edges, but this basically does what I want.
-rw-r--r--org/madore/damlengine/DamlEngine.java1
-rw-r--r--org/madore/damlengine/Unparser.java86
2 files changed, 74 insertions, 13 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java
index e5454da..93cfea5 100644
--- a/org/madore/damlengine/DamlEngine.java
+++ b/org/madore/damlengine/DamlEngine.java
@@ -34,6 +34,7 @@ public final class DamlEngine {
for (String fname : args) {
doc = db.parse(fname);
processDocument();
+ doc.normalizeDocument();
Unparser unparser
= new Unparser(doc, new OutputStreamWriter(System.out));
unparser.unparse();
diff --git a/org/madore/damlengine/Unparser.java b/org/madore/damlengine/Unparser.java
index a903523..b6ea4fc 100644
--- a/org/madore/damlengine/Unparser.java
+++ b/org/madore/damlengine/Unparser.java
@@ -6,17 +6,27 @@ import org.w3c.dom.*;
public final class Unparser {
+ private Document doc;
private Node cursor;
private enum Dir { PUSHING, POPPING };
private Dir dir;
private Writer out;
public Unparser(Document doc, Writer out) {
- cursor = doc;
- dir = Dir.PUSHING;
+ this.doc = doc;
this.out = out;
}
+ protected void enter() {
+ Node chld = cursor.getFirstChild();
+ if ( chld == null ) {
+ dir = Dir.POPPING;
+ } else {
+ cursor = chld;
+ dir = Dir.PUSHING;
+ }
+ }
+
protected void skip() {
Node nsib = cursor.getNextSibling();
if ( nsib == null ) {
@@ -28,43 +38,77 @@ public final class Unparser {
}
}
+ public static String quote(String s) {
+ StringBuilder buf = new StringBuilder(s.length()+64);
+ int ptr0 = 0; int ptr;
+ for ( ptr=0 ; ptr<s.length() ; ptr++ ) {
+ char ch = s.charAt(ptr);
+ String qch = null;
+ switch ( ch ) {
+ case '&': qch = "&amp;"; break;
+ case '<': qch = "&lt;"; break;
+ case '>': qch = "&gt;"; break;
+ case '"': qch = "&quot;"; break;
+ }
+ if ( qch != null ) {
+ buf.append(s, ptr0, ptr);
+ buf.append(qch);
+ }
+ }
+ buf.append(s, ptr0, ptr);
+ return buf.toString();
+ }
+
protected void unparseOne()
throws IOException {
+ // System.err.println("unparseOne(): cursor.nodeName="+cursor.getNodeName());
switch ( dir ) {
case PUSHING:
if ( cursor.getNodeType() == Node.ELEMENT_NODE ) {
Element elt = (Element)cursor;
- out.write("<"+elt.getTagName());
+ String qname = elt.getTagName();
+ out.write("<"+qname);
if ( elt.hasAttributes() ) {
NamedNodeMap attrs = elt.getAttributes();
Node n2;
for ( int i=0 ; (n2=attrs.item(i)) != null ; i++ ) {
Attr attr = (Attr)n2;
- out.write(" "+attr.getName()
- +"=\"(value)\"");
+ out.write(" "+attr.getName()+"=\""+quote(attr.getValue())+"\"");
+
}
}
- if ( ! elt.hasChildNodes() ) {
+ if ( ! elt.hasChildNodes() && ( qname.equals("br") || qname.equals("hr")
+ || qname.equals("img")
+ || qname.equals("area")
+ || qname.equals("base")
+ || qname.equals("basefont")
+ || qname.equals("col")
+ || qname.equals("frame")
+ || qname.equals("input")
+ || qname.equals("isindex")
+ || qname.equals("link")
+ || qname.equals("meta")
+ || qname.equals("param") ) ) {
out.write(" />");
skip();
} else {
out.write(">");
- cursor = elt.getFirstChild();
+ enter();
}
} else if ( cursor.getNodeType() == Node.DOCUMENT_NODE ) {
- cursor = cursor.getFirstChild();
- skip();
+ enter();
} else if ( cursor.getNodeType() == Node.TEXT_NODE ) {
- out.write("(text)");
+ out.write(quote(((Text)cursor).getData()));
skip();
} else if ( cursor.getNodeType() == Node.COMMENT_NODE ) {
- out.write("<!--(comment)-->");
+ // FIXME: comments could conceivably contain the "--" string
+ out.write("<!--"+((Comment)cursor).getData()+"-->");
skip();
} else if ( cursor.getNodeType() == Node.CDATA_SECTION_NODE ) {
- out.write("<![CDATA[(cdata)]]>");
+ // FIXME: cdata section could conceivably contain the "]]>" string
+ out.write("<![CDATA["+((CDATASection)cursor).getData()+"]]>");
skip();
} else {
- out.write("<!--(some other kind of node)-->");
skip();
}
break;
@@ -80,9 +124,25 @@ public final class Unparser {
public void unparse()
throws IOException {
+ out.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
+ DocumentType doctype = doc.getDoctype();
+ if ( doctype != null ) {
+ String externalId = "";
+ if ( doctype.getPublicId() != null ) {
+ externalId = " PUBLIC \""+doctype.getPublicId()+"\" \""
+ +doctype.getSystemId()+"\"";
+ } else if ( doctype.getSystemId() != null ) {
+ externalId = " SYSTEM \""+doctype.getSystemId()+"\"";
+ }
+ out.write("<!DOCTYPE "+doctype.getName()+externalId+">\n");
+ }
+ cursor = doc.getDocumentElement();
+ dir = Dir.PUSHING;
while ( cursor != null ) {
unparseOne();
}
+ out.write("\n");
+ out.flush();
}
}