Write a semi-decent unparser(=serializer).

Still a bit rough at the edges, but this basically does what I want.
author: David A. Madore <david@procyon.(none)> 2010-04-12 18:48:34 +0200
committer: David A. Madore <david@procyon.(none)> 2010-04-12 18:48:34 +0200
commit: 2312810ac995f9cf2d93c63df29cefd6aa02156f (patch)
tree: 711af4d97065d0fa7d87792a3f5291919cb3557d
parent: 00dc8d9bbb54cf9a0cc47e6326a8444df92a14f1 (diff)
download: damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.gz
damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.bz2
damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.zip
2 files changed, 74 insertions, 13 deletions
diff --git a/org/madore/damlengine/DamlEngine.java b/org/madore/damlengine/DamlEngine.java
index e5454da..93cfea5 100644
--- a/org/madore/damlengine/DamlEngine.java
+++ b/org/madore/damlengine/DamlEngine.java
@@ -34,6 +34,7 @@ public final class DamlEngine {
 	for (String fname : args) {
 	    doc = db.parse(fname);
 	    processDocument();
+	    doc.normalizeDocument();
 	    Unparser unparser
 		= new Unparser(doc, new OutputStreamWriter(System.out));
 	    unparser.unparse();
diff --git a/org/madore/damlengine/Unparser.java b/org/madore/damlengine/Unparser.java
index a903523..b6ea4fc 100644
--- a/org/madore/damlengine/Unparser.java
+++ b/org/madore/damlengine/Unparser.java
@@ -6,17 +6,27 @@ import org.w3c.dom.*;
 
 public final class Unparser {
 
+    private Document doc;
     private Node cursor;
     private enum Dir { PUSHING, POPPING };
     private Dir dir;
     private Writer out;
 
     public Unparser(Document doc, Writer out) {
-	cursor = doc;
-	dir = Dir.PUSHING;
+	this.doc = doc;
 	this.out = out;
     }
 
+    protected void enter() {
+	Node chld = cursor.getFirstChild();
+	if ( chld == null ) {
+	    dir = Dir.POPPING;
+	} else {
+	    cursor = chld;
+	    dir = Dir.PUSHING;
+	}
+    }
+
     protected void skip() {
 	Node nsib = cursor.getNextSibling();
 	if ( nsib == null ) {
@@ -28,43 +38,77 @@ public final class Unparser {
 	}
     }
 
+    public static String quote(String s) {
+	StringBuilder buf = new StringBuilder(s.length()+64);
+	int ptr0 = 0;  int ptr;
+	for ( ptr=0 ; ptr<s.length() ; ptr++ ) {
+	    char ch = s.charAt(ptr);
+	    String qch = null;
+	    switch ( ch ) {
+	    case '&': qch = "&amp;"; break;
+	    case '<': qch = "&lt;"; break;
+	    case '>': qch = "&gt;"; break;
+	    case '"': qch = "&quot;"; break;
+	    }
+	    if ( qch != null ) {
+		buf.append(s, ptr0, ptr);
+		buf.append(qch);
+	    }
+	}
+	buf.append(s, ptr0, ptr);
+	return buf.toString();
+    }
+
     protected void unparseOne()
 	throws IOException {
+	// System.err.println("unparseOne(): cursor.nodeName="+cursor.getNodeName());
 	switch ( dir ) {
 	case PUSHING:
 	    if ( cursor.getNodeType() == Node.ELEMENT_NODE ) {
 		Element elt = (Element)cursor;
-		out.write("<"+elt.getTagName());
+		String qname = elt.getTagName();
+		out.write("<"+qname);
 		if ( elt.hasAttributes() ) {
 		    NamedNodeMap attrs = elt.getAttributes();
 		    Node n2;
 		    for ( int i=0 ; (n2=attrs.item(i)) != null ; i++ ) {
 			Attr attr = (Attr)n2;
-			out.write(" "+attr.getName()
-				  +"=\"(value)\"");
+			out.write(" "+attr.getName()+"=\""+quote(attr.getValue())+"\"");
+
 		    }
 		}
-		if ( ! elt.hasChildNodes() ) {
+		if ( ! elt.hasChildNodes() && ( qname.equals("br") || qname.equals("hr")
+						|| qname.equals("img")
+						|| qname.equals("area")
+						|| qname.equals("base")
+						|| qname.equals("basefont")
+						|| qname.equals("col")
+						|| qname.equals("frame")
+						|| qname.equals("input")
+						|| qname.equals("isindex")
+						|| qname.equals("link")
+						|| qname.equals("meta")
+						|| qname.equals("param") ) ) {
 		    out.write(" />");
 		    skip();
 		} else {
 		    out.write(">");
-		    cursor = elt.getFirstChild();
+		    enter();
 		}
 	    } else if ( cursor.getNodeType() == Node.DOCUMENT_NODE ) {
-		cursor = cursor.getFirstChild();
-		skip();
+		enter();
 	    } else if ( cursor.getNodeType() == Node.TEXT_NODE ) {
-		out.write("(text)");
+		out.write(quote(((Text)cursor).getData()));
 		skip();
 	    } else if ( cursor.getNodeType() == Node.COMMENT_NODE ) {
-		out.write("<!--(comment)-->");
+		// FIXME: comments could conceivably contain the "--" string
+		out.write("<!--"+((Comment)cursor).getData()+"-->");
 		skip();
 	    } else if ( cursor.getNodeType() == Node.CDATA_SECTION_NODE ) {
-		out.write("<![CDATA[(cdata)]]>");
+		// FIXME: cdata section could conceivably contain the "]]>" string
+		out.write("<![CDATA["+((CDATASection)cursor).getData()+"]]>");
 		skip();
 	    } else {
-		out.write("<!--(some other kind of node)-->");
 		skip();
 	    }
 	    break;
@@ -80,9 +124,25 @@ public final class Unparser {
 
     public void unparse()
 	throws IOException {
+	out.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
+	DocumentType doctype = doc.getDoctype();
+	if ( doctype != null ) {
+	    String externalId = "";
+	    if ( doctype.getPublicId() != null ) {
+		externalId = " PUBLIC \""+doctype.getPublicId()+"\" \""
+		    +doctype.getSystemId()+"\"";
+	    } else if ( doctype.getSystemId() != null ) {
+		externalId = " SYSTEM \""+doctype.getSystemId()+"\"";
+	    }
+	    out.write("<!DOCTYPE "+doctype.getName()+externalId+">\n");
+	}
+	cursor = doc.getDocumentElement();
+	dir = Dir.PUSHING;
 	while ( cursor != null ) {
 	    unparseOne();
 	}
+	out.write("\n");
+	out.flush();
     }
 
 }
author	David A. Madore <david@procyon.(none)>	2010-04-12 18:48:34 +0200
committer	David A. Madore <david@procyon.(none)>	2010-04-12 18:48:34 +0200
commit	2312810ac995f9cf2d93c63df29cefd6aa02156f (patch)
tree	711af4d97065d0fa7d87792a3f5291919cb3557d
parent	00dc8d9bbb54cf9a0cc47e6326a8444df92a14f1 (diff)
download	damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.gz damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.tar.bz2 damlengine-2312810ac995f9cf2d93c63df29cefd6aa02156f.zip