summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid A. Madore <david+git@madore.org>2010-04-07 01:35:50 (GMT)
committerDavid A. Madore <david+git@madore.org>2010-04-07 01:35:50 (GMT)
commit24233a957fe518834b1c4299244706f4345ff993 (patch)
tree3357257b9760690d0841c3b895efd2594a6c3835
parentf26764b3d1f63255a56a4289559ba213362389b4 (diff)
downloadblogengine-24233a957fe518834b1c4299244706f4345ff993.zip
blogengine-24233a957fe518834b1c4299244706f4345ff993.tar.gz
blogengine-24233a957fe518834b1c4299244706f4345ff993.tar.bz2
Yet another attempt to work around the grotesque limitations of libxml2.
This one is completely clean, but, alas, unimaginably costly in execution time.
-rwxr-xr-xdaml2html.pl95
1 files changed, 64 insertions, 31 deletions
diff --git a/daml2html.pl b/daml2html.pl
index 51e7808..c1ca768 100755
--- a/daml2html.pl
+++ b/daml2html.pl
@@ -141,31 +141,6 @@ if ( defined($ARGV[0]) ) {
$doc = $parser->parse_fh(\*STDIN);
}
-$doc->setEncoding("utf-8");
-$doc->removeExternalSubset();
-$doc->removeInternalSubset();
-$doc->createInternalSubset("html", XHTML_PUBID, XHTML_URI);
-
-# The following is a cruft to work around XML::LibXML limitations:
-# there seems to be no way to eliminate namespace declarations which
-# have become unnecessary; so, instead, we prevent them from appearing
-# by placing the entire tree ($true_root) as the single child of a
-# root1 element which contains the relevant prefix declarations, and
-# that is itself the single child of a root0 element which has _no_
-# prefix declarations. We work entirely within root1 except at the
-# very end when we do the following trick: move $true_root outside of
-# $root1 (by $root0->appendChild($true_root)), which forces namespace
-# declarations to be made only as necessary, and then make $true_root
-# the document element. Yuck!
-my $true_root = $doc->documentElement;
-my $root0 = $doc->createElement("root0");
-$doc->setDocumentElement($root0);
-my $root1 = $doc->createElement("root1");
-$root1->setAttributeNS(XML_XMLNS_NS, "xmlns", XHTML_NS);
-$root1->setAttributeNS(XML_XMLNS_NS, "xmlns:d", DAML_NS);
-$root0->appendChild($root1);
-$root1->appendChild($true_root);
-
my $xpc = XML::LibXML::XPathContext->new($doc);
$xpc->registerNs('h', XHTML_NS);
$xpc->registerNs('d', DAML_NS);
@@ -194,7 +169,7 @@ $xpc->registerNs('d', DAML_NS);
# node and shifts them at the beginning of @todo_stack.
my @todo_stack;
-unshift @todo_stack, [undef, $true_root, {}, {is_root=>1}];
+unshift @todo_stack, [undef, $doc->documentElement, {}, {is_root=>1}];
# Global variables for processing:
my $uri_to_top;
@@ -488,8 +463,7 @@ sub daml_handler {
if ( defined($lang = get_node_lang_norec $node) ) {
set_node_lang_norec $html_node, $lang;
}
- $true_root->replaceNode($html_node);
- $true_root = $html_node;
+ $doc->setDocumentElement($html_node);
$html_node->appendChild($doc->createTextNode("\n"));
$html_node->appendChild($doc->createComment(" This file is automatically generated. Do not edit! "));
$html_node->appendChild($doc->createTextNode("\n"));
@@ -1055,6 +1029,65 @@ while ( my $process = shift @todo_stack ) {
}
}
-$root0->appendChild($true_root);
-$doc->setDocumentElement($true_root);
-print $doc->toString;
+# And now we create an output document that is identical to the
+# document... but we need to do this to get rid of useless namespace
+# declarations which XML::LibXML has inserted and there seems to be no
+# better way. :-((((((((((((
+my $out_doc = XML::LibXML::Document->new("1.0", "utf-8");
+$out_doc->createInternalSubset("html", XHTML_PUBID, XHTML_URI);
+my %out_namespaces = ("xml"=>XML_XML_NS);
+sub out_replicate_node;
+sub out_replicate_node {
+ my $node = shift;
+ my $out_parent = shift;
+ my $out_root = shift;
+ my $out_node;
+ my $type = $node->nodeType;
+ if ( $type == XML_ELEMENT_NODE ) {
+ my $ns_uri = $node->namespaceURI // XHTML_NS;
+ my $ns_pfx = $node->prefix;
+ my $ns_lname = $node->localName;
+ my $out_node = $out_doc->createElementNS(
+ $ns_uri, ($ns_pfx?"$ns_pfx:$ns_lname":$ns_lname));
+ $out_root = $out_node unless defined($out_root);
+ if ( ! defined($out_namespaces{$ns_pfx//""}) ) {
+ $out_root->setNamespace($ns_uri, $ns_pfx//"", $out_node==$out_root);
+ $out_namespaces{$ns_pfx//""} = $ns_uri;
+ # printf STDERR "added prefix %s => %s (for node %s)\n",
+ # $ns_pfx//"(default)", $ns_uri, $ns_lname;
+ }
+ foreach my $attr ( $node->attributes ) {
+ if ( $attr->nodeType == XML_ATTRIBUTE_NODE ) {
+ my $at_uri = $attr->namespaceURI;
+ my $at_pfx = $attr->prefix;
+ my $at_lname = $attr->localName;
+ if ( defined($at_uri)
+ && ! defined($out_namespaces{$at_pfx//""}) ) {
+ $out_root->setNamespace($at_uri, $at_pfx, 0);
+ $out_namespaces{$at_pfx//""} = $at_uri;
+ # printf STDERR "added prefix %s => %s (for attr %s)\n",
+ # $at_pfx//"(default)", $at_uri, $at_lname;
+ }
+ $out_node->setAttributeNS(
+ $at_uri, ($at_pfx?"$at_pfx:$at_lname":$at_lname),
+ $attr->value);
+ }
+ }
+ if ( defined($out_parent) ) {
+ $out_parent->appendChild($out_node);
+ } else {
+ $out_doc->setDocumentElement($out_node);
+ }
+ foreach my $child ( $node->childNodes ) {
+ out_replicate_node $child, $out_node, $out_root;
+ }
+ } elsif ( $type == XML_TEXT_NODE || $type == XML_CDATA_SECTION_NODE
+ || $type == XML_COMMENT_NODE ) {
+ $out_parent->appendChild($out_doc->adoptNode($node));
+ } else {
+ die "unhandled node type";
+ }
+};
+out_replicate_node ($doc->documentElement, undef, undef);
+
+print $out_doc->toString;