summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xdaml2html.pl172
1 files changed, 172 insertions, 0 deletions
diff --git a/daml2html.pl b/daml2html.pl
new file mode 100755
index 0000000..2bce970
--- /dev/null
+++ b/daml2html.pl
@@ -0,0 +1,172 @@
+#! /usr/local/bin/perl -w
+
+require 5.10.0;
+
+use strict;
+use warnings;
+
+use Encode;
+
+use XML::LibXML qw(:libxml :ns);
+
+use DBI;
+
+use Digest::SHA1 qw(sha1_hex);
+
+use Getopt::Std;
+
+use constant {
+ XHTML_PUBID => "-//W3C//DTD XHTML 1.0 Strict//EN",
+ XHTML_URI => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd",
+ XHTML_NS => "http://www.w3.org/1999/xhtml",
+};
+
+my %opts;
+
+getopts('cd:', \%opts);
+
+my $dbname = "weblog" // $opts{d}; # The PostgreSQL database to use
+my $obtain_cdates = $opts{c};
+
+my $parser = XML::LibXML->new();
+
+
+# Functions to take care of the x-daml-magic URI scheme (this should go away)
+
+sub match_daml_magic_uri {
+ my $uri = shift;
+ return $uri =~ /^x-daml-magic\:/;
+}
+
+my %daml_magic_values = (
+ "x-daml-magic://daml/weblog/domaxentries.xml" => "INCLUDE",
+ "x-daml-magic://daml/weblog/maxentries.xml" => "20",
+ "x-daml-magic://daml/weblog/domonth.xml" => "IGNORE",
+ "x-daml-magic://daml/weblog/month.xml" => "",
+ "x-daml-magic://daml/weblog/docategory.xml" => "IGNORE",
+ "x-daml-magic://daml/weblog/category-en.xml" => "",
+ "x-daml-magic://daml/weblog/category-fr.xml" => "",
+ );
+
+sub open_daml_magic_uri {
+ my $uri = shift;
+ my $s = $daml_magic_values{$uri};
+ if ( defined($s) ) {
+ return \$s;
+ } else {
+ return undef;
+ }
+}
+
+sub read_daml_magic_uri {
+ my $handler = shift;
+ my $length = shift;
+ my $buf = substr($$handler, 0, $length, "");
+ return $buf;
+}
+
+sub close_daml_magic_uri {
+ my $handler = shift;
+ return "0 but true";
+}
+
+my $input_callbacks = XML::LibXML::InputCallback->new();
+$input_callbacks->register_callbacks([ \&match_daml_magic_uri,
+ \&open_daml_magic_uri,
+ \&read_daml_magic_uri,
+ \&close_daml_magic_uri ]);
+$parser->input_callbacks($input_callbacks);
+
+# Avoid accessing the W3C web site all the time.
+$parser->load_catalog("/etc/xml/catalog");
+
+my $in_doc;
+if ( defined($ARGV[0]) ) {
+ $in_doc = $parser->parse_file($ARGV[0]);
+} else {
+ $in_doc = $parser->parse_fh(\*STDIN);
+}
+
+my $out_doc = XML::LibXML::Document->new("1.0","utf-8");
+my $out_dtd = $out_doc->createInternalSubset("html", XHTML_PUBID, XHTML_URI);
+my $out_html = $out_doc->createElementNS(XHTML_NS, "html");
+$out_doc->setDocumentElement($out_html);
+$out_html->appendChild($out_doc->createTextNode("\n"));
+$out_html->appendChild($out_doc->createComment(" This file is automatically generated. Do not edit! "));
+$out_html->appendChild($out_doc->createTextNode("\n"));
+my $out_head = $out_doc->createElementNS(XHTML_NS, "head");
+$out_html->appendChild($out_head);
+$out_head->appendChild($out_doc->createTextNode("\n"));
+my $out_title = $out_doc->createElementNS(XHTML_NS, "title");
+$out_head->appendChild($out_title);
+$out_head->appendChild($out_doc->createTextNode("\n"));
+$out_html->appendChild($out_doc->createTextNode("\n"));
+my $out_body = $out_doc->createElementNS(XHTML_NS, "body");
+$out_html->appendChild($out_body);
+$out_body->appendChild($out_doc->createTextNode("\n"));
+$out_html->appendChild($out_doc->createTextNode("\n"));
+
+my $xpc = XML::LibXML::XPathContext->new($in_doc);
+#$xpc->registerNs('h', XHTML_NS);
+my $in_title = ($xpc->findnodes("/daml/title"))[0];
+if ( defined($in_title) ) {
+ $out_title->appendChild($out_doc->createTextNode($in_title->textContent));
+ my $out_h1 = $out_doc->createElementNS(XHTML_NS, "h1");
+ $out_h1->setAttributeNS("", "class", "title");
+ $out_body->appendChild($out_h1);
+ $out_body->appendChild($out_doc->createTextNode("\n"));
+ foreach my $child ( $out_title->childNodes ) {
+ $out_h1->appendChild($out_doc->importNode($child,1));
+ }
+}
+my $in_subtitle = ($xpc->findnodes("/daml/subtitle"))[0];
+if ( defined($in_title) ) {
+ my $out_p = $out_doc->createElementNS(XHTML_NS, "p");
+ $out_p->setAttributeNS("", "class", "subtitle");
+ $out_body->appendChild($out_p);
+ $out_body->appendChild($out_doc->createTextNode("\n"));
+ foreach my $child ( $out_title->childNodes ) {
+ $out_p->appendChild($out_doc->importNode($child,1));
+ }
+}
+
+# sub map_children {
+# my $fun = shift;
+# my $node = shift;
+# my $cref = shift;
+# for ( my $child=$node->firstChild ; $child ; $child=$child->nextSibling ) {
+# &$fun($child, $cref) if $child->nodeType == XML_ELEMENT_NODE;
+# }
+# }
+
+# sub handle_body {
+# my $node = shift;
+# my $cref = shift;
+# # Extract children list first, because they will be removed!
+# my @child_nodes = $node->childNodes();
+# foreach my $child ( @child_nodes ) {
+# $out_doc->adoptNode($child);
+# $out_body->appendChild($child);
+# }
+# $cref = {%$cref, in_body=>1};
+# foreach my $child ( @child_nodes ) {
+# apply_templates($child, $cref) if $child->nodeType == XML_ELEMENT_NODE;
+# }
+# }
+
+# sub apply_templates {
+# my $node = shift;
+# my $cref = shift;
+# my $node_name = $node->localname;
+# if ( $node_name eq "body" ) {
+# handle_body $node, $cref;
+# return;
+# }
+# map_children \&apply_templates, $node, $cref;
+# }
+
+# my %context = ();
+
+# apply_templates $in_doc->documentElement, \%context;
+
+print $out_doc->toString;