summaryrefslogtreecommitdiffstats
path: root/daml2html.pl
diff options
context:
space:
mode:
Diffstat (limited to 'daml2html.pl')
-rwxr-xr-xdaml2html.pl471
1 files changed, 137 insertions, 334 deletions
diff --git a/daml2html.pl b/daml2html.pl
index d1d0c51..cf98ed5 100755
--- a/daml2html.pl
+++ b/daml2html.pl
@@ -90,369 +90,172 @@ if ( defined($ARGV[0]) ) {
$doc = $parser->parse_fh(\*STDIN);
}
-my $doc_is_daml = ( $doc->documentElement->namespaceURI eq DAML_NS
- && $doc->documentElement->localname eq "daml" );
-my $doc_is_html = ( $doc->documentElement->namespaceURI eq XHTML_NS
- && $doc->documentElement->localname eq "html" );
+$doc->setEncoding("utf-8");
+$doc->removeExternalSubset();
+$doc->removeInternalSubset();
+$doc->createInternalSubset("html", XHTML_PUBID, XHTML_URI);
-die "Root element should be d:daml or html"
- unless ( $doc_is_daml || $doc_is_html );
-
-my $xpc = XML::LibXML::XPathContext->new($in_doc);
+my $xpc = XML::LibXML::XPathContext->new($doc);
$xpc->registerNs('h', XHTML_NS);
$xpc->registerNs('d', DAML_NS);
-sub rec_lang {
- my $node = shift;
- my $lang;
- while ( defined($node) && $node->nodeType == XML_ELEMENT_NODE ) {
- $lang = $node->getAttributeNS(XML_XML_NS, "lang");
- return $lang if defined($lang);
- $node = $node->parentNode;
- }
- return undef;
-}
+my @node_stack;
-sub set_rec_lang {
- my $node = shift;
- my $lang = shift;
- return unless defined($lang);
- my $parent = $node->parentNode;
- my $parent_lang = defined($parent)?rec_lang($parent):undef;
- if ( defined($parent_lang) && $lang eq $parent_lang ) {
- $node->removeAttributeNS(XML_XML_NS, "lang");
- } else {
- $node->setAttributeNS(XML_XML_NS, "lang", $lang);
- }
-}
-
-my $top_lang = rec_lang($in_doc->documentElement);
-my $body_lang = $top_lang;
-my $uri_to_top;
-my $file_name;
-
-my $out_doc = XML::LibXML::Document->new("1.0","utf-8");
-my $out_dtd = $out_doc->createInternalSubset("html", XHTML_PUBID, XHTML_URI);
-my $out_html = $out_doc->createElementNS(XHTML_NS, "html");;
-my $out_head;
-my $out_body;
-
-if ( $in_doc_is_daml ) {
- $uri_to_top = $in_doc->documentElement->getAttributeNS("", "uri-to-top")
- // "";
- $file_name = $in_doc->documentElement->getAttributeNS("", "file.name");
- set_rec_lang($out_html, $top_lang) if defined($top_lang);
- my $in_body = ($xpc->findnodes("/d:daml/d:body"))[0];
- $body_lang = rec_lang($in_body) // $top_lang;
- $out_html->appendChild($out_doc->createTextNode("\n"));
- $out_html->appendChild($out_doc->createComment(" This file is automatically generated. Do not edit! "));
- $out_html->appendChild($out_doc->createTextNode("\n"));
- $out_head = $out_doc->createElementNS(XHTML_NS, "head");
- $out_html->appendChild($out_head);
- $out_head->appendChild($out_doc->createTextNode("\n"));
- my $out_title = $out_doc->createElementNS(XHTML_NS, "title");
- $out_head->appendChild($out_title);
- $out_head->appendChild($out_doc->createTextNode("\n"));
- $out_html->appendChild($out_doc->createTextNode("\n"));
- my $out_body = $out_doc->createElementNS(XHTML_NS, "body");
- $out_html->appendChild($out_body);
- $out_body->setAttributeNS("","onload","onLoad()");
- set_rec_lang($out_body, $body_lang);
- $out_body->appendChild($out_doc->createTextNode("\n"));
- $out_html->appendChild($out_doc->createTextNode("\n"));
-} elsif ( $in_doc_is_html ) {
- my $in_body = ($xpc->findnodes("/h:html/h:body"))[0];
- $body_lang = rec_lang($in_body) // $top_lang;
-}
+unshift @node_stack, [$doc->documentElement, {is_root=>1}];
-my $out_doc
-my
-$out_doc->setDocumentElement($out_html);
-if ( defined($top_lang) ) {
-}
+my $html_node;
+my $head_node;
+my $title_node;
-my $in_title = ($xpc->findnodes("/d:daml/d:title"))[0];
-if ( defined($in_title) ) {
- $out_title->appendChild($out_doc->createTextNode($in_title->textContent));
- my $out_meta_title = $out_doc->createElementNS(XHTML_NS, "meta");
- $out_meta_title->setAttributeNS("", "name", "Title");
- $out_meta_title->setAttributeNS("", "content", $in_title->textContent);
- $out_head->appendChild($out_meta_title);
- set_rec_lang($out_meta_title, rec_lang $in_title);
- $out_head->appendChild($out_doc->createTextNode("\n"));
- my $out_h1 = $out_doc->createElementNS(XHTML_NS, "h1");
- $out_h1->setAttributeNS("", "class", "title");
- $out_body->appendChild($out_h1);
- set_rec_lang($out_h1, rec_lang $in_title);
- $out_body->appendChild($out_doc->createTextNode("\n"));
- foreach my $child ( $in_title->childNodes ) {
- $out_h1->appendChild($out_doc->importNode($child,1));
- }
-}
-my $in_subtitle = ($xpc->findnodes("/d:daml/d:subtitle"))[0];
-if ( defined($in_subtitle) ) {
- my $out_p = $out_doc->createElementNS(XHTML_NS, "p");
- $out_p->setAttributeNS("", "class", "subtitle");
- $out_body->appendChild($out_p);
- set_rec_lang($out_p, rec_lang $in_subtitle);
- $out_body->appendChild($out_doc->createTextNode("\n"));
- foreach my $child ( $in_subtitle->childNodes ) {
- $out_p->appendChild($out_doc->importNode($child,1));
+sub default_handler_nodeonly {
+ my $node = shift;
+ my $optref = shift;
+ print STDERR "warning: default handler doesn't handle arguments\n" if @_;
+# return unless $node->nodeType == XML_ELEMENT_NODE;
+ my @child_nodes = $node->childNodes;
+ my @to_process;
+ foreach my $child ( @child_nodes ) {
+ push @to_process, [$child, {}] if $child->nodeType == XML_ELEMENT_NODE;
}
-}
-my $in_meta_description = ($xpc->findnodes("/d:daml/d:meta-description"))[0];
-if ( defined($in_meta_description) ) {
- my $out_meta_description = $out_doc->createElementNS(XHTML_NS, "meta");
- $out_meta_description->setAttributeNS("", "name", "Description");
- $out_meta_description->setAttributeNS("", "content",
- $in_meta_description->textContent);
- $out_head->appendChild($out_meta_description);
- set_rec_lang($out_meta_description, rec_lang $in_meta_description);
- $out_head->appendChild($out_doc->createTextNode("\n"));
-}
-my $in_meta_keywords = ($xpc->findnodes("/daml/meta-keywords"))[0];
-if ( defined($in_meta_keywords) ) {
- my $out_meta_keywords = $out_doc->createElementNS(XHTML_NS, "meta");
- $out_meta_keywords->setAttributeNS("", "name", "Keywords");
- $out_meta_keywords->setAttributeNS("", "content",
- $in_meta_keywords->textContent);
- $out_head->appendChild($out_meta_keywords);
- set_rec_lang($out_meta_keywords, rec_lang $in_meta_keywords);
- $out_head->appendChild($out_doc->createTextNode("\n"));
+ unshift @node_stack, @to_process;
}
-{
- my $out_meta = $out_doc->createElementNS(XHTML_NS, "meta");
- $out_meta->setAttributeNS("", "http-equiv", "Content-Type");
- $out_meta->setAttributeNS("", "content", "text/html; charset=utf-8");
- $out_head->appendChild($out_meta);
- $out_head->appendChild($out_doc->createTextNode("\n"));
-}
-if ( defined($body_lang) && $body_lang ne "und" ) {
- my $out_meta = $out_doc->createElementNS(XHTML_NS, "meta");
- $out_meta->setAttributeNS("", "http-equiv", "Content-Language");
- $out_meta->setAttributeNS("", "content", $body_lang);
- $out_head->appendChild($out_meta);
- $out_head->appendChild($out_doc->createTextNode("\n"));
-}
-
-{
- my $out_link = $out_doc->createElementNS(XHTML_NS, "link");
- $out_link->setAttributeNS("", "href", $uri_to_top."favicon.ico");
- $out_link->setAttributeNS("", "rel", "Shortcut Icon");
- $out_head->appendChild($out_link);
- $out_head->appendChild($out_doc->createTextNode("\n"));
+sub default_handler {
+ my $node = shift;
+ my $optref = shift;
+ default_handler_nodeonly $node, $optref;
}
-my @in_translations = $xpc->findnodes("/d:daml/d:translation");
-foreach my $in_translation ( @in_translations ) {
- my $lang = $in_translation->getAttributeNS(XML_XML_NS, "lang");
- die "xml:lang attribute missing on <translation>" unless defined($lang);
- my $out_link = $out_doc->createElementNS(XHTML_NS, "link");
- $out_link->setAttributeNS("", "href", $file_name.".".$lang);
- $out_link->setAttributeNS("", "hreflang", $lang);
- if ( $lang eq "en" ) {
- $out_link->setAttributeNS("", "title", "English version");
- } elsif ( $lang eq "fr" ) {
- $out_link->setAttributeNS("", "title", "Version française");
+sub create_meta_element_helper {
+ my $src_node = shift; # May be undef
+ my $name_or_http_equiv = shift;
+ my $meta_name = shift;
+ my $meta_content = shift;
+ my $meta = $doc->createElementNS(XHTML_NS, "meta");
+ if ( defined($src_node) && ( my $lang = $src_node->getAttributeNS(XML_XML_NS, "lang") ) ) {
+ $meta->setAttributeNS(XML_XML_NS, "lang", $lang);
}
- $out_link->setAttributeNS("", "rel", "Alternate");
- $out_link->setAttributeNS(XML_XML_NS, "lang", $lang);
- $out_head->appendChild($out_link);
- $out_head->appendChild($out_doc->createTextNode("\n"));
-}
-
-my @in_metastuff = $xpc->findnodes("/d:daml/*[self::h:meta or self::h:link]");
-foreach my $in_mnode ( @in_metastuff ) {
- $out_head->appendChild($out_doc->importNode($in_mnode,1));
- $out_head->appendChild($out_doc->createTextNode("\n"));
-}
-
-{
- my $out_navbar = $out_doc->createElementNS(XHTML_NS, "p");
- $out_navbar->setAttributeNS("", "class", "navbar");
- $out_body->appendChild($out_navbar);
- $out_body->appendChild($out_doc->createTextNode("\n"));
- my $lang;
- if ( defined($body_lang) && $body_lang eq "fr" ) {
- $lang = "fr";
+ $meta->setAttributeNS("", $name_or_http_equiv, $meta_name);
+ $meta->setAttributeNS("", "content", $meta_content);
+ if ( defined($src_node) ) {
+ ($doc->documentElement)->appendChild($meta); # Work around libxml2 bug <URL: https://bugzilla.gnome.org/show_bug.cgi?id=614068 >
+ $src_node->replaceNode($meta);
} else {
- $lang = "en";
+ die "\$head_node should have been defined at this point" unless defined($head_node);
+ $head_node->appendChild($meta);
+ $head_node->appendChild($doc->createTextNode("\n"));
}
- my $out_node;
- $out_node = $out_doc->createElementNS(XHTML_NS, "a");
- $out_node->setAttributeNS("", "href", "http://www.madore.org/~david/");
- $out_navbar->appendChild($out_node);
- $out_node->appendChild($out_doc->createTextNode("David Madore"));
- $out_navbar->appendChild($out_doc->createElementNS(XHTML_NS, "br"));
- $out_node = $out_doc->createElementNS(XHTML_NS, "a");
- $out_node->setAttributeNS("", "href", "http://perso.enst.fr/~madore/");
- $out_navbar->appendChild($out_node);
- $out_node->appendChild($out_doc->createTextNode($lang eq "fr"?"Site prof.":"Prof. site"));
- $out_navbar->appendChild($out_doc->createElementNS(XHTML_NS, "br"));
- $out_node = $out_doc->createElementNS(XHTML_NS, "a");
- $out_node->setAttributeNS("", "href", $uri_to_top);
- $out_navbar->appendChild($out_node);
- $out_node->appendChild($out_doc->createTextNode($lang eq "fr"?"Racine du site":"Site Root"));
- $out_navbar->appendChild($out_doc->createElementNS(XHTML_NS, "br"));
- $out_node = $out_doc->createElementNS(XHTML_NS, "a");
- $out_node->setAttributeNS("", "href", $uri_to_top."sitemap.html");
- $out_navbar->appendChild($out_node);
- $out_node->appendChild($out_doc->createTextNode($lang eq "fr"?"Plan du site":"Site Map"));
- $out_navbar->appendChild($out_doc->createElementNS(XHTML_NS, "br"));
- $out_node = $out_doc->createElementNS(XHTML_NS, "a");
- $out_node->setAttributeNS("", "href", $uri_to_top."weblog/");
- $out_navbar->appendChild($out_node);
- $out_node->appendChild($out_doc->createTextNode("WebLog"));
}
-foreach my $in_translation ( @in_translations ) {
- my $lang = $in_translation->getAttributeNS(XML_XML_NS, "lang");
- die "xml:lang attribute missing on <translation>" unless defined($lang);
- my $out_p = $out_doc->createElementNS(XHTML_NS, "p");
- $out_p->setAttributeNS("", "class", "translation-offer");
- $out_p->setAttributeNS(XML_XML_NS, "lang", $lang);
- if ( $lang eq "fr" ) {
- $out_p->appendChild($out_doc->createTextNode("[Une "));
- } elsif ( $lang eq "en" ) {
- $out_p->appendChild($out_doc->createTextNode("[An "));
- } else {
- $out_p->appendChild($out_doc->createTextNode("["));
- }
- my $out_a = $out_doc->createElementNS(XHTML_NS, "a");
- $out_a->setAttributeNS("", "href", $file_name.".".$lang);
- $out_a->setAttributeNS("", "hreflang", $lang);
- if ( $lang eq "fr" ) {
- $out_a->appendChild($out_doc->createTextNode("version française"));
- } elsif ( $lang eq "en" ) {
- $out_a->appendChild($out_doc->createTextNode("English version"));
- } else {
- $out_a->appendChild($out_doc->createTextNode($lang));
+sub daml_handler {
+ my $node = shift;
+ my $optref = shift;
+ print STDERR "warning: title handler doesn't handle arguments\n" if @_;
+ die "daml element can only be root element" unless $$optref{is_root};
+ die "\$html_node already defined: what magic is this?" if defined($html_node);
+ $html_node = $doc->createElementNS(XHTML_NS, "html");
+ my $lang;
+ if ( $lang = $node->getAttributeNS(XML_XML_NS, "lang") ) {
+ $html_node->setAttributeNS(XML_XML_NS, "lang", $lang);
}
- $out_p->appendChild($out_a);
- if ( $lang eq "fr" ) {
- $out_p->appendChild($out_doc->createTextNode(" de cette page est également disponible.]"));
- } elsif ( $lang eq "en" ) {
- $out_p->appendChild($out_doc->createTextNode(" of this page is also available.]"));
- } else {
- $out_p->appendChild($out_doc->createTextNode("]"));
+ $doc->setDocumentElement($html_node);
+ $html_node->appendChild($doc->createTextNode("\n"));
+ $html_node->appendChild($doc->createComment(" This file is automatically generated. Do not edit! "));
+ $html_node->appendChild($doc->createTextNode("\n"));
+ die "\$head_node already defined: what magic is this?" if defined($head_node);
+ $head_node = $doc->createElementNS(XHTML_NS, "head");
+ $html_node->appendChild($head_node);
+ $html_node->appendChild($doc->createTextNode("\n"));
+ $head_node->appendChild($doc->createTextNode("\n"));
+ create_meta_element_helper(undef, "http-equiv", "Content-Type", "text/html; charset=utf-8");
+ create_meta_element_helper(undef, "http-equiv", "Content-Language", $lang) if $lang;
+ my @child_nodes = $node->childNodes;
+ my @to_process;
+ foreach my $child ( @child_nodes ) {
+ if ( $child->nodeType == XML_ELEMENT_NODE ) {
+ if ( $child->localName eq "body" ) {
+ $html_node->appendChild($doc->createTextNode("\n"));
+ $html_node->appendChild($child);
+ $html_node->appendChild($doc->createTextNode("\n"));
+ } else {
+ $head_node->appendChild($child);
+ $head_node->appendChild($doc->createTextNode("\n"));
+ }
+ push @to_process, [$child, {is_daml_child=>1}];
+ } elsif ( $child->nodeType == XML_TEXT_NODE
+ || $child->nodeType == XML_CDATA_SECTION_NODE ) {
+ die "daml element cannot contain text"
+ unless $child->data =~ m/^\s*$/s;
+ }
}
- $out_body->appendChild($out_p);
- $out_body->appendChild($out_doc->createTextNode("\n"));
+ unshift @node_stack, @to_process;
}
-if ( defined($in_body) ) {
- foreach my $child ( $in_body->childNodes ) {
- $out_doc->adoptNode($child);
- $out_body->appendChild($child);
+sub body_handler {
+ my $node = shift;
+ my $optref = shift;
+ print STDERR "warning: body handler doesn't handle arguments\n" if @_;
+ my $body_node = $doc->createElementNS(XHTML_NS, "body");
+ if ( my $lang = $node->getAttributeNS(XML_XML_NS, "lang") ) {
+ $body_node->setAttributeNS(XML_XML_NS, "lang", $lang);
}
+ ($doc->documentElement)->appendChild($body_node); # Work around libxml2 bug <URL: https://bugzilla.gnome.org/show_bug.cgi?id=614068 >
+ $node->replaceNode($body_node);
+ my @child_nodes = $node->childNodes;
+ my @to_process;
+ foreach my $child ( @child_nodes ) {
+ if ( $child->nodeType == XML_TEXT_NODE
+ || $child->nodeType == XML_CDATA_SECTION_NODE ) {
+ die "body element cannot contain text"
+ unless $child->data =~ m/^\s*$/s;
+ }
+ $body_node->appendChild($child);
+ push @to_process, [$child, {}] if $child->nodeType == XML_ELEMENT_NODE;
+ }
+ unshift @node_stack, @to_process;
}
-{
- my $out_hr = $out_doc->createElementNS(XHTML_NS, "hr");
- $out_hr->setAttributeNS("", "class", "cleared");
- $out_body->appendChild($out_hr);
- $out_body->appendChild($out_doc->createTextNode("\n"));
- my $out_address = $out_doc->createElementNS(XHTML_NS, "address");
- my $out_a = $out_doc->createElementNS(XHTML_NS, "a");
- $out_a->setAttributeNS("", "href", $uri_to_top);
- $out_a->appendChild($out_doc->createTextNode("David Madore"));
- $out_address->appendChild($out_a);
- $out_address->appendChild($out_doc->createTextNode(" ("));
- # XXX - email address here
- $out_address->appendChild($out_doc->createTextNode(")"));
- $out_body->appendChild($out_address);
- $out_body->appendChild($out_doc->createTextNode("\n"));
+sub title_handler {
+ my $node = shift;
+ my $optref = shift;
+ print STDERR "warning: title handler doesn't handle arguments\n" if @_;
+ if ( $$optref{is_daml_child} ) {
+ $title_node = $node;
+ create_meta_element_helper $node, "name", "Title", $title_node->textContent;
+ }
}
-my $in_last_modified = ($xpc->findnodes("/d:daml/d:last-modified"))[0];
-if ( defined($in_last_modified) ) {
- my $out_last_modified = $out_doc->createElementNS(XHTML_NS, "p");
- $out_last_modified->setAttributeNS("", "class", "last-modified");
- $out_body->appendChild($out_last_modified);
- my $lang = rec_lang $in_last_modified;
- my $str = $in_last_modified->textContent;
- if ( defined($lang) && $lang eq "fr" ) {
- $str = "Dernière modification : " . $str;
- } elsif ( defined($lang) && $lang eq "en" ) {
- $str = "Last modified: " . $str;
+sub meta_handler {
+ my $node = shift;
+ my $optref = shift;
+ print STDERR "warning: meta handler doesn't handle arguments\n" if @_;
+ if ( $node->localName eq "meta-description" ) {
+ create_meta_element_helper $node, "name", "Description", $node->textContent;
+ } elsif ( $node->localName eq "meta-keywords" ) {
+ create_meta_element_helper $node, "name", "Keywords", $node->textContent;
}
- $out_last_modified->appendChild($out_doc->createTextNode($str));
- set_rec_lang($out_last_modified, $lang);
- $out_body->appendChild($out_doc->createTextNode("\n"));
}
-my $pure_style_content = "";
-if ( open my $common_style_file, "<", "included.css" ) {
- local $/;
- $pure_style_content = <$common_style_file>;
- close $common_style_file;
-}
-my $pure_script_content = "";
-if ( open my $common_script_file, "<", "included.js" ) {
- local $/;
- $pure_script_content = <$common_script_file>;
- close $common_script_file;
-}
+my %daml_handler = (
+ "daml" => \&daml_handler,
+ "body" => \&body_handler,
+ "title" => \&title_handler,
+ "meta-description" => \&meta_handler,
+ "meta-keywords" => \&meta_handler,
+ );
-my @in_extra_style = $xpc->findnodes("/d:daml/d:extra-style");
-foreach my $in_extra_style ( @in_extra_style ) {
- $pure_style_content .= $in_extra_style->textContent;
+NODELOOP:
+while ( my $process = shift @node_stack ) {
+ my $node = $$process[0];
+ unless ( defined($node->namespaceURI) ) {
+ printf STDERR "warning: skipping %s node with missing namespace\n", $node->nodeName;
+ next NODELOOP;
+ }
+ if ( $node->namespaceURI eq DAML_NS && defined($daml_handler{$node->localName}) ) {
+ &{$daml_handler{$node->localName}}(@$process);
+ } else {
+ default_handler(@$process);
+ }
}
-my $out_style_section = $out_doc->createElementNS(XHTML_NS, "style");
-$out_head->appendChild($out_style_section);
-$out_head->appendChild($out_doc->createTextNode("\n"));
-$out_style_section->setAttributeNS("", "type", "text/css");
-$out_style_section->appendChild($out_doc->createTextNode("\n/* "));
-$out_style_section->appendChild($out_doc->createCDATASection(" */\n".$pure_style_content."/* "));
-$out_style_section->appendChild($out_doc->createTextNode(" */\n"));
-my $out_script_section = $out_doc->createElementNS(XHTML_NS, "script");
-$out_head->appendChild($out_script_section);
-$out_head->appendChild($out_doc->createTextNode("\n"));
-$out_script_section->setAttributeNS("", "type", "text/javascript");
-$out_script_section->setAttributeNS("", "defer", "defer");
-$out_script_section->appendChild($out_doc->createTextNode("\n// "));
-$out_script_section->appendChild($out_doc->createCDATASection("\n".$pure_script_content."// "));
-$out_script_section->appendChild($out_doc->createTextNode("\n"));
-
-# sub map_children {
-# my $fun = shift;
-# my $node = shift;
-# my $cref = shift;
-# for ( my $child=$node->firstChild ; $child ; $child=$child->nextSibling ) {
-# &$fun($child, $cref) if $child->nodeType == XML_ELEMENT_NODE;
-# }
-# }
-
-# sub handle_body {
-# my $node = shift;
-# my $cref = shift;
-# # Extract children list first, because they will be removed!
-# my @child_nodes = $node->childNodes();
-# foreach my $child ( @child_nodes ) {
-# $out_doc->adoptNode($child);
-# $out_body->appendChild($child);
-# }
-# $cref = {%$cref, in_body=>1};
-# foreach my $child ( @child_nodes ) {
-# apply_templates($child, $cref) if $child->nodeType == XML_ELEMENT_NODE;
-# }
-# }
-
-# sub apply_templates {
-# my $node = shift;
-# my $cref = shift;
-# my $node_name = $node->localname;
-# if ( $node_name eq "body" ) {
-# handle_body $node, $cref;
-# return;
-# }
-# map_children \&apply_templates, $node, $cref;
-# }
-
-# my %context = ();
-
-# apply_templates $in_doc->documentElement, \%context;
-
-print $out_doc->toString;
+print $doc->toString;