#! /usr/local/bin/perl -w # -*- coding: utf-8 -*- require 5.10.0; use strict; use warnings; use utf8; use Encode; use XML::LibXML qw(:libxml :ns); use DBI; use Digest::SHA1 qw(sha1_hex); use Getopt::Std; use constant { XHTML_PUBID => "-//W3C//DTD XHTML 1.0 Strict//EN", XHTML_URI => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", XHTML_NS => "http://www.w3.org/1999/xhtml", DAML_NS => "http://www.madore.org/~david/NS/daml/", }; my %opts; getopts('cd:', \%opts); my $dbname = "weblog" // $opts{d}; # The PostgreSQL database to use my $obtain_cdates = $opts{c}; my $parser = XML::LibXML->new(); sub get_node_lang_norec { # Return the xml:lang attribute for $node, or undef if there is none. my $node = shift; return $node->getAttributeNS(XML_XML_NS, "lang"); } sub get_node_lang_rec { # Return the xml:lang attribute for $node, or that if its parent # recursively (can return undef if no parent ever has an xml:lang # attribute). $node may be undef here, in which case return undef. my $node = shift; my $lang; while ( defined($node) && $node->nodeType == XML_ELEMENT_NODE ) { $lang = get_node_lang_norec $node; return $lang if defined($lang); $node = $node->parentNode; } return undef; } sub set_node_lang_norec { # Explicitly set the xml:lang attribute to a given value (even if # parent already has this). my $node = shift; my $lang = shift; die "set_node_lang_norec called with no lang" unless $lang; $node->setAttributeNS(XML_XML_NS, "lang", $lang); } sub unset_node_lang_norec { # Explicitly unset the xml:lang attribute (this is purposefully a # different function from the previous one to catch programming # errors). my $node = shift; $node->removeAttributeNS(XML_XML_NS, "lang"); } sub set_node_lang_rec { # Set the xml:lang attribute if necessary to the given value, or # unset if same lang as parent is requested. my $node = shift; my $lang = shift; my $parent_lang = get_node_lang_rec($node->parentNode); # Works if undef! if ( ! defined($parent_lang) || $lang ne $parent_lang ) { set_node_lang_norec $node, $lang; } else { unset_node_lang_norec $node; } } # Functions to take care of the x-daml-magic URI scheme (this should go away) sub match_daml_magic_uri { my $uri = shift; return $uri =~ /^x-daml-magic\:/; } my %daml_magic_values = ( "x-daml-magic://daml/weblog/domaxentries.xml" => "INCLUDE", "x-daml-magic://daml/weblog/maxentries.xml" => "20", "x-daml-magic://daml/weblog/domonth.xml" => "IGNORE", "x-daml-magic://daml/weblog/month.xml" => "", "x-daml-magic://daml/weblog/docategory.xml" => "IGNORE", "x-daml-magic://daml/weblog/category-en.xml" => "", "x-daml-magic://daml/weblog/category-fr.xml" => "", ); sub open_daml_magic_uri { my $uri = shift; my $s = $daml_magic_values{$uri}; if ( defined($s) ) { return \$s; } else { return undef; } } sub read_daml_magic_uri { my $handler = shift; my $length = shift; my $buf = substr($$handler, 0, $length, ""); return $buf; } sub close_daml_magic_uri { my $handler = shift; return "0 but true"; } my $input_callbacks = XML::LibXML::InputCallback->new(); $input_callbacks->register_callbacks([ \&match_daml_magic_uri, \&open_daml_magic_uri, \&read_daml_magic_uri, \&close_daml_magic_uri ]); $parser->input_callbacks($input_callbacks); # Avoid accessing the W3C web site all the time. $parser->load_catalog("/etc/xml/catalog"); my $doc; if ( defined($ARGV[0]) ) { $doc = $parser->parse_file($ARGV[0]); } else { $doc = $parser->parse_fh(\*STDIN); } my $xpc = XML::LibXML::XPathContext->new($doc); $xpc->registerNs('h', XHTML_NS); $xpc->registerNs('d', DAML_NS); # @todo_stack is a list of array references of "things to do"; note # that while it functions mainly as a stack, the stack pointer is at # the start (use Perl shift/unshift to pop/push things to do, and Perl # push to add something to do at the very end). The program's main # loop repeatedly shifts the first element off @todo_stack and # processes it until the list is empty. A "thing to do" is a # reference to an array: the first element ($call_this) is generally # undef, meaning handle a node as usual, otherwise it is a reference # to a function that will be called with the rest of the array as # arguments; the second argument ($node) is the node to examine; the # third ($ctxref) is a reference to a hash giving some extra # information on the hereditary processing context, whereas the fourth # ($optref) is a reference to a hash giving some extra information on # the immediate processing context (such as why this particular node # is being examined now, or what parent it had); the difference # between these two contexts is that the default handler transmits # $ctxref unchanged whereas it passes a clean $optref; the rest are # optional arguments to the handler function. The default behavior # (when $call_this is null) is to examine $node and, if a special # handler is registered for it, invoke that, otherwise invoke the # default handler: the latter simply lists all element children of the # node and shifts them at the beginning of @todo_stack. my @todo_stack; unshift @todo_stack, [undef, $doc->documentElement, {}, {is_root=>1}]; # Global variables for processing: my $uri_to_top; my $file_name; my $html_node; my $head_node; my ($src_title, $src_title_lang); my ($src_subtitle, $src_subtitle_lang); my @translations; my $style_content = ""; my $script_content = ""; if ( open my $common_style_file, "<", "included.css" ) { local $/; $style_content = <$common_style_file>; close $common_style_file; } if ( open my $common_script_file, "<", "included.js" ) { local $/; $script_content = <$common_script_file>; close $common_script_file; } sub default_handler_attributes { my $node = shift; my $ctxref = shift; my $optref = shift; foreach my $attr ( $node->attributes ) { if ( $attr->nodeType == XML_ATTRIBUTE_NODE && defined($attr->namespaceURI) ) { push @todo_stack, ["attr", $attr, $ctxref, {}, $node]; } } } sub default_handler_nodeonly { my $node = shift; my $ctxref = shift; my $optref = shift; # return unless $node->nodeType == XML_ELEMENT_NODE; my @child_nodes = $node->childNodes; my @to_process; foreach my $child ( @child_nodes ) { push @to_process, [undef, $child, $ctxref, {}] if $child->nodeType == XML_ELEMENT_NODE; } unshift @todo_stack, @to_process; } sub default_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: default handler doesn't handle arguments\n" if @_; default_handler_attributes $node, $ctxref, $optref; default_handler_nodeonly $node, $ctxref, $optref; } sub do_title_or_subtitle_handler { my $node = shift; my $ctxref = shift; my $optref = shift; my $type = shift; die "do_title_or_subtitle_handler called with type other than \"title\" or \"subtitle\"" unless defined($type) && ($type eq "title" || $type eq "subtitle"); my $src_t = shift or die "missing src_t argument"; my $src_t_lang = shift; if ( defined($src_t_lang) ) { printf STDERR ("warning: ignoring xml:lang attribute on %s node\n", $src_t->nodeName) if get_node_lang_norec $node; set_node_lang_rec $node, $src_t_lang; } my $new_t_node = $doc->createElementNS(XHTML_NS, ($type eq "title" ? "h1" : "p")); $new_t_node->setAttributeNS("", "class", $type); if ( defined(my $lang = get_node_lang_norec $node) ) { set_node_lang_norec $new_t_node, $lang; } $node->replaceNode($new_t_node); my @child_nodes = $src_t->childNodes; foreach my $child ( @child_nodes ) { $new_t_node->appendChild($child->cloneNode(1)); } unshift @todo_stack, [undef, $new_t_node, $ctxref, {}]; } sub do_title_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: title handler doesn't handle arguments\n" if @_; do_title_or_subtitle_handler $node, $ctxref, $optref, "title", $src_title, $src_title_lang; } sub do_subtitle_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: subtitle handler doesn't handle arguments\n" if @_; do_title_or_subtitle_handler $node, $ctxref, $optref, "subtitle", $src_subtitle, $src_subtitle_lang; } sub do_translations_handler { my $node = shift; my $ctxref = shift; my $optref = shift; my $parent = $node->parentNode; foreach my $lang ( @translations ) { my $p = $doc->createElementNS(XHTML_NS, "p"); set_node_lang_norec $p, $lang; $p->setAttributeNS("", "class", "translation-offer"); $parent->insertBefore($p, $node); $parent->insertBefore($doc->createTextNode("\n"), $node); my $str = "["; if ( $lang eq "en" ) { $str = "[An "; } elsif ( $lang eq "fr" ) { $str = "[Une "; } $p->appendChild($doc->createTextNode($str)); my $a = $doc->createElementNS(XHTML_NS, "a"); $p->appendChild($a); $a->setAttributeNS("", "href", ($file_name//"").".".$lang); $a->setAttributeNS("", "hreflang", $lang); $str = $lang; if ( $lang eq "en" ) { $str = "English version"; } elsif ( $lang eq "fr" ) { $str = "version française"; } $a->appendChild($doc->createTextNode($str)); $str = "]"; if ( $lang eq "en" ) { $str = " of this page is also available.]"; } elsif ( $lang eq "fr" ) { $str = " de cette page est également disponible.]"; } $p->appendChild($doc->createTextNode($str)); } if ( ($node->nextSibling)->nodeType == XML_TEXT_NODE && ($node->nextSibling)->data =~ m/^\s*$/s ) { $node->nextSibling->unbindNode; } $node->unbindNode; } sub do_navbar_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: do-navbar handler doesn't handle arguments\n" if @_; my $p = $doc->createElementNS(XHTML_NS, "p"); my $lang = get_node_lang_rec $node; my $explicit_lang = get_node_lang_norec $node; unless ( defined($lang) && ( $lang eq "en" || $lang eq "fr" ) ) { print STDERR "warning: will use English navbar\n"; $lang = "en"; $explicit_lang = "en"; } if ( defined($explicit_lang) ) { set_node_lang_norec $p, $explicit_lang; } $p->setAttributeNS("", "class", "navbar"); $node->replaceNode($p); my $a; $a = $doc->createElementNS(XHTML_NS, "a"); $a->setAttributeNS("", "href", "http://www.madore.org/~david/"); $p->appendChild($a); $a->appendChild($doc->createTextNode("David Madore")); $p->appendChild($doc->createElementNS(XHTML_NS, "br")); $a = $doc->createElementNS(XHTML_NS, "a"); $a->setAttributeNS("", "href", "http://perso.enst.fr/~madore/"); $p->appendChild($a); $a->appendChild($doc->createTextNode($lang eq "fr" ? "Site prof." : "Prof. site")); $p->appendChild($doc->createElementNS(XHTML_NS, "br")); $a = $doc->createElementNS(XHTML_NS, "a"); $a->setAttributeNS("", "href", $uri_to_top//"./"); $p->appendChild($a); $a->appendChild($doc->createTextNode($lang eq "fr" ? "Racine du site" : "Site Root")); $p->appendChild($doc->createElementNS(XHTML_NS, "br")); $a = $doc->createElementNS(XHTML_NS, "a"); $a->setAttributeNS("", "href", ($uri_to_top//"")."sitemap.html"); $p->appendChild($a); $a->appendChild($doc->createTextNode($lang eq "fr" ? "Plan du site" : "Site Map")); $p->appendChild($doc->createElementNS(XHTML_NS, "br")); $a = $doc->createElementNS(XHTML_NS, "a"); $a->setAttributeNS("", "href", ($uri_to_top//"")."weblog/"); $p->appendChild($a); $a->appendChild($doc->createTextNode("WebLog")); unshift @todo_stack, [undef, $p, $ctxref, {}]; } sub do_footer_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: do-footer handler doesn't handle arguments\n" if @_; my $parent = $node->parentNode; my $hr = $doc->createElementNS(XHTML_NS, "hr"); $hr->setAttributeNS("", "class", "cleared"); $parent->insertBefore($hr, $node); $parent->insertBefore($doc->createTextNode("\n"), $node); my $address = $doc->createElementNS(XHTML_NS, "address"); $parent->insertBefore($address, $node); $parent->insertBefore($doc->createTextNode("\n"), $node); my $a = $doc->createElementNS(XHTML_NS, "a"); $a->setAttributeNS("", "href", $uri_to_top//"http://www.madore.org/~david/"); $address->appendChild($a); $a->appendChild($doc->createTextNode("David Madore")); $address->appendChild($doc->createTextNode(" (")); my $email = $doc->createElementNS(DAML_NS, "d:email-despammed"); $address->appendChild($email); $address->appendChild($doc->createTextNode(")")); $email->appendChild($doc->createTextNode("david+www")); $email->appendChild($doc->createElementNS(DAML_NS, "d:email-at")); $email->appendChild($doc->createTextNode("madore")); $email->appendChild($doc->createElementNS(DAML_NS, "d:email-dot")); $email->appendChild($doc->createTextNode("org")); if ( ($node->nextSibling)->nodeType == XML_TEXT_NODE && ($node->nextSibling)->data =~ m/^\s*$/s ) { $node->nextSibling->unbindNode; } $node->unbindNode; unshift @todo_stack, [undef, $hr, $ctxref, {}]; # Pretty useless... unshift @todo_stack, [undef, $address, $ctxref, {}]; } sub create_meta_element_helper { my $src_node = shift; # May be undef my $name_or_http_equiv = shift; my $meta_name = shift; my $meta_content = shift; my $lang = shift; my $meta = $doc->createElementNS(XHTML_NS, "meta"); if ( defined($src_node) && !defined($lang) ) { $lang = get_node_lang_norec $src_node; } if ( defined($lang) ) { set_node_lang_norec $meta, $lang; } $meta->setAttributeNS("", $name_or_http_equiv, $meta_name); $meta->setAttributeNS("", "content", $meta_content); return $meta; } sub create_style_or_script_node { my $node = shift; my $ctxref = shift; my $optref = shift; my $type = shift; die "create_style_or_script_node called with type other than \"style\" or \"script\"" unless defined($type) && ($type eq "style" || $type eq "script"); my $valref = shift; die "\$head_node should have been defined at this point" unless defined($head_node); die "fix broken assumption: \$node == \$head_node" unless $node == $head_node; my $snode = $doc->createElementNS(XHTML_NS, $type); $head_node->appendChild($snode); $head_node->appendChild($doc->createTextNode("\n")); $snode->setAttributeNS("", "type", ($type eq "style"?"text/css":"text/javascript")); $snode->setAttributeNS("", "defer", "defer") if $type eq "script"; if ( $type eq "style" ) { $snode->appendChild($doc->createTextNode("\n/* ")); $snode->appendChild($doc->createCDATASection(" */\n".$$valref."/* ")); $snode->appendChild($doc->createTextNode(" */\n")); } else { $snode->appendChild($doc->createTextNode("\n// ")); $snode->appendChild($doc->createCDATASection("\n".$$valref."// ")); $snode->appendChild($doc->createTextNode("\n")); } } sub daml_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: daml handler doesn't handle arguments\n" if @_; die "daml element can only be root element" unless $$optref{is_root}; if ( defined(my $attr = $node->getAttributeNS("", "uri-to-top")) ) { $uri_to_top = $attr; } if ( defined(my $attr = $node->getAttributeNS("", "file.name")) ) { $file_name = $attr; } die "\$html_node already defined: what magic is this?" if defined($html_node); $html_node = $doc->createElementNS(XHTML_NS, "html"); my $lang; if ( defined($lang = get_node_lang_norec $node) ) { set_node_lang_norec $html_node, $lang; } $doc->setDocumentElement($html_node); $html_node->appendChild($doc->createTextNode("\n")); $html_node->appendChild($doc->createComment(" This file is automatically generated. Do not edit! ")); $html_node->appendChild($doc->createTextNode("\n")); die "\$head_node already defined: what magic is this?" if defined($head_node); $head_node = $doc->createElementNS(XHTML_NS, "head"); $html_node->appendChild($head_node); $html_node->appendChild($doc->createTextNode("\n")); $head_node->appendChild($doc->createTextNode("\n")); $head_node->appendChild( create_meta_element_helper(undef, "http-equiv", "Content-Type", "text/html; charset=utf-8")); $head_node->appendChild($doc->createTextNode("\n")); $head_node->appendChild( create_meta_element_helper(undef, "http-equiv", "Content-Language", $lang)) if $lang; $head_node->appendChild($doc->createTextNode("\n")); my $link = $doc->createElementNS(XHTML_NS, "link"); $link->setAttributeNS("", "rel", "Shortcut Icon"); $link->setAttributeNS("", "href", ($uri_to_top//"")."favicon.ico"); $head_node->appendChild($link); $head_node->appendChild($doc->createTextNode("\n")); my @child_nodes = $node->childNodes; my @to_process; foreach my $child ( @child_nodes ) { if ( $child->nodeType == XML_ELEMENT_NODE ) { if ( $child->localName eq "body" ) { $html_node->appendChild($doc->createTextNode("\n")); $html_node->appendChild($child); $html_node->appendChild($doc->createTextNode("\n")); } else { $head_node->appendChild($child); $head_node->appendChild($doc->createTextNode("\n")); } push @to_process, [undef, $child, $ctxref, {is_daml_child=>1}]; } elsif ( $child->nodeType == XML_TEXT_NODE || $child->nodeType == XML_CDATA_SECTION_NODE ) { die "daml element cannot contain text" unless $child->data =~ m/^\s*$/s; } } unshift @todo_stack, @to_process; push @todo_stack, [\&create_style_or_script_node, $head_node, {%$ctxref, special=>1}, {special=>1}, "style", \$style_content]; push @todo_stack, [\&create_style_or_script_node, $head_node, {%$ctxref, special=>1}, {special=>1}, "script", \$script_content]; } sub body_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: body handler doesn't handle arguments\n" if @_; my $body_node = $doc->createElementNS(XHTML_NS, "body"); if ( defined(my $lang = get_node_lang_norec $node) ) { set_node_lang_norec $body_node, $lang; } $body_node->setAttributeNS("", "onload", "onLoad()"); # ($doc->documentElement)->appendChild($body_node); # Work around libxml2 bug $node->replaceNode($body_node); my @child_nodes = $node->childNodes; my @to_process; unless ( $node->getAttributeNS("", "notitle") ) { if ( defined($src_title) ) { my $token = $doc->createElementNS(DAML_NS, "d:implicit-do-title"); $body_node->appendChild($doc->createTextNode("\n")); $body_node->appendChild($token); unshift @to_process, [\&do_title_handler, $token, $ctxref, {implicit=>1}]; } } unless ( $node->getAttributeNS("", "nosubtitle") ) { if ( defined($src_subtitle) ) { my $token = $doc->createElementNS(DAML_NS, "d:implicit-do-subtitle"); $body_node->appendChild($doc->createTextNode("\n")); $body_node->appendChild($token); unshift @to_process, [\&do_subtitle_handler, $token, $ctxref, {implicit=>1}]; } } unless ( $node->getAttributeNS("", "nonavbar") ) { my $token = $doc->createElementNS(DAML_NS, "d:implicit-do-navbar"); $body_node->appendChild($doc->createTextNode("\n")); $body_node->appendChild($token); unshift @to_process, [\&do_navbar_handler, $token, $ctxref, {implicit=>1}]; } unless ( $node->getAttributeNS("", "notranslations") ) { if ( @translations ) { my $token = $doc->createElementNS(DAML_NS, "d:implicit-do-translations"); $body_node->appendChild($doc->createTextNode("\n")); $body_node->appendChild($token); unshift @to_process, [\&do_translations_handler, $token, $ctxref, {implicit=>1}]; } } foreach my $child ( @child_nodes ) { if ( $child->nodeType == XML_TEXT_NODE || $child->nodeType == XML_CDATA_SECTION_NODE ) { die "body element cannot contain text" unless $child->data =~ m/^\s*$/s; } $body_node->appendChild($child); push @to_process, [undef, $child, $ctxref, {}] if $child->nodeType == XML_ELEMENT_NODE; } unless ( $node->getAttributeNS("", "nofooter") ) { my $token = $doc->createElementNS(DAML_NS, "d:implicit-do-footer"); $body_node->appendChild($token); $body_node->appendChild($doc->createTextNode("\n")); unshift @to_process, [\&do_footer_handler, $token, $ctxref, {implicit=>1}]; } unshift @todo_stack, @to_process; } sub document_title_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: title handler doesn't handle arguments\n" if @_; my $title_text = $node->textContent; $src_title = $doc->createDocumentFragment; $src_title_lang = get_node_lang_rec $node; my $src_title_explicit_lang = get_node_lang_norec $node; foreach my $child ( $node->childNodes ) { $src_title->appendChild($child); } my $title_node = $doc->createElementNS(XHTML_NS, "title"); if ( defined($src_title_explicit_lang) ) { # Note here this is with norec, earlier with rec... set_node_lang_norec $title_node, $src_title_explicit_lang; } # ($doc->documentElement)->appendChild($title_node); # Work around libxml2 bug $node->replaceNode($title_node); $title_node->appendChild($doc->createTextNode($title_text)); my $title_parent = $title_node->parentNode; $title_parent->insertAfter( create_meta_element_helper(undef, "name", "Title", $title_text, $src_title_explicit_lang), $title_node); $title_parent->insertAfter($doc->createTextNode("\n"), $title_node); } sub subtitle_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: subtitle handler doesn't handle arguments\n" if @_; $src_subtitle = $doc->createDocumentFragment; $src_subtitle_lang = get_node_lang_norec $node; foreach my $child ( $node->childNodes ) { $src_subtitle->appendChild($child); } if ( ($node->nextSibling)->nodeType == XML_TEXT_NODE && ($node->nextSibling)->data =~ m/^\s*$/s ) { $node->nextSibling->unbindNode; } $node->unbindNode; } sub meta_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: meta handler doesn't handle arguments\n" if @_; if ( $node->localName eq "meta-description" ) { $node->replaceNode( create_meta_element_helper($node, "name", "Description", $node->textContent)); } elsif ( $node->localName eq "meta-keywords" ) { $node->replaceNode( create_meta_element_helper($node, "name", "Keywords", $node->textContent)); } } sub translation_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: translation handler doesn't handle arguments\n" if @_; my $lang = get_node_lang_norec $node; die "missing xml:lang attribute on translation" unless $lang; push @translations, $lang; my $link = $doc->createElementNS(XHTML_NS, "link"); set_node_lang_norec $link, $lang; $link->setAttributeNS("", "rel", "Alternate"); if ( $lang eq "en" ) { $link->setAttributeNS("", "title", "English version"); } elsif ( $lang eq "fr" ) { $link->setAttributeNS("", "title", "Version française"); } $link->setAttributeNS("", "hreflang", $lang); $link->setAttributeNS("", "href", ($file_name//"").".".$lang); $node->replaceNode($link); } sub extra_style_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: extra-style handler doesn't handle arguments\n" if @_; $style_content .= $node->textContent; if ( ($node->nextSibling)->nodeType == XML_TEXT_NODE && ($node->nextSibling)->data =~ m/^\s*$/s ) { $node->nextSibling->unbindNode; } $node->unbindNode; } sub email_at_or_dot_handler { my $node = shift; my $ctxref = shift; my $optref = shift; my $type = shift; die "email_at_or_dot_handler called with type other than \"at\" or \"dot\"" unless defined($type) && ($type eq "at" || $type eq "dot"); die "email-$type can only be a child of email-despammed" unless $$optref{is_email_despammed_child}; my ($char_name, $img_name, $en_name, $fr_name); if ( $type eq "at" ) { $char_name = "commercial-at"; $img_name = "commercial_at.png"; $en_name = "at sign"; $fr_name = "arobase"; } else { $char_name = "full-stop"; $img_name = "full_stop.png"; $en_name = "dot"; $fr_name = "point"; } my $span0 = $doc->createElementNS(XHTML_NS, "span"); my $lang = get_node_lang_rec $node; my $explicit_lang = get_node_lang_norec $node; unless ( defined($lang) && ( $lang eq "en" || $lang eq "fr" ) ) { $lang = "en"; $explicit_lang = "en"; } if ( defined($explicit_lang) ) { set_node_lang_norec $span0, $explicit_lang; } $span0->setAttributeNS("", "class", "replace-$char_name"); $node->replaceNode($span0); my $span1 = $doc->createElementNS(XHTML_NS, "span"); $span1->setAttributeNS("", "class", "subreplace-$char_name"); $span0->appendChild($span1); my $img = $doc->createElementNS(XHTML_NS, "img"); $img->setAttributeNS("", "src", ($uri_to_top//"")."images/".$img_name); $img->setAttributeNS("", "alt", "[".($lang eq "fr" ? $fr_name : $en_name)."]"); $img->setAttributeNS("", "height", "15"); $img->setAttributeNS("", "width", "16"); $span1->appendChild($img); } sub email_at_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: email-at handler doesn't handle arguments\n" if @_; email_at_or_dot_handler $node, $ctxref, $optref, "at"; } sub email_dot_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: email-dot handler doesn't handle arguments\n" if @_; email_at_or_dot_handler $node, $ctxref, $optref, "dot"; } sub email_despammed_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: email-despammed handler doesn't handle arguments\n" if @_; my $out0 = $doc->createElementNS(XHTML_NS, "code"); if ( defined(my $lang = get_node_lang_norec $node) ) { set_node_lang_norec $out0, $lang; } $node->replaceNode($out0); my $out1 = $doc->createElementNS(XHTML_NS, "a"); $out1->setAttributeNS("", "class", "despammed-address"); $out0->appendChild($out1); my @child_nodes = $node->childNodes; my @to_process; foreach my $child ( @child_nodes ) { $out1->appendChild($child); push @to_process, [undef, $child, $ctxref, {is_email_despammed_child=>1}] if $child->nodeType == XML_ELEMENT_NODE; } unshift @todo_stack, @to_process; } sub smiley_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: smiley handler doesn't handle arguments\n" if @_; my ($emotion) = $node->localName =~ /^smiley-([a-z0-9\-]*)$/s or die "this is impossible"; my %alt_text = ( "smile" => ":-)", "wink" => ";-)", "surprised" => ":-o", "sad" => ":-(", "cool" => "8-)", "biggrin" => ":-D", "confused" => ":-S", "crazy" => "%-)", "neutral" => ":-|", "twisted" => "8->", "cry" => "¦-(" ); my %en_name = ( "smile" => "Smile", "wink" => "Wink", "surprised" => "Surprised", "sad" => "Sad", "cool" => "Cool", "biggrin" => "Big grin", "confused" => "Confused", "crazy" => "Crazy", "neutral" => "Non-grin", "twisted" => "Twisted", "cry" => "Crying" ); my %fr_name = ( "smile" => "Sourire", "wink" => "Clin d'œil", "surprised" => "Surpris", "sad" => "Triste", "cool" => "Cool", "biggrin" => "Grand sourire", "confused" => "Troublé", "crazy" => "Fou", "neutral" => "Sans sourire", "twisted" => "Tordu", "cry" => "Pleure" ); die "this is impossible" unless ( defined($alt_text{$emotion}) && defined($en_name{$emotion}) && defined($fr_name{$emotion}) ); my $lang = get_node_lang_rec $node; my $img = $doc->createElementNS(XHTML_NS, "img"); if ( defined(my $explicit_lang = get_node_lang_norec $node) ) { set_node_lang_norec $img, $explicit_lang; } $img->setAttributeNS("", "src", ($uri_to_top//"") ."images/smileys/${emotion}.png"); $img->setAttributeNS("", "class", "smiley"); $img->setAttributeNS("", "width", "15"); $img->setAttributeNS("", "height", "15"); $img->setAttributeNS("", "alt", $alt_text{$emotion}); if ( defined($lang) && $lang eq "en" ) { $img->setAttributeNS("", "title", $en_name{$emotion}); } elsif ( defined($lang) && $lang eq "fr" ) { $img->setAttributeNS("", "title", $fr_name{$emotion}); } $node->replaceNode($img); } sub entry_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: entry handler doesn't handle arguments\n" if @_; my $entry_number = $node->getAttributeNS("", "number"); die "entry has missing or bad number attribute" unless ( defined($entry_number) && $entry_number =~ /^(\d{4})$/s ); my $entry_date = $node->getAttributeNS("", "date"); my ($entry_year,$entry_month,$entry_day); die "entry has missing or bad date attribute" unless ( defined($entry_date) && ( ($entry_year,$entry_month,$entry_day) = ( $entry_date =~ /^(\d{4})\-(\d{2})\-(\d{2})$/s ) ) ); my $entry_dow = $node->getAttributeNS("", "day_of_week"); my $entry_yandm = "${entry_year}-${entry_month}"; my %newctx = (%{$ctxref}, in_entry=>1, entry_number=>$entry_number, entry_date=>$entry_date, entry_yandm=>$entry_yandm, entry_dow=>$entry_dow); my $lang = get_node_lang_rec $node; my $div = $doc->createElementNS(XHTML_NS, "div"); if ( defined(my $explicit_lang = get_node_lang_norec $node) ) { set_node_lang_norec $div, $explicit_lang; } $div->setAttributeNS("", "id", "d.${entry_date}.${entry_number}"); $div->setAttributeNS("", "class", "weblog-entry"); if ( defined(my $style = $node->getAttributeNS("", "style")) ) { $div->setAttributeNS("", "style", $style); } $node->replaceNode($div); $div->appendChild($doc->createTextNode("\n")); my $header = $doc->createElementNS(XHTML_NS, "p"); $header->setAttributeNS("", "class", "weblog-entry-header"); $div->appendChild($header); $div->appendChild($doc->createTextNode("\n")); my $permalink = $doc->createElementNS(XHTML_NS, "a"); $permalink->setAttributeNS("", "href", "${entry_yandm}.html" ."\#d.${entry_date}.${entry_number}"); $permalink->appendChild($doc->createTextNode($entry_date)); $header->appendChild($permalink); $header->appendChild($doc->createTextNode(" (".$entry_dow.")")) if defined($entry_dow); $newctx{entry_header} = $header; my @child_nodes = $node->childNodes; my @to_process; foreach my $child ( @child_nodes ) { if ( $child->nodeType == XML_ELEMENT_NODE ) { $div->appendChild($child); push @to_process, [undef, $child, \%newctx, {is_entry_child=>1}]; } elsif ( $child->nodeType == XML_TEXT_NODE || $child->nodeType == XML_CDATA_SECTION_NODE ) { die "entry element cannot contain text" unless $child->data =~ m/^\s*$/s; $div->appendChild($doc->createTextNode("\n")); # Don't copy whitespace, like previous engine, to faciliate comparison. } } unless ( $node->getAttributeNS("", "nocomments") ) { my $token = $doc->createElementNS(DAML_NS, "d:implicit-do-comments"); $div->appendChild($token); $div->appendChild($doc->createTextNode("\n")); unshift @to_process, [\&do_comments_handler, $token, \%newctx, {implicit=>1}]; } unshift @todo_stack, @to_process; } sub entry_title_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: title handler doesn't handle arguments\n" if @_; my $h2 = $doc->createElementNS(XHTML_NS, "h2"); if ( my $lang = get_node_lang_norec $node ) { set_node_lang_norec $h2, $lang; } $h2->setAttributeNS("", "class", "weblog-entry-title"); $node->replaceNode($h2); my @child_nodes = $node->childNodes; my @to_process; foreach my $child ( @child_nodes ) { $h2->appendChild($child); push @to_process, [undef, $child, $ctxref, {}] if $child->nodeType == XML_ELEMENT_NODE; } unshift @todo_stack, @to_process; } sub title_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: title handler doesn't handle arguments\n" if @_; if ( $$optref{is_daml_child} ) { document_title_handler $node, $ctxref, $optref; } elsif ( $$optref{is_entry_child} ) { entry_title_handler $node, $ctxref, $optref; } else { die "misplaced title element"; } } sub date_extra_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: date-extra handler doesn't handle arguments\n" if @_; die "misplaced date-extra element: makes sense only as a child of entry" unless $$optref{is_entry_child}; die "this is impossible" unless defined($$ctxref{entry_header}); $$ctxref{entry_header}->appendChild( $doc->createTextNode(" · ".$node->textContent)); } sub do_comments_handler { my $node = shift; my $ctxref = shift; my $optref = shift; print STDERR "warning: do-comments handler doesn't handle arguments\n" if @_; my $p = $doc->createElementNS(XHTML_NS, "p"); my $lang = get_node_lang_rec $node; my $explicit_lang = get_node_lang_norec $node; my %link_name = ( "en" => "Comments", "fr" => "Commentaires", "de" => "Kommentare", "ia" => "Commentos" ); unless ( defined($lang) && defined($link_name{$lang}) ) { print STDERR "warning: will use English comments link\n"; $lang = "en"; $explicit_lang = "en"; } if ( defined($explicit_lang) ) { set_node_lang_norec $p, $explicit_lang; } $p->setAttributeNS("", "class", "talkback-link"); $node->replaceNode($p); my $a = $doc->createElementNS(XHTML_NS, "a"); my $cmt_url = "http://www.madore.org/cgi-bin/comment.pl/" ."showcomments?href=http%3a%2f%2fwww.madore.org%2f" ."%7edavid%2fweblog%2f".$$ctxref{entry_yandm} .".html%23d.".$$ctxref{entry_date}.".".$$ctxref{entry_number}; $a->setAttributeNS("", "href", $cmt_url); $p->appendChild($a); $a->appendChild($doc->createTextNode($link_name{$lang})); my $span = $doc->createElementNS(XHTML_NS, "span"); $span->setAttributeNS("", "id", "d.".$$ctxref{entry_date}.".".$$ctxref{entry_number} .".numcomments"); $p->appendChild($span); $span->appendChild($doc->createComment(" EMPTY ")); } sub wref_attr_handler { my $attr = shift; my $ctxref = shift; my $optref = shift; my $pnode = shift; die "wref attribute makes sense only on an a element" unless $pnode->namespaceURI eq XHTML_NS && $pnode->localName eq "a"; my $tgt = $attr->nodeValue; $attr->unbindNode; $pnode->setAttributeNS("", "href", $tgt); } my %daml_handler = ( "daml" => \&daml_handler, "body" => \&body_handler, "title" => \&title_handler, "subtitle" => \&subtitle_handler, "meta-description" => \&meta_handler, "meta-keywords" => \&meta_handler, "translation" => \&translation_handler, "extra-style" => \&extra_style_handler, "email-despammed" => \&email_despammed_handler, "email-at" => \&email_at_handler, "email-dot" => \&email_dot_handler, "smiley-smile" => \&smiley_handler, "smiley-wink" => \&smiley_handler, "smiley-surprised" => \&smiley_handler, "smiley-sad" => \&smiley_handler, "smiley-cool" => \&smiley_handler, "smiley-biggrin" => \&smiley_handler, "smiley-confused" => \&smiley_handler, "smiley-crazy" => \&smiley_handler, "smiley-neutral" => \&smiley_handler, "smiley-twisted" => \&smiley_handler, "smiley-cry" => \&smiley_handler, "entry" => \&entry_handler, "date-extra" => \&date_extra_handler, ); my %daml_attr_handler = ( "wref" => \&wref_attr_handler, ); TODO_LOOP: while ( my $process = shift @todo_stack ) { my $call_this = shift @$process; if ( ref($call_this) eq "CODE" ) { &$call_this(@$process); next TODO_LOOP; } my $node = $$process[0]; unless ( defined($node->namespaceURI) ) { printf STDERR "warning: skipping %s node with missing namespace\n", $node->nodeName; next TODO_LOOP; } if ( defined($call_this) && $call_this eq "attr" ) { if ( $node->namespaceURI eq DAML_NS && defined($daml_attr_handler{$node->localName}) ) { &{$daml_attr_handler{$node->localName}}(@$process); } else { # Just leave it alone... } } else { if ( $node->namespaceURI eq DAML_NS && defined($daml_handler{$node->localName}) ) { &{$daml_handler{$node->localName}}(@$process); } else { default_handler(@$process); } } } # And now we create an output document that is identical to the # document... but we need to do this to get rid of useless namespace # declarations which XML::LibXML has inserted and there seems to be no # better way. :-(((((((((((( my $out_doc = XML::LibXML::Document->new("1.0", "utf-8"); $out_doc->createInternalSubset("html", XHTML_PUBID, XHTML_URI); my %out_namespaces = ("xml"=>XML_XML_NS); sub out_replicate_node; sub out_replicate_node { my $node = shift; my $out_parent = shift; my $out_root = shift; my $out_node; my $type = $node->nodeType; if ( $type == XML_ELEMENT_NODE ) { my $ns_uri = $node->namespaceURI // XHTML_NS; my $ns_pfx = $node->prefix // ""; my $ns_lname = $node->localName; my $out_node = $out_doc->createElementNS( $ns_uri, ($ns_pfx?"$ns_pfx:$ns_lname":$ns_lname)); $out_root = $out_node unless defined($out_root); if ( ! defined($out_namespaces{$ns_pfx}) ) { $out_root->setNamespace($ns_uri, $ns_pfx, $out_node==$out_root); $out_namespaces{$ns_pfx} = $ns_uri; # printf STDERR "added prefix %s => %s (for node %s)\n", # $ns_pfx||"(default)", $ns_uri, $ns_lname; } foreach my $attr ( $node->attributes ) { if ( $attr->nodeType == XML_ATTRIBUTE_NODE ) { my $at_uri = $attr->namespaceURI; my $at_pfx = $attr->prefix // ""; my $at_lname = $attr->localName; if ( defined($at_uri) && ! defined($out_namespaces{$at_pfx}) ) { $out_root->setNamespace($at_uri, $at_pfx, 0); $out_namespaces{$at_pfx} = $at_uri; # printf STDERR "added prefix %s => %s (for attr %s)\n", # $at_pfx//"(default)", $at_uri, $at_lname; } $out_node->setAttributeNS( $at_uri, ($at_pfx?"$at_pfx:$at_lname":$at_lname), $attr->value); } } if ( defined($out_parent) ) { $out_parent->appendChild($out_node); } else { $out_doc->setDocumentElement($out_node); } foreach my $child ( $node->childNodes ) { out_replicate_node $child, $out_node, $out_root; } } elsif ( $type == XML_TEXT_NODE || $type == XML_CDATA_SECTION_NODE || $type == XML_COMMENT_NODE ) { $out_parent->appendChild($out_doc->adoptNode($node)); } else { die "unhandled node type"; } }; out_replicate_node ($doc->documentElement, undef, undef); print $out_doc->toString;