From: Simon McVittie
Date: Sun, 16 Nov 2008 18:11:39 +0000 (+0000)
Subject: htmlbalance: new plugin that balances tags by parsing and re-serializing
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=e7a840ed9a817cf4db59c90e680afd89e146b581;p=ikiwiki.git
htmlbalance: new plugin that balances tags by parsing and re-serializing
---
diff --git a/IkiWiki/Plugin/htmlbalance.pm b/IkiWiki/Plugin/htmlbalance.pm
new file mode 100644
index 000000000..667d73b6c
--- /dev/null
+++ b/IkiWiki/Plugin/htmlbalance.pm
@@ -0,0 +1,57 @@
+#!/usr/bin/perl
+package IkiWiki::Plugin::htmlbalance;
+
+# htmlbalance: Parse and re-serialize HTML to ensure balanced tags
+#
+# Copyright 2008 Simon McVittie
+# Licensed under the GNU GPL, version 2, or any later version published by the
+# Free Software Foundation
+
+use warnings;
+use strict;
+use IkiWiki 2.00;
+
+sub import { #{{{
+ hook(type => "getsetup", id => "htmlbalance", call => \&getsetup);
+ hook(type => "sanitize", id => "htmlbalance", call => \&sanitize);
+} # }}}
+
+sub getsetup () { #{{{
+ return
+ plugin => {
+ safe => 1,
+ rebuild => undef,
+ },
+} #}}}
+
+sub sanitize (@) { #{{{
+ my %params=@_;
+ my $ret = '';
+
+ eval {
+ use HTML::TreeBuilder;
+ use XML::Atom::Util qw(encode_xml);
+ };
+
+ if ($@) {
+ error($@);
+ return $params{content};
+ }
+
+ my $tree = HTML::TreeBuilder->new_from_content($params{content});
+ my @nodes = $tree->disembowel();
+ foreach my $node (@nodes) {
+ if (ref $node) {
+ $ret .= $node->as_XML();
+ chomp $ret;
+ $node->delete();
+ }
+ else {
+ $ret .= encode_xml($node);
+ }
+ }
+ $tree->delete();
+ return $ret;
+} # }}}
+
+1
diff --git a/doc/plugins/aggregate.mdwn b/doc/plugins/aggregate.mdwn
index c40a6dc22..6fc87853b 100644
--- a/doc/plugins/aggregate.mdwn
+++ b/doc/plugins/aggregate.mdwn
@@ -9,9 +9,9 @@ New users of aggregate should enable the `aggregateinternal => 1` option in the
.setup file. If you don't do so, you will need to enable the [[html]] plugin
as well as aggregate itself, since feed entries will be stored as HTML.
-The [[meta]] and [[tag]] plugins are also recommended. The
-[[htmltidy]] plugin is suggested, since feeds can easily contain html
-problems, some of which tidy can fix.
+The [[meta]] and [[tag]] plugins are also recommended. Either the
+[[htmltidy]] or [[htmlbalance]] plugin is suggested, since feeds can easily
+contain html problems, some of which these plugins can fix.
You will need to run ikiwiki periodically from a cron job, passing it the
--aggregate parameter, to make it check for new posts. Here's an example
diff --git a/doc/plugins/htmlbalance.mdwn b/doc/plugins/htmlbalance.mdwn
new file mode 100644
index 000000000..7cdb1f950
--- /dev/null
+++ b/doc/plugins/htmlbalance.mdwn
@@ -0,0 +1,9 @@
+[[!template id=plugin name=htmlbalance author="Simon McVittie"]]
+[[!tag type/html]]
+
+This plugin ensures that the HTML emitted by ikiwiki contains well-balanced
+HTML tags, by parsing it with HTML::TreeBuilder and re-serializing it. This
+acts as a lighter-weight alternative to [[plugins/htmltidy]]; it doesn't
+ensure validity, but it does at least ensure that formatting from a
+blog post pulled in by \[[![[ikiwiki/directive/inline]]]] doesn't
+leak into the rest of the page.
diff --git a/doc/plugins/htmltidy.mdwn b/doc/plugins/htmltidy.mdwn
index f675a01ae..580e56f59 100644
--- a/doc/plugins/htmltidy.mdwn
+++ b/doc/plugins/htmltidy.mdwn
@@ -7,4 +7,5 @@ emitted by ikiwiki. Besides being nicely formatted, this helps ensure that
even if users enter suboptimal html, your wiki generates valid html.
Note that since tidy is an external program, that is run each time a page
-is built, this plugin will slow ikiwiki down somewhat.
+is built, this plugin will slow ikiwiki down somewhat. [[plugins/htmlbalance]]
+might provide a faster alternative.
diff --git a/t/htmlbalance.t b/t/htmlbalance.t
new file mode 100755
index 000000000..cd124e473
--- /dev/null
+++ b/t/htmlbalance.t
@@ -0,0 +1,13 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use Test::More tests => 7;
+
+BEGIN { use_ok("IkiWiki::Plugin::htmlbalance"); }
+
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "
"), "
");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "
"), "");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => ""), "");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "foo "), "foo ");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => " foo "), " foo ");
+is(IkiWiki::Plugin::htmlbalance::sanitize(content => "a>"), "a>");