From: joey Date: Thu, 3 Aug 2006 21:50:47 +0000 (+0000) Subject: * Try to handle relative links in aggregated feeds. However, X-Git-Tag: 1.16~17 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=e8b39b094116e8b50cf12fe56b9c6a04f05683e5;p=ikiwiki.git * Try to handle relative links in aggregated feeds. However, the current support is a crude hack due to limitations of XML::Feed: xml:base is not supported, neither is Content-Location. And of course, relative links in RSS feeds are ill-undefined.. --- diff --git a/IkiWiki/Plugin/aggregate.pm b/IkiWiki/Plugin/aggregate.pm index 16db42a8f..d6592fef3 100644 --- a/IkiWiki/Plugin/aggregate.pm +++ b/IkiWiki/Plugin/aggregate.pm @@ -6,6 +6,9 @@ use warnings; use strict; use IkiWiki; use HTML::Entities; +use HTML::Parser; +use HTML::Tagset; +use URI; my %feeds; my %guids; @@ -283,15 +286,13 @@ sub add_page (@) { #{{{ # Create the page. my $template=IkiWiki::template("aggregatepost.tmpl", blind_cache => 1); - my $content=$params{content}; - $params{content}=~s/(?param(title => $params{title}) if defined $params{title} && length($params{title}); - $template->param(content => $params{content}); + $template->param(content => htmlescape(htmlabs($params{content}, $feed->{feedurl}))); $template->param(url => $feed->{url}); $template->param(name => $feed->{name}); - $template->param(link => $params{link}) if defined $params{link}; + $template->param(link => urlabs($params{link}, $feed->{feedurl})) + if defined $params{link}; if (ref $feed->{tags}) { $template->param(tags => [map { tag => $_ }, @{$feed->{tags}}]); } @@ -303,6 +304,58 @@ sub add_page (@) { #{{{ utime $mtime, $mtime, pagefile($guid->{page}) if defined $mtime; } #}}} +sub htmlescape ($) { #{{{ + # escape accidental wikilinks and preprocessor stuff + my $html=shift; + $html=~s/(?new_abs($url, $urlbase)->as_string; +} #}}} + +sub htmlabs ($$) { #{{{ + # Convert links in html from relative to absolute. + # Note that this is a heuristic, which is not specified by the rss + # spec and may not be right for all feeds. Also, see Debian + # bug #XXXX TODO: get bug. + my $html=shift; + my $urlbase=shift; + + my $ret=""; + my $p = HTML::Parser->new(api_version => 3); + $p->handler(default => sub { $ret.=join("", @_) }, "text"); + $p->handler(start => sub { + my ($tagname, $pos, $text) = @_; + if (ref $HTML::Tagset::linkElements{$tagname}) { + while (4 <= @$pos) { + # use attribute sets from right to left + # to avoid invalidating the offsets + # when replacing the values + my($k_offset, $k_len, $v_offset, $v_len) = + splice(@$pos, -4); + my $attrname = lc(substr($text, $k_offset, $k_len)); + next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}}; + next unless $v_offset; # 0 v_offset means no value + my $v = substr($text, $v_offset, $v_len); + $v =~ s/^([\'\"])(.*)\1$/$2/; + my $new_v=urlabs($v, $urlbase); + $new_v =~ s/\"/"/g; # since we quote with "" + substr($text, $v_offset, $v_len) = qq("$new_v"); + } + } + $ret.=$text; + }, "tagname, tokenpos, text"); + $p->parse($html); + $p->eof; + + return $ret; +} #}}} + sub remove_feeds () { #{{{ my $page=shift; diff --git a/debian/changelog b/debian/changelog index aacdbe52f..ab053e496 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +ikiwiki (1.16) UNRELEASED; urgency=low + + * Try to handle relative links in aggregated feeds. However, + the current support is a crude hack due to limitations of XML::Feed: + xml:base is not supported, neither is Content-Location. And of course, + relative links in RSS feeds are ill-undefined.. + + -- Joey Hess Thu, 3 Aug 2006 17:29:51 -0400 + ikiwiki (1.15) unstable; urgency=low * Remove CDPATH and other env vars perl taint checking doesn't like. diff --git a/doc/todo/aggregation.mdwn b/doc/todo/aggregation.mdwn index 5abb6a53e..dec242ea6 100644 --- a/doc/todo/aggregation.mdwn +++ b/doc/todo/aggregation.mdwn @@ -2,6 +2,3 @@ * Need to store page author metadata and include it in the rss feed. Permalink to? Also, that stuff could be presented better in the html blog view, also using the metadata. -* Some rss feeds contain relative links or relative urls to inline images, - which break when aggregated. Do I need to parse the html and make them - all absolute?