my @links;
foreach my $p (keys %links) {
next if bestlink($page, $p) eq $page;
+
if (grep { length $_ && bestlink($p, $_) eq $page } @{$links{$p}}) {
my $href=abs2rel(htmlpage($p), dirname($page));
return (stat($file))[9];
} #}}}
-sub findlinks ($$) { #{{{
- my $page=shift;
- my $content=shift;
+sub scan ($) { #{{{
+ my $file=shift;
- my @links;
- while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
- push @links, titlepage($2);
- }
- if ($config{discussion}) {
- # Discussion links are a special case since they're not in the
- # text of the page, but on its template.
- return @links, "$page/discussion";
- }
- else {
- return @links;
+ my $type=pagetype($file);
+ if (defined $type) {
+ my $srcfile=srcfile($file);
+ my $content=readfile($srcfile);
+ my $page=pagename($file);
+
+ my @links;
+ while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
+ push @links, titlepage($2);
+ }
+ if ($config{discussion}) {
+ # Discussion links are a special case since they're not in the
+ # text of the page, but on its template.
+ push @links, "$page/discussion";
+ }
+ $links{$page}=\@links;
}
} #}}}
will_render($page, htmlpage($page), 1);
$content=filter($page, $content);
-
- $links{$page}=[findlinks($page, $content)];
-
$content=preprocess($page, $page, $content);
$content=linkify($page, $page, $content);
$content=htmlize($page, $type, $content);
}
else {
my $content=readfile($srcfile, 1);
- $links{$file}=[];
delete $depends{$file};
will_render($file, $file, 1);
writefile($file, $config{destdir}, $content, 1);
foreach my $file (@files) {
my $page=pagename($file);
if (! $oldpagemtime{$page}) {
- debug("new page $page") unless exists $pagectime{$page};
push @add, $file;
- $links{$page}=[];
+ scan($file);
$pagecase{lc $page}=$page;
$pagesources{$page}=$file;
if ($config{getctime} && -e "$config{srcdir}/$file") {
if (! $exists{$page}) {
debug("removing old page $page");
push @del, $pagesources{$page};
+ $links{$page}=[];
$renderedfiles{$page}=[];
$oldpagemtime{$page}=0;
prune($config{destdir}."/".$_)
}
}
+ # scan updated files to update info about them
+ foreach my $file (@files) {
+ my $page=pagename($file);
+
+ if (! exists $oldpagemtime{$page} ||
+ mtime(srcfile($file)) > $oldpagemtime{$page} ||
+ $forcerebuild{$page}) {
+ debug("scanning $file");
+ scan($file);
+ }
+ }
+
# render any updated files
foreach my $file (@files) {
my $page=pagename($file);
}
# if any files were added or removed, check to see if each page
- # needs an update due to linking to them or inlining them.
- # TODO: inefficient; pages may get rendered above and again here;
- # problem is the bestlink may have changed and we won't know until
- # now
+ # needs an update due to linking to them or inlining them
if (@add || @del) {
FILE: foreach my $file (@files) {
+ next if $rendered{$file};
my $page=pagename($file);
foreach my $f (@add, @del) {
my $p=pagename($f);
# Handle backlinks; if a page has added/removed links, update the
# pages it links to. Also handles rebuilding dependant pages.
- # TODO: inefficient; pages may get rendered above and again here;
- # problem is the backlinks could be wrong in the first pass render
- # above
if (%rendered || @del) {
foreach my $f (@files) {
+ next if $rendered{$f};
my $p=pagename($f);
if (exists $depends{$p}) {
foreach my $file (keys %rendered, @del) {
foreach my $link (keys %linkchanged) {
my $linkfile=$pagesources{$link};
if (defined $linkfile) {
+ next if $rendered{$linkfile};
debug("rendering $linkfile, to update its backlinks");
render($linkfile);
$rendered{$linkfile}=1;
-* Render each changed page only once. Currently pages are rendered up to 4
- times in worst case (8 times if there's an rss feed).
-
- The issue is that rendering a page is used to gather info like the links
- on the page (and other stuff) that can effect rendering other pages. So it
- needs a multi-pass system. But rendering the whole page in each pass is
- rather obscene.
-
- It would be better to have the first pass be a data gathering pass. Such
- a pass would still need to load and parse the page contents etc, but
- wouldn't need to generate html or write anything to disk.
-
- One problem with this idea is that it could turn into 2x the work in
- cases where ikiwiki currently efficiently renders a page just once. And
- caching between the passes to avoid that wouldn't do good things to the
- memory footprint.
-
- Might be best to just do a partial first pass, getting eg, the page links
- up-to-date, and then multiple, but generally fewer, rendering passes.
-
* Don't render blog archive pages unless a page is added/removed. Just
changing a page doesn't affect the archives as they show only the title.
* Look at splitting up CGI.pm. But note that too much splitting can slow
perl down.
+
+* The backlinks code turns out to scale badly to wikis with thousands of
+ pages. The code is O(N^2)! It's called for each page, and it loops
+ through all the pages to find backlinks.
+
+ Need to find a way to calculate and cache all the backlinks in one pass,
+ which could be done in at worst O(N), and possibly less (if they're
+ stored in the index, it could be constant time). But to do this, there
+ would need to be a way to invalidate or update the cache in these
+ situations:
+
+ - A page is added. Note that this can change a backlink to point to
+ the new page instead of the page it pointed to before.
+ - A page is deleted. This can also change backlinks that pointed to that
+ page.
+ - A page is modified. Links added/removed.