From: Joey Hess Date: Thu, 8 May 2008 19:51:09 +0000 (-0400) Subject: amazon s3 index file improvements X-Git-Tag: 2.46~16 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=b8d81b7b7f53b8fac5632747eae73c7158e32fde;p=ikiwiki.git amazon s3 index file improvements Turns out duplicate index files do not need to be stored when usedirs is in use, just when it's not. Ikiwiki is quite consistent about using page/ when usedirs is in use. (The only exception is the search plugin, which needs fixing.) This also includes significant code cleanup, removal of a incorrect special case for empty files, and addition of a workaround for a bug in the amazon perl module. --- diff --git a/IkiWiki/Plugin/amazon_s3.pm b/IkiWiki/Plugin/amazon_s3.pm index cc15fbdb1..6652b9fba 100644 --- a/IkiWiki/Plugin/amazon_s3.pm +++ b/IkiWiki/Plugin/amazon_s3.pm @@ -71,6 +71,29 @@ sub getbucket { #{{{ } #}}} } +# Given a file, return any S3 keys associated with it. +sub file2keys ($) { #{{{ + my $file=shift; + + my @keys; + if ($file =~ /^\Q$config{destdir}\/\E(.*)/) { + push @keys, $config{amazon_s3_prefix}.$1; + + # Munge foo/index.html to foo/ + if ($keys[0]=~/(^|.*\/)index.$config{htmlext}$/) { + # A duplicate might need to be stored under the + # unmunged name too. + if (!$config{usedirs} || $config{amazon_s3_dupindex}) { + push @keys, $1; + } + else { + @keys=($1); + } + } + } + return @keys; +} #}}} + package IkiWiki; use File::MimeInfo; use Encode; @@ -85,21 +108,11 @@ sub writefile ($$$;$$) { #{{{ # First, write the file to disk. my $ret=$IkiWiki::Plugin::amazon_s3::subs{'IkiWiki::writefile'}->($file, $destdir, $content, $binary, $writer); - - # Now, determine if the file was written to the destdir. - # writefile might be used for writing files elsewhere. - # Also, $destdir might be set to a subdirectory of the destdir. - my $key; - if ($destdir eq $config{destdir}) { - $key=$file; - } - elsif ("$destdir/$file" =~ /^\Q$config{destdir}\/\E(.*)/) { - $key=$1; - } + + my @keys=IkiWiki::Plugin::amazon_s3::file2keys("$destdir/$file"); # Store the data in S3. - if (defined $key) { - $key=$config{amazon_s3_prefix}.$key; + if (@keys) { my $bucket=IkiWiki::Plugin::amazon_s3::getbucket(); # The http layer tries to downgrade utf-8 @@ -108,42 +121,37 @@ sub writefile ($$$;$$) { #{{{ # so force convert it to bytes. $content=encode_utf8($content) if defined $content; - if (defined $content && ! length $content) { - # S3 doesn't allow storing empty files! - $content=" "; - } - my %opts=( acl_short => 'public-read', content_type => mimetype("$destdir/$file"), ); - my $res; - if (! $writer) { - $res=$bucket->add_key($key, $content, \%opts); - } - else { - # read back in the file that the writer emitted - $res=$bucket->add_key_filename($key, "$destdir/$file", \%opts); - } - if ($res && $key=~/(^|.*\/)index.$config{htmlext}$/) { - # index.html files are a special case. Since S3 is - # not a normal web server, it won't serve up - # foo/index.html when foo/ is requested. So the - # file has to be stored twice. (This is bad news - # when usedirs is enabled!) - # TODO: invesitgate using the new copy operation. - # (It may not be robust enough.) - my $base=$1; + + # If there are multiple keys to write, data is sent + # multiple times. + # TODO: investigate using the new copy operation. + # (It may not be robust enough.) + foreach my $key (@keys) { + debug("storing $key"); + my $res; if (! $writer) { - $res=$bucket->add_key($base, $content, \%opts); + $res=$bucket->add_key($key, $content, \%opts); } else { - $res=$bucket->add_key_filename($base, "$destdir/$file", \%opts); + # This test for empty files is a workaround + # for this bug: + # http://rt.cpan.org//Ticket/Display.html?id=35731 + if (-z "$destdir/$file") { + $res=$bucket->add_key($key, "", \%opts); + } + else { + # read back in the file that the writer emitted + $res=$bucket->add_key_filename($key, "$destdir/$file", \%opts); + } + } + if (! $res) { + error(gettext("Failed to save file to S3: "). + $bucket->err.": ".$bucket->errstr."\n"); } - } - if (! $res) { - error(gettext("Failed to save file to S3: "). - $bucket->err.": ".$bucket->errstr."\n"); } } @@ -154,19 +162,19 @@ sub writefile ($$$;$$) { #{{{ sub prune ($) { #{{{ my $file=shift; - # If a file in the destdir is being pruned, need to delete it out - # of S3 as well. - if ($file =~ /^\Q$config{destdir}\/\E(.*)/) { - my $key=$config{amazon_s3_prefix}.$1; + my @keys=IkiWiki::Plugin::amazon_s3::file2keys($file); + + # Prune files out of S3 too. + if (@keys) { my $bucket=IkiWiki::Plugin::amazon_s3::getbucket(); - my $res=$bucket->delete_key($key); - if ($res && $key=~/(^|.*\/)index.$config{htmlext}$/) { - # index.html special case: Delete other file too - $res=$bucket->delete_key($1); - } - if (! $res) { - error(gettext("Failed to delete file from S3: "). - $bucket->err.": ".$bucket->errstr."\n"); + + foreach my $key (@keys) { + debug("deleting $key"); + my $res=$bucket->delete_key($key); + if (! $res) { + error(gettext("Failed to delete file from S3: "). + $bucket->err.": ".$bucket->errstr."\n"); + } } } diff --git a/doc/bugs/Search_results_should_point_to_dir__44___not_index.html__44___when_use__95__dirs_is_enabled.mdwn b/doc/bugs/Search_results_should_point_to_dir__44___not_index.html__44___when_use__95__dirs_is_enabled.mdwn index 91dca9548..45a8f0abd 100644 --- a/doc/bugs/Search_results_should_point_to_dir__44___not_index.html__44___when_use__95__dirs_is_enabled.mdwn +++ b/doc/bugs/Search_results_should_point_to_dir__44___not_index.html__44___when_use__95__dirs_is_enabled.mdwn @@ -7,3 +7,7 @@ point to `foo/bar/` instead. > --[Jason Blevins](http://jblevins.org/) >> Even with `usedirs`, there is no reason why the `index.html` should be called directly, and it might break content negotiation. Please just direct to the directory. --[[madduck]] + +> This bug affects the [[plugins/amazon_s3]] plugin -- when using that +> plugin plus the search plugin, you need to enable `amazon_s3_dupindex`. +> So this definitly should be fixed. --[[Joey]] diff --git a/doc/ikiwiki.setup b/doc/ikiwiki.setup index 997dcd18e..e85518849 100644 --- a/doc/ikiwiki.setup +++ b/doc/ikiwiki.setup @@ -183,4 +183,6 @@ use IkiWiki::Setup::Standard { #amazon_s3_prefix => "wiki/", # Uncomment to use the S3 European datacenter. #amazon_s3_location => "EU", + # Uncomment if you need to store each index file twice. + #amazon_s3_dupindex => 1, } diff --git a/doc/plugins/amazon_s3.mdwn b/doc/plugins/amazon_s3.mdwn index f1887d970..66042bfbe 100644 --- a/doc/plugins/amazon_s3.mdwn +++ b/doc/plugins/amazon_s3.mdwn @@ -11,11 +11,6 @@ modules and an Amazon S3 account to use this plugin. ## configuration -Important note: You should seriously consider turning off `usedirs` before -enabling this plugin. If `usedirs` is enabled, every page has to be stored -in S3 *twice*, as "page/index.html" and as "page/". That will cost you -money and bandwidth. - This plugin uses the following settings in the setup file: * `amazon_s3_key_id` - Set to your public access key id. @@ -32,6 +27,13 @@ This plugin uses the following settings in the setup file: empty string. * `amazon_s3_location` - Optionally, this can be set to control which datacenter to use. For example, set it to "EU" to for Europe. +* `amazon_s3_dupindex` - Normally, when `usedirs` is enabled, + "foo/index.html" is stored in S3 as a key named "foo/", and all links + between pages use that name. If you also needs links that include + "index.html" in their names to work, you can enable this option. Then + each index.html file will be stored in S3 *twice*, under both names. This + will use more disk and bandwidth, and is not recommended unless you really + need it for some reason. Note that you should still set `destdir` in the setup file. The files that are uploaded to Amazon S3 will still be written to the destdir, too.