From f50bd57bcebe08d26653299b189fe82beaea4a0f Mon Sep 17 00:00:00 2001 From: joey Date: Tue, 4 Apr 2006 19:34:50 +0000 Subject: [PATCH] proper binmode settings so that with -CSD, ikiwiki will support unicode however, due to robustness, that's not enabled by default yet --- IkiWiki/Render.pm | 5 +++-- doc/todo/utf8.mdwn | 27 +++++++++++++++++++++++++++ ikiwiki | 10 +++++++--- 3 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 doc/todo/utf8.mdwn diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index 23f6b1c72..de35d24e1 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -341,8 +341,8 @@ sub render ($) { #{{{ my $type=pagetype($file); my $srcfile=srcfile($file); - my $content=readfile($srcfile); if ($type ne 'unknown') { + my $content=readfile($srcfile); my $page=pagename($file); $links{$page}=[findlinks($content, $page)]; @@ -366,9 +366,10 @@ sub render ($) { #{{{ } } else { + my $content=readfile($srcfile, 1); $links{$file}=[]; check_overwrite("$config{destdir}/$file", $file); - writefile($file, $config{destdir}, $content); + writefile($file, $config{destdir}, $content, 1); $oldpagemtime{$file}=time; $renderedfiles{$file}=$file; } diff --git a/doc/todo/utf8.mdwn b/doc/todo/utf8.mdwn new file mode 100644 index 000000000..536ec75b2 --- /dev/null +++ b/doc/todo/utf8.mdwn @@ -0,0 +1,27 @@ +ikiwiki should support utf-8 pages, both input and output + +Currently ikiwiki is belived to be utf-8 clean itself; it tells perl to use +binmode when reading possibly binary files (such as images) and it uses +utf-8 compatable regexps etc. + +utf-8 IO is not enabled by default though. While you can probably embed +utf-8 in pages anyway, ikiwiki will not treat it right in the cases where +it deals with things on a per-character basis (mostly when escaping and +de-escaping special characters in filenames). + +To enable utf-8, edit ikiwiki and add -CSD to the perl hashbang line. +(This should probably be configurable via a --utf8 or better --encoding= +switch.) + +The following problems have been observed when running ikiwiki this way: + +* If invalid utf-8 creeps into a file, ikiwiki will crash rendering it as + follows: + + Malformed UTF-8 character (unexpected continuation byte 0x97, with no preceding start byte) in substitution iterator at /usr/bin/markdown line 1317. + Malformed UTF-8 character (fatal) at /usr/bin/markdown line 1317. + + In this example, a literal 0x97 character had gotten into a markdown + file. + + Here, let's put one in this file: "—" diff --git a/ikiwiki b/ikiwiki index 97afa853f..6bf58017d 100755 --- a/ikiwiki +++ b/ikiwiki @@ -193,24 +193,27 @@ sub srcfile ($) { #{{{ error("internal error: $file cannot be found"); } #}}} -sub readfile ($) { #{{{ +sub readfile ($;$) { #{{{ my $file=shift; + my $binary=shift; if (-l $file) { error("cannot read a symlink ($file)"); } local $/=undef; - open (IN, "$file") || error("failed to read $file: $!"); + open (IN, $file) || error("failed to read $file: $!"); + binmode(IN) if $binary; my $ret=; close IN; return $ret; } #}}} -sub writefile ($$$) { #{{{ +sub writefile ($$$;$) { #{{{ my $file=shift; # can include subdirs my $destdir=shift; # directory to put file in my $content=shift; + my $binary=shift; my $test=$file; while (length $test) { @@ -232,6 +235,7 @@ sub writefile ($$$) { #{{{ } open (OUT, ">$destdir/$file") || error("failed to write $destdir/$file: $!"); + binmode(OUT) if $binary; print OUT $content; close OUT; } #}}} -- 2.26.2