From 29507e94a46d0f2006671d31ac6812ebd0715cee Mon Sep 17 00:00:00 2001 From: joey Date: Fri, 26 May 2006 15:33:14 +0000 Subject: [PATCH] utf-8 support seems to be working now --- IkiWiki/Render.pm | 5 ++++- debian/changelog | 12 +++++++++--- debian/postinst | 2 +- doc/todo/utf8.mdwn | 15 ++++++++++++++- ikiwiki | 3 +-- 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm index f33d6e66e..08f5e7e95 100644 --- a/IkiWiki/Render.pm +++ b/IkiWiki/Render.pm @@ -28,10 +28,13 @@ sub htmlize ($$) { #{{{ $blosxom::version="is a proper perl module too much to ask?"; use warnings 'all'; do "/usr/bin/markdown"; + require Encode; } if ($type eq '.mdwn') { - $content=Markdown::Markdown($content); + # Markdown does character based stuff that does not work + # well with utf-8 strings. + $content=Encode::decode_utf8(Markdown::Markdown(Encode::encode_utf8($content))); } else { error("htmlization of $type not supported"); diff --git a/debian/changelog b/debian/changelog index 12dc06178..5be298136 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,11 +14,17 @@ ikiwiki (1.4) UNRELEASED; urgency=low * Rebuilding on upgrade to this version is recommended. * Add a html validity check to the test suite, using the wdg-html-validator, if available. - * Make the html valid when there is nothing in the actions list by adding an - empty
  • to the end of it. + * Make the html valid when there is nothing in the actions list. * Reordered some function call parameters for consistency. + * Enable full utf-8 support for page input and output. + * Add a workaround for markdown, which does not work well with utf-8 + strings. + * --getctime had bitrotted (well I only ever used it the once so far..), + fix and make it a bit more flexible + * rcs_getctime is changed, now rather than needing to loop over all pages, + it should just use the rcs to get the ctime of the passed file. - -- Joey Hess Fri, 26 May 2006 04:11:57 -0400 + -- Joey Hess Fri, 26 May 2006 04:49:49 -0400 ikiwiki (1.3) unstable; urgency=low diff --git a/debian/postinst b/debian/postinst index 76d826357..9135af754 100755 --- a/debian/postinst +++ b/debian/postinst @@ -4,7 +4,7 @@ set -e # Change this when some incompatible change is made that requires # rebuilding all wikis. -firstcompat=1.1 +firstcompat=1.4 if [ "$1" = configure ] && \ dpkg --compare-versions "$2" lt "$firstcompat"; then diff --git a/doc/todo/utf8.mdwn b/doc/todo/utf8.mdwn index 68195b729..b905e4633 100644 --- a/doc/todo/utf8.mdwn +++ b/doc/todo/utf8.mdwn @@ -25,4 +25,17 @@ The following problems have been observed when running ikiwiki this way: Malformed UTF-8 character (fatal) at /usr/bin/markdown line 1317. In this example, a literal 0x97 character had gotten into a markdown - file. + file. + + Running this before markdown can avoid it: + + $content = Encode::encode_utf8($content); + + I'm not sure how, or what should be done after markdown to get the string + back into a form that perl can treat as utf-8. + +* Apache "AddDefaultCharset on" settings will not play well with utf-8 + pages. + +* CGI::FormBuilder needs to be told to set `charset => "utf-8"` so that + utf-8 is used in the edit form. (done) diff --git a/ikiwiki b/ikiwiki index be7f86a45..3ea6b7aa0 100755 --- a/ikiwiki +++ b/ikiwiki @@ -1,4 +1,4 @@ -#!/usr/bin/perl -T +#!/usr/bin/perl -T -CSD $ENV{PATH}="/usr/local/bin:/usr/bin:/bin"; package IkiWiki; @@ -101,7 +101,6 @@ sub main () { #{{{ loadindex(); require IkiWiki::Render; rcs_update(); - rcs_getctime() if $config{getctime}; refresh(); rcs_notify() if $config{notify}; saveindex(); -- 2.26.2