From: David Bremner Date: Sat, 28 Jun 2008 22:04:03 +0000 (+0200) Subject: encoding sketch X-Git-Tag: 0.1~48 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=7496b8adf000e2b71cedcbd5ad7004a9363e1fd2;p=ikiwiki.git encoding sketch --- diff --git a/encoding.pl b/encoding.pl new file mode 100644 index 000000000..9e8a896a9 --- /dev/null +++ b/encoding.pl @@ -0,0 +1,40 @@ +use encoding "utf-8"; + +our @digits=split "","ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"; + +sub encode_num($){ + my $num=shift; + my $str=""; + + while ($num>0){ + $remainder=$num % 64; + $num=$num >> 6; + + $str = $digits[$remainder].$str; + } + + return $str; +} +sub strict_rfc2822_escape($){ +# according to rfc 2822, the following non-alphanumerics are OK for +# the local part of an address: "!#$%&'*+-/=?^_`{|}~". On the other +# hand, a fairly common exim configuration, for example, blocks +# addresses having "@%!/|`#&?" in the local part. '+' and '-' are +# pretty widely used to attach suffixes (although usually only one +# works on a given mail host). It seems ok to use '+-', since the first +# marks the beginning of a suffix, and then is a regular character. +# '.' also seems mostly permissable + my $str=shift; + + # "=" we use as an escape, and '_' for space + $str=~ s/([^a-zA-Z0-9+\-~. ])/"=".encode_num(ord($1))."="/ge; + $str=~ s/ /_/g; + + return $str; +}; + +while(<>){ + chomp(); + print strict_rfc2822_escape($_); +} +