git-svn: refuse to dcommit non-UTF-8 messages
authorEric Wong <normalperson@yhbt.net>
Thu, 28 May 2009 07:56:23 +0000 (00:56 -0700)
committerEric Wong <normalperson@yhbt.net>
Thu, 28 May 2009 07:57:07 +0000 (00:57 -0700)
...without i18n.commitencoding set in the config.

SVN tries to store all commit messages in UTF-8, however it is
up to the job of the clients to enforce this rule.  SVN servers
themselves do not always enforce this; allowing clients to
commit malformed UTF-8 messages and break repositories.

So git-svn will enforce this and tell the user to set
i18n.commitencoding when a git commit is is not in UTF-8.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
git-svn.perl
t/t9139-git-svn-non-utf8-commitencoding.sh [new file with mode: 0755]

index a70c7d7b2cc1e47d5293e1bd45e11d398e48c3f1..33017974d0a098bdaff707ccf0e8660aa5009079 100755 (executable)
@@ -1178,16 +1178,27 @@ sub get_commit_entry {
        }
        rename $commit_editmsg, $commit_msg or croak $!;
        {
+               require Encode;
                # SVN requires messages to be UTF-8 when entering the repo
                local $/;
                open $log_fh, '<', $commit_msg or croak $!;
                binmode $log_fh;
                chomp($log_entry{log} = <$log_fh>);
 
-               if (my $enc = Git::config('i18n.commitencoding')) {
-                       require Encode;
-                       Encode::from_to($log_entry{log}, $enc, 'UTF-8');
+               my $enc = Git::config('i18n.commitencoding') || 'UTF-8';
+               my $msg = $log_entry{log};
+
+               eval { $msg = Encode::decode($enc, $msg, 1) };
+               if ($@) {
+                       die "Could not decode as $enc:\n", $msg,
+                           "\nPerhaps you need to set i18n.commitencoding\n";
                }
+
+               eval { $msg = Encode::encode('UTF-8', $msg, 1) };
+               die "Could not encode as UTF-8:\n$msg\n" if $@;
+
+               $log_entry{log} = $msg;
+
                close $log_fh or croak $!;
        }
        unlink $commit_msg;
diff --git a/t/t9139-git-svn-non-utf8-commitencoding.sh b/t/t9139-git-svn-non-utf8-commitencoding.sh
new file mode 100755 (executable)
index 0000000..2b1db97
--- /dev/null
@@ -0,0 +1,47 @@
+#!/bin/sh
+#
+# Copyright (c) 2009 Eric Wong
+
+test_description='git svn refuses to dcommit non-UTF8 messages'
+
+. ./lib-git-svn.sh
+
+# ISO-2022-JP can pass for valid UTF-8, so skipping that in this test
+
+for H in ISO-8859-1 EUCJP
+do
+       test_expect_success "$H setup" '
+               mkdir $H &&
+               svn_cmd import -m "$H test" $H "$svnrepo"/$H &&
+               git svn clone "$svnrepo"/$H $H
+       '
+done
+
+for H in ISO-8859-1 EUCJP
+do
+       test_expect_success "$H commit on git side" '
+       (
+               cd $H &&
+               git config i18n.commitencoding $H &&
+               git checkout -b t refs/remotes/git-svn &&
+               echo $H >F &&
+               git add F &&
+               git commit -a -F "$TEST_DIRECTORY"/t3900/$H.txt &&
+               E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") &&
+               test "z$E" = "z$H"
+       )
+       '
+done
+
+for H in ISO-8859-1 EUCJP
+do
+       test_expect_success "$H dcommit to svn" '
+       (
+               cd $H &&
+               git config --unset i18n.commitencoding &&
+               ! git svn dcommit
+       )
+       '
+done
+
+test_done