Teach log family --encoding
authorJunio C Hamano <junkio@cox.net>
Mon, 25 Dec 2006 19:48:35 +0000 (11:48 -0800)
committerJunio C Hamano <junkio@cox.net>
Tue, 26 Dec 2006 08:52:13 +0000 (00:52 -0800)
Updated commit objects record the encoding used in their
encoding header.  This updates the log family to reencode it
into the encoding specified in i18n.commitencoding (or the
default, which is "utf-8") upon output.

To force a specific encoding that is different, log family takes
command line flag --encoding=<encoding>; giving --encoding=none
entirely disables the reencoding and lets you view log messges
in their original encoding.

Signed-off-by: Junio C Hamano <junkio@cox.net>
builtin-log.c
commit.c
revision.h

index 8df3c1394a0b70548708137bfe20126fbea01709..b7e47cb5fe980374fa0d328830a524c6f6d65b66 100644 (file)
@@ -20,6 +20,8 @@ void add_head(struct rev_info *revs);
 static void cmd_log_init(int argc, const char **argv, const char *prefix,
                      struct rev_info *rev)
 {
+       int i;
+
        rev->abbrev = DEFAULT_ABBREV;
        rev->commit_format = CMIT_FMT_DEFAULT;
        rev->verbose_header = 1;
@@ -27,8 +29,21 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
        argc = setup_revisions(argc, argv, rev, "HEAD");
        if (rev->diffopt.pickaxe || rev->diffopt.filter)
                rev->always_show_header = 0;
-       if (argc > 1)
-               die("unrecognized argument: %s", argv[1]);
+       for (i = 1; i < argc; i++) {
+               const char *arg = argv[i];
+               if (!strncmp(arg, "--encoding=", 11)) {
+                       arg += 11;
+                       if (MAX_ENCODING_LENGTH <= strlen(arg))
+                               die(" Value of output encoding '%s' too long",
+                                   arg);
+                       if (strcmp(arg, "none"))
+                               strcpy(git_commit_encoding, arg);
+                       else
+                               git_commit_encoding[0] = 0;
+               }
+               else
+                       die("unrecognized argument: %s", arg);
+       }
 }
 
 static int cmd_log_walk(struct rev_info *rev)
index 289ef65eb1162ff8f386bf31fa6ee27008bb3096..df4bc0775a5042a353cb37228ff66210f35ed97b 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "tag.h"
 #include "commit.h"
+#include "utf8.h"
 
 int save_commit_buffer = 1;
 
@@ -563,10 +564,53 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com
        return offset;
 }
 
-unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
-                                 unsigned long len, char *buf, unsigned long space,
+static char *get_header(const struct commit *commit, const char *key)
+{
+       int key_len = strlen(key);
+       const char *line = commit->buffer;
+
+       for (;;) {
+               const char *eol = strchr(line, '\n'), *next;
+
+               if (line == eol)
+                       return NULL;
+               if (!eol) {
+                       eol = line + strlen(line);
+                       next = NULL;
+               } else
+                       next = eol + 1;
+               if (!strncmp(line, key, key_len) && line[key_len] == ' ') {
+                       int len = eol - line - key_len;
+                       char *ret = xmalloc(len);
+                       memcpy(ret, line + key_len + 1, len - 1);
+                       ret[len - 1] = '\0';
+                       return ret;
+               }
+               line = next;
+       }
+}
+
+static char *logmsg_reencode(const struct commit *commit)
+{
+       char *encoding = get_header(commit, "encoding");
+       char *out;
+
+       if (!encoding || !strcmp(encoding, git_commit_encoding))
+               return NULL;
+       out = reencode_string(commit->buffer, git_commit_encoding, encoding);
+       free(encoding);
+       if (!out)
+               return NULL;
+       return out;
+}
+
+unsigned long pretty_print_commit(enum cmit_fmt fmt,
+                                 const struct commit *commit,
+                                 unsigned long len,
+                                 char *buf, unsigned long space,
                                  int abbrev, const char *subject,
-                                 const char *after_subject, int relative_date)
+                                 const char *after_subject,
+                                 int relative_date)
 {
        int hdr = 1, body = 0;
        unsigned long offset = 0;
@@ -574,6 +618,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
        int parents_shown = 0;
        const char *msg = commit->buffer;
        int plain_non_ascii = 0;
+       char *reencoded = NULL;
+
+       if (*git_commit_encoding) {
+               reencoded = logmsg_reencode(commit);
+               if (reencoded) {
+                       msg = reencoded;
+                       len = strlen(msg);
+               }
+       }
 
        if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
                indent = 0;
@@ -721,6 +774,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
        if (fmt == CMIT_FMT_EMAIL && !body)
                buf[offset++] = '\n';
        buf[offset] = '\0';
+
+       free(reencoded);
        return offset;
 }
 
index ec991e5c57039a57af7c63db483e5b108a25ad16..8f7907d7abdb764317612d868f58ab88f7175b59 100644 (file)
@@ -72,6 +72,7 @@ struct rev_info {
        const char      *ref_message_id;
        const char      *add_signoff;
        const char      *extra_headers;
+       const char      *log_reencode;
 
        /* Filter by commit log message */
        struct grep_opt *grep_filter;