logmsg_reencode: lazily load missing commit buffers

author Jeff King <peff@peff.net>

Sat, 26 Jan 2013 09:44:28 +0000 (04:44 -0500)

committer Junio C Hamano <gitster@pobox.com>

Sat, 26 Jan 2013 21:28:22 +0000 (13:28 -0800)
author Jeff King <peff@peff.net>
Sat, 26 Jan 2013 09:44:28 +0000 (04:44 -0500)
committer Junio C Hamano <gitster@pobox.com>
Sat, 26 Jan 2013 21:28:22 +0000 (13:28 -0800)
diff --git a/builtin/blame.c b/builtin/blame.c

index 962e4e3cd1ce56cc85fc1765304beb9851313c12..86100e96627e4f839fb5b627bc09ed4264c36176 100644 (file)
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1424,19 +1424,6 @@ static void get_commit_info(struct commit *commit,
  
         commit_info_init(ret);
  
-       /*
-        * We've operated without save_commit_buffer, so
-        * we now need to populate them for output.
-        */
-       if (!commit->buffer) {
-               enum object_type type;
-               unsigned long size;
-               commit->buffer =
-                       read_sha1_file(commit->object.sha1, &type, &size);
-               if (!commit->buffer)
-                       die("Cannot read commit %s",
-                           sha1_to_hex(commit->object.sha1));
-       }
         encoding = get_log_output_encoding();
         message = logmsg_reencode(commit, encoding);
         get_ac_line(message, "\nauthor ",
diff --git a/pretty.c b/pretty.c

index c6753494386d4d1043e87babcc97f8499177cb5a..eae57ad9d7f3b06a5a76f9d934825f9824def477 100644 (file)
--- a/pretty.c
+++ b/pretty.c
@@ -592,18 +592,59 @@ char *logmsg_reencode(const struct commit *commit,
         char *msg = commit->buffer;
         char *out;
  
+       if (!msg) {
+               enum object_type type;
+               unsigned long size;
+
+               msg = read_sha1_file(commit->object.sha1, &type, &size);
+               if (!msg)
+                       die("Cannot read commit object %s",
+                           sha1_to_hex(commit->object.sha1));
+               if (type != OBJ_COMMIT)
+                       die("Expected commit for '%s', got %s",
+                           sha1_to_hex(commit->object.sha1), typename(type));
+       }
+
         if (!output_encoding || !*output_encoding)
                 return msg;
         encoding = get_header(commit, msg, "encoding");
         use_encoding = encoding ? encoding : utf8;
-       if (same_encoding(use_encoding, output_encoding))
-               if (encoding) /* we'll strip encoding header later */
-                       out = xstrdup(commit->buffer);
-               else
-                       return msg; /* nothing to do */
-       else
-               out = reencode_string(commit->buffer,
-                                     output_encoding, use_encoding);
+       if (same_encoding(use_encoding, output_encoding)) {
+               /*
+                * No encoding work to be done. If we have no encoding header
+                * at all, then there's nothing to do, and we can return the
+                * message verbatim (whether newly allocated or not).
+                */
+               if (!encoding)
+                       return msg;
+
+               /*
+                * Otherwise, we still want to munge the encoding header in the
+                * result, which will be done by modifying the buffer. If we
+                * are using a fresh copy, we can reuse it. But if we are using
+                * the cached copy from commit->buffer, we need to duplicate it
+                * to avoid munging commit->buffer.
+                */
+               out = msg;
+               if (out == commit->buffer)
+                       out = xstrdup(out);
+       }
+       else {
+               /*
+                * There's actual encoding work to do. Do the reencoding, which
+                * still leaves the header to be replaced in the next step. At
+                * this point, we are done with msg. If we allocated a fresh
+                * copy, we can free it.
+                */
+               out = reencode_string(msg, output_encoding, use_encoding);
+               if (out && msg != commit->buffer)
+                       free(msg);
+       }
+
+       /*
+        * This replacement actually consumes the buffer we hand it, so we do
+        * not have to worry about freeing the old "out" here.
+        */
         if (out)
                 out = replace_encoding_header(out, output_encoding);
  
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh

index 91f8198f058df5e2ee93cda8086793ce6a9186d5..04a44d5c61495c4c87db6ada952f5652b8e17642 100755 (executable)
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,12 @@ test_expect_success 'switching diff driver produces correct results' '
         test_cmp expect actual
  '
  
+# The point here is to test that we can log the notes cache and still use it to
+# produce a diff later (older versions of git would segfault on this). It's
+# much more likely to come up in the real world with "log --all -p", but using
+# --no-walk lets us reliably reproduce the order of traversal.
+test_expect_success 'log notes cache and still use cache for -p' '
+       git log --no-walk -p refs/notes/textconv/magic HEAD
+'
+
  test_done
author	Jeff King <peff@peff.net>
	Sat, 26 Jan 2013 09:44:28 +0000 (04:44 -0500)
committer	Junio C Hamano <gitster@pobox.com>
	Sat, 26 Jan 2013 21:28:22 +0000 (13:28 -0800)
builtin/blame.c		patch \| blob \| history
pretty.c		patch \| blob \| history
t/t4042-diff-textconv-caching.sh		patch \| blob \| history