do not stream large files to pack when filters are in use

author Jeff King <peff@peff.net>

Fri, 24 Feb 2012 22:10:17 +0000 (17:10 -0500)

committer Junio C Hamano <gitster@pobox.com>

Fri, 24 Feb 2012 22:18:20 +0000 (14:18 -0800)
author Jeff King <peff@peff.net>
Fri, 24 Feb 2012 22:10:17 +0000 (17:10 -0500)
committer Junio C Hamano <gitster@pobox.com>
Fri, 24 Feb 2012 22:18:20 +0000 (14:18 -0800)
diff --git a/sha1_file.c b/sha1_file.c

index 956422ba4a5df5f46caaf85f10e4c85321439272..c9ae7ec2ae1495df209268358a73ce18f97eee96 100644 (file)
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2688,10 +2688,13 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
   * This also bypasses the usual "convert-to-git" dance, and that is on
   * purpose. We could write a streaming version of the converting
   * functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above), but the primary
- * motivation for trying to stream from the working tree file and to
- * avoid mmaping it in core is to deal with large binary blobs, and
- * by definition they do _not_ want to get any conversion.
+ * (or equivalent in-core API described above). However, that is
+ * somewhat complicated, as we do not know the size of the filter
+ * result, which we need to know beforehand when writing a git object.
+ * Since the primary motivation for trying to stream from the working
+ * tree file and to avoid mmaping it in core is to deal with large
+ * binary blobs, they generally do not want to get any conversion, and
+ * callers should avoid this code path when filters are requested.
   */
  static int index_stream(unsigned char *sha1, int fd, size_t size,
                         enum object_type type, const char *path,
@@ -2766,7 +2769,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
  
         if (!S_ISREG(st->st_mode))
                 ret = index_pipe(sha1, fd, type, path, flags);
-       else if (size <= big_file_threshold || type != OBJ_BLOB)
+       else if (size <= big_file_threshold || type != OBJ_BLOB ||
+                (path && would_convert_to_git(path, NULL, 0, 0)))
                 ret = index_core(sha1, fd, size, type, path, flags);
         else
                 ret = index_stream(sha1, fd, size, type, path, flags);
diff --git a/t/t1051-large-conversion.sh b/t/t1051-large-conversion.sh

new file mode 100755 (executable)

index 0000000..8b7640b
--- /dev/null
+++ b/t/t1051-large-conversion.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+test_description='test conversion filters on large files'
+. ./test-lib.sh
+
+set_attr() {
+       test_when_finished 'rm -f .gitattributes' &&
+       echo "* $*" >.gitattributes
+}
+
+check_input() {
+       git read-tree --empty &&
+       git add small large &&
+       git cat-file blob :small >small.index &&
+       git cat-file blob :large | head -n 1 >large.index &&
+       test_cmp small.index large.index
+}
+
+check_output() {
+       rm -f small large &&
+       git checkout small large &&
+       head -n 1 large >large.head &&
+       test_cmp small large.head
+}
+
+test_expect_success 'setup input tests' '
+       printf "\$Id: foo\$\\r\\n" >small &&
+       cat small small >large &&
+       git config core.bigfilethreshold 20 &&
+       git config filter.test.clean "sed s/.*/CLEAN/"
+'
+
+test_expect_success 'autocrlf=true converts on input' '
+       test_config core.autocrlf true &&
+       check_input
+'
+
+test_expect_success 'eol=crlf converts on input' '
+       set_attr eol=crlf &&
+       check_input
+'
+
+test_expect_success 'ident converts on input' '
+       set_attr ident &&
+       check_input
+'
+
+test_expect_success 'user-defined filters convert on input' '
+       set_attr filter=test &&
+       check_input
+'
+
+test_expect_success 'setup output tests' '
+       echo "\$Id\$" >small &&
+       cat small small >large &&
+       git add small large &&
+       git config core.bigfilethreshold 7 &&
+       git config filter.test.smudge "sed s/.*/SMUDGE/"
+'
+
+test_expect_success 'autocrlf=true converts on output' '
+       test_config core.autocrlf true &&
+       check_output
+'
+
+test_expect_success 'eol=crlf converts on output' '
+       set_attr eol=crlf &&
+       check_output
+'
+
+test_expect_success 'user-defined filters convert on output' '
+       set_attr filter=test &&
+       check_output
+'
+
+test_expect_success 'ident converts on output' '
+       set_attr ident &&
+       rm -f small large &&
+       git checkout small large &&
+       sed -n "s/Id: .*/Id: SHA/p" <small >small.clean &&
+       head -n 1 large >large.head &&
+       sed -n "s/Id: .*/Id: SHA/p" <large.head >large.clean &&
+       test_cmp small.clean large.clean
+'
+
+test_done
author	Jeff King <peff@peff.net>
	Fri, 24 Feb 2012 22:10:17 +0000 (17:10 -0500)
committer	Junio C Hamano <gitster@pobox.com>
	Fri, 24 Feb 2012 22:18:20 +0000 (14:18 -0800)
sha1_file.c		patch \| blob \| history
t/t1051-large-conversion.sh	[new file with mode: 0755]	patch \| blob