archive-zip: streaming for deflated files
authorRené Scharfe <rene.scharfe@lsrfire.ath.cx>
Thu, 3 May 2012 01:51:08 +0000 (08:51 +0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 3 May 2012 17:22:57 +0000 (10:22 -0700)
After an entry has been streamed out, its CRC and sizes are written as
part of a data descriptor.

For simplicity, we make the buffer for the compressed chunks twice as
big as for the uncompressed ones, to be sure the result fit in even
if deflate makes them bigger.

t5000 verifies output. t1050 makes sure the command always respects
core.bigfilethreshold

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
archive-zip.c
t/t1050-large.sh
t/t5000-tar-tree.sh

index 1c6c39d42c6c1175268d77ba25cd14f062e22c04..f5af81f904df081002dad46a71be2eca8e3bebab 100644 (file)
@@ -211,7 +211,7 @@ static int write_zip_entry(struct archiver_args *args,
                compressed_size = size;
 
                if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
-                   size > big_file_threshold && method == 0) {
+                   size > big_file_threshold) {
                        stream = open_istream(sha1, &type, &size, NULL);
                        if (!stream)
                                return error("cannot stream blob %s",
@@ -307,6 +307,68 @@ static int write_zip_entry(struct archiver_args *args,
                write_zip_data_desc(size, compressed_size, crc);
                zip_offset += ZIP_DATA_DESC_SIZE;
 
+               set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
+       } else if (stream && method == 8) {
+               unsigned char buf[STREAM_BUFFER_SIZE];
+               ssize_t readlen;
+               git_zstream zstream;
+               int result;
+               size_t out_len;
+               unsigned char compressed[STREAM_BUFFER_SIZE * 2];
+
+               memset(&zstream, 0, sizeof(zstream));
+               git_deflate_init(&zstream, args->compression_level);
+
+               compressed_size = 0;
+               zstream.next_out = compressed;
+               zstream.avail_out = sizeof(compressed);
+
+               for (;;) {
+                       readlen = read_istream(stream, buf, sizeof(buf));
+                       if (readlen <= 0)
+                               break;
+                       crc = crc32(crc, buf, readlen);
+
+                       zstream.next_in = buf;
+                       zstream.avail_in = readlen;
+                       result = git_deflate(&zstream, 0);
+                       if (result != Z_OK)
+                               die("deflate error (%d)", result);
+                       out = compressed;
+                       if (!compressed_size)
+                               out += 2;
+                       out_len = zstream.next_out - out;
+
+                       if (out_len > 0) {
+                               write_or_die(1, out, out_len);
+                               compressed_size += out_len;
+                               zstream.next_out = compressed;
+                               zstream.avail_out = sizeof(compressed);
+                       }
+
+               }
+               close_istream(stream);
+               if (readlen)
+                       return readlen;
+
+               zstream.next_in = buf;
+               zstream.avail_in = 0;
+               result = git_deflate(&zstream, Z_FINISH);
+               if (result != Z_STREAM_END)
+                       die("deflate error (%d)", result);
+
+               git_deflate_end(&zstream);
+               out = compressed;
+               if (!compressed_size)
+                       out += 2;
+               out_len = zstream.next_out - out - 4;
+               write_or_die(1, out, out_len);
+               compressed_size += out_len;
+               zip_offset += compressed_size;
+
+               write_zip_data_desc(size, compressed_size, crc);
+               zip_offset += ZIP_DATA_DESC_SIZE;
+
                set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
        } else if (compressed_size > 0) {
                write_or_die(1, out, compressed_size);
index 9db54b56bc08601defe3389e647a5b40c931532c..55ed955ceffee9184b5822054697f58e7d0ef6a4 100755 (executable)
@@ -142,4 +142,8 @@ test_expect_success 'zip achiving, store only' '
        git archive --format=zip -0 HEAD >/dev/null
 '
 
+test_expect_success 'zip achiving, deflate' '
+       git archive --format=zip HEAD >/dev/null
+'
+
 test_done
index 3b54c38621125901d06a8d25b950bdd0d455641f..94f2ebac5f28e3fb306857892473f152307a7223 100755 (executable)
@@ -250,6 +250,13 @@ test_expect_success UNZIP 'git archive -0 --format=zip on large files' '
     (mkdir large && cd large && $UNZIP ../large.zip)
 '
 
+test_expect_success UNZIP 'git archive --format=zip on large files' '
+    test_config core.bigfilethreshold 1 &&
+    git archive --format=zip HEAD >large-compressed.zip &&
+    (mkdir large-compressed && cd large-compressed && $UNZIP ../large-compressed.zip) &&
+    test_cmp large-compressed/a/bin/sh large/a/bin/sh
+'
+
 test_expect_success \
     'git archive --list outside of a git repo' \
     'GIT_DIR=some/non-existing/directory git archive --list'