From 5544049def9a80bc5ea09a5649e13c1b56160518 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Thu, 3 May 2012 08:51:04 +0700 Subject: [PATCH] archive-tar: stream large blobs to tar file MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit t5000 verifies output while t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-tar.c | 56 +++++++++++++++++++++++++++++++++++++++++---- t/t1050-large.sh | 4 ++++ t/t5000-tar-tree.sh | 6 +++++ 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 3be0cdf35..93387ea33 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -4,6 +4,7 @@ #include "cache.h" #include "tar.h" #include "archive.h" +#include "streaming.h" #include "run-command.h" #define RECORDSIZE (512) @@ -30,10 +31,9 @@ static void write_if_needed(void) * queues up writes, so that all our write(2) calls write exactly one * full block; pads writes to RECORDSIZE */ -static void write_blocked(const void *data, unsigned long size) +static void do_write_blocked(const void *data, unsigned long size) { const char *buf = data; - unsigned long tail; if (offset) { unsigned long chunk = BLOCKSIZE - offset; @@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size) memcpy(block + offset, buf, size); offset += size; } +} + +static void finish_record(void) +{ + unsigned long tail; tail = offset % RECORDSIZE; if (tail) { memset(block + offset, 0, RECORDSIZE - tail); @@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size) write_if_needed(); } +static void write_blocked(const void *data, unsigned long size) +{ + do_write_blocked(data, size); + finish_record(); +} + /* * The end of tar archives is marked by 2*512 nul bytes and after that * follows the rest of the block (if any). @@ -77,6 +88,33 @@ static void write_trailer(void) } } +/* + * queues up writes, so that all our write(2) calls write exactly one + * full block; pads writes to RECORDSIZE + */ +static int stream_blocked(const unsigned char *sha1) +{ + struct git_istream *st; + enum object_type type; + unsigned long sz; + char buf[BLOCKSIZE]; + ssize_t readlen; + + st = open_istream(sha1, &type, &sz, NULL); + if (!st) + return error("cannot stream blob %s", sha1_to_hex(sha1)); + for (;;) { + readlen = read_istream(st, buf, sizeof(buf)); + if (readlen <= 0) + break; + do_write_blocked(buf, readlen); + } + close_istream(st); + if (!readlen) + finish_record(); + return readlen; +} + /* * pax extended header records have the format "%u %s=%s\n". %u contains * the size of the whole string (including the %u), the first %s is the @@ -203,7 +241,11 @@ static int write_tar_entry(struct archiver_args *args, } else memcpy(header.name, path, pathlen); - if (S_ISLNK(mode) || S_ISREG(mode)) { + if (S_ISREG(mode) && !args->convert && + sha1_object_info(sha1, &size) == OBJ_BLOB && + size > big_file_threshold) + buffer = NULL; + else if (S_ISLNK(mode) || S_ISREG(mode)) { enum object_type type; buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size); if (!buffer) @@ -235,8 +277,12 @@ static int write_tar_entry(struct archiver_args *args, } strbuf_release(&ext_header); write_blocked(&header, sizeof(header)); - if (S_ISREG(mode) && buffer && size > 0) - write_blocked(buffer, size); + if (S_ISREG(mode) && size > 0) { + if (buffer) + write_blocked(buffer, size); + else + err = stream_blocked(sha1); + } free(buffer); return err; } diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 4d127f19b..fe475542f 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -134,4 +134,8 @@ test_expect_success 'repack' ' git repack -ad ' +test_expect_success 'tar achiving' ' + git archive --format=tar HEAD >/dev/null +' + test_done diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 527c9e754..d9b997f5d 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -84,6 +84,12 @@ test_expect_success \ 'git archive vs. git tar-tree' \ 'test_cmp b.tar b2.tar' +test_expect_success 'git archive on large files' ' + test_config core.bigfilethreshold 1 && + git archive HEAD >b3.tar && + test_cmp b.tar b3.tar +' + test_expect_success \ 'git archive in a bare repo' \ '(cd bare.git && git archive HEAD) >b3.tar' -- 2.26.2