From 65c2e0c349aa5c7f605defb52dc67f1b3658a1b9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 2 Jun 2005 15:20:54 -0700 Subject: [PATCH] [PATCH] Find size of SHA1 object without inflating everything. This adds sha1_file_size() helper function and uses it in the rename/copy similarity estimator. The helper function handles deltified object as well. Signed-off-by: Junio C Hamano Signed-off-by: Linus Torvalds --- cache.h | 1 + diff.c | 11 +++++----- sha1_file.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 5 deletions(-) diff --git a/cache.h b/cache.h index e54176b4b..481f7c787 100644 --- a/cache.h +++ b/cache.h @@ -154,6 +154,7 @@ extern void * map_sha1_file(const unsigned char *sha1, unsigned long *size); extern int unpack_sha1_header(z_stream *stream, void *map, unsigned long mapsize, void *buffer, unsigned long size); extern int parse_sha1_header(char *hdr, char *type, unsigned long *sizep); extern int sha1_delta_base(const unsigned char *, unsigned char *); +extern int sha1_file_size(const unsigned char *, unsigned long *); extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size); extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size); extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1); diff --git a/diff.c b/diff.c index 7cf40daee..5513632b9 100644 --- a/diff.c +++ b/diff.c @@ -333,7 +333,6 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) close(fd); } else { - /* We cannot do size only for SHA1 blobs */ char type[20]; struct sha1_size_cache *e; @@ -343,11 +342,13 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) s->size = e->size; return 0; } + if (!sha1_file_size(s->sha1, &s->size)) + locate_size_cache(s->sha1, s->size); + } + else { + s->data = read_sha1_file(s->sha1, type, &s->size); + s->should_free = 1; } - s->data = read_sha1_file(s->sha1, type, &s->size); - s->should_free = 1; - if (s->data && size_only) - locate_size_cache(s->sha1, s->size); } return 0; } diff --git a/sha1_file.c b/sha1_file.c index ccfcca07c..a2ba4c81d 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -432,6 +432,66 @@ int sha1_delta_base(const unsigned char *sha1, unsigned char *base_sha1) return ret; } +int sha1_file_size(const unsigned char *sha1, unsigned long *sizep) +{ + int ret, status; + unsigned long mapsize, size; + void *map; + z_stream stream; + char hdr[64], type[20]; + const unsigned char *data; + unsigned char cmd; + int i; + + map = map_sha1_file(sha1, &mapsize); + if (!map) + return -1; + ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)); + status = -1; + if (ret < Z_OK || parse_sha1_header(hdr, type, &size) < 0) + goto out; + if (strcmp(type, "delta")) { + *sizep = size; + status = 0; + goto out; + } + + /* We are dealing with a delta object. Inflated, the first + * 20 bytes hold the base object SHA1, and delta data follows + * immediately after it. + * + * The initial part of the delta starts at delta_data_head + + * 20. Borrow code from patch-delta to read the result size. + */ + data = hdr + strlen(hdr) + 1 + 20; + + /* Skip over the source size; we are not interested in + * it and we cannot verify it because we do not want + * to read the base object. + */ + cmd = *data++; + while (cmd) { + if (cmd & 1) + data++; + cmd >>= 1; + } + /* Read the result size */ + size = i = 0; + cmd = *data++; + while (cmd) { + if (cmd & 1) + size |= *data++ << i; + i += 8; + cmd >>= 1; + } + *sizep = size; + status = 0; + out: + inflateEnd(&stream); + munmap(map, mapsize); + return status; +} + void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) { unsigned long mapsize; -- 2.26.2