more lightweight revalidation while reusing deflated stream in packing
authorJunio C Hamano <junkio@cox.net>
Mon, 4 Sep 2006 04:09:18 +0000 (21:09 -0700)
committerJunio C Hamano <junkio@cox.net>
Mon, 4 Sep 2006 04:09:18 +0000 (21:09 -0700)
When copying from an existing pack and when copying from a loose
object with new style header, the code makes sure that the piece
we are going to copy out inflates well and inflate() consumes
the data in full while doing so.

The check to see if the xdelta really apply is quite expensive
as you described, because you would need to have the image of
the base object which can be represented as a delta against
something else.

Signed-off-by: Junio C Hamano <junkio@cox.net>
builtin-pack-objects.c
cache.h
object.h
sha1_file.c

index 5e42387a450cd2385aa4c1a09539f225e6b5caf3..149fa283971712650f124e96aa5bf9e2f99b0ebb 100644 (file)
@@ -243,41 +243,61 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
        return n;
 }
 
-static int revalidate_one(struct object_entry *entry,
-                         void *data, char *type, unsigned long size)
+static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
 {
-       int err;
-       if ((!data) ||
-           ((entry->type != OBJ_DELTA) &&
-            ( (size != entry->size) ||
-              strcmp(type_names[entry->type], type))))
-               err = -1;
-       else
-               err = check_sha1_signature(entry->sha1, data, size, type);
-       free(data);
-       return err;
+       z_stream stream;
+       unsigned char fakebuf[4096];
+       int st;
+
+       memset(&stream, 0, sizeof(stream));
+       stream.next_in = data;
+       stream.avail_in = len;
+       stream.next_out = fakebuf;
+       stream.avail_out = sizeof(fakebuf);
+       inflateInit(&stream);
+
+       while (1) {
+               st = inflate(&stream, Z_FINISH);
+               if (st == Z_STREAM_END || st == Z_OK) {
+                       st = (stream.total_out == expect &&
+                             stream.total_in == len) ? 0 : -1;
+                       break;
+               }
+               if (st != Z_BUF_ERROR) {
+                       st = -1;
+                       break;
+               }
+               stream.next_out = fakebuf;
+               stream.avail_out = sizeof(fakebuf);
+       }
+       inflateEnd(&stream);
+       return st;
 }
 
 /*
  * we are going to reuse the existing pack entry data.  make
  * sure it is not corrupt.
  */
-static int revalidate_pack_entry(struct object_entry *entry)
+static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len)
 {
-       void *data;
-       char type[20];
-       unsigned long size;
-       struct pack_entry e;
+       enum object_type type;
+       unsigned long size, used;
 
        if (pack_to_stdout)
                return 0;
 
-       e.p = entry->in_pack;
-       e.offset = entry->in_pack_offset;
-
-       /* the caller has already called use_packed_git() for us */
-       data = unpack_entry_gently(&e, type, &size);
-       return revalidate_one(entry, data, type, size);
+       /* the caller has already called use_packed_git() for us,
+        * so it is safe to access the pack data from mmapped location.
+        * make sure the entry inflates correctly.
+        */
+       used = unpack_object_header_gently(data, len, &type, &size);
+       if (!used)
+               return -1;
+       if (type == OBJ_DELTA)
+               used += 20; /* skip base object name */
+       data += used;
+       len -= used;
+       return check_inflate(data, len, entry->size);
 }
 
 static int revalidate_loose_object(struct object_entry *entry,
@@ -285,15 +305,18 @@ static int revalidate_loose_object(struct object_entry *entry,
                                   unsigned long mapsize)
 {
        /* we already know this is a loose object with new type header. */
-       void *data;
-       char type[20];
-       unsigned long size;
+       enum object_type type;
+       unsigned long size, used;
 
        if (pack_to_stdout)
                return 0;
 
-       data = unpack_sha1_file(map, mapsize, type, &size);
-       return revalidate_one(entry, data, type, size);
+       used = unpack_object_header_gently(map, mapsize, &type, &size);
+       if (!used)
+               return -1;
+       map += used;
+       mapsize -= used;
+       return check_inflate(map, mapsize, size);
 }
 
 static unsigned long write_object(struct sha1file *f,
@@ -377,7 +400,7 @@ static unsigned long write_object(struct sha1file *f,
                datalen = find_packed_object_size(p, entry->in_pack_offset);
                buf = (char *) p->pack_base + entry->in_pack_offset;
 
-               if (revalidate_pack_entry(entry))
+               if (revalidate_pack_entry(entry, buf, datalen))
                        die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
                sha1write(f, buf, datalen);
                unuse_packed_git(p);
diff --git a/cache.h b/cache.h
index 195908fc34445d6c0368e1973955a6e3a058eb65..a53204f6d695cfe7fef3afdde296564dbcf37423 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -267,6 +267,17 @@ extern int legacy_loose_object(unsigned char *);
 extern int has_pack_file(const unsigned char *sha1);
 extern int has_pack_index(const unsigned char *sha1);
 
+enum object_type {
+       OBJ_NONE = 0,
+       OBJ_COMMIT = 1,
+       OBJ_TREE = 2,
+       OBJ_BLOB = 3,
+       OBJ_TAG = 4,
+       /* 5/6 for future expansion */
+       OBJ_DELTA = 7,
+       OBJ_BAD,
+};
+
 /* Convert to/from hex/sha1 representation */
 #define MINIMUM_ABBREV 4
 #define DEFAULT_ABBREV 7
@@ -374,6 +385,7 @@ extern int num_packed_objects(const struct packed_git *p);
 extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*);
 extern int find_pack_entry_one(const unsigned char *, struct pack_entry *, struct packed_git *);
 extern void *unpack_entry_gently(struct pack_entry *, char *, unsigned long *);
+extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
 extern void packed_object_info_detail(struct pack_entry *, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
 
 /* Dumb servers support */
index 733faac4ccd1c9a8bb4ed1fc67986b369d9442b3..3d4ff4611f55d766cf2f82baa9771a80b50c85f9 100644 (file)
--- a/object.h
+++ b/object.h
@@ -27,17 +27,6 @@ struct object_array {
 /*
  * The object type is stored in 3 bits.
  */
-enum object_type {
-       OBJ_NONE = 0,
-       OBJ_COMMIT = 1,
-       OBJ_TREE = 2,
-       OBJ_BLOB = 3,
-       OBJ_TAG = 4,
-       /* 5/6 for future expansion */
-       OBJ_DELTA = 7,
-       OBJ_BAD,
-};
-
 struct object {
        unsigned parsed : 1;
        unsigned used : 1;
index 4ef98053f8b5ba9988fd28afc60a1ae2fced93c9..428d791ba8e2f62e3c30627a8f9463236c77cc60 100644 (file)
@@ -711,7 +711,7 @@ int legacy_loose_object(unsigned char *map)
                return 0;
 }
 
-static unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep)
+unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep)
 {
        unsigned shift;
        unsigned char c;