From a733cb606fed08130317d803956e946f73b0b88e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 28 Jun 2005 14:21:02 -0700 Subject: [PATCH] Change pack file format. Hopefully for the last time. This also adds a header with a signature, version info, and the number of objects to the pack file. It also encodes the file length and type more efficiently. --- pack-objects.c | 84 +++++++++++++++++++++++++++++++++++------------- pack.h | 23 +++++++++++++ sha1_file.c | 75 ++++++++++++++++++++++++++---------------- unpack-objects.c | 65 +++++++++++++++++++++++-------------- 4 files changed, 174 insertions(+), 73 deletions(-) create mode 100644 pack.h diff --git a/pack-objects.c b/pack-objects.c index e9764604f..d1e62dc01 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -2,25 +2,18 @@ #include "cache.h" #include "object.h" #include "delta.h" +#include "pack.h" #include "csum-file.h" static const char pack_usage[] = "git-pack-objects [--window=N] [--depth=N] {--stdout | base-name} < object-list"; -/* - * The object type is a single-character shorthand: - * - 'C' for "Commit" - * - 'T' for "Tree" - * - 'B' for "Blob" - * - 'G' for "taG" - * - 'D' for "Delta" - */ struct object_entry { unsigned char sha1[20]; unsigned long size; unsigned long offset; unsigned int depth; unsigned int hash; - unsigned char type; + enum object_type type; unsigned long delta_size; struct object_entry *delta; }; @@ -49,13 +42,54 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e return delta_buf; } +/* + * The per-object header is a pretty dense thing, which is + * - first byte: low four bits are "size", then three bits of "type", + * and the high bit is "size continues". + * - each byte afterwards: low seven bits are size continuation, + * with the high bit being "size continues" + */ +static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) +{ + int n = 1, i; + unsigned char c; + + if (type < OBJ_COMMIT || type > OBJ_DELTA) + die("bad type %d", type); + + /* + * Shift the size up by 7 bits at a time, + * until you get bits in the "high four". + * That will be our beginning. We'll have + * four size bits in 28..31, then groups + * of seven in 21..27, 14..20, 7..13 and + * finally 0..6. + */ + if (size) { + n = 5; + while (!(size & 0xfe000000)) { + size <<= 7; + n--; + } + } + c = (type << 4) | (size >> 28); + for (i = 1; i < n; i++) { + *hdr++ = c | 0x80; + c = (size >> 21) & 0x7f; + size <<= 7; + } + *hdr = c; + return n; +} + static unsigned long write_object(struct sha1file *f, struct object_entry *entry) { unsigned long size; char type[10]; void *buf = read_sha1_file(entry->sha1, type, &size); - char header[25]; + unsigned char header[10]; unsigned hdrlen, datalen; + enum object_type obj_type; if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); @@ -67,18 +101,18 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry * length, except for deltas that has the 20 bytes of delta sha * instead. */ - header[0] = entry->type; - hdrlen = 5; + obj_type = entry->type; if (entry->delta) { - header[0] = 'D'; - memcpy(header+5, entry->delta, 20); buf = delta_against(buf, size, entry); size = entry->delta_size; - hdrlen = 25; + obj_type = OBJ_DELTA; } - datalen = htonl(size); - memcpy(header+1, &datalen, 4); + hdrlen = encode_header(obj_type, size, header); sha1write(f, header, hdrlen); + if (entry->delta) { + sha1write(f, entry->delta, 20); + hdrlen += 20; + } datalen = sha1write_compressed(f, buf, size); free(buf); return hdrlen + datalen; @@ -88,13 +122,19 @@ static void write_pack_file(void) { int i; struct sha1file *f; - unsigned long offset = 0; + unsigned long offset; unsigned long mb; + struct pack_header hdr; if (!base_name) f = sha1fd(1, ""); else f = sha1create("%s.%s", base_name, "pack"); + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(1); + hdr.hdr_entries = htonl(nr_objects); + sha1write(f, &hdr, sizeof(hdr)); + offset = sizeof(hdr); for (i = 0; i < nr_objects; i++) { struct object_entry *entry = objects + i; entry->offset = offset; @@ -168,13 +208,13 @@ static void check_object(struct object_entry *entry) if (!sha1_object_info(entry->sha1, type, &entry->size)) { if (!strcmp(type, "commit")) { - entry->type = 'C'; + entry->type = OBJ_COMMIT; } else if (!strcmp(type, "tree")) { - entry->type = 'T'; + entry->type = OBJ_TREE; } else if (!strcmp(type, "blob")) { - entry->type = 'B'; + entry->type = OBJ_BLOB; } else if (!strcmp(type, "tag")) { - entry->type = 'G'; + entry->type = OBJ_TAG; } else die("unable to pack object %s of type %s", sha1_to_hex(entry->sha1), type); diff --git a/pack.h b/pack.h new file mode 100644 index 000000000..08e120dca --- /dev/null +++ b/pack.h @@ -0,0 +1,23 @@ +#ifndef PACK_H +#define PACK_H + +enum object_type { + OBJ_NONE, + OBJ_COMMIT, + OBJ_TREE, + OBJ_BLOB, + OBJ_TAG, + OBJ_DELTA, +}; + +/* + * Packed object header + */ +#define PACK_SIGNATURE 0x5041434b /* "PACK" */ +struct pack_header { + unsigned int hdr_signature; + unsigned int hdr_version; + unsigned int hdr_entries; +}; + +#endif diff --git a/sha1_file.c b/sha1_file.c index e27affb28..6e3fd180f 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -10,6 +10,7 @@ #include #include "cache.h" #include "delta.h" +#include "pack.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -665,37 +666,60 @@ static int packed_delta_info(unsigned char *base_sha1, return 0; } +static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset, + enum object_type *type, unsigned long *sizep) +{ + unsigned char *pack, c; + unsigned long size; + + if (offset >= p->pack_size) + die("object offset outside of pack file"); + + pack = p->pack_base + offset; + c = *pack++; + offset++; + *type = (c >> 4) & 7; + size = c & 15; + while (c & 0x80) { + if (offset >= p->pack_size) + die("object offset outside of pack file"); + c = *pack++; + offset++; + size = (size << 7) | (c & 0x7f); + } + *sizep = size; + return offset; +} + static int packed_object_info(struct pack_entry *entry, char *type, unsigned long *sizep) { struct packed_git *p = entry->p; unsigned long offset, size, left; unsigned char *pack; - - offset = entry->offset; - if (p->pack_size - 5 < offset) - die("object offset outside of pack file"); + enum object_type kind; if (use_packed_git(p)) die("cannot map packed file"); + offset = unpack_object_header(p, entry->offset, &kind, &size); pack = p->pack_base + offset; - size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4]; - left = p->pack_size - offset - 5; - switch (*pack) { - case 'D': - return packed_delta_info(pack+5, size, left, type, sizep); + left = p->pack_size - offset; + + switch (kind) { + case OBJ_DELTA: + return packed_delta_info(pack, size, left, type, sizep); break; - case 'C': + case OBJ_COMMIT: strcpy(type, "commit"); break; - case 'T': + case OBJ_TREE: strcpy(type, "tree"); break; - case 'B': + case OBJ_BLOB: strcpy(type, "blob"); break; - case 'G': + case OBJ_TAG: strcpy(type, "tag"); break; default: @@ -787,37 +811,34 @@ static void *unpack_entry(struct pack_entry *entry, struct packed_git *p = entry->p; unsigned long offset, size, left; unsigned char *pack; - - offset = entry->offset; - if (p->pack_size - 5 < offset) - die("object offset outside of pack file"); + enum object_type kind; if (use_packed_git(p)) die("cannot map packed file"); + offset = unpack_object_header(p, entry->offset, &kind, &size); pack = p->pack_base + offset; - size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4]; - left = p->pack_size - offset - 5; - switch (*pack) { - case 'D': - return unpack_delta_entry(pack+5, size, left, type, sizep); - case 'C': + left = p->pack_size - offset; + switch (kind) { + case OBJ_DELTA: + return unpack_delta_entry(pack, size, left, type, sizep); + case OBJ_COMMIT: strcpy(type, "commit"); break; - case 'T': + case OBJ_TREE: strcpy(type, "tree"); break; - case 'B': + case OBJ_BLOB: strcpy(type, "blob"); break; - case 'G': + case OBJ_TAG: strcpy(type, "tag"); break; default: die("corrupted pack file"); } *sizep = size; - return unpack_non_delta_entry(pack+5, size, left); + return unpack_non_delta_entry(pack, size, left); } static int find_pack_entry_1(const unsigned char *sha1, diff --git a/unpack-objects.c b/unpack-objects.c index 57f3c9b6b..98b696cf2 100644 --- a/unpack-objects.c +++ b/unpack-objects.c @@ -1,6 +1,7 @@ #include "cache.h" #include "object.h" #include "delta.h" +#include "pack.h" static int dry_run; static int nr_entries; @@ -92,7 +93,7 @@ static int check_index(void) } static int unpack_non_delta_entry(struct pack_entry *entry, - int kind, + enum object_type kind, unsigned char *data, unsigned long size, unsigned long left) @@ -101,9 +102,9 @@ static int unpack_non_delta_entry(struct pack_entry *entry, z_stream stream; char *buffer; unsigned char sha1[20]; - char *type_s; + char *type; - printf("%s %c %lu\n", sha1_to_hex(entry->sha1), kind, size); + printf("%s %c %lu\n", sha1_to_hex(entry->sha1), ".CTBGD"[kind], size); if (dry_run) return 0; @@ -121,18 +122,18 @@ static int unpack_non_delta_entry(struct pack_entry *entry, if ((st != Z_STREAM_END) || stream.total_out != size) goto err_finish; switch (kind) { - case 'C': type_s = "commit"; break; - case 'T': type_s = "tree"; break; - case 'B': type_s = "blob"; break; - case 'G': type_s = "tag"; break; + case OBJ_COMMIT: type = "commit"; break; + case OBJ_TREE: type = "tree"; break; + case OBJ_BLOB: type = "blob"; break; + case OBJ_TAG: type = "tag"; break; default: goto err_finish; } - if (write_sha1_file(buffer, size, type_s, sha1) < 0) + if (write_sha1_file(buffer, size, type, sha1) < 0) die("failed to write %s (%s)", - sha1_to_hex(entry->sha1), type_s); - printf("%s %s\n", sha1_to_hex(sha1), type_s); + sha1_to_hex(entry->sha1), type); + printf("%s %s\n", sha1_to_hex(sha1), type); if (memcmp(sha1, entry->sha1, 20)) - die("resulting %s have wrong SHA1", type_s); + die("resulting %s have wrong SHA1", type); finish: st = 0; @@ -237,28 +238,44 @@ static int unpack_delta_entry(struct pack_entry *entry, static void unpack_entry(struct pack_entry *entry) { unsigned long offset, size, left; - unsigned char *pack; + unsigned char *pack, c; + int type; /* Have we done this one already due to deltas based on it? */ if (lookup_object(entry->sha1)) return; offset = ntohl(entry->offset); - if (offset > pack_size - 5) - die("object offset outside of pack file"); + if (offset >= pack_size) + goto bad; + pack = pack_base + offset; - size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4]; - left = pack_size - offset - 5; - switch (*pack) { - case 'C': case 'T': case 'B': case 'G': - unpack_non_delta_entry(entry, *pack, pack+5, size, left); - break; - case 'D': + c = *pack++; + offset++; + type = (c >> 4) & 7; + size = (c & 15); + while (c & 0x80) { + if (offset >= pack_size) + goto bad; + offset++; + c = *pack++; + size = (size << 7) + (c & 0x7f); + + } + left = pack_size - offset; + switch (type) { + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + unpack_non_delta_entry(entry, type, pack, size, left); + return; + case OBJ_DELTA: unpack_delta_entry(entry, pack+5, size, left); - break; - default: - die("corrupted pack file"); + return; } +bad: + die("corrupted pack file"); } /* -- 2.26.2