[PATCH] HTTP partial transfer support for object, pack, and index transfers
authorNick Hengeveld <nickh@reactrix.com>
Wed, 28 Sep 2005 17:14:04 +0000 (10:14 -0700)
committerJunio C Hamano <junkio@cox.net>
Sun, 2 Oct 2005 06:17:14 +0000 (23:17 -0700)
HTTP partial transfer support for object, pack, and index transfers

[jc: this should not be placed in "master" -- it does not have any
 fixes requested on the list.]

Signed-off-by: Nick Hengeveld <nickh@reactrix.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
http-fetch.c

index 0566a9125cd698a79f99a44ab9a64e9bd7c30c7d..778d508243e78bc5c3dce2d71ab6c225c6a3159f 100644 (file)
 #define curl_global_init(a) do { /* nothing */ } while(0)
 #endif
 
+#define PREV_BUF_SIZE 4096
+#define RANGE_HEADER_SIZE 30
+
 static CURL *curl;
 static struct curl_slist *no_pragma_header;
+static struct curl_slist *no_range_header;
 static char curl_errorstr[CURL_ERROR_SIZE];
 
 static char *initial_base;
@@ -87,12 +91,37 @@ void prefetch(unsigned char *sha1)
 {
 }
 
+int relink_or_rename(char *old, char *new) {
+       int ret;
+
+       ret = link(old, new);
+       if (ret < 0) {
+               /* Same Coda hack as in write_sha1_file(sha1_file.c) */
+               ret = errno;
+               if (ret == EXDEV && !rename(old, new))
+                       return 0;
+       }
+       unlink(old);
+       if (ret) {
+               if (ret != EEXIST)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int got_alternates = 0;
 
 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
 {
        char *filename;
        char *url;
+       char tmpfile[PATH_MAX];
+       int ret;
+       long prev_posn = 0;
+       char range[RANGE_HEADER_SIZE];
+       struct curl_slist *range_header = NULL;
+       CURLcode curl_result;
 
        FILE *indexfile;
 
@@ -108,7 +137,8 @@ static int fetch_index(struct alt_base *repo, unsigned char *sha1)
                repo->base, sha1_to_hex(sha1));
        
        filename = sha1_pack_index_name(sha1);
-       indexfile = fopen(filename, "w");
+       snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+       indexfile = fopen(tmpfile, "a");
        if (!indexfile)
                return error("Unable to open local file %s for pack index",
                             filename);
@@ -119,13 +149,36 @@ static int fetch_index(struct alt_base *repo, unsigned char *sha1)
        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
        curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
        
-       if (curl_easy_perform(curl)) {
+       /* If there is data present from a previous transfer attempt,
+          resume where it left off */
+       prev_posn = ftell(indexfile);
+       if (prev_posn>0) {
+               if (get_verbosely)
+                       fprintf(stderr,
+                               "Resuming fetch of index for pack %s at byte %ld\n",
+                               sha1_to_hex(sha1), prev_posn);
+               sprintf(range, "Range: bytes=%ld-", prev_posn);
+               range_header = curl_slist_append(range_header, range);
+               curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+       }
+
+       /* Clear out the Range: header after performing the request, so
+          other curl requests don't inherit inappropriate header data */
+       curl_result = curl_easy_perform(curl);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+       if (curl_result != 0) {
                fclose(indexfile);
                return error("Unable to get pack index %s\n%s", url,
                             curl_errorstr);
        }
 
        fclose(indexfile);
+
+       ret = relink_or_rename(tmpfile, filename);
+       if (ret)
+               return error("unable to write index filename %s: %s",
+                            filename, strerror(ret));
+
        return 0;
 }
 
@@ -306,6 +359,12 @@ static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
        struct packed_git **lst;
        FILE *packfile;
        char *filename;
+       char tmpfile[PATH_MAX];
+       int ret;
+       long prev_posn = 0;
+       char range[RANGE_HEADER_SIZE];
+       struct curl_slist *range_header = NULL;
+       CURLcode curl_result;
 
        if (fetch_indices(repo))
                return -1;
@@ -325,7 +384,8 @@ static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
                repo->base, sha1_to_hex(target->sha1));
 
        filename = sha1_pack_name(target->sha1);
-       packfile = fopen(filename, "w");
+       snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+       packfile = fopen(tmpfile, "a");
        if (!packfile)
                return error("Unable to open local file %s for pack",
                             filename);
@@ -336,7 +396,24 @@ static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_pragma_header);
        curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 
-       if (curl_easy_perform(curl)) {
+       /* If there is data present from a previous transfer attempt,
+          resume where it left off */
+       prev_posn = ftell(packfile);
+       if (prev_posn>0) {
+               if (get_verbosely)
+                       fprintf(stderr,
+                               "Resuming fetch of pack %s at byte %ld\n",
+                               sha1_to_hex(target->sha1), prev_posn);
+               sprintf(range, "Range: bytes=%ld-", prev_posn);
+               range_header = curl_slist_append(range_header, range);
+               curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+       }
+
+       /* Clear out the Range: header after performing the request, so
+          other curl requests don't inherit inappropriate header data */
+       curl_result = curl_easy_perform(curl);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+       if (curl_result != 0) {
                fclose(packfile);
                return error("Unable to get pack file %s\n%s", url,
                             curl_errorstr);
@@ -344,6 +421,11 @@ static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
 
        fclose(packfile);
 
+       ret = relink_or_rename(tmpfile, filename);
+       if (ret)
+               return error("unable to write pack filename %s: %s",
+                            filename, strerror(ret));
+
        lst = &repo->packs;
        while (*lst != target)
                lst = &((*lst)->next);
@@ -360,14 +442,29 @@ static int fetch_object(struct alt_base *repo, unsigned char *sha1)
        char *filename = sha1_file_name(sha1);
        unsigned char real_sha1[20];
        char tmpfile[PATH_MAX];
+       char prevfile[PATH_MAX];
        int ret;
        char *url;
        char *posn;
+       int prevlocal;
+       unsigned char prev_buf[PREV_BUF_SIZE];
+       ssize_t prev_read = 0;
+       long prev_posn = 0;
+       char range[RANGE_HEADER_SIZE];
+       struct curl_slist *range_header = NULL;
+       CURLcode curl_result;
+
+       snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
+       snprintf(prevfile, sizeof(prevfile), "%s.prev", filename);
+       unlink(prevfile);
+       rename(tmpfile, prevfile);
+       unlink(tmpfile);
+
+       local = open(tmpfile, O_WRONLY | O_CREAT | O_EXCL, 0666);
 
-       snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX",
-                get_object_directory());
+       /* Note: if another instance starts now, it will turn our new
+          tmpfile into its prevfile. */
 
-       local = mkstemp(tmpfile);
        if (local < 0)
                return error("Couldn't create temporary file %s for %s: %s\n",
                             tmpfile, filename, strerror(errno));
@@ -396,8 +493,57 @@ static int fetch_object(struct alt_base *repo, unsigned char *sha1)
 
        curl_easy_setopt(curl, CURLOPT_URL, url);
 
-       if (curl_easy_perform(curl)) {
-               unlink(filename);
+       /* If a previous temp file is present, process what was already
+          fetched. */
+       prevlocal = open(prevfile, O_RDONLY);
+       if (prevlocal != -1) {
+               do {
+                       prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
+                       if (prev_read>0) {
+                               if (fwrite_sha1_file(prev_buf,
+                                                    1,
+                                                    prev_read,
+                                                    NULL) == prev_read) {
+                                       prev_posn += prev_read;
+                               } else {
+                                       prev_read = -1;
+                               }
+                       }
+               } while (prev_read > 0);
+               close(prevlocal);
+       }
+       unlink(prevfile);
+
+       /* Reset inflate/SHA1 if there was an error reading the previous temp
+          file; also rewind to the beginning of the local file. */
+       if (prev_read == -1) {
+               memset(&stream, 0, sizeof(stream));
+               inflateInit(&stream);
+               SHA1_Init(&c);
+               if (prev_posn>0) {
+                       prev_posn = 0;
+                       lseek(local, SEEK_SET, 0);
+               }
+       }
+
+       /* If we have successfully processed data from a previous fetch
+          attempt, only fetch the data we don't already have. */
+       if (prev_posn>0) {
+               if (get_verbosely)
+                       fprintf(stderr,
+                               "Resuming fetch of object %s at byte %ld\n",
+                               hex, prev_posn);
+               sprintf(range, "Range: bytes=%ld-", prev_posn);
+               range_header = curl_slist_append(range_header, range);
+               curl_easy_setopt(curl, CURLOPT_HTTPHEADER, range_header);
+       }
+
+       /* Clear out the Range: header after performing the request, so
+          other curl requests don't inherit inappropriate header data */
+       curl_result = curl_easy_perform(curl);
+       curl_easy_setopt(curl, CURLOPT_HTTPHEADER, no_range_header);
+       if (curl_result != 0) {
+               unlink(tmpfile);
                return error("%s", curl_errorstr);
        }
 
@@ -413,20 +559,11 @@ static int fetch_object(struct alt_base *repo, unsigned char *sha1)
                unlink(tmpfile);
                return error("File %s has bad hash\n", hex);
        }
-       ret = link(tmpfile, filename);
-       if (ret < 0) {
-               /* Same Coda hack as in write_sha1_file(sha1_file.c) */
-               ret = errno;
-               if (ret == EXDEV && !rename(tmpfile, filename))
-                       goto out;
-       }
-       unlink(tmpfile);
-       if (ret) {
-               if (ret != EEXIST)
-                       return error("unable to write sha1 filename %s: %s",
-                                    filename, strerror(ret));
-       }
- out:
+       ret = relink_or_rename(tmpfile, filename);
+       if (ret)
+               return error("unable to write sha1 filename %s: %s",
+                            filename, strerror(ret));
+
        pull_say("got %s\n", hex);
        return 0;
 }
@@ -519,6 +656,7 @@ int main(int argc, char **argv)
 
        curl = curl_easy_init();
        no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
+       no_range_header = curl_slist_append(no_range_header, "Range:");
 
        curl_ssl_verify = getenv("GIT_SSL_NO_VERIFY") ? 0 : 1;
        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, curl_ssl_verify);