Merge branch 'db/vcs-svn-incremental' into svn-fe
authorJonathan Nieder <jrnieder@gmail.com>
Thu, 26 May 2011 06:51:38 +0000 (01:51 -0500)
committerJonathan Nieder <jrnieder@gmail.com>
Thu, 26 May 2011 07:02:44 +0000 (02:02 -0500)
This teaches svn-fe to incrementally import into an existing
repository (at last!) at the expense of less convenient UI.  Think of
it as growing pains.  This opens the door to many excellent things,
and it would be a bad idea to discourage people from building on it
for much longer.

* db/vcs-svn-incremental:
  vcs-svn: avoid using ls command twice
  vcs-svn: use mark from previous import for parent commit
  vcs-svn: handle filenames with dq correctly
  vcs-svn: quote paths correctly for ls command
  vcs-svn: eliminate repo_tree structure
  vcs-svn: add a comment before each commit
  vcs-svn: save marks for imported commits
  vcs-svn: use higher mark numbers for blobs
  vcs-svn: set up channel to read fast-import cat-blob response

Conflicts:
t/t9010-svn-fe.sh
vcs-svn/fast_export.c
vcs-svn/fast_export.h
vcs-svn/repo_tree.c
vcs-svn/svndump.c

1  2 
t/t9010-svn-fe.sh
vcs-svn/fast_export.c
vcs-svn/fast_export.h
vcs-svn/repo_tree.h
vcs-svn/string_pool.c
vcs-svn/svndump.c

index 6f6175a8f7d9b0c6e6334f89ff21b74f067d6532,720fd6b5a32e88f0b58614e0bea4cc3e7a1e0e07..003395c5f6f05bf7afe2aef35146518a3323af02
@@@ -370,115 -488,7 +488,113 @@@ test_expect_failure PIPE 'change file m
        test_cmp hello actual.target
  '
  
- test_expect_success 'NUL in property value' '
++test_expect_success PIPE 'NUL in property value' '
 +      reinit_git &&
 +      echo "commit message" >expect.message &&
 +      {
 +              properties \
 +                      unimportant "something with a NUL (Q)" \
 +                      svn:log "commit message"&&
 +              echo PROPS-END
 +      } |
 +      q_to_nul >props &&
 +      {
 +              cat <<-\EOF &&
 +              SVN-fs-dump-format-version: 3
 +
 +              Revision-number: 1
 +              EOF
 +              echo Prop-content-length: $(wc -c <props) &&
 +              echo Content-length: $(wc -c <props) &&
 +              echo &&
 +              cat props
 +      } >nulprop.dump &&
-       test-svn-fe nulprop.dump >stream &&
-       git fast-import <stream &&
++      try_dump nulprop.dump &&
 +      git diff-tree --always -s --format=%s HEAD >actual.message &&
 +      test_cmp expect.message actual.message
 +'
 +
- test_expect_success 'NUL in log message, file content, and property name' '
++test_expect_success PIPE 'NUL in log message, file content, and property name' '
 +      # Caveat: svnadmin 1.6.16 (r1073529) truncates at \0 in the
 +      # svn:specialQnotreally example.
 +      reinit_git &&
 +      cat >expect <<-\EOF &&
 +      OBJID
 +      :100644 100644 OBJID OBJID M    greeting
 +      OBJID
 +      :000000 100644 OBJID OBJID A    greeting
 +      EOF
 +      printf "\n%s\n" "something with an ASCII NUL (Q)" >expect.message &&
 +      printf "%s\n" "helQo" >expect.hello1 &&
 +      printf "%s\n" "link hello" >expect.hello2 &&
 +      {
 +              properties svn:log "something with an ASCII NUL (Q)" &&
 +              echo PROPS-END
 +      } |
 +      q_to_nul >props &&
 +      {
 +              q_to_nul <<-\EOF &&
 +              SVN-fs-dump-format-version: 3
 +
 +              Revision-number: 1
 +              Prop-content-length: 10
 +              Content-length: 10
 +
 +              PROPS-END
 +
 +              Node-path: greeting
 +              Node-kind: file
 +              Node-action: add
 +              Prop-content-length: 10
 +              Text-content-length: 6
 +              Content-length: 16
 +
 +              PROPS-END
 +              helQo
 +
 +              Revision-number: 2
 +              EOF
 +              echo Prop-content-length: $(wc -c <props) &&
 +              echo Content-length: $(wc -c <props) &&
 +              echo &&
 +              cat props &&
 +              q_to_nul <<-\EOF
 +
 +              Node-path: greeting
 +              Node-kind: file
 +              Node-action: change
 +              Prop-content-length: 43
 +              Text-content-length: 11
 +              Content-length: 54
 +
 +              K 21
 +              svn:specialQnotreally
 +              V 1
 +              *
 +              PROPS-END
 +              link hello
 +              EOF
 +      } >8bitclean.dump &&
-       test-svn-fe 8bitclean.dump >stream &&
-       git fast-import <stream &&
++      try_dump 8bitclean.dump &&
 +      {
 +              git rev-list HEAD |
 +              git diff-tree --root --stdin |
 +              sed "s/$_x40/OBJID/g"
 +      } >actual &&
 +      {
 +              git cat-file commit HEAD | nul_to_q &&
 +              echo
 +      } |
 +      sed -ne "/^\$/,\$ p" >actual.message &&
 +      git cat-file blob HEAD^:greeting | nul_to_q >actual.hello1 &&
 +      git cat-file blob HEAD:greeting | nul_to_q >actual.hello2 &&
 +      test_cmp expect actual &&
 +      test_cmp expect.message actual.message &&
 +      test_cmp expect.hello1 actual.hello1 &&
 +      test_cmp expect.hello2 actual.hello2
 +'
 +
- test_expect_success 'change file mode and reiterate content' '
+ test_expect_success PIPE 'change file mode and reiterate content' '
        reinit_git &&
        cat >expect <<-\EOF &&
        OBJID
index 99ed70b88a5aaacbb48a9463f19ec198d7bedf84,f19db9ae8274e530250189d138616e18fbb071ba..ff980b3a2aad56c13d129bdd8b24091de8981865
@@@ -31,58 -59,131 +59,143 @@@ void fast_export_modify(uint32_t depth
  }
  
  static char gitsvnline[MAX_GITSVN_LINE_LEN];
- void fast_export_commit(uint32_t revision, const char *author,
 -void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
 -                      uint32_t uuid, uint32_t url,
++void fast_export_begin_commit(uint32_t revision, const char *author,
 +                      const struct strbuf *log,
 +                      const char *uuid, const char *url,
                        unsigned long timestamp)
  {
 +      static const struct strbuf empty = STRBUF_INIT;
        if (!log)
 -              log = "";
 -      if (~uuid && ~url) {
 +              log = &empty;
 +      if (*uuid && *url) {
                snprintf(gitsvnline, MAX_GITSVN_LINE_LEN,
                                "\n\ngit-svn-id: %s@%"PRIu32" %s\n",
 -                               pool_fetch(url), revision, pool_fetch(uuid));
 +                               url, revision, uuid);
        } else {
                *gitsvnline = '\0';
        }
        printf("commit refs/heads/master\n");
+       printf("mark :%"PRIu32"\n", revision);
        printf("committer %s <%s@%s> %ld +0000\n",
 -                 ~author ? pool_fetch(author) : "nobody",
 -                 ~author ? pool_fetch(author) : "nobody",
 -                 ~uuid ? pool_fetch(uuid) : "local", timestamp);
 -      printf("data %"PRIu32"\n%s%s\n",
 -                 (uint32_t) (strlen(log) + strlen(gitsvnline)),
 -                 log, gitsvnline);
 +                 *author ? author : "nobody",
 +                 *author ? author : "nobody",
 +                 *uuid ? uuid : "local", timestamp);
 +      printf("data %"PRIuMAX"\n",
 +              (uintmax_t) (log->len + strlen(gitsvnline)));
 +      fwrite(log->buf, log->len, 1, stdout);
 +      printf("%s\n", gitsvnline);
        if (!first_commit_done) {
                if (revision > 1)
-                       printf("from refs/heads/master^0\n");
+                       printf("from :%"PRIu32"\n", revision - 1);
                first_commit_done = 1;
        }
-       repo_diff(revision - 1, revision);
-       fputc('\n', stdout);
+ }
  
+ void fast_export_end_commit(uint32_t revision)
+ {
        printf("progress Imported commit %"PRIu32".\n\n", revision);
  }
  
- void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input)
+ static void ls_from_rev(uint32_t rev, uint32_t depth, const uint32_t *path)
+ {
+       /* ls :5 path/to/old/file */
+       printf("ls :%"PRIu32" \"", rev);
+       pool_print_seq_q(depth, path, '/', stdout);
+       printf("\"\n");
+       fflush(stdout);
+ }
+ static void ls_from_active_commit(uint32_t depth, const uint32_t *path)
+ {
+       /* ls "path/to/file" */
+       printf("ls \"");
+       pool_print_seq_q(depth, path, '/', stdout);
+       printf("\"\n");
+       fflush(stdout);
+ }
+ static const char *get_response_line(void)
+ {
+       const char *line = buffer_read_line(&report_buffer);
+       if (line)
+               return line;
+       if (buffer_ferror(&report_buffer))
+               die_errno("error reading from fast-import");
+       die("unexpected end of fast-import feedback");
+ }
 +static void die_short_read(struct line_buffer *input)
 +{
 +      if (buffer_ferror(input))
 +              die_errno("error reading dump file");
 +      die("invalid dump: unexpected end of file");
 +}
 +
+ void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input)
  {
        if (mode == REPO_MODE_LNK) {
                /* svn symlink blobs start with "link " */
 -              buffer_skip_bytes(input, 5);
                len -= 5;
 +              if (buffer_skip_bytes(input, 5) != 5)
 +                      die_short_read(input);
        }
-       printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len);
+       printf("data %"PRIu32"\n", len);
 -      buffer_copy_bytes(input, len);
 +      if (buffer_copy_bytes(input, len) != len)
 +              die_short_read(input);
        fputc('\n', stdout);
  }
+ static int parse_ls_response(const char *response, uint32_t *mode,
+                                       struct strbuf *dataref)
+ {
+       const char *tab;
+       const char *response_end;
+       assert(response);
+       response_end = response + strlen(response);
+       if (*response == 'm') { /* Missing. */
+               errno = ENOENT;
+               return -1;
+       }
+       /* Mode. */
+       if (response_end - response < strlen("100644") ||
+           response[strlen("100644")] != ' ')
+               die("invalid ls response: missing mode: %s", response);
+       *mode = 0;
+       for (; *response != ' '; response++) {
+               char ch = *response;
+               if (ch < '0' || ch > '7')
+                       die("invalid ls response: mode is not octal: %s", response);
+               *mode *= 8;
+               *mode += ch - '0';
+       }
+       /* ' blob ' or ' tree ' */
+       if (response_end - response < strlen(" blob ") ||
+           (response[1] != 'b' && response[1] != 't'))
+               die("unexpected ls response: not a tree or blob: %s", response);
+       response += strlen(" blob ");
+       /* Dataref. */
+       tab = memchr(response, '\t', response_end - response);
+       if (!tab)
+               die("invalid ls response: missing tab: %s", response);
+       strbuf_add(dataref, response, tab - response);
+       return 0;
+ }
+ int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path,
+                               uint32_t *mode, struct strbuf *dataref)
+ {
+       ls_from_rev(rev, depth, path);
+       return parse_ls_response(get_response_line(), mode, dataref);
+ }
+ int fast_export_ls(uint32_t depth, const uint32_t *path,
+                               uint32_t *mode, struct strbuf *dataref)
+ {
+       ls_from_active_commit(depth, path);
+       return parse_ls_response(get_response_line(), mode, dataref);
+ }
index 33a8fe996f5fc025f587c1d09ae3f81e1786dbbb,633d21944e83b795a170021be9290114e6e28f10..9c522d177d4959266ae6756546a0b30789dbc681
@@@ -1,16 -1,25 +1,26 @@@
  #ifndef FAST_EXPORT_H_
  #define FAST_EXPORT_H_
  
- #include "line_buffer.h"
  struct strbuf;
+ struct line_buffer;
  
- void fast_export_delete(uint32_t depth, uint32_t *path);
- void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
-                       uint32_t mark);
- void fast_export_commit(uint32_t revision, const char *author,
+ void fast_export_init(int fd);
+ void fast_export_deinit(void);
+ void fast_export_reset(void);
+ void fast_export_delete(uint32_t depth, const uint32_t *path);
+ void fast_export_modify(uint32_t depth, const uint32_t *path,
+                       uint32_t mode, const char *dataref);
 -void fast_export_begin_commit(uint32_t revision, uint32_t author, char *log,
 -                      uint32_t uuid, uint32_t url, unsigned long timestamp);
++void fast_export_begin_commit(uint32_t revision, const char *author,
 +                      const struct strbuf *log, const char *uuid,
 +                      const char *url, unsigned long timestamp);
- void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len,
-                     struct line_buffer *input);
+ void fast_export_end_commit(uint32_t revision);
+ void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
+ /* If there is no such file at that rev, returns -1, errno == ENOENT. */
+ int fast_export_ls_rev(uint32_t rev, uint32_t depth, const uint32_t *path,
+                       uint32_t *mode_out, struct strbuf *dataref_out);
+ int fast_export_ls(uint32_t depth, const uint32_t *path,
+                       uint32_t *mode_out, struct strbuf *dataref_out);
  
  #endif
index 37bde2e37484071998edbe790d38d7b19eb24fb5,f506352dc2d560442f7c45c7df35516fe3fef69e..ce69fa7e58536d28fd7e262562bb7de587602cdc
@@@ -14,12 -14,10 +14,11 @@@ struct strbuf
  uint32_t next_blob_mark(void);
  void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst);
  void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark);
- uint32_t repo_read_path(const uint32_t *path);
- uint32_t repo_read_mode(const uint32_t *path);
+ const char *repo_read_path(const uint32_t *path, uint32_t *mode_out);
  void repo_delete(uint32_t *path);
 -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
 -               uint32_t url, long unsigned timestamp);
 +void repo_commit(uint32_t revision, const char *author,
 +              const struct strbuf *log, const char *uuid, const char *url,
 +              long unsigned timestamp);
  void repo_diff(uint32_t r1, uint32_t r2);
  void repo_init(void);
  void repo_reset(void);
Simple merge
index 572a99596657b85cbda372a9107a43c6c2fb44b2,99a5ba0d1008083a2a0c868d1984f44c03ba4232..35a8af3c9f7faee7692bbebbd39308a37335ad5e
  #include "repo_tree.h"
  #include "fast_export.h"
  #include "line_buffer.h"
 -#include "obj_pool.h"
  #include "string_pool.h"
 +#include "strbuf.h"
 +
 +/*
 + * Compare start of string to literal of equal length;
 + * must be guarded by length test.
 + */
 +#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
  
+ #define REPORT_FILENO 3
  #define NODEACT_REPLACE 4
  #define NODEACT_DELETE 3
  #define NODEACT_ADD 2
@@@ -265,18 -276,34 +278,35 @@@ static void handle_node(void
        /*
         * Save the result.
         */
-       repo_add(node_ctx.dst, node_ctx.type, mark);
-       if (have_text)
-               fast_export_blob(node_ctx.type, mark,
-                                node_ctx.textLength, &input);
+       if (type == REPO_MODE_DIR)      /* directories are not tracked. */
+               return;
+       assert(old_data);
+       if (old_data == empty_blob)
+               /* For the fast_export_* functions, NULL means empty. */
+               old_data = NULL;
+       if (!have_text) {
+               fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
+                                       node_ctx.type, old_data);
+               return;
+       }
+       fast_export_modify(REPO_MAX_PATH_DEPTH, node_ctx.dst,
+                               node_ctx.type, "inline");
+       fast_export_data(node_ctx.type, node_ctx.textLength, &input);
+ }
+ static void begin_revision(void)
+ {
+       if (!rev_ctx.revision)  /* revision 0 gets no git commit. */
+               return;
 -      fast_export_begin_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
 -              dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
++      fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
++              &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
++              rev_ctx.timestamp);
  }
  
- static void handle_revision(void)
+ static void end_revision(void)
  {
        if (rev_ctx.revision)
-               repo_commit(rev_ctx.revision, rev_ctx.author.buf,
-                       &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
-                       rev_ctx.timestamp);
+               fast_export_end_commit(rev_ctx.revision);
  }
  
  void svndump_read(const char *url)
                        if (dump_ctx.version > 3)
                                die("expected svn dump format version <= 3, found %"PRIu32,
                                    dump_ctx.version);
 -              } else if (key == keys.uuid) {
 -                      dump_ctx.uuid = pool_intern(val);
 -              } else if (key == keys.revision_number) {
 +                      break;
 +              case sizeof("UUID"):
 +                      if (constcmp(t, "UUID"))
 +                              continue;
 +                      strbuf_reset(&dump_ctx.uuid);
 +                      strbuf_addstr(&dump_ctx.uuid, val);
 +                      break;
 +              case sizeof("Revision-number"):
 +                      if (constcmp(t, "Revision-number"))
 +                              continue;
                        if (active_ctx == NODE_CTX)
                                handle_node();
+                       if (active_ctx == REV_CTX)
+                               begin_revision();
                        if (active_ctx != DUMP_CTX)
-                               handle_revision();
+                               end_revision();
                        active_ctx = REV_CTX;
                        reset_rev_ctx(atoi(val));
 -              } else if (key == keys.node_path) {
 -                      if (active_ctx == NODE_CTX)
 -                              handle_node();
 -                      if (active_ctx == REV_CTX)
 -                              begin_revision();
 -                      active_ctx = NODE_CTX;
 -                      reset_node_ctx(val);
 -              } else if (key == keys.node_kind) {
 +                      break;
 +              case sizeof("Node-path"):
 +                      if (prefixcmp(t, "Node-"))
 +                              continue;
 +                      if (!constcmp(t + strlen("Node-"), "path")) {
 +                              if (active_ctx == NODE_CTX)
 +                                      handle_node();
++                              if (active_ctx == REV_CTX)
++                                      begin_revision();
 +                              active_ctx = NODE_CTX;
 +                              reset_node_ctx(val);
 +                              break;
 +                      }
 +                      if (constcmp(t + strlen("Node-"), "kind"))
 +                              continue;
                        if (!strcmp(val, "dir"))
                                node_ctx.type = REPO_MODE_DIR;
                        else if (!strcmp(val, "file"))
                                read_props();
                        } else if (active_ctx == NODE_CTX) {
                                handle_node();
-                               active_ctx = REV_CTX;
+                               active_ctx = INTERNODE_CTX;
                        } else {
                                fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
 -                              buffer_skip_bytes(&input, len);
 +                              if (buffer_skip_bytes(&input, len) != len)
 +                                      die_short_read();
                        }
                }
        }
 +      if (buffer_ferror(&input))
 +              die_short_read();
        if (active_ctx == NODE_CTX)
                handle_node();
+       if (active_ctx == REV_CTX)
+               begin_revision();
        if (active_ctx != DUMP_CTX)
-               handle_revision();
+               end_revision();
  }
  
  int svndump_init(const char *filename)
  {
        if (buffer_init(&input, filename))
                return error("cannot open %s: %s", filename, strerror(errno));
-       repo_init();
+       fast_export_init(REPORT_FILENO);
 -      reset_dump_ctx(~0);
 +      strbuf_init(&dump_ctx.uuid, 4096);
 +      strbuf_init(&dump_ctx.url, 4096);
 +      strbuf_init(&rev_ctx.log, 4096);
 +      strbuf_init(&rev_ctx.author, 4096);
 +      reset_dump_ctx(NULL);
        reset_rev_ctx(0);
        reset_node_ctx(NULL);
 -      init_keys();
        return 0;
  }
  
  void svndump_deinit(void)
  {
-       repo_reset();
 -      log_reset();
+       fast_export_deinit();
 -      reset_dump_ctx(~0);
 +      reset_dump_ctx(NULL);
        reset_rev_ctx(0);
        reset_node_ctx(NULL);
 +      strbuf_release(&rev_ctx.log);
        if (buffer_deinit(&input))
                fprintf(stderr, "Input error\n");
        if (ferror(stdout))
  
  void svndump_reset(void)
  {
 -      log_reset();
+       fast_export_reset();
        buffer_reset(&input);
-       repo_reset();
 -      reset_dump_ctx(~0);
 -      reset_rev_ctx(0);
 -      reset_node_ctx(NULL);
 +      strbuf_release(&dump_ctx.uuid);
 +      strbuf_release(&dump_ctx.url);
 +      strbuf_release(&rev_ctx.log);
 +      strbuf_release(&rev_ctx.author);
  }