Rework unquote_c_style to work on a strbuf.
authorPierre Habouzit <madcoder@debian.org>
Wed, 19 Sep 2007 22:42:14 +0000 (00:42 +0200)
committerJunio C Hamano <gitster@pobox.com>
Fri, 21 Sep 2007 06:32:18 +0000 (23:32 -0700)
If the gain is not obvious in the diffstat, the resulting code is more
readable, _and_ in checkout-index/update-index we now reuse the same buffer
to unquote strings instead of always freeing/mallocing.

This also is more coherent with the next patch that reworks quoting
functions.

The quoting function is also made more efficient scanning for backslashes
and treating portions of strings without a backslash at once.

Signed-off-by: Pierre Habouzit <madcoder@debian.org>
builtin-apply.c
builtin-checkout-index.c
builtin-update-index.c
fast-import.c
mktree.c
quote.c
quote.h

index db337f1aca48d7f7ec8b5e3c4c9230621d346d81..1cf68ede1263dd56466689f5c94eedfe194c45da 100644 (file)
@@ -231,35 +231,33 @@ static char *find_name(const char *line, char *def, int p_value, int terminate)
 {
        int len;
        const char *start = line;
-       char *name;
 
        if (*line == '"') {
+               struct strbuf name;
+
                /* Proposed "new-style" GNU patch/diff format; see
                 * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
                 */
-               name = unquote_c_style(line, NULL);
-               if (name) {
-                       char *cp = name;
-                       while (p_value) {
+               strbuf_init(&name, 0);
+               if (!unquote_c_style(&name, line, NULL)) {
+                       char *cp;
+
+                       for (cp = name.buf; p_value; p_value--) {
                                cp = strchr(cp, '/');
                                if (!cp)
                                        break;
                                cp++;
-                               p_value--;
                        }
                        if (cp) {
                                /* name can later be freed, so we need
                                 * to memmove, not just return cp
                                 */
-                               memmove(name, cp, strlen(cp) + 1);
+                               strbuf_remove(&name, 0, cp - name.buf);
                                free(def);
-                               return name;
-                       }
-                       else {
-                               free(name);
-                               name = NULL;
+                               return name.buf;
                        }
                }
+               strbuf_release(&name);
        }
 
        for (;;) {
@@ -567,29 +565,30 @@ static const char *stop_at_slash(const char *line, int llen)
  */
 static char *git_header_name(char *line, int llen)
 {
-       int len;
        const char *name;
        const char *second = NULL;
+       size_t len;
 
        line += strlen("diff --git ");
        llen -= strlen("diff --git ");
 
        if (*line == '"') {
                const char *cp;
-               char *first = unquote_c_style(line, &second);
-               if (!first)
-                       return NULL;
+               struct strbuf first;
+               struct strbuf sp;
+
+               strbuf_init(&first, 0);
+               strbuf_init(&sp, 0);
+
+               if (unquote_c_style(&first, line, &second))
+                       goto free_and_fail1;
 
                /* advance to the first slash */
-               cp = stop_at_slash(first, strlen(first));
-               if (!cp || cp == first) {
-                       /* we do not accept absolute paths */
-               free_first_and_fail:
-                       free(first);
-                       return NULL;
-               }
-               len = strlen(cp+1);
-               memmove(first, cp+1, len+1); /* including NUL */
+               cp = stop_at_slash(first.buf, first.len);
+               /* we do not accept absolute paths */
+               if (!cp || cp == first.buf)
+                       goto free_and_fail1;
+               strbuf_remove(&first, 0, cp + 1 - first.buf);
 
                /* second points at one past closing dq of name.
                 * find the second name.
@@ -598,40 +597,40 @@ static char *git_header_name(char *line, int llen)
                        second++;
 
                if (line + llen <= second)
-                       goto free_first_and_fail;
+                       goto free_and_fail1;
                if (*second == '"') {
-                       char *sp = unquote_c_style(second, NULL);
-                       if (!sp)
-                               goto free_first_and_fail;
-                       cp = stop_at_slash(sp, strlen(sp));
-                       if (!cp || cp == sp) {
-                       free_both_and_fail:
-                               free(sp);
-                               goto free_first_and_fail;
-                       }
+                       if (unquote_c_style(&sp, second, NULL))
+                               goto free_and_fail1;
+                       cp = stop_at_slash(sp.buf, sp.len);
+                       if (!cp || cp == sp.buf)
+                               goto free_and_fail1;
                        /* They must match, otherwise ignore */
-                       if (strcmp(cp+1, first))
-                               goto free_both_and_fail;
-                       free(sp);
-                       return first;
+                       if (strcmp(cp + 1, first.buf))
+                               goto free_and_fail1;
+                       strbuf_release(&sp);
+                       return first.buf;
                }
 
                /* unquoted second */
                cp = stop_at_slash(second, line + llen - second);
                if (!cp || cp == second)
-                       goto free_first_and_fail;
+                       goto free_and_fail1;
                cp++;
-               if (line + llen - cp != len + 1 ||
-                   memcmp(first, cp, len))
-                       goto free_first_and_fail;
-               return first;
+               if (line + llen - cp != first.len + 1 ||
+                   memcmp(first.buf, cp, first.len))
+                       goto free_and_fail1;
+               return first.buf;
+
+       free_and_fail1:
+               strbuf_release(&first);
+               strbuf_release(&sp);
+               return NULL;
        }
 
        /* unquoted first name */
        name = stop_at_slash(line, llen);
        if (!name || name == line)
                return NULL;
-
        name++;
 
        /* since the first name is unquoted, a dq if exists must be
@@ -639,28 +638,30 @@ static char *git_header_name(char *line, int llen)
         */
        for (second = name; second < line + llen; second++) {
                if (*second == '"') {
-                       const char *cp = second;
+                       struct strbuf sp;
                        const char *np;
-                       char *sp = unquote_c_style(second, NULL);
-
-                       if (!sp)
-                               return NULL;
-                       np = stop_at_slash(sp, strlen(sp));
-                       if (!np || np == sp) {
-                       free_second_and_fail:
-                               free(sp);
-                               return NULL;
-                       }
+
+                       strbuf_init(&sp, 0);
+                       if (unquote_c_style(&sp, second, NULL))
+                               goto free_and_fail2;
+
+                       np = stop_at_slash(sp.buf, sp.len);
+                       if (!np || np == sp.buf)
+                               goto free_and_fail2;
                        np++;
-                       len = strlen(np);
-                       if (len < cp - name &&
+
+                       len = sp.buf + sp.len - np;
+                       if (len < second - name &&
                            !strncmp(np, name, len) &&
                            isspace(name[len])) {
                                /* Good */
-                               memmove(sp, np, len + 1);
-                               return sp;
+                               strbuf_remove(&sp, 0, np - sp.buf);
+                               return sp.buf;
                        }
-                       goto free_second_and_fail;
+
+               free_and_fail2:
+                       strbuf_release(&sp);
+                       return NULL;
                }
        }
 
index a18ecc4bab7541477144402d4d680277d95c4e9a..e6264c4bf0ef537ea7ea2127133d1fad5a33b7d6 100644 (file)
@@ -270,26 +270,27 @@ int cmd_checkout_index(int argc, const char **argv, const char *prefix)
        }
 
        if (read_from_stdin) {
-               struct strbuf buf;
+               struct strbuf buf, nbuf;
+
                if (all)
                        die("git-checkout-index: don't mix '--all' and '--stdin'");
+
                strbuf_init(&buf, 0);
-               while (1) {
-                       char *path_name;
+               strbuf_init(&nbuf, 0);
+               while (strbuf_getline(&buf, stdin, line_termination) != EOF) {
                        const char *p;
-                       if (strbuf_getline(&buf, stdin, line_termination) == EOF)
-                               break;
-                       if (line_termination && buf.buf[0] == '"')
-                               path_name = unquote_c_style(buf.buf, NULL);
-                       else
-                               path_name = buf.buf;
-                       p = prefix_path(prefix, prefix_length, path_name);
+                       if (line_termination && buf.buf[0] == '"') {
+                               strbuf_reset(&nbuf);
+                               if (unquote_c_style(&nbuf, buf.buf, NULL))
+                                       die("line is badly quoted");
+                               strbuf_swap(&buf, &nbuf);
+                       }
+                       p = prefix_path(prefix, prefix_length, buf.buf);
                        checkout_file(p, prefix_length);
-                       if (p < path_name || p > path_name + strlen(path_name))
+                       if (p < buf.buf || p > buf.buf + buf.len)
                                free((char *)p);
-                       if (path_name != buf.buf)
-                               free(path_name);
                }
+               strbuf_release(&nbuf);
                strbuf_release(&buf);
        }
 
index acd5ab592641064c4dab1f24b3e1cdb15b1326bb..c76879ed7f8fb4fc46bd4193746e6bbfa50fa01a 100644 (file)
@@ -295,8 +295,11 @@ static void update_one(const char *path, const char *prefix, int prefix_length)
 static void read_index_info(int line_termination)
 {
        struct strbuf buf;
+       struct strbuf uq;
+
        strbuf_init(&buf, 0);
-       while (1) {
+       strbuf_init(&uq, 0);
+       while (strbuf_getline(&buf, stdin, line_termination) != EOF) {
                char *ptr, *tab;
                char *path_name;
                unsigned char sha1[20];
@@ -320,9 +323,6 @@ static void read_index_info(int line_termination)
                 * This format is to put higher order stages into the
                 * index file and matches git-ls-files --stage output.
                 */
-               if (strbuf_getline(&buf, stdin, line_termination) == EOF)
-                       break;
-
                errno = 0;
                ul = strtoul(buf.buf, &ptr, 8);
                if (ptr == buf.buf || *ptr != ' '
@@ -347,15 +347,17 @@ static void read_index_info(int line_termination)
                if (get_sha1_hex(tab - 40, sha1) || tab[-41] != ' ')
                        goto bad_line;
 
-               if (line_termination && ptr[0] == '"')
-                       path_name = unquote_c_style(ptr, NULL);
-               else
-                       path_name = ptr;
+               path_name = ptr;
+               if (line_termination && path_name[0] == '"') {
+                       strbuf_reset(&uq);
+                       if (unquote_c_style(&uq, path_name, NULL)) {
+                               die("git-update-index: bad quoting of path name");
+                       }
+                       path_name = uq.buf;
+               }
 
                if (!verify_path(path_name)) {
                        fprintf(stderr, "Ignoring path %s\n", path_name);
-                       if (path_name != ptr)
-                               free(path_name);
                        continue;
                }
 
@@ -383,6 +385,7 @@ static void read_index_info(int line_termination)
                die("malformed index info %s", buf.buf);
        }
        strbuf_release(&buf);
+       strbuf_release(&uq);
 }
 
 static const char update_index_usage[] =
@@ -705,26 +708,26 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
                        free((char*)p);
        }
        if (read_from_stdin) {
-               struct strbuf buf;
+               struct strbuf buf, nbuf;
+
                strbuf_init(&buf, 0);
-               while (1) {
-                       char *path_name;
+               strbuf_init(&nbuf, 0);
+               while (strbuf_getline(&buf, stdin, line_termination) != EOF) {
                        const char *p;
-                       if (strbuf_getline(&buf, stdin, line_termination) == EOF)
-                               break;
-                       if (line_termination && buf.buf[0] == '"')
-                               path_name = unquote_c_style(buf.buf, NULL);
-                       else
-                               path_name = buf.buf;
-                       p = prefix_path(prefix, prefix_length, path_name);
+                       if (line_termination && buf.buf[0] == '"') {
+                               strbuf_reset(&nbuf);
+                               if (unquote_c_style(&nbuf, buf.buf, NULL))
+                                       die("line is badly quoted");
+                               strbuf_swap(&buf, &nbuf);
+                       }
+                       p = prefix_path(prefix, prefix_length, buf.buf);
                        update_one(p, NULL, 0);
                        if (set_executable_bit)
                                chmod_path(set_executable_bit, p);
-                       if (p < path_name || p > path_name + strlen(path_name))
-                               free((char*) p);
-                       if (path_name != buf.buf)
-                               free(path_name);
+                       if (p < buf.buf || p > buf.buf + buf.len)
+                               free((char *)p);
                }
+               strbuf_release(&nbuf);
                strbuf_release(&buf);
        }
 
index eddae22ea11c6045f2e7ed12b98163364715575a..a870a44e3d1a266f856738ac1242158482dc59dc 100644 (file)
@@ -1759,7 +1759,7 @@ static void load_branch(struct branch *b)
 static void file_change_m(struct branch *b)
 {
        const char *p = command_buf.buf + 2;
-       char *p_uq;
+       static struct strbuf uq = STRBUF_INIT;
        const char *endp;
        struct object_entry *oe = oe;
        unsigned char sha1[20];
@@ -1797,18 +1797,20 @@ static void file_change_m(struct branch *b)
        if (*p++ != ' ')
                die("Missing space after SHA1: %s", command_buf.buf);
 
-       p_uq = unquote_c_style(p, &endp);
-       if (p_uq) {
+       strbuf_reset(&uq);
+       if (!unquote_c_style(&uq, p, &endp)) {
                if (*endp)
                        die("Garbage after path in: %s", command_buf.buf);
-               p = p_uq;
+               p = uq.buf;
        }
 
        if (inline_data) {
                static struct strbuf buf = STRBUF_INIT;
 
-               if (!p_uq)
-                       p = p_uq = xstrdup(p);
+               if (p != uq.buf) {
+                       strbuf_addstr(&uq, p);
+                       p = uq.buf;
+               }
                read_next_command();
                cmd_data(&buf);
                store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0);
@@ -1826,56 +1828,54 @@ static void file_change_m(struct branch *b)
        }
 
        tree_content_set(&b->branch_tree, p, sha1, S_IFREG | mode, NULL);
-       free(p_uq);
 }
 
 static void file_change_d(struct branch *b)
 {
        const char *p = command_buf.buf + 2;
-       char *p_uq;
+       static struct strbuf uq = STRBUF_INIT;
        const char *endp;
 
-       p_uq = unquote_c_style(p, &endp);
-       if (p_uq) {
+       strbuf_reset(&uq);
+       if (!unquote_c_style(&uq, p, &endp)) {
                if (*endp)
                        die("Garbage after path in: %s", command_buf.buf);
-               p = p_uq;
+               p = uq.buf;
        }
        tree_content_remove(&b->branch_tree, p, NULL);
-       free(p_uq);
 }
 
 static void file_change_cr(struct branch *b, int rename)
 {
        const char *s, *d;
-       char *s_uq, *d_uq;
+       static struct strbuf s_uq = STRBUF_INIT;
+       static struct strbuf d_uq = STRBUF_INIT;
        const char *endp;
        struct tree_entry leaf;
 
        s = command_buf.buf + 2;
-       s_uq = unquote_c_style(s, &endp);
-       if (s_uq) {
+       strbuf_reset(&s_uq);
+       if (!unquote_c_style(&s_uq, s, &endp)) {
                if (*endp != ' ')
                        die("Missing space after source: %s", command_buf.buf);
-       }
-       else {
+       } else {
                endp = strchr(s, ' ');
                if (!endp)
                        die("Missing space after source: %s", command_buf.buf);
-               s_uq = xmemdupz(s, endp - s);
+               strbuf_add(&s_uq, s, endp - s);
        }
-       s = s_uq;
+       s = s_uq.buf;
 
        endp++;
        if (!*endp)
                die("Missing dest: %s", command_buf.buf);
 
        d = endp;
-       d_uq = unquote_c_style(d, &endp);
-       if (d_uq) {
+       strbuf_reset(&d_uq);
+       if (!unquote_c_style(&d_uq, d, &endp)) {
                if (*endp)
                        die("Garbage after dest in: %s", command_buf.buf);
-               d = d_uq;
+               d = d_uq.buf;
        }
 
        memset(&leaf, 0, sizeof(leaf));
@@ -1889,9 +1889,6 @@ static void file_change_cr(struct branch *b, int rename)
                leaf.versions[1].sha1,
                leaf.versions[1].mode,
                leaf.tree);
-
-       free(s_uq);
-       free(d_uq);
 }
 
 static void file_change_deleteall(struct branch *b)
index 9c137dec4599c97d2827abda68692dc714f2d295..e0da110a98d3a7376dc78df71fabc10fc5664296 100644 (file)
--- a/mktree.c
+++ b/mktree.c
@@ -66,6 +66,7 @@ static const char mktree_usage[] = "git-mktree [-z]";
 int main(int ac, char **av)
 {
        struct strbuf sb;
+       struct strbuf p_uq;
        unsigned char sha1[20];
        int line_termination = '\n';
 
@@ -82,14 +83,13 @@ int main(int ac, char **av)
        }
 
        strbuf_init(&sb, 0);
-       while (1) {
+       strbuf_init(&p_uq, 0);
+       while (strbuf_getline(&sb, stdin, line_termination) != EOF) {
                char *ptr, *ntr;
                unsigned mode;
                enum object_type type;
                char *path;
 
-               if (strbuf_getline(&sb, stdin, line_termination) == EOF)
-                       break;
                ptr = sb.buf;
                /* Input is non-recursive ls-tree output format
                 * mode SP type SP sha1 TAB name
@@ -109,18 +109,21 @@ int main(int ac, char **av)
                *ntr++ = 0; /* now at the beginning of SHA1 */
                if (type != type_from_string(ptr))
                        die("object type %s mismatch (%s)", ptr, typename(type));
-               ntr += 41; /* at the beginning of name */
-               if (line_termination && ntr[0] == '"')
-                       path = unquote_c_style(ntr, NULL);
-               else
-                       path = ntr;
 
-               append_to_tree(mode, sha1, path);
+               path = ntr + 41;  /* at the beginning of name */
+               if (line_termination && path[0] == '"') {
+                       strbuf_reset(&p_uq);
+                       if (unquote_c_style(&p_uq, path, NULL)) {
+                               die("invalid quoting");
+                       }
+                       path = p_uq.buf;
+               }
 
-               if (path != ntr)
-                       free(path);
+               append_to_tree(mode, sha1, path);
        }
+       strbuf_release(&p_uq);
        strbuf_release(&sb);
+
        write_tree(sha1);
        puts(sha1_to_hex(sha1));
        exit(0);
diff --git a/quote.c b/quote.c
index d88bf7515932bba96c694478c3b51c85549fa92a..7771c9c678d1b1508ef2d582eb218e57e3caeec1 100644 (file)
--- a/quote.c
+++ b/quote.c
@@ -239,73 +239,71 @@ int quote_c_style(const char *name, char *outbuf, FILE *outfp, int no_dq)
 /*
  * C-style name unquoting.
  *
- * Quoted should point at the opening double quote.  Returns
- * an allocated memory that holds unquoted name, which the caller
- * should free when done.  Updates endp pointer to point at
- * one past the ending double quote if given.
+ * Quoted should point at the opening double quote.
+ * + Returns 0 if it was able to unquote the string properly, and appends the
+ *   result in the strbuf `sb'.
+ * + Returns -1 in case of error, and doesn't touch the strbuf. Though note
+ *   that this function will allocate memory in the strbuf, so calling
+ *   strbuf_release is mandatory whichever result unquote_c_style returns.
+ *
+ * Updates endp pointer to point at one past the ending double quote if given.
  */
-
-char *unquote_c_style(const char *quoted, const char **endp)
+int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp)
 {
-       const char *sp;
-       char *name = NULL, *outp = NULL;
-       int count = 0, ch, ac;
-
-#undef EMIT
-#define EMIT(c) (outp ? (*outp++ = (c)) : (count++))
+       size_t oldlen = sb->len, len;
+       int ch, ac;
 
        if (*quoted++ != '"')
-               return NULL;
+               return -1;
+
+       for (;;) {
+               len = strcspn(quoted, "\"\\");
+               strbuf_add(sb, quoted, len);
+               quoted += len;
 
-       while (1) {
-               /* first pass counts and allocates, second pass fills */
-               for (sp = quoted; (ch = *sp++) != '"'; ) {
-                       if (ch == '\\') {
-                               switch (ch = *sp++) {
-                               case 'a': ch = '\a'; break;
-                               case 'b': ch = '\b'; break;
-                               case 'f': ch = '\f'; break;
-                               case 'n': ch = '\n'; break;
-                               case 'r': ch = '\r'; break;
-                               case 't': ch = '\t'; break;
-                               case 'v': ch = '\v'; break;
-
-                               case '\\': case '"':
-                                       break; /* verbatim */
-
-                               case '0':
-                               case '1':
-                               case '2':
-                               case '3':
-                               case '4':
-                               case '5':
-                               case '6':
-                               case '7':
-                                       /* octal */
+               switch (*quoted++) {
+                 case '"':
+                       if (endp)
+                               *endp = quoted + 1;
+                       return 0;
+                 case '\\':
+                       break;
+                 default:
+                       goto error;
+               }
+
+               switch ((ch = *quoted++)) {
+               case 'a': ch = '\a'; break;
+               case 'b': ch = '\b'; break;
+               case 'f': ch = '\f'; break;
+               case 'n': ch = '\n'; break;
+               case 'r': ch = '\r'; break;
+               case 't': ch = '\t'; break;
+               case 'v': ch = '\v'; break;
+
+               case '\\': case '"':
+                       break; /* verbatim */
+
+               /* octal values with first digit over 4 overflow */
+               case '0': case '1': case '2': case '3':
                                        ac = ((ch - '0') << 6);
-                                       if ((ch = *sp++) < '0' || '7' < ch)
-                                               return NULL;
+                       if ((ch = *quoted++) < '0' || '7' < ch)
+                               goto error;
                                        ac |= ((ch - '0') << 3);
-                                       if ((ch = *sp++) < '0' || '7' < ch)
-                                               return NULL;
+                       if ((ch = *quoted++) < '0' || '7' < ch)
+                               goto error;
                                        ac |= (ch - '0');
                                        ch = ac;
                                        break;
                                default:
-                                       return NULL; /* malformed */
-                               }
+                       goto error;
                        }
-                       EMIT(ch);
+               strbuf_addch(sb, ch);
                }
 
-               if (name) {
-                       *outp = 0;
-                       if (endp)
-                               *endp = sp;
-                       return name;
-               }
-               outp = name = xmalloc(count + 1);
-       }
+  error:
+       strbuf_setlen(sb, oldlen);
+       return -1;
 }
 
 void write_name_quoted(const char *prefix, int prefix_len,
diff --git a/quote.h b/quote.h
index 8a59cc55d1dcfba728614b2d6494272ceafbf3a1..f48a3fc5843c72ab522463f8f025bf00c1e70c0c 100644 (file)
--- a/quote.h
+++ b/quote.h
@@ -43,9 +43,9 @@ extern int add_to_string(char **ptrp, int *sizep, const char *str, int quote);
  */
 extern char *sq_dequote(char *);
 
+extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp);
 extern int quote_c_style(const char *name, char *outbuf, FILE *outfp,
                         int nodq);
-extern char *unquote_c_style(const char *quoted, const char **endp);
 
 extern void write_name_quoted(const char *prefix, int prefix_len,
                              const char *name, int quote, FILE *out);