mailsplit and mailinfo: gracefully handle NUL characters
authorJohannes Schindelin <Johannes.Schindelin@gmx.de>
Fri, 16 May 2008 13:03:30 +0000 (14:03 +0100)
committerJunio C Hamano <gitster@pobox.com>
Sun, 25 May 2008 20:21:40 +0000 (13:21 -0700)
The function fgets() has a big problem with NUL characters: it reads
them, but nobody will know if the NUL comes from the file stream, or
was appended at the end of the line.

So implement a custom read_line_with_nul() function.

Noticed by Tommy Thorn.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin-mailinfo.c
builtin-mailsplit.c
builtin.h
t/t5100-mailinfo.sh
t/t5100/nul [new file with mode: 0644]

index 11f154b31fcbd5c788299855e7d69f54ae8c7e70..f0c420976f0f41ea0402a2c20f3f443dd0781b83 100644 (file)
@@ -641,7 +641,7 @@ static void decode_transfer_encoding(char *line, unsigned linesize)
        }
 }
 
-static int handle_filter(char *line, unsigned linesize);
+static int handle_filter(char *line, unsigned linesize, int linelen);
 
 static int find_boundary(void)
 {
@@ -669,7 +669,7 @@ again:
                                        "can't recover\n");
                        exit(1);
                }
-               handle_filter(newline, sizeof(newline));
+               handle_filter(newline, sizeof(newline), strlen(newline));
 
                /* skip to the next boundary */
                if (!find_boundary())
@@ -759,14 +759,14 @@ static int handle_commit_msg(char *line, unsigned linesize)
        return 0;
 }
 
-static int handle_patch(char *line)
+static int handle_patch(char *line, int len)
 {
-       fputs(line, patchfile);
+       fwrite(line, 1, len, patchfile);
        patch_lines++;
        return 0;
 }
 
-static int handle_filter(char *line, unsigned linesize)
+static int handle_filter(char *line, unsigned linesize, int linelen)
 {
        static int filter = 0;
 
@@ -779,7 +779,7 @@ static int handle_filter(char *line, unsigned linesize)
                        break;
                filter++;
        case 1:
-               if (!handle_patch(line))
+               if (!handle_patch(line, linelen))
                        break;
                filter++;
        default:
@@ -794,6 +794,7 @@ static void handle_body(void)
        int rc = 0;
        static char newline[2000];
        static char *np = newline;
+       int len = strlen(line);
 
        /* Skip up to the first boundary */
        if (content_top->boundary) {
@@ -807,7 +808,8 @@ static void handle_body(void)
                        /* flush any leftover */
                        if ((transfer_encoding == TE_BASE64)  &&
                            (np != newline)) {
-                               handle_filter(newline, sizeof(newline));
+                               handle_filter(newline, sizeof(newline),
+                                               strlen(newline));
                        }
                        if (!handle_boundary())
                                return;
@@ -824,7 +826,7 @@ static void handle_body(void)
 
                        /* binary data most likely doesn't have newlines */
                        if (message_type != TYPE_TEXT) {
-                               rc = handle_filter(line, sizeof(newline));
+                               rc = handle_filter(line, sizeof(line), len);
                                break;
                        }
 
@@ -841,7 +843,7 @@ static void handle_body(void)
                                        /* should be sitting on a new line */
                                        *(++np) = 0;
                                        op++;
-                                       rc = handle_filter(newline, sizeof(newline));
+                                       rc = handle_filter(newline, sizeof(newline), np - newline);
                                        np = newline;
                                }
                        } while (*op != 0);
@@ -851,12 +853,12 @@ static void handle_body(void)
                        break;
                }
                default:
-                       rc = handle_filter(line, sizeof(newline));
+                       rc = handle_filter(line, sizeof(line), len);
                }
                if (rc)
                        /* nothing left to filter */
                        break;
-       } while (fgets(line, sizeof(line), fin));
+       } while ((len = read_line_with_nul(line, sizeof(line), fin)));
 
        return;
 }
index 46b27cdaea71cba92974480da74ec5922fcf3a7a..e4d977bafbc04fa2bfedfc2912fc87bed9de8e1e 100644 (file)
@@ -45,6 +45,25 @@ static int is_from_line(const char *line, int len)
 /* Could be as small as 64, enough to hold a Unix "From " line. */
 static char buf[4096];
 
+/* We cannot use fgets() because our lines can contain NULs */
+int read_line_with_nul(char *buf, int size, FILE *in)
+{
+       int len = 0, c;
+
+       for (;;) {
+               c = getc(in);
+               buf[len++] = c;
+               if (c == EOF || c == '\n' || len + 1 >= size)
+                       break;
+       }
+
+       if (c == EOF)
+               len--;
+       buf[len] = '\0';
+
+       return len;
+}
+
 /* Called with the first line (potentially partial)
  * already in buf[] -- normally that should begin with
  * the Unix "From " line.  Write it into the specified
@@ -70,19 +89,19 @@ static int split_one(FILE *mbox, const char *name, int allow_bare)
         * "From " and having something that looks like a date format.
         */
        for (;;) {
-               int is_partial = (buf[len-1] != '\n');
+               int is_partial = len && buf[len-1] != '\n';
 
-               if (fputs(buf, output) == EOF)
+               if (fwrite(buf, 1, len, output) != len)
                        die("cannot write output");
 
-               if (fgets(buf, sizeof(buf), mbox) == NULL) {
+               len = read_line_with_nul(buf, sizeof(buf), mbox);
+               if (len == 0) {
                        if (feof(mbox)) {
                                status = 1;
                                break;
                        }
                        die("cannot read mbox");
                }
-               len = strlen(buf);
                if (!is_partial && !is_bare && is_from_line(buf, len))
                        break; /* done with one message */
        }
index 95126fd0c12149034372afd8eb37b944be684957..48f13320014e6e5dee104378e0b9f86eab744151 100644 (file)
--- a/builtin.h
+++ b/builtin.h
@@ -9,6 +9,7 @@ extern const char git_usage_string[];
 extern void list_common_cmds_help(void);
 extern void help_unknown_cmd(const char *cmd);
 extern void prune_packed_objects(int);
+extern int read_line_with_nul(char *buf, int size, FILE *file);
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
index d6c55c115779730fdef8d05fbdb039fe90e3fad7..5a4610b8607cc47bc813291ecb7a16085f7f5393 100755 (executable)
@@ -25,4 +25,13 @@ do
                diff ../t5100/info$mail info$mail"
 done
 
+test_expect_success 'respect NULs' '
+
+       git mailsplit -d3 -o. ../t5100/nul &&
+       cmp ../t5100/nul 001 &&
+       (cat 001 | git mailinfo msg patch) &&
+       test 4 = $(wc -l < patch)
+
+'
+
 test_done
diff --git a/t/t5100/nul b/t/t5100/nul
new file mode 100644 (file)
index 0000000..3d40691
Binary files /dev/null and b/t/t5100/nul differ