From 2744b2344dc42fa2a1ddf17f4818975cd48f6d42 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 11 Apr 2005 23:46:50 -0700 Subject: [PATCH 1/1] Start of early patch applicator tools for git. I looked a bit at my old BK tools for the same thing, but they were just so horrid in many ways that I largely rewrote it all and these tools do things a bit differently. Instead of aggressively piping data from one process to another (which was clever but very hard to follow), this first just splits out the mbox into many smaller email files, and then does some scripts on these temporary files. --- Makefile | 14 +++ mailinfo.c | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mailsplit.c | 144 +++++++++++++++++++++++++ 3 files changed, 458 insertions(+) create mode 100644 Makefile create mode 100644 mailinfo.c create mode 100644 mailsplit.c diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..3c518baca --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +CC=gcc +CFLAGS=-Wall -O2 +HOME=$(shell echo $$HOME) + +PROGRAMS=mailsplit mailinfo +SCRIPTS= + +all: $(PROGRAMS) + +install: $(PROGRAMS) $(SCRIPTS) + cp -f $(PROGRAMS) $(SCRIPTS) $(HOME)/bin/ + +clean: + rm -f $(PROGRAMS) *.o diff --git a/mailinfo.c b/mailinfo.c new file mode 100644 index 000000000..1ca554e92 --- /dev/null +++ b/mailinfo.c @@ -0,0 +1,300 @@ +/* + * Another stupid program, this one parsing the headers of an + * email to figure out authorship and subject + */ +#include +#include +#include +#include + +static FILE *cmitmsg, *patchfile; + +static char line[1000]; +static char name[1000]; +static char email[1000]; +static char subject[1000]; + +static char *sanity_check(char *name, char *email) +{ + int len = strlen(name); + if (len < 3 || len > 60) + return email; + if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) + return email; + return name; +} + +static int handle_from(char *line) +{ + char *at = strchr(line, '@'); + char *dst; + + if (!at) + return 0; + + /* + * If we already have one email, don't take any confusing lines + */ + if (*email && strchr(at+1, '@')) + return 0; + + while (at > line) { + char c = at[-1]; + if (isspace(c) || c == '<') + break; + at--; + } + dst = email; + for (;;) { + unsigned char c = *at; + if (!c || c == '>' || isspace(c)) + break; + *at++ = ' '; + *dst++ = c; + } + *dst++ = 0; + + at = line + strlen(line); + while (at > line) { + unsigned char c = *--at; + if (isalnum(c)) + break; + *at = 0; + } + + at = line; + for (;;) { + unsigned char c = *at; + if (!c) + break; + if (isalnum(c)) + break; + at++; + } + + at = sanity_check(at, email); + + strcpy(name, at); + return 1; +} + +static void handle_subject(char *line) +{ + strcpy(subject, line); +} + +static void add_subject_line(char *line) +{ + while (isspace(*line)) + line++; + *--line = ' '; + strcat(subject, line); +} + +static void check_line(char *line, int len) +{ + static int cont = -1; + if (!memcmp(line, "From:", 5) && isspace(line[5])) { + handle_from(line+6); + cont = 0; + return; + } + if (!memcmp(line, "Subject:", 8) && isspace(line[8])) { + handle_subject(line+9); + cont = 1; + return; + } + if (isspace(*line)) { + switch (cont) { + case 0: + fprintf(stderr, "I don't do 'From:' line continuations\n"); + break; + case 1: + add_subject_line(line); + return; + default: + break; + } + } + cont = -1; +} + +static char * cleanup_subject(char *subject) +{ + for (;;) { + char *p; + int len, remove; + switch (*subject) { + case 'r': case 'R': + if (!memcmp("e:", subject+1, 2)) { + subject +=3; + continue; + } + break; + case ' ': case '\t': case ':': + subject++; + continue; + + case '[': + p = strchr(subject, ']'); + if (!p) { + subject++; + continue; + } + len = strlen(p); + remove = p - subject; + if (remove <= len *2) { + subject = p+1; + continue; + } + break; + } + return subject; + } +} + +static void cleanup_space(char *buf) +{ + unsigned char c; + while ((c = *buf) != 0) { + buf++; + if (isspace(c)) { + buf[-1] = ' '; + c = *buf; + while (isspace(c)) { + int len = strlen(buf); + memmove(buf, buf+1, len); + c = *buf; + } + } + } +} + +/* + * Hacky hacky. This depends not only on -p1, but on + * filenames not having some special characters in them, + * like tilde. + */ +static void show_filename(char *line) +{ + int len; + char *name = strchr(line, '/'); + + if (!name || !isspace(*line)) + return; + name++; + len = 0; + for (;;) { + unsigned char c = name[len]; + switch (c) { + default: + len++; + continue; + + case 0: case ' ': + case '\t': case '\n': + break; + + case '~': + break; + } + break; + } + /* remove ".orig" from the end - common patch behaviour */ + if (len > 5 && !memcmp(name+len-5, ".orig", 5)) + len -=5; + if (!len) + return; + printf("filename: %.*s\n", len, name); +} + +static void handle_rest(void) +{ + char *sub = cleanup_subject(subject); + cleanup_space(name); + cleanup_space(email); + cleanup_space(sub); + printf("Author: %s\nEmail: %s\nSubject: %s\n\n", name, email, sub); + FILE *out = cmitmsg; + + do { + /* Track filename information from the patch.. */ + if (!memcmp("---", line, 3)) { + out = patchfile; + show_filename(line+3); + } + + if (!memcmp("+++", line, 3)) + show_filename(line+3); + + fputs(line, out); + } while (fgets(line, sizeof(line), stdin) != NULL); + + if (out == cmitmsg) { + fprintf(stderr, "No patch found\n"); + exit(1); + } + + fclose(cmitmsg); + fclose(patchfile); +} + +static int eatspace(char *line) +{ + int len = strlen(line); + while (len > 0 && isspace(line[len-1])) + line[--len] = 0; + return len; +} + +static void handle_body(void) +{ + int has_from = 0; + + /* First line of body can be a From: */ + while (fgets(line, sizeof(line), stdin) != NULL) { + int len = eatspace(line); + if (!len) + continue; + if (!memcmp("From:", line, 5) && isspace(line[5])) { + if (!has_from && handle_from(line+6)) { + has_from = 1; + continue; + } + } + line[len] = '\n'; + handle_rest(); + break; + } +} + +static void usage(void) +{ + fprintf(stderr, "mailinfo msg-file path-file < email\n"); + exit(1); +} + +int main(int argc, char ** argv) +{ + if (argc != 3) + usage(); + cmitmsg = fopen(argv[1], "w"); + if (!cmitmsg) { + perror(argv[1]); + exit(1); + } + patchfile = fopen(argv[2], "w"); + if (!patchfile) { + perror(argv[2]); + exit(1); + } + while (fgets(line, sizeof(line), stdin) != NULL) { + int len = eatspace(line); + if (!len) { + handle_body(); + break; + } + check_line(line, len); + } + return 0; +} diff --git a/mailsplit.c b/mailsplit.c new file mode 100644 index 000000000..9379fbc5e --- /dev/null +++ b/mailsplit.c @@ -0,0 +1,144 @@ +/* + * Totally braindamaged mbox splitter program. + * + * It just splits a mbox into a list of files: "0001" "0002" .. + * so you can process them further from there. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int usage(void) +{ + fprintf(stderr, "mailsplit \n"); + exit(1); +} + +static int linelen(const char *map, unsigned long size) +{ + int len = 0, c; + + do { + c = *map; + map++; + size--; + len++; + } while (size && c != '\n'); + return len; +} + +static int is_from_line(const char *line, int len) +{ + const char *colon; + + if (len < 20 || memcmp("From ", line, 5)) + return 0; + + colon = line + len - 2; + line += 5; + for (;;) { + if (colon < line) + return 0; + if (*--colon == ':') + break; + } + + if (!isdigit(colon[-4]) || + !isdigit(colon[-2]) || + !isdigit(colon[-1]) || + !isdigit(colon[ 1]) || + !isdigit(colon[ 2])) + return 0; + + /* year */ + if (strtol(colon+3, NULL, 10) <= 90) + return 0; + + /* Ok, close enough */ + return 1; +} + +static int parse_email(const void *map, unsigned long size) +{ + unsigned long offset; + + if (size < 6 || memcmp("From ", map, 5)) + goto corrupt; + + /* Make sure we don't trigger on this first line */ + map++; size--; offset=1; + + /* + * Search for a line beginning with "From ", and + * having smething that looks like a date format. + */ + do { + int len = linelen(map, size); + if (is_from_line(map, len)) + return offset; + map += len; + size -= len; + offset += len; + } while (size); + return offset; + +corrupt: + fprintf(stderr, "corrupt mailbox\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int fd, nr; + struct stat st; + unsigned long size; + void *map; + + if (argc != 3) + usage(); + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + exit(1); + } + if (chdir(argv[2]) < 0) + usage(); + if (fstat(fd, &st) < 0) { + perror("stat"); + exit(1); + } + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (-1 == (int)(long)map) { + perror("mmap"); + exit(1); + } + close(fd); + nr = 0; + do { + char name[10]; + unsigned long len = parse_email(map, size); + assert(len <= size); + sprintf(name, "%04d", ++nr); + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (fd < 0) { + perror(name); + exit(1); + } + if (write(fd, map, len) != len) { + perror("write"); + exit(1); + } + close(fd); + map += len; + size -= len; + } while (size > 0); + return 0; +} -- 2.26.2