From 2f94b3090c03bb56b43c4adfb94f7eeb28a6bf18 Mon Sep 17 00:00:00 2001 From: David Bremner Date: Sun, 4 Jun 2017 09:32:26 -0300 Subject: [PATCH] lib: factor out message-id parsing to separate file. This is really pure C string parsing, and doesn't need to be mixed in with the Xapian/C++ layer. Although not strictly necessary, it also makes it a bit more natural to call _parse_message_id from multiple compilation units. --- lib/Makefile.local | 1 + lib/add-message.cc | 106 +----------------------------------------- lib/message-id.c | 96 ++++++++++++++++++++++++++++++++++++++ lib/notmuch-private.h | 14 ++++++ 4 files changed, 112 insertions(+), 105 deletions(-) create mode 100644 lib/message-id.c diff --git a/lib/Makefile.local b/lib/Makefile.local index 9dd68286..0b5c4b08 100644 --- a/lib/Makefile.local +++ b/lib/Makefile.local @@ -38,6 +38,7 @@ libnotmuch_c_srcs = \ $(dir)/filenames.c \ $(dir)/string-list.c \ $(dir)/message-file.c \ + $(dir)/message-id.c \ $(dir)/messages.c \ $(dir)/sha1.c \ $(dir)/built-with.c \ diff --git a/lib/add-message.cc b/lib/add-message.cc index 0f09415e..314016a8 100644 --- a/lib/add-message.cc +++ b/lib/add-message.cc @@ -1,109 +1,5 @@ #include "database-private.h" -/* Advance 'str' past any whitespace or RFC 822 comments. A comment is - * a (potentially nested) parenthesized sequence with '\' used to - * escape any character (including parentheses). - * - * If the sequence to be skipped continues to the end of the string, - * then 'str' will be left pointing at the final terminating '\0' - * character. - */ -static void -skip_space_and_comments (const char **str) -{ - const char *s; - - s = *str; - while (*s && (isspace (*s) || *s == '(')) { - while (*s && isspace (*s)) - s++; - if (*s == '(') { - int nesting = 1; - s++; - while (*s && nesting) { - if (*s == '(') { - nesting++; - } else if (*s == ')') { - nesting--; - } else if (*s == '\\') { - if (*(s+1)) - s++; - } - s++; - } - } - } - - *str = s; -} - -/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 - * comments, and the '<' and '>' delimiters. - * - * If not NULL, then *next will be made to point to the first character - * not parsed, (possibly pointing to the final '\0' terminator. - * - * Returns a newly talloc'ed string belonging to 'ctx'. - * - * Returns NULL if there is any error parsing the message-id. */ -static char * -_parse_message_id (void *ctx, const char *message_id, const char **next) -{ - const char *s, *end; - char *result; - - if (message_id == NULL || *message_id == '\0') - return NULL; - - s = message_id; - - skip_space_and_comments (&s); - - /* Skip any unstructured text as well. */ - while (*s && *s != '<') - s++; - - if (*s == '<') { - s++; - } else { - if (next) - *next = s; - return NULL; - } - - skip_space_and_comments (&s); - - end = s; - while (*end && *end != '>') - end++; - if (next) { - if (*end) - *next = end + 1; - else - *next = end; - } - - if (end > s && *end == '>') - end--; - if (end <= s) - return NULL; - - result = talloc_strndup (ctx, s, end - s + 1); - - /* Finally, collapse any whitespace that is within the message-id - * itself. */ - { - char *r; - int len; - - for (r = result, len = strlen (r); *r; r++, len--) - if (*r == ' ' || *r == '\t') - memmove (r, r+1, len); - } - - return result; -} - /* Parse a References header value, putting a (talloc'ed under 'ctx') * copy of each referenced message-id into 'hash'. * @@ -126,7 +22,7 @@ parse_references (void *ctx, return NULL; while (*refs) { - ref = _parse_message_id (ctx, refs, &refs); + ref = _notmuch_message_id_parse (ctx, refs, &refs); if (ref && strcmp (ref, message_id)) { g_hash_table_add (hash, ref); diff --git a/lib/message-id.c b/lib/message-id.c new file mode 100644 index 00000000..d7541d50 --- /dev/null +++ b/lib/message-id.c @@ -0,0 +1,96 @@ +#include "notmuch-private.h" + +/* Advance 'str' past any whitespace or RFC 822 comments. A comment is + * a (potentially nested) parenthesized sequence with '\' used to + * escape any character (including parentheses). + * + * If the sequence to be skipped continues to the end of the string, + * then 'str' will be left pointing at the final terminating '\0' + * character. + */ +static void +skip_space_and_comments (const char **str) +{ + const char *s; + + s = *str; + while (*s && (isspace (*s) || *s == '(')) { + while (*s && isspace (*s)) + s++; + if (*s == '(') { + int nesting = 1; + s++; + while (*s && nesting) { + if (*s == '(') { + nesting++; + } else if (*s == ')') { + nesting--; + } else if (*s == '\\') { + if (*(s+1)) + s++; + } + s++; + } + } + } + + *str = s; +} + +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next) +{ + const char *s, *end; + char *result; + + if (message_id == NULL || *message_id == '\0') + return NULL; + + s = message_id; + + skip_space_and_comments (&s); + + /* Skip any unstructured text as well. */ + while (*s && *s != '<') + s++; + + if (*s == '<') { + s++; + } else { + if (next) + *next = s; + return NULL; + } + + skip_space_and_comments (&s); + + end = s; + while (*end && *end != '>') + end++; + if (next) { + if (*end) + *next = end + 1; + else + *next = end; + } + + if (end > s && *end == '>') + end--; + if (end <= s) + return NULL; + + result = talloc_strndup (ctx, s, end - s + 1); + + /* Finally, collapse any whitespace that is within the message-id + * itself. */ + { + char *r; + int len; + + for (r = result, len = strlen (r); *r; r++, len--) + if (*r == ' ' || *r == '\t') + memmove (r, r+1, len); + } + + return result; +} diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h index 5dfebf5d..9957164d 100644 --- a/lib/notmuch-private.h +++ b/lib/notmuch-private.h @@ -492,6 +492,20 @@ notmuch_status_t _notmuch_query_count_documents (notmuch_query_t *query, const char *type, unsigned *count_out); +/* message-id.c */ + +/* Parse an RFC 822 message-id, discarding whitespace, any RFC 822 + * comments, and the '<' and '>' delimiters. + * + * If not NULL, then *next will be made to point to the first character + * not parsed, (possibly pointing to the final '\0' terminator. + * + * Returns a newly talloc'ed string belonging to 'ctx'. + * + * Returns NULL if there is any error parsing the message-id. */ +char * +_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next); + /* message.cc */ -- 2.26.2