3e/b30dd5048c1da6bcfb053df54190db595b9eb9

   1 Return-Path: <m.walters@qmul.ac.uk>\r
   2 X-Original-To: notmuch@notmuchmail.org\r
   3 Delivered-To: notmuch@notmuchmail.org\r
   4 Received: from localhost (localhost [127.0.0.1])\r
   5         by olra.theworths.org (Postfix) with ESMTP id 89212429E2E\r
   6         for <notmuch@notmuchmail.org>; Sun,  6 Jan 2013 14:06:03 -0800 (PST)\r
   7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
   8 X-Spam-Flag: NO\r
   9 X-Spam-Score: -1.098\r
  10 X-Spam-Level: \r
  11 X-Spam-Status: No, score=-1.098 tagged_above=-999 required=5\r
  12         tests=[DKIM_ADSP_CUSTOM_MED=0.001, FREEMAIL_FROM=0.001,\r
  13         NML_ADSP_CUSTOM_MED=1.2, RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled\r
  14 Received: from olra.theworths.org ([127.0.0.1])\r
  15         by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
  16         with ESMTP id P1lCaNKrQ+HO for <notmuch@notmuchmail.org>;\r
  17         Sun,  6 Jan 2013 14:06:02 -0800 (PST)\r
  18 Received: from mail2.qmul.ac.uk (mail2.qmul.ac.uk [138.37.6.6])\r
  19         (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))\r
  20         (No client certificate requested)\r
  21         by olra.theworths.org (Postfix) with ESMTPS id 17207431FD4\r
  22         for <notmuch@notmuchmail.org>; Sun,  6 Jan 2013 14:06:02 -0800 (PST)\r
  23 Received: from smtp.qmul.ac.uk ([138.37.6.40])\r
  24         by mail2.qmul.ac.uk with esmtp (Exim 4.71)\r
  25         (envelope-from <m.walters@qmul.ac.uk>)\r
  26         id 1TryLr-0002Im-Ub; Sun, 06 Jan 2013 22:05:56 +0000\r
  27 Received: from 93-97-24-31.zone5.bethere.co.uk ([93.97.24.31] helo=localhost)\r
  28         by smtp.qmul.ac.uk with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.69)\r
  29         (envelope-from <m.walters@qmul.ac.uk>)\r
  30         id 1TryLr-00054w-6p; Sun, 06 Jan 2013 22:05:55 +0000\r
  31 From: Mark Walters <markwalters1009@gmail.com>\r
  32 To: Jani Nikula <jani@nikula.org>, Austin Clements <amdragon@MIT.EDU>,\r
  33         notmuch@notmuchmail.org\r
  34 Subject: Re: [PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore\r
  35 In-Reply-To: <87d2xi7yya.fsf@nikula.org>\r
  36 References: <1357503762-28759-1-git-send-email-amdragon@mit.edu>\r
  37         <87d2xi7yya.fsf@nikula.org>\r
  38 User-Agent: Notmuch/0.14+236~g1d0044f (http://notmuchmail.org) Emacs/23.4.1\r
  39         (x86_64-pc-linux-gnu)\r
  40 Date: Sun, 06 Jan 2013 22:05:58 +0000\r
  41 Message-ID: <878v86ynjt.fsf@qmul.ac.uk>\r
  42 MIME-Version: 1.0\r
  43 Content-Type: text/plain; charset=us-ascii\r
  44 X-Sender-Host-Address: 93.97.24.31\r
  45 X-QM-SPAM-Info: Sender has good ham record.  :)\r
  46 X-QM-Body-MD5: ebea13e7ea52d34a85733088b32b689a (of first 20000 bytes)\r
  47 X-SpamAssassin-Score: -1.8\r
  48 X-SpamAssassin-SpamBar: -\r
  49 X-SpamAssassin-Report: The QM spam filters have analysed this message to\r
  50         determine if it is\r
  51         spam. We require at least 5.0 points to mark a message as spam.\r
  52         This message scored -1.8 points.\r
  53         Summary of the scoring: \r
  54         * -2.3 RCVD_IN_DNSWL_MED RBL: Sender listed at http://www.dnswl.org/,\r
  55         *      medium trust\r
  56         *      [138.37.6.40 listed in list.dnswl.org]\r
  57         * 0.0 FREEMAIL_FROM Sender email is commonly abused enduser mail\r
  58         provider *      (markwalters1009[at]gmail.com)\r
  59         *  0.5 AWL AWL: From: address is in the auto white-list\r
  60 X-QM-Scan-Virus: ClamAV says the message is clean\r
  61 Cc: tomi.ollila@iki.fi\r
  62 X-BeenThere: notmuch@notmuchmail.org\r
  63 X-Mailman-Version: 2.1.13\r
  64 Precedence: list\r
  65 List-Id: "Use and development of the notmuch mail system."\r
  66         <notmuch.notmuchmail.org>\r
  67 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
  68         <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
  69 List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
  70 List-Post: <mailto:notmuch@notmuchmail.org>\r
  71 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
  72 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
  73         <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
  74 X-List-Received-Date: Sun, 06 Jan 2013 22:06:03 -0000\r
  75 \r
  76 \r
  77 LGTM too +1\r
  78 \r
  79 Mark\r
  80 \r
  81 On Sun, 06 Jan 2013, Jani Nikula <jani@nikula.org> wrote:\r
  82 > On Sun, 06 Jan 2013, Austin Clements <amdragon@MIT.EDU> wrote:\r
  83 >> This obsoletes\r
  84 >>\r
  85 >>   id:1356936162-2589-1-git-send-email-amdragon@mit.edu\r
  86 >>\r
  87 >> v5 should address all of the comments on v4 except those I\r
  88 >> specifically replied to (via the ML or IRC).  It also adds a new patch\r
  89 >> at the beginning that makes missing message IDs non-fatal in restore,\r
  90 >> like they were in 0.14.  This patch can be pushed separately; it's in\r
  91 >> this series because later tests rely on it.\r
  92 >\r
  93 > The series LGMT,\r
  94 > Jani.\r
  95 >\r
  96 >>\r
  97 >> The diff from v4 follows.\r
  98 >>\r
  99 >> diff --git a/notmuch-dump.c b/notmuch-dump.c\r
 100 >> index bf01a39..a3244e0 100644\r
 101 >> --- a/notmuch-dump.c\r
 102 >> +++ b/notmuch-dump.c\r
 103 >> @@ -103,6 +103,18 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])\r
 104 >>      message = notmuch_messages_get (messages);\r
 105 >>      message_id = notmuch_message_get_message_id (message);\r
 106 >>  \r
 107 >> +    if (output_format == DUMP_FORMAT_BATCH_TAG &&\r
 108 >> +        strchr (message_id, '\n')) {\r
 109 >> +        /* This will produce a line break in the output, which\r
 110 >> +         * would be difficult to handle in tools.  However, it's\r
 111 >> +         * also impossible to produce an email containing a line\r
 112 >> +         * break in a message ID because of unfolding, so we can\r
 113 >> +         * safely disallow it. */\r
 114 >> +        fprintf (stderr, "Warning: skipping message id containing line break: \"%s\"\n", message_id);\r
 115 >> +        notmuch_message_destroy (message);\r
 116 >> +        continue;\r
 117 >> +    }\r
 118 >> +\r
 119 >>      if (output_format == DUMP_FORMAT_SUP) {\r
 120 >>          fprintf (output, "%s (", message_id);\r
 121 >>      }\r
 122 >> @@ -133,19 +145,10 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])\r
 123 >>      if (output_format == DUMP_FORMAT_SUP) {\r
 124 >>          fputs (")\n", output);\r
 125 >>      } else {\r
 126 >> -        if (strchr (message_id, '\n')) {\r
 127 >> -            /* This will produce a line break in the output, which\r
 128 >> -             * would be difficult to handle in tools.  However,\r
 129 >> -             * it's also impossible to produce an email containing\r
 130 >> -             * a line break in a message ID because of unfolding,\r
 131 >> -             * so we can safely disallow it. */\r
 132 >> -            fprintf (stderr, "Error: cannot dump message id containing line break: %s\n", message_id);\r
 133 >> -            return 1;\r
 134 >> -        }\r
 135 >>          if (make_boolean_term (notmuch, "id", message_id,\r
 136 >>                                 &buffer, &buffer_size)) {\r
 137 >> -                fprintf (stderr, "Error: failed to quote message id %s\n",\r
 138 >> -                         message_id);\r
 139 >> +                fprintf (stderr, "Error quoting message id %s: %s\n",\r
 140 >> +                         message_id, strerror (errno));\r
 141 >>                  return 1;\r
 142 >>          }\r
 143 >>          fprintf (output, " -- %s\n", buffer);\r
 144 >> diff --git a/notmuch-restore.c b/notmuch-restore.c\r
 145 >> index 77a4c27..81d4d98 100644\r
 146 >> --- a/notmuch-restore.c\r
 147 >> +++ b/notmuch-restore.c\r
 148 >> @@ -26,7 +26,8 @@\r
 149 >>  static regex_t regex;\r
 150 >>  \r
 151 >>  /* Non-zero return indicates an error in retrieving the message,\r
 152 >> - * or in applying the tags.\r
 153 >> + * or in applying the tags.  Missing messages are reported, but not\r
 154 >> + * considered errors.\r
 155 >>   */\r
 156 >>  static int\r
 157 >>  tag_message (unused (void *ctx),\r
 158 >> @@ -40,13 +41,17 @@ tag_message (unused (void *ctx),\r
 159 >>      int ret = 0;\r
 160 >>  \r
 161 >>      status = notmuch_database_find_message (notmuch, message_id, &message);\r
 162 >> -    if (status || message == NULL) {\r
 163 >> -    fprintf (stderr, "Warning: cannot apply tags to %smessage: %s\n",\r
 164 >> -             message ? "" : "missing ", message_id);\r
 165 >> -    if (status)\r
 166 >> -        fprintf (stderr, "%s\n", notmuch_status_to_string (status));\r
 167 >> +    if (status) {\r
 168 >> +    fprintf (stderr, "Error applying tags to message %s: %s\n",\r
 169 >> +             message_id, notmuch_status_to_string (status));\r
 170 >>      return 1;\r
 171 >>      }\r
 172 >> +    if (message == NULL) {\r
 173 >> +    fprintf (stderr, "Warning: cannot apply tags to missing message: %s\n",\r
 174 >> +             message_id);\r
 175 >> +    /* We consider this a non-fatal error. */\r
 176 >> +    return 0;\r
 177 >> +    }\r
 178 >>  \r
 179 >>      /* In order to detect missing messages, this check/optimization is\r
 180 >>       * intentionally done *after* first finding the message. */\r
 181 >> @@ -222,12 +227,17 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])\r
 182 >>          if (ret == 0) {\r
 183 >>              ret = parse_boolean_term (line_ctx, query_string,\r
 184 >>                                        &prefix, &term);\r
 185 >> -            if (ret) {\r
 186 >> -                fprintf (stderr, "Warning: cannot parse query: %s\n",\r
 187 >> -                         query_string);\r
 188 >> +            if (ret && errno == EINVAL) {\r
 189 >> +                fprintf (stderr, "Warning: cannot parse query: %s (skipping)\n", query_string);\r
 190 >>                  continue;\r
 191 >> +            } else if (ret) {\r
 192 >> +                /* This is more fatal (e.g., out of memory) */\r
 193 >> +                fprintf (stderr, "Error parsing query: %s\n",\r
 194 >> +                         strerror (errno));\r
 195 >> +                ret = 1;\r
 196 >> +                break;\r
 197 >>              } else if (strcmp ("id", prefix) != 0) {\r
 198 >> -                fprintf (stderr, "Warning: not an id query: %s\n", query_string);\r
 199 >> +                fprintf (stderr, "Warning: not an id query: %s (skipping)\n", query_string);\r
 200 >>                  continue;\r
 201 >>              }\r
 202 >>              query_string = term;\r
 203 >> diff --git a/test/dump-restore b/test/dump-restore\r
 204 >> index f9ae5b3..f076c12 100755\r
 205 >> --- a/test/dump-restore\r
 206 >> +++ b/test/dump-restore\r
 207 >> @@ -202,18 +202,32 @@ a\r
 208 >>  + +e -- id:20091117232137.GA7669@griffis1.net\r
 209 >>  # valid id, but warning about missing message\r
 210 >>  +e id:missing_message_id\r
 211 >> +# exercise parser\r
 212 >> ++e -- id:some)stuff\r
 213 >> ++e -- id:some stuff\r
 214 >> ++e -- id:some"stuff\r
 215 >> ++e -- id:"a_message_id_with""_a_quote"\r
 216 >> ++e -- id:"a message id with spaces"\r
 217 >> ++e --  id:an_id_with_leading_and_trailing_ws \\r
 218 >> +\r
 219 >>  EOF\r
 220 >>  \r
 221 >>  cat <<EOF > EXPECTED\r
 222 >> -Warning: cannot parse query: a\r
 223 >> +Warning: cannot parse query: a (skipping)\r
 224 >>  Warning: no query string [+0]\r
 225 >>  Warning: no query string [+a +b]\r
 226 >>  Warning: missing query string [+a +b ]\r
 227 >>  Warning: no query string after -- [+c +d --]\r
 228 >>  Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]\r
 229 >> -Warning: cannot parse query: id:"\r
 230 >> -Warning: not an id query: tag:abc\r
 231 >> +Warning: cannot parse query: id:" (skipping)\r
 232 >> +Warning: not an id query: tag:abc (skipping)\r
 233 >>  Warning: cannot apply tags to missing message: missing_message_id\r
 234 >> +Warning: cannot parse query: id:some)stuff (skipping)\r
 235 >> +Warning: cannot parse query: id:some stuff (skipping)\r
 236 >> +Warning: cannot apply tags to missing message: some"stuff\r
 237 >> +Warning: cannot apply tags to missing message: a_message_id_with"_a_quote\r
 238 >> +Warning: cannot apply tags to missing message: a message id with spaces\r
 239 >> +Warning: cannot apply tags to missing message: an_id_with_leading_and_trailing_ws\r
 240 >>  EOF\r
 241 >>  \r
 242 >>  test_expect_equal_file EXPECTED OUTPUT\r
 243 >> diff --git a/util/string-util.c b/util/string-util.c\r
 244 >> index 52c7781..aba9aa8 100644\r
 245 >> --- a/util/string-util.c\r
 246 >> +++ b/util/string-util.c\r
 247 >> @@ -23,6 +23,7 @@\r
 248 >>  #include "talloc.h"\r
 249 >>  \r
 250 >>  #include <ctype.h>\r
 251 >> +#include <errno.h>\r
 252 >>  \r
 253 >>  char *\r
 254 >>  strtok_len (char *s, const char *delim, size_t *len)\r
 255 >> @@ -36,6 +37,12 @@ strtok_len (char *s, const char *delim, size_t *len)\r
 256 >>      return *len ? s : NULL;\r
 257 >>  }\r
 258 >>  \r
 259 >> +static int\r
 260 >> +is_unquoted_terminator (unsigned char c)\r
 261 >> +{\r
 262 >> +    return c == 0 || c <= ' ' || c == ')';\r
 263 >> +}\r
 264 >> +\r
 265 >>  int\r
 266 >>  make_boolean_term (void *ctx, const char *prefix, const char *term,\r
 267 >>                 char **buf, size_t *len)\r
 268 >> @@ -49,7 +56,8 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,\r
 269 >>       * containing a quote, even though it only matters at the\r
 270 >>       * beginning, and anything containing non-ASCII text. */\r
 271 >>      for (in = term; *in && !need_quoting; in++)\r
 272 >> -    if (*in <= ' ' || *in == ')' || *in == '"' || (unsigned char)*in > 127)\r
 273 >> +    if (is_unquoted_terminator (*in) || *in == '"'\r
 274 >> +        || (unsigned char)*in > 127)\r
 275 >>          need_quoting = 1;\r
 276 >>  \r
 277 >>      if (need_quoting)\r
 278 >> @@ -67,8 +75,10 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,\r
 279 >>      *buf = talloc_realloc (ctx, *buf, char, *len);\r
 280 >>      }\r
 281 >>  \r
 282 >> -    if (! *buf)\r
 283 >> -    return 1;\r
 284 >> +    if (! *buf) {\r
 285 >> +    errno = ENOMEM;\r
 286 >> +    return -1;\r
 287 >> +    }\r
 288 >>  \r
 289 >>      out = *buf;\r
 290 >>  \r
 291 >> @@ -102,7 +112,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,\r
 292 >>  static const char*\r
 293 >>  skip_space (const char *str)\r
 294 >>  {\r
 295 >> -    while (*str && isspace (*str))\r
 296 >> +    while (*str && isspace ((unsigned char) *str))\r
 297 >>      ++str;\r
 298 >>      return str;\r
 299 >>  }\r
 300 >> @@ -111,6 +121,7 @@ int\r
 301 >>  parse_boolean_term (void *ctx, const char *str,\r
 302 >>                  char **prefix_out, char **term_out)\r
 303 >>  {\r
 304 >> +    int err = EINVAL;\r
 305 >>      *prefix_out = *term_out = NULL;\r
 306 >>  \r
 307 >>      /* Parse prefix */\r
 308 >> @@ -119,12 +130,20 @@ parse_boolean_term (void *ctx, const char *str,\r
 309 >>      if (! pos)\r
 310 >>      goto FAIL;\r
 311 >>      *prefix_out = talloc_strndup (ctx, str, pos - str);\r
 312 >> +    if (! *prefix_out) {\r
 313 >> +    err = ENOMEM;\r
 314 >> +    goto FAIL;\r
 315 >> +    }\r
 316 >>      ++pos;\r
 317 >>  \r
 318 >>      /* Implement de-quoting compatible with make_boolean_term. */\r
 319 >>      if (*pos == '"') {\r
 320 >>      char *out = talloc_array (ctx, char, strlen (pos));\r
 321 >>      int closed = 0;\r
 322 >> +    if (! out) {\r
 323 >> +        err = ENOMEM;\r
 324 >> +        goto FAIL;\r
 325 >> +    }\r
 326 >>      *term_out = out;\r
 327 >>      /* Skip the opening quote, find the closing quote, and\r
 328 >>       * un-double doubled internal quotes. */\r
 329 >> @@ -148,18 +167,25 @@ parse_boolean_term (void *ctx, const char *str,\r
 330 >>      } else {\r
 331 >>      const char *start = pos;\r
 332 >>      /* Check for text after the boolean term. */\r
 333 >> -    while (*pos > ' ' && *pos != ')')\r
 334 >> +    while (! is_unquoted_terminator (*pos))\r
 335 >>          ++pos;\r
 336 >> -    if (*skip_space (pos))\r
 337 >> +    if (*skip_space (pos)) {\r
 338 >> +        err = EINVAL;\r
 339 >>          goto FAIL;\r
 340 >> +    }\r
 341 >>      /* No trailing text; dup the string so the caller can free\r
 342 >>       * it. */\r
 343 >>      *term_out = talloc_strndup (ctx, start, pos - start);\r
 344 >> +    if (! *term_out) {\r
 345 >> +        err = ENOMEM;\r
 346 >> +        goto FAIL;\r
 347 >> +    }\r
 348 >>      }\r
 349 >>      return 0;\r
 350 >>  \r
 351 >>   FAIL:\r
 352 >>      talloc_free (*prefix_out);\r
 353 >>      talloc_free (*term_out);\r
 354 >> -    return 1;\r
 355 >> +    errno = err;\r
 356 >> +    return -1;\r
 357 >>  }\r
 358 >> diff --git a/util/string-util.h b/util/string-util.h\r
 359 >> index 8b9fe50..0194607 100644\r
 360 >> --- a/util/string-util.h\r
 361 >> +++ b/util/string-util.h\r
 362 >> @@ -28,7 +28,8 @@ char *strtok_len (char *s, const char *delim, size_t *len);\r
 363 >>   * can be parsed by parse_boolean_term.\r
 364 >>   *\r
 365 >>   * Output is into buf; it may be talloc_realloced.\r
 366 >> - * Return: 0 on success, non-zero on memory allocation failure.\r
 367 >> + * Return: 0 on success, -1 on error.  errno will be set to ENOMEM if\r
 368 >> + * there is an allocation failure.\r
 369 >>   */\r
 370 >>  int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,\r
 371 >>                     char **buf, size_t *len);\r
 372 >> @@ -42,7 +43,8 @@ int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,\r
 373 >>   * of the quoting styles supported by Xapian (and hence notmuch).\r
 374 >>   * *prefix_out and *term_out will be talloc'd with context ctx.\r
 375 >>   *\r
 376 >> - * Return: 0 on success, non-zero on parse error.\r
 377 >> + * Return: 0 on success, -1 on error.  errno will be set to EINVAL if\r
 378 >> + * there is a parse error or ENOMEM if there is an allocation failure.\r
 379 >>   */\r
 380 >>  int\r
 381 >>  parse_boolean_term (void *ctx, const char *str,\r