ac/b9f34c163752d2ef49b8c4df483d7828ef5eb9

   1 Return-Path: <jani@nikula.org>\r
   2 X-Original-To: notmuch@notmuchmail.org\r
   3 Delivered-To: notmuch@notmuchmail.org\r
   4 Received: from localhost (localhost [127.0.0.1])\r
   5         by olra.theworths.org (Postfix) with ESMTP id 947D4431FAF\r
   6         for <notmuch@notmuchmail.org>; Fri, 30 Nov 2012 13:43:49 -0800 (PST)\r
   7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
   8 X-Spam-Flag: NO\r
   9 X-Spam-Score: -0.7\r
  10 X-Spam-Level: \r
  11 X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5\r
  12         tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled\r
  13 Received: from olra.theworths.org ([127.0.0.1])\r
  14         by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
  15         with ESMTP id iEVbKlHTBOyl for <notmuch@notmuchmail.org>;\r
  16         Fri, 30 Nov 2012 13:43:45 -0800 (PST)\r
  17 Received: from mail-la0-f53.google.com (mail-la0-f53.google.com\r
  18         [209.85.215.53]) (using TLSv1 with cipher RC4-SHA (128/128 bits))\r
  19         (No client certificate requested)\r
  20         by olra.theworths.org (Postfix) with ESMTPS id 67A9E431FAE\r
  21         for <notmuch@notmuchmail.org>; Fri, 30 Nov 2012 13:43:44 -0800 (PST)\r
  22 Received: by mail-la0-f53.google.com with SMTP id w12so789442lag.26\r
  23         for <notmuch@notmuchmail.org>; Fri, 30 Nov 2012 13:43:42 -0800 (PST)\r
  24 X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;\r
  25         d=google.com; s=20120113;\r
  26         h=from:to:cc:subject:in-reply-to:references:user-agent:date\r
  27         :message-id:mime-version:content-type:x-gm-message-state;\r
  28         bh=vaJl32SvhOshPg0lRn8ibEGhmC3r1fvHvNc6sEiaCw8=;\r
  29         b=FNSd95Je10y+cF6gmh3JPWNq1vbxs6OD61GJImkBFJjm+BatNTuaX5HKIk9ZEJyfYf\r
  30         PCILp35n9w1Cn0ojED7Jwet4TyZZP7AkZpH7hZZd5K9vDFnqCMnmajfMPSJmE2QWiVF/\r
  31         bsSY1AbSTIR2HqrGM11eDe7aoHOGgbvGCrFtfmoeZ/cNrhFzbHGwDU+7/rLCsaBOswl2\r
  32         7UtG1V0nevlUQxaxjFQB4q33HeLGQjqtJoHVD6KASHtAFUlAdgAD3sg93anhs3/gKfwK\r
  33         PQMoMSZ3VH0QEE8bXN02IWHTdH8ftXA25TlgqFFkdSVYJ5pGD09jUiWJ9PFrFwn91SFU\r
  34         dhKg==\r
  35 Received: by 10.112.83.133 with SMTP id q5mr1424247lby.40.1354311822497;\r
  36         Fri, 30 Nov 2012 13:43:42 -0800 (PST)\r
  37 Received: from localhost (dsl-hkibrasgw4-fe51df00-27.dhcp.inet.fi.\r
  38         [80.223.81.27])\r
  39         by mx.google.com with ESMTPS id y10sm2519789lbg.4.2012.11.30.13.43.40\r
  40         (version=SSLv3 cipher=OTHER); Fri, 30 Nov 2012 13:43:41 -0800 (PST)\r
  41 From: Jani Nikula <jani@nikula.org>\r
  42 To: david@tethera.net, notmuch@notmuchmail.org\r
  43 Subject: Re: [Patch v2 01/17] hex-escape: (en|de)code strings to/from\r
  44         restricted character set\r
  45 In-Reply-To: <1353792017-31459-2-git-send-email-david@tethera.net>\r
  46 References: <1353792017-31459-1-git-send-email-david@tethera.net>\r
  47         <1353792017-31459-2-git-send-email-david@tethera.net>\r
  48 User-Agent: Notmuch/0.14+124~g3b17402 (http://notmuchmail.org) Emacs/23.4.1\r
  49         (i686-pc-linux-gnu)\r
  50 Date: Fri, 30 Nov 2012 23:43:38 +0200\r
  51 Message-ID: <87wqx2ix6d.fsf@nikula.org>\r
  52 MIME-Version: 1.0\r
  53 Content-Type: text/plain; charset=us-ascii\r
  54 X-Gm-Message-State:\r
  55  ALoCoQkPIg2uqputwiYaIbCItPz1RTVWy81b65SwMZ7wTonNRXDIfJbuu2+rhWn/SnmdUzuYs2gg\r
  56 Cc: David Bremner <bremner@debian.org>\r
  57 X-BeenThere: notmuch@notmuchmail.org\r
  58 X-Mailman-Version: 2.1.13\r
  59 Precedence: list\r
  60 List-Id: "Use and development of the notmuch mail system."\r
  61         <notmuch.notmuchmail.org>\r
  62 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
  63         <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
  64 List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
  65 List-Post: <mailto:notmuch@notmuchmail.org>\r
  66 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
  67 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
  68         <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
  69 X-List-Received-Date: Fri, 30 Nov 2012 21:43:49 -0000\r
  70 \r
  71 On Sat, 24 Nov 2012, david@tethera.net wrote:\r
  72 > From: David Bremner <bremner@debian.org>\r
  73 >\r
  74 > The character set is chosen to be suitable for pathnames, and the same\r
  75 > as that used by contrib/nmbug\r
  76 >\r
  77 > [With additions by Jani Nikula]\r
  78 \r
  79 So it must be good. ;)\r
  80 \r
  81 Just a couple of nitpicks below.\r
  82 \r
  83 BR,\r
  84 Jani.\r
  85 \r
  86 > ---\r
  87 >  util/Makefile.local |    2 +-\r
  88 >  util/hex-escape.c   |  168 +++++++++++++++++++++++++++++++++++++++++++++++++++\r
  89 >  util/hex-escape.h   |   41 +++++++++++++\r
  90 >  3 files changed, 210 insertions(+), 1 deletion(-)\r
  91 >  create mode 100644 util/hex-escape.c\r
  92 >  create mode 100644 util/hex-escape.h\r
  93 >\r
  94 > diff --git a/util/Makefile.local b/util/Makefile.local\r
  95 > index c7cae61..3ca623e 100644\r
  96 > --- a/util/Makefile.local\r
  97 > +++ b/util/Makefile.local\r
  98 > @@ -3,7 +3,7 @@\r
  99 >  dir := util\r
 100 >  extra_cflags += -I$(srcdir)/$(dir)\r
 101 >  \r
 102 > -libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c\r
 103 > +libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c\r
 104 >  \r
 105 >  libutil_modules := $(libutil_c_srcs:.c=.o)\r
 106 >  \r
 107 > diff --git a/util/hex-escape.c b/util/hex-escape.c\r
 108 > new file mode 100644\r
 109 > index 0000000..d8905d0\r
 110 > --- /dev/null\r
 111 > +++ b/util/hex-escape.c\r
 112 > @@ -0,0 +1,168 @@\r
 113 > +/* hex-escape.c -  Manage encoding and decoding of byte strings into path names\r
 114 > + *\r
 115 > + * Copyright (c) 2011 David Bremner\r
 116 > + *\r
 117 > + * This program is free software: you can redistribute it and/or modify\r
 118 > + * it under the terms of the GNU General Public License as published by\r
 119 > + * the Free Software Foundation, either version 3 of the License, or\r
 120 > + * (at your option) any later version.\r
 121 > + *\r
 122 > + * This program is distributed in the hope that it will be useful,\r
 123 > + * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
 124 > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
 125 > + * GNU General Public License for more details.\r
 126 > + *\r
 127 > + * You should have received a copy of the GNU General Public License\r
 128 > + * along with this program.  If not, see http://www.gnu.org/licenses/ .\r
 129 > + *\r
 130 > + * Author: David Bremner <david@tethera.net>\r
 131 > + */\r
 132 > +\r
 133 > +#include <assert.h>\r
 134 > +#include <string.h>\r
 135 > +#include <talloc.h>\r
 136 > +#include <ctype.h>\r
 137 > +#include "error_util.h"\r
 138 > +#include "hex-escape.h"\r
 139 > +\r
 140 > +static const size_t default_buf_size = 1024;\r
 141 > +\r
 142 > +static const char *output_charset =\r
 143 > +    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,";\r
 144 > +\r
 145 > +static const char escape_char = '%';\r
 146 > +\r
 147 > +static int\r
 148 > +is_output (char c)\r
 149 > +{\r
 150 > +    return (strchr (output_charset, c) != NULL);\r
 151 > +}\r
 152 > +\r
 153 > +static int\r
 154 > +maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size)\r
 155 > +{\r
 156 > +    if (*out_size < needed) {\r
 157 > +\r
 158 > +     if (*out == NULL)\r
 159 > +         *out = talloc_size (ctx, needed);\r
 160 > +     else\r
 161 > +         *out = talloc_realloc (ctx, *out, char, needed);\r
 162 > +\r
 163 > +     if (*out == NULL)\r
 164 > +         return 0;\r
 165 > +\r
 166 > +     *out_size = needed;\r
 167 > +    }\r
 168 > +    return 1;\r
 169 > +}\r
 170 > +\r
 171 > +hex_status_t\r
 172 > +hex_encode (void *ctx, const char *in, char **out, size_t *out_size)\r
 173 > +{\r
 174 > +\r
 175 > +    const unsigned char *p;\r
 176 \r
 177 The casts to unsigned char * below bother me. Perhaps this should be\r
 178 just const char *, with the only cast being in the sprintf?\r
 179 \r
 180 > +    char *q;\r
 181 > +\r
 182 > +    size_t escape_count = 0;\r
 183 > +    size_t len = 0;\r
 184 > +    size_t needed;\r
 185 > +\r
 186 > +    assert (ctx); assert (in); assert (out); assert (out_size);\r
 187 > +\r
 188 > +    for (p = (unsigned char *) in; *p; p++) {\r
 189 > +     escape_count += (!is_output (*p));\r
 190 > +     len++;\r
 191 > +    }\r
 192 > +\r
 193 > +    needed = len + escape_count * 2 + 1;\r
 194 \r
 195 I wonder if it would be clearer if escape_count and len were ditched,\r
 196 and the for loop just did:\r
 197 \r
 198         needed += is_output (*p) ? 1 : 3;\r
 199 \r
 200 and another needed++ after the loop for NUL. And maybe s/needed/len/\r
 201 after that.\r
 202 \r
 203 > +\r
 204 > +    if (*out == NULL)\r
 205 > +     *out_size = 0;\r
 206 > +\r
 207 > +    if (!maybe_realloc (ctx, needed, out, out_size))\r
 208 > +     return HEX_OUT_OF_MEMORY;\r
 209 > +\r
 210 > +    q = *out;\r
 211 > +    p = (unsigned char *) in;\r
 212 > +\r
 213 > +    while (*p) {\r
 214 > +     if (is_output (*p)) {\r
 215 > +         *q++ = *p++;\r
 216 > +     } else {\r
 217 > +         sprintf (q, "%%%02x", *p++);\r
 218 > +         q += 3;\r
 219 > +     }\r
 220 > +    }\r
 221 > +\r
 222 > +    *q = '\0';\r
 223 > +    return HEX_SUCCESS;\r
 224 > +}\r
 225 > +\r
 226 > +/* Hex decode 'in' to 'out'.\r
 227 > + *\r
 228 > + * This must succeed for in == out to support hex_decode_inplace().\r
 229 > + */\r
 230 > +static hex_status_t\r
 231 > +hex_decode_internal (const char *in, unsigned char *out)\r
 232 > +{\r
 233 > +    char buf[3];\r
 234 > +\r
 235 > +    while (*in) {\r
 236 > +     if (*in == escape_char) {\r
 237 > +         char *endp;\r
 238 > +\r
 239 > +         /* This also handles unexpected end-of-string. */\r
 240 > +         if (!isxdigit ((unsigned char) in[1]) ||\r
 241 > +             !isxdigit ((unsigned char) in[2]))\r
 242 > +             return HEX_SYNTAX_ERROR;\r
 243 > +\r
 244 > +         buf[0] = in[1];\r
 245 > +         buf[1] = in[2];\r
 246 > +         buf[2] = '\0';\r
 247 > +\r
 248 > +         *out = strtoul (buf, &endp, 16);\r
 249 > +\r
 250 > +         if (endp != buf + 2)\r
 251 > +             return HEX_SYNTAX_ERROR;\r
 252 > +\r
 253 > +         in += 3;\r
 254 > +         out++;\r
 255 > +     } else {\r
 256 > +         *out++ = *in++;\r
 257 > +     }\r
 258 > +    }\r
 259 > +\r
 260 > +    *out = '\0';\r
 261 > +\r
 262 > +    return HEX_SUCCESS;\r
 263 > +}\r
 264 > +\r
 265 > +hex_status_t\r
 266 > +hex_decode_inplace (char *s)\r
 267 > +{\r
 268 > +    /* A decoded string is never longer than the encoded one, so it is\r
 269 > +     * safe to decode a string onto itself. */\r
 270 > +    return hex_decode_internal (s, (unsigned char *) s);\r
 271 > +}\r
 272 > +\r
 273 > +hex_status_t\r
 274 > +hex_decode (void *ctx, const char *in, char **out, size_t * out_size)\r
 275 > +{\r
 276 > +    const char *p;\r
 277 > +    size_t escape_count = 0;\r
 278 > +    size_t needed = 0;\r
 279 > +\r
 280 > +    assert (ctx); assert (in); assert (out); assert (out_size);\r
 281 > +\r
 282 > +    size_t len = strlen (in);\r
 283 > +\r
 284 > +    for (p = in; *p; p++)\r
 285 > +     escape_count += (*p == escape_char);\r
 286 > +\r
 287 > +    needed = len - escape_count * 2 + 1;\r
 288 \r
 289 Same as above for counting the needed size. It would also save scanning\r
 290 the input string twice (strlen and for loop).\r
 291 \r
 292 > +\r
 293 > +    if (!maybe_realloc (ctx, needed, out, out_size))\r
 294 > +     return HEX_OUT_OF_MEMORY;\r
 295 > +\r
 296 > +    return hex_decode_internal (in, (unsigned char *) *out);\r
 297 > +}\r
 298 > diff --git a/util/hex-escape.h b/util/hex-escape.h\r
 299 > new file mode 100644\r
 300 > index 0000000..5182042\r
 301 > --- /dev/null\r
 302 > +++ b/util/hex-escape.h\r
 303 > @@ -0,0 +1,41 @@\r
 304 > +#ifndef _HEX_ESCAPE_H\r
 305 > +#define _HEX_ESCAPE_H\r
 306 > +\r
 307 > +typedef enum hex_status {\r
 308 > +    HEX_SUCCESS = 0,\r
 309 > +    HEX_SYNTAX_ERROR,\r
 310 > +    HEX_OUT_OF_MEMORY\r
 311 > +} hex_status_t;\r
 312 > +\r
 313 > +/*\r
 314 > + * The API for hex_encode() and hex_decode() is modelled on that for\r
 315 > + * getline.\r
 316 > + *\r
 317 > + * If 'out' points to a NULL pointer a char array of the appropriate\r
 318 > + * size is allocated using talloc, and out_size is updated.\r
 319 > + *\r
 320 > + * If 'out' points to a non-NULL pointer, it assumed to describe an\r
 321 > + * existing char array, with the size given in *out_size.  This array\r
 322 > + * may be resized by talloc_realloc if needed; in this case *out_size\r
 323 > + * will also be updated.\r
 324 > + *\r
 325 > + * Note that it is an error to pass a NULL pointer for any parameter\r
 326 > + * of these routines.\r
 327 > + */\r
 328 > +\r
 329 > +hex_status_t\r
 330 > +hex_encode (void *talloc_ctx, const char *in, char **out,\r
 331 > +            size_t *out_size);\r
 332 > +\r
 333 > +hex_status_t\r
 334 > +hex_decode (void *talloc_ctx, const char *in, char **out,\r
 335 > +            size_t *out_size);\r
 336 > +\r
 337 > +/*\r
 338 > + * Non-allocating hex decode to decode 's' in-place. The length of the\r
 339 > + * result is always equal to or shorter than the length of the\r
 340 > + * original.\r
 341 > + */\r
 342 > +hex_status_t\r
 343 > +hex_decode_inplace (char *s);\r
 344 > +#endif\r
 345 > -- \r
 346 > 1.7.10.4\r
 347 >\r
 348 > _______________________________________________\r
 349 > notmuch mailing list\r
 350 > notmuch@notmuchmail.org\r
 351 > http://notmuchmail.org/mailman/listinfo/notmuch\r