f4/0a4519eabfaad7faaf74c5862e82e73757ab58

   1 Return-Path: <tomi.ollila@iki.fi>\r
   2 X-Original-To: notmuch@notmuchmail.org\r
   3 Delivered-To: notmuch@notmuchmail.org\r
   4 Received: from localhost (localhost [127.0.0.1])\r
   5         by olra.theworths.org (Postfix) with ESMTP id 9E965431FAF\r
   6         for <notmuch@notmuchmail.org>; Sat,  8 Sep 2012 06:38:29 -0700 (PDT)\r
   7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
   8 X-Spam-Flag: NO\r
   9 X-Spam-Score: 0\r
  10 X-Spam-Level: \r
  11 X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none]\r
  12         autolearn=disabled\r
  13 Received: from olra.theworths.org ([127.0.0.1])\r
  14         by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
  15         with ESMTP id Kj6MYN+0UJ1F for <notmuch@notmuchmail.org>;\r
  16         Sat,  8 Sep 2012 06:38:28 -0700 (PDT)\r
  17 Received: from guru.guru-group.fi (guru.guru-group.fi [46.183.73.34])\r
  18         by olra.theworths.org (Postfix) with ESMTP id 4D08B431FAE\r
  19         for <notmuch@notmuchmail.org>; Sat,  8 Sep 2012 06:38:28 -0700 (PDT)\r
  20 Received: from guru.guru-group.fi (localhost [IPv6:::1])\r
  21         by guru.guru-group.fi (Postfix) with ESMTP id 656061000E5;\r
  22         Sat,  8 Sep 2012 16:38:35 +0300 (EEST)\r
  23 From: Tomi Ollila <tomi.ollila@iki.fi>\r
  24 To: david@tethera.net, notmuch@notmuchmail.org\r
  25 Subject: Re: [Patch v3 5/6] test: add generator for random "stub" messages\r
  26 In-Reply-To: <1345382314-5330-6-git-send-email-david@tethera.net>\r
  27 References: <1345382314-5330-1-git-send-email-david@tethera.net>\r
  28         <1345382314-5330-6-git-send-email-david@tethera.net>\r
  29 User-Agent: Notmuch/0.14+11~gd9bf007 (http://notmuchmail.org) Emacs/24.2.1\r
  30         (x86_64-unknown-linux-gnu)\r
  31 X-Face: HhBM'cA~<r"^Xv\KRN0P{vn'Y"Kd;zg_y3S[4)KSN~s?O\"QPoL\r
  32         $[Xv_BD:i/F$WiEWax}R(MPS`^UaptOGD`*/=@\1lKoVa9tnrg0TW?"r7aRtgk[F\r
  33         !)g;OY^,BjTbr)Np:%c_o'jj,Z\r
  34 Date: Sat, 08 Sep 2012 16:38:35 +0300\r
  35 Message-ID: <m2wr04ocro.fsf@guru.guru-group.fi>\r
  36 MIME-Version: 1.0\r
  37 Content-Type: text/plain\r
  38 Cc: David Bremner <bremner@debian.org>\r
  39 X-BeenThere: notmuch@notmuchmail.org\r
  40 X-Mailman-Version: 2.1.13\r
  41 Precedence: list\r
  42 List-Id: "Use and development of the notmuch mail system."\r
  43         <notmuch.notmuchmail.org>\r
  44 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
  45         <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
  46 List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
  47 List-Post: <mailto:notmuch@notmuchmail.org>\r
  48 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
  49 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
  50         <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
  51 X-List-Received-Date: Sat, 08 Sep 2012 13:38:29 -0000\r
  52 \r
  53 On Sun, Aug 19 2012, david@tethera.net wrote:\r
  54 \r
  55 > From: David Bremner <bremner@debian.org>\r
  56 >\r
  57 > Initial use case is testing dump and restore, so we only have\r
  58 > message-ids and tags.\r
  59 >\r
  60 > The message ID's are nothing like RFC compliant, but it doesn't seem\r
  61 > any harder to roundtrip random UTF-8 strings than RFC-compliant ones.\r
  62 >\r
  63 > Tags are UTF-8, even though notmuch is in principle more generous than\r
  64 > that.\r
  65 > ---\r
  66 \r
  67 Mostly LGTM (the whole series). Few comments inline...\r
  68 \r
  69 Finally, 6/6 adds known broken test -- when will we see this code\r
  70 taken into use and the broken test fixed :)\r
  71 \r
  72 >  test/.gitignore      |    1 +\r
  73 >  test/Makefile.local  |    9 +++\r
  74 >  test/basic           |    2 +-\r
  75 >  test/random-corpus.c |  202 ++++++++++++++++++++++++++++++++++++++++++++++++++\r
  76 >  4 files changed, 213 insertions(+), 1 deletion(-)\r
  77 >  create mode 100644 test/random-corpus.c\r
  78 \r
  79 [ ... ]\r
  80 \r
  81 >  \r
  82 > diff --git a/test/random-corpus.c b/test/random-corpus.c\r
  83 > new file mode 100644\r
  84 > index 0000000..8c5b559\r
  85 > --- /dev/null\r
  86 > +++ b/test/random-corpus.c\r
  87 \r
  88 [ ... ]\r
  89 \r
  90 > +\r
  91 > +/* Current largest UTF-32 value defined. Note that most of these will\r
  92 > + * be printed as boxes in most fonts.\r
  93 > + */\r
  94 \r
  95 Should we be talking about UTF-8 valies. UTF-8 (currently has the same\r
  96 limit).\r
  97 \r
  98 > +\r
  99 > +#define GLYPH_MAX 0x10FFFE\r
 100 > +\r
 101 > +static gunichar\r
 102 > +random_unichar ()\r
 103 > +{\r
 104 > +    int start = 1, stop = GLYPH_MAX;\r
 105 > +    int class = random() % 2;\r
 106 > +\r
 107 > +    /*\r
 108 > +     *  Choose about half ascii as test characters, as ascii\r
 109 > +     *  punctation and whitespace is the main cause of problems for\r
 110 > +     *  the (old) restore parser\r
 111 > +    */\r
 112 > +    switch (class) {\r
 113 > +    case 0:\r
 114 > +     /* ascii */\r
 115 > +     start = 0x01;\r
 116 > +     stop = 0x7f;\r
 117 > +     break;\r
 118 > +    case 1:\r
 119 > +     /* the rest of unicode */\r
 120 > +     start = 0x80;\r
 121 > +     stop = GLYPH_MAX;\r
 122 > +    }\r
 123 > +\r
 124 > +    if (start == stop)\r
 125 > +     return start;\r
 126 > +    else\r
 127 > +     return start + (random() % (stop - start + 1));\r
 128 > +}\r
 129 > +\r
 130 > +static char *\r
 131 > +random_utf8_string (void *ctx, size_t char_count)\r
 132 > +{\r
 133 > +\r
 134 > +    gchar *buf = NULL;\r
 135 > +    size_t buf_size = 0;\r
 136 > +\r
 137 > +    size_t offset = 0;\r
 138 > +\r
 139 > +    size_t i;\r
 140 > +\r
 141 > +    buf = talloc_realloc (ctx, NULL, gchar, char_count);\r
 142 > +    buf_size = char_count;\r
 143 > +\r
 144 > +    for (i = 0; i < char_count; i++) {\r
 145 > +     gunichar randomchar;\r
 146 > +     size_t written;\r
 147 > +\r
 148 > +     /* 6 for one glyph, one for null */\r
 149 > +     if (buf_size - offset < 8) {\r
 150 > +         buf_size += 16;\r
 151 > +         buf = talloc_realloc (ctx, buf, gchar, buf_size);\r
 152 \r
 153 This reallocation will hit many times, as originally there was just\r
 154 char_count bytes allocated -- this limit will probably get hit before \r
 155 halfway the creation of random string (half uses 1 byte, other half\r
 156 2, 3 or 4 bytes, mostly 4 (even only half of the 4-byte range is used...)\r
 157 \r
 158 Maybe originally allocating char_count * 2 + 8 and if realloc required\r
 159 (char_count - i) * 2 + 8... or maybe better, just doing the latter\r
 160 realloc and replacing first with buf = NULL; buf_size = 0;\r
 161 \r
 162 Alternatively you could play with random states; calculate size,\r
 163 reset random state, alloc size + 1 and write chars.\r
 164 \r
 165 > +     }\r
 166 > +\r
 167 > +     randomchar = random_unichar();\r
 168 > +\r
 169 > +     written = g_unichar_to_utf8 (randomchar, buf + offset);\r
 170 > +\r
 171 > +     if (written <= 0) {\r
 172 > +         fprintf (stderr, "error converting to utf8\n");\r
 173 > +         exit (1);\r
 174 > +     }\r
 175 > +\r
 176 > +     offset += written;\r
 177 > +\r
 178 > +    }\r
 179 \r
 180 Above there is extra newline. There are a few others in other\r
 181 files (at least after opening and before closing brace).\r
 182 Maybe uncrustify your source :)\r
 183 \r
 184 > +    buf[offset] = 0;\r
 185 > +    return buf;\r
 186 > +}\r
 187 > +\r
 188 \r
 189 [ ... ]\r
 190 \r
 191 \r
 192 Tomi\r
 193 \r