Re: [Patch v3 5/6] test: add generator for random "stub" messages

author Tomi Ollila <tomi.ollila@iki.fi>

Sat, 8 Sep 2012 13:38:35 +0000 (16:38 +0300)

committer W. Trevor King <wking@tremily.us>

Fri, 7 Nov 2014 17:49:26 +0000 (09:49 -0800)
author Tomi Ollila <tomi.ollila@iki.fi>
Sat, 8 Sep 2012 13:38:35 +0000 (16:38 +0300)
committer W. Trevor King <wking@tremily.us>
Fri, 7 Nov 2014 17:49:26 +0000 (09:49 -0800)
diff --git a/f4/0a4519eabfaad7faaf74c5862e82e73757ab58 b/f4/0a4519eabfaad7faaf74c5862e82e73757ab58

new file mode 100644 (file)

index 0000000..c831360
--- /dev/null
+++ b/f4/0a4519eabfaad7faaf74c5862e82e73757ab58
@@ -0,0 +1,193 @@
+Return-Path: <tomi.ollila@iki.fi>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+       by olra.theworths.org (Postfix) with ESMTP id 9E965431FAF\r
+       for <notmuch@notmuchmail.org>; Sat,  8 Sep 2012 06:38:29 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: 0\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none]\r
+       autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+       by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+       with ESMTP id Kj6MYN+0UJ1F for <notmuch@notmuchmail.org>;\r
+       Sat,  8 Sep 2012 06:38:28 -0700 (PDT)\r
+Received: from guru.guru-group.fi (guru.guru-group.fi [46.183.73.34])\r
+       by olra.theworths.org (Postfix) with ESMTP id 4D08B431FAE\r
+       for <notmuch@notmuchmail.org>; Sat,  8 Sep 2012 06:38:28 -0700 (PDT)\r
+Received: from guru.guru-group.fi (localhost [IPv6:::1])\r
+       by guru.guru-group.fi (Postfix) with ESMTP id 656061000E5;\r
+       Sat,  8 Sep 2012 16:38:35 +0300 (EEST)\r
+From: Tomi Ollila <tomi.ollila@iki.fi>\r
+To: david@tethera.net, notmuch@notmuchmail.org\r
+Subject: Re: [Patch v3 5/6] test: add generator for random "stub" messages\r
+In-Reply-To: <1345382314-5330-6-git-send-email-david@tethera.net>\r
+References: <1345382314-5330-1-git-send-email-david@tethera.net>\r
+       <1345382314-5330-6-git-send-email-david@tethera.net>\r
+User-Agent: Notmuch/0.14+11~gd9bf007 (http://notmuchmail.org) Emacs/24.2.1\r
+       (x86_64-unknown-linux-gnu)\r
+X-Face: HhBM'cA~<r"^Xv\KRN0P{vn'Y"Kd;zg_y3S[4)KSN~s?O\"QPoL\r
+       $[Xv_BD:i/F$WiEWax}R(MPS`^UaptOGD`*/=@\1lKoVa9tnrg0TW?"r7aRtgk[F\r
+       !)g;OY^,BjTbr)Np:%c_o'jj,Z\r
+Date: Sat, 08 Sep 2012 16:38:35 +0300\r
+Message-ID: <m2wr04ocro.fsf@guru.guru-group.fi>\r
+MIME-Version: 1.0\r
+Content-Type: text/plain\r
+Cc: David Bremner <bremner@debian.org>\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+       <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Sat, 08 Sep 2012 13:38:29 -0000\r
+\r
+On Sun, Aug 19 2012, david@tethera.net wrote:\r
+\r
+> From: David Bremner <bremner@debian.org>\r
+>\r
+> Initial use case is testing dump and restore, so we only have\r
+> message-ids and tags.\r
+>\r
+> The message ID's are nothing like RFC compliant, but it doesn't seem\r
+> any harder to roundtrip random UTF-8 strings than RFC-compliant ones.\r
+>\r
+> Tags are UTF-8, even though notmuch is in principle more generous than\r
+> that.\r
+> ---\r
+\r
+Mostly LGTM (the whole series). Few comments inline...\r
+\r
+Finally, 6/6 adds known broken test -- when will we see this code\r
+taken into use and the broken test fixed :)\r
+\r
+>  test/.gitignore      |    1 +\r
+>  test/Makefile.local  |    9 +++\r
+>  test/basic           |    2 +-\r
+>  test/random-corpus.c |  202 ++++++++++++++++++++++++++++++++++++++++++++++++++\r
+>  4 files changed, 213 insertions(+), 1 deletion(-)\r
+>  create mode 100644 test/random-corpus.c\r
+\r
+[ ... ]\r
+\r
+>  \r
+> diff --git a/test/random-corpus.c b/test/random-corpus.c\r
+> new file mode 100644\r
+> index 0000000..8c5b559\r
+> --- /dev/null\r
+> +++ b/test/random-corpus.c\r
+\r
+[ ... ]\r
+\r
+> +\r
+> +/* Current largest UTF-32 value defined. Note that most of these will\r
+> + * be printed as boxes in most fonts.\r
+> + */\r
+\r
+Should we be talking about UTF-8 valies. UTF-8 (currently has the same\r
+limit).\r
+\r
+> +\r
+> +#define GLYPH_MAX 0x10FFFE\r
+> +\r
+> +static gunichar\r
+> +random_unichar ()\r
+> +{\r
+> +    int start = 1, stop = GLYPH_MAX;\r
+> +    int class = random() % 2;\r
+> +\r
+> +    /*\r
+> +     *  Choose about half ascii as test characters, as ascii\r
+> +     *  punctation and whitespace is the main cause of problems for\r
+> +     *  the (old) restore parser\r
+> +    */\r
+> +    switch (class) {\r
+> +    case 0:\r
+> +    /* ascii */\r
+> +    start = 0x01;\r
+> +    stop = 0x7f;\r
+> +    break;\r
+> +    case 1:\r
+> +    /* the rest of unicode */\r
+> +    start = 0x80;\r
+> +    stop = GLYPH_MAX;\r
+> +    }\r
+> +\r
+> +    if (start == stop)\r
+> +    return start;\r
+> +    else\r
+> +    return start + (random() % (stop - start + 1));\r
+> +}\r
+> +\r
+> +static char *\r
+> +random_utf8_string (void *ctx, size_t char_count)\r
+> +{\r
+> +\r
+> +    gchar *buf = NULL;\r
+> +    size_t buf_size = 0;\r
+> +\r
+> +    size_t offset = 0;\r
+> +\r
+> +    size_t i;\r
+> +\r
+> +    buf = talloc_realloc (ctx, NULL, gchar, char_count);\r
+> +    buf_size = char_count;\r
+> +\r
+> +    for (i = 0; i < char_count; i++) {\r
+> +    gunichar randomchar;\r
+> +    size_t written;\r
+> +\r
+> +    /* 6 for one glyph, one for null */\r
+> +    if (buf_size - offset < 8) {\r
+> +        buf_size += 16;\r
+> +        buf = talloc_realloc (ctx, buf, gchar, buf_size);\r
+\r
+This reallocation will hit many times, as originally there was just\r
+char_count bytes allocated -- this limit will probably get hit before \r
+halfway the creation of random string (half uses 1 byte, other half\r
+2, 3 or 4 bytes, mostly 4 (even only half of the 4-byte range is used...)\r
+\r
+Maybe originally allocating char_count * 2 + 8 and if realloc required\r
+(char_count - i) * 2 + 8... or maybe better, just doing the latter\r
+realloc and replacing first with buf = NULL; buf_size = 0;\r
+\r
+Alternatively you could play with random states; calculate size,\r
+reset random state, alloc size + 1 and write chars.\r
+\r
+> +    }\r
+> +\r
+> +    randomchar = random_unichar();\r
+> +\r
+> +    written = g_unichar_to_utf8 (randomchar, buf + offset);\r
+> +\r
+> +    if (written <= 0) {\r
+> +        fprintf (stderr, "error converting to utf8\n");\r
+> +        exit (1);\r
+> +    }\r
+> +\r
+> +    offset += written;\r
+> +\r
+> +    }\r
+\r
+Above there is extra newline. There are a few others in other\r
+files (at least after opening and before closing brace).\r
+Maybe uncrustify your source :)\r
+\r
+> +    buf[offset] = 0;\r
+> +    return buf;\r
+> +}\r
+> +\r
+\r
+[ ... ]\r
+\r
+\r
+Tomi\r
+\r
author	Tomi Ollila <tomi.ollila@iki.fi>
	Sat, 8 Sep 2012 13:38:35 +0000 (16:38 +0300)
committer	W. Trevor King <wking@tremily.us>
	Fri, 7 Nov 2014 17:49:26 +0000 (09:49 -0800)