From 5155442fc282b5c3a9218024f7649e95e195de71 Mon Sep 17 00:00:00 2001 From: Mark Walters Date: Sun, 2 Feb 2014 18:24:09 +0000 Subject: [PATCH] Re: [PATCH v2 3/7] util: make sanitize string available in string util for reuse --- c8/0f717649c781b69e883435715f2de657b2c6f2 | 184 ++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 c8/0f717649c781b69e883435715f2de657b2c6f2 diff --git a/c8/0f717649c781b69e883435715f2de657b2c6f2 b/c8/0f717649c781b69e883435715f2de657b2c6f2 new file mode 100644 index 000000000..5a2bc47d5 --- /dev/null +++ b/c8/0f717649c781b69e883435715f2de657b2c6f2 @@ -0,0 +1,184 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 9717C431FC9 + for ; Sun, 2 Feb 2014 10:26:38 -0800 (PST) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -1.098 +X-Spam-Level: +X-Spam-Status: No, score=-1.098 tagged_above=-999 required=5 + tests=[DKIM_ADSP_CUSTOM_MED=0.001, FREEMAIL_FROM=0.001, + NML_ADSP_CUSTOM_MED=1.2, RCVD_IN_DNSWL_MED=-2.3] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id 59ykr-uGb85J for ; + Sun, 2 Feb 2014 10:26:33 -0800 (PST) +Received: from mail2.qmul.ac.uk (mail2.qmul.ac.uk [138.37.6.6]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 2AA2D431FC7 + for ; Sun, 2 Feb 2014 10:26:33 -0800 (PST) +Received: from smtp.qmul.ac.uk ([138.37.6.40]) + by mail2.qmul.ac.uk with esmtp (Exim 4.71) + (envelope-from ) + id 1WA1kV-0002cw-DA; Sun, 02 Feb 2014 18:26:31 +0000 +Received: from 93-97-24-31.zone5.bethere.co.uk ([93.97.24.31] helo=localhost) + by smtp.qmul.ac.uk with esmtpsa (TLSv1:AES128-SHA:128) (Exim 4.71) + (envelope-from ) + id 1WA1jc-0006ET-15; Sun, 02 Feb 2014 18:25:36 +0000 +From: Mark Walters +To: Jani Nikula , notmuch@notmuchmail.org +Subject: Re: [PATCH v2 3/7] util: make sanitize string available in string + util for reuse +In-Reply-To: + <6859d1e9de273e00101c10122cd8ca162cc3a542.1385825425.git.jani@nikula.org> +References: + <6859d1e9de273e00101c10122cd8ca162cc3a542.1385825425.git.jani@nikula.org> +User-Agent: Notmuch/0.15.2+484~gfb59956 (http://notmuchmail.org) Emacs/23.4.1 + (x86_64-pc-linux-gnu) +Date: Sun, 02 Feb 2014 18:24:09 +0000 +Message-ID: <871tzlqsom.fsf@qmul.ac.uk> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +X-Sender-Host-Address: 93.97.24.31 +X-QM-Geographic: According to ripencc, + this message was delivered by a machine in Britain (UK) (GB). +X-QM-SPAM-Info: Sender has good ham record. :) +X-QM-Body-MD5: a9f5328c5f40b5e0c904f105c00820f8 (of first 20000 bytes) +X-SpamAssassin-Score: 0.0 +X-SpamAssassin-SpamBar: / +X-SpamAssassin-Report: The QM spam filters have analysed this message to + determine if it is + spam. We require at least 5.0 points to mark a message as spam. + This message scored 0.0 points. Summary of the scoring: + * 0.0 FREEMAIL_FROM Sender email is commonly abused enduser mail + provider * (markwalters1009[at]gmail.com) + * 0.0 AWL AWL: From: address is in the auto white-list +X-QM-Scan-Virus: ClamAV says the message is clean +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Sun, 02 Feb 2014 18:26:38 -0000 + + +On Sat, 30 Nov 2013, Jani Nikula wrote: +> No functional changes. +> --- +> notmuch-search.c | 19 ------------------- +> util/string-util.c | 22 ++++++++++++++++++++++ +> util/string-util.h | 7 +++++++ +> 3 files changed, 29 insertions(+), 19 deletions(-) +> +> diff --git a/notmuch-search.c b/notmuch-search.c +> index 11cd6ee..8b6940a 100644 +> --- a/notmuch-search.c +> +++ b/notmuch-search.c +> @@ -30,25 +30,6 @@ typedef enum { +> OUTPUT_TAGS +> } output_t; +> +> -static char * +> -sanitize_string (const void *ctx, const char *str) +> -{ +> - char *out, *loop; +> - +> - if (NULL == str) +> - return NULL; +> - +> - loop = out = talloc_strdup (ctx, str); +> - +> - for (; *loop; loop++) { +> - if (*loop == '\t' || *loop == '\n') +> - *loop = ' '; +> - else if ((unsigned char)(*loop) < 32) +> - *loop = '?'; +> - } +> - return out; +> -} +> - +> /* Return two stable query strings that identify exactly the matched +> * and unmatched messages currently in thread. If there are no +> * matched or unmatched messages, the returned buffers will be +> diff --git a/util/string-util.c b/util/string-util.c +> index a5622d7..9e2f728 100644 +> --- a/util/string-util.c +> +++ b/util/string-util.c +> @@ -37,6 +37,28 @@ strtok_len (char *s, const char *delim, size_t *len) +> return *len ? s : NULL; +> } +> +> +char * +> +sanitize_string (const void *ctx, const char *str) +> +{ +> + char *out, *loop; +> + +> + if (! str) +> + return NULL; +> + +> + out = talloc_strdup (ctx, str); +> + if (! out) +> + return NULL; +> + +> + for (loop = out; *loop; loop++) { +> + if (*loop == '\t' || *loop == '\n') +> + *loop = ' '; +> + else if ((unsigned char)(*loop) < 32) +> + *loop = '?'; +> + } +> + +> + return out; +> +} +> + +> static int +> is_unquoted_terminator (unsigned char c) +> { +> diff --git a/util/string-util.h b/util/string-util.h +> index 0194607..228420d 100644 +> --- a/util/string-util.h +> +++ b/util/string-util.h +> @@ -19,6 +19,13 @@ +> +> char *strtok_len (char *s, const char *delim, size_t *len); +> +> +/* Return a talloced string with str sanitized. +> + * +> + * Whitespace (tabs and newlines) is replaced with spaces, +> + * non-printable characters with question marks. +> + */ + +A complete triviality but I would prefer "Whitespace characters (tabs +and newlines) are replaced with spaces..." just to emphasise that e.g. +multiple tabs are replaced by multiple spaces. + +Best wishes + +Mark + + + + + + +> +char *sanitize_string (const void *ctx, const char *str); +> + +> /* Construct a boolean term query with the specified prefix (e.g., +> * "id") and search term, quoting term as necessary. Specifically, if +> * term contains any non-printable ASCII characters, non-ASCII +> -- +> 1.8.4.2 +> +> _______________________________________________ +> notmuch mailing list +> notmuch@notmuchmail.org +> http://notmuchmail.org/mailman/listinfo/notmuch -- 2.26.2