From 9311e182e56d2629e0e5fccb906d00103818dedc Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 3 Apr 2014 16:43:38 +2000 Subject: [PATCH] Re: [Patch v5 3/6] util: add gz_readline --- 7c/bc6d3e4b41d594c5cff12575dbe629553cb3db | 283 ++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 7c/bc6d3e4b41d594c5cff12575dbe629553cb3db diff --git a/7c/bc6d3e4b41d594c5cff12575dbe629553cb3db b/7c/bc6d3e4b41d594c5cff12575dbe629553cb3db new file mode 100644 index 000000000..d1e5e0ad3 --- /dev/null +++ b/7c/bc6d3e4b41d594c5cff12575dbe629553cb3db @@ -0,0 +1,283 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 1DAFF431FBF + for ; Wed, 2 Apr 2014 13:43:54 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id HNJ+rtZaPRll for ; + Wed, 2 Apr 2014 13:43:46 -0700 (PDT) +Received: from dmz-mailsec-scanner-1.mit.edu (dmz-mailsec-scanner-1.mit.edu + [18.9.25.12]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 4043E431FAE + for ; Wed, 2 Apr 2014 13:43:46 -0700 (PDT) +X-AuditID: 1209190c-f794a6d000000c27-bf-533c767fe4f3 +Received: from mailhub-auth-1.mit.edu ( [18.9.21.35]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-1.mit.edu (Symantec Messaging Gateway) with SMTP + id 76.64.03111.F767C335; Wed, 2 Apr 2014 16:43:43 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id s32KhffS031181; + Wed, 2 Apr 2014 16:43:42 -0400 +Received: from awakening.csail.mit.edu (awakening.csail.mit.edu [18.26.4.91]) + (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s32Khc8G004820 + (version=TLSv1/SSLv3 cipher=DHE-RSA-AES128-SHA bits=128 verify=NOT); + Wed, 2 Apr 2014 16:43:40 -0400 +Received: from amthrax by awakening.csail.mit.edu with local (Exim 4.80) + (envelope-from ) + id 1WVS0Y-0001dl-H8; Wed, 02 Apr 2014 16:43:38 -0400 +Date: Wed, 2 Apr 2014 16:43:38 -0400 +From: Austin Clements +To: Tomi Ollila +Subject: Re: [Patch v5 3/6] util: add gz_readline +Message-ID: <20140402204337.GA4678@mit.edu> +References: <1396401381-18128-1-git-send-email-david@tethera.net> + <1396401381-18128-4-git-send-email-david@tethera.net> + <20140402032644.GB25677@mit.edu> + +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline +In-Reply-To: +User-Agent: Mutt/1.5.21 (2010-09-15) +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFprNKsWRmVeSWpSXmKPExsUixCmqrFtfZhNs0LdD0+JGazejxfWbM5kt + 3qycx+rA7HH460IWj2erbjF7bDn0njmAOYrLJiU1J7MstUjfLoEr487U2WwFTcYVL5o2MjYw + PlPrYuTkkBAwkVj+rJ0ZwhaTuHBvPVsXIxeHkMBsJoktT79AORsYJa72nIVyTjFJzLm9kxXC + WcIocej4N3aQfhYBFYlHL5awgdhsAhoS2/YvZwSxRYDiD9rWs4LYzAJ2Eke+d4HFhQUMJf4v + e8YCYvMKaEss7JkIteEQo8TG82dYIRKCEidnPmGBaNaSuPHvJVMXIweQLS2x/B8HSJhTwECi + 4+UbsJmiQLumnNzGNoFRaBaS7llIumchdC9gZF7FKJuSW6Wbm5iZU5yarFucnJiXl1qka6iX + m1mil5pSuokRHO6SPDsY3xxUOsQowMGoxMNrIWUTLMSaWFZcmXuIUZKDSUmUN7cEKMSXlJ9S + mZFYnBFfVJqTWgz0BwezkgivShRQjjclsbIqtSgfJiXNwaIkzvvW2ipYSCA9sSQ1OzW1ILUI + JivDwaEkwdtQCtQoWJSanlqRlplTgpBm4uAEGc4DNHw2yGLe4oLE3OLMdIj8KUZdjk8b1jQy + CbHk5eelSonzXgApEgApyijNg5sDS1OvGMWB3hLmDQNZxwNMcXCTXgEtYQJawr3OCmRJSSJC + SqqB0WGvU9qefdUfr2s6bu0VXRR8bVHym4ozxd/Nn58SdmdZqC/86KzQR8N/c59yXnvNODdc + RPaj3cwtbClqtxvdUmYFlbrWrdrJE8gsuCwqIb2x9UjIullNDh6PPr5+tzvNuufeoa1nyu4w + Wv7fE7hU783tLGNek3M/34T/WKvAIzyxrWE6f9gtfiWW4oxEQy3mouJEAHDnz1IuAwAA +Cc: notmuch@notmuchmail.org +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Wed, 02 Apr 2014 20:43:54 -0000 + +Quoth Tomi Ollila on Apr 02 at 7:43 pm: +> On Wed, Apr 02 2014, Austin Clements wrote: +> +> > Quoth David Bremner on Apr 01 at 10:16 pm: +> >> The idea is to provide a more or less drop in replacement for readline +> >> to read from zlib/gzip streams. Take the opportunity to replace +> >> malloc with talloc. +> >> --- +> >> util/Makefile.local | 2 +- +> >> util/util.h | 12 +++++++++ +> >> util/zlib-extra.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++ +> >> util/zlib-extra.h | 11 ++++++++ +> >> 4 files changed, 100 insertions(+), 1 deletion(-) +> >> create mode 100644 util/util.h +> >> create mode 100644 util/zlib-extra.c +> >> create mode 100644 util/zlib-extra.h +> >> +> >> diff --git a/util/Makefile.local b/util/Makefile.local +> >> index 29c0ce6..e2a5b65 100644 +> >> --- a/util/Makefile.local +> >> +++ b/util/Makefile.local +> >> @@ -4,7 +4,7 @@ dir := util +> >> extra_cflags += -I$(srcdir)/$(dir) +> >> +> >> libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \ +> >> - $(dir)/string-util.c $(dir)/talloc-extra.c +> >> + $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c +> >> +> >> libutil_modules := $(libutil_c_srcs:.c=.o) +> >> +> >> diff --git a/util/util.h b/util/util.h +> >> new file mode 100644 +> >> index 0000000..8663cfc +> >> --- /dev/null +> >> +++ b/util/util.h +> >> @@ -0,0 +1,12 @@ +> >> +#ifndef _UTIL_H +> >> +#define _UTIL_H +> >> + +> >> +typedef enum util_status { +> >> + UTIL_SUCCESS = 0, +> >> + UTIL_ERROR = 1, +> >> + UTIL_OUT_OF_MEMORY, +> >> + UTIL_EOF, +> >> + UTIL_FILE, +> >> +} util_status_t; +> >> + +> >> +#endif +> >> diff --git a/util/zlib-extra.c b/util/zlib-extra.c +> >> new file mode 100644 +> >> index 0000000..cb1eba0 +> >> --- /dev/null +> >> +++ b/util/zlib-extra.c +> >> @@ -0,0 +1,76 @@ +> >> +/* zlib-extra.c - Extra or enhanced routines for compressed I/O. +> >> + * +> >> + * Copyright (c) 2014 David Bremner +> >> + * +> >> + * This program is free software: you can redistribute it and/or modify +> >> + * it under the terms of the GNU General Public License as published by +> >> + * the Free Software Foundation, either version 3 of the License, or +> >> + * (at your option) any later version. +> >> + * +> >> + * This program is distributed in the hope that it will be useful, +> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of +> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +> >> + * GNU General Public License for more details. +> >> + * +> >> + * You should have received a copy of the GNU General Public License +> >> + * along with this program. If not, see http://www.gnu.org/licenses/ . +> >> + * +> >> + * Author: David Bremner +> >> + */ +> >> + +> >> +#include "zlib-extra.h" +> >> +#include +> >> +#include +> >> +#include +> >> + +> >> +/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */ +> >> +util_status_t +> >> +gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read, +> > +> > Talloc chunks know their size, so rather than taking bufsize, use +> > talloc_get_size (or talloc_array_length if you switch to talloc array +> > functions below). +> +> Now yoy David have a chance to drop the bufsiz argument altogether, as the +> info is available in *bufptr:s talloc context... +> +> > +> >> + gzFile stream) +> >> +{ +> >> + size_t len = *bufsiz; +> >> + char *buf = *bufptr; +> >> + size_t offset = 0; +> >> + +> >> + if (len == 0 || buf == NULL) { +> >> + /* same as getdelim from gnulib */ +> >> + len = 120; +> > +> > This is presumably because glibc's malloc has an 8 byte header. Fun +> > fact: talloc has a 104 byte header (on 64-bit and including the malloc +> > header). +> +> hmm, what should we choose here? 152 ? Some bikeshedding on IRC ? + +How about we bikeshed about not bikeshedding about this? + +> >> + buf = talloc_size (talloc_ctx, len); +> >> + if (buf == NULL) +> >> + return UTIL_OUT_OF_MEMORY; +> >> + } +> >> + +> >> + while (1) { +> >> + if (! gzgets (stream, buf + offset, len - offset)) { +> >> + int zlib_status = 0; +> >> + (void) gzerror (stream, &zlib_status); +> >> + switch (zlib_status) { +> >> + case Z_OK: +> >> + /* follow getline behaviour */ +> >> + *bytes_read = -1; +> > +> > Is this really what getline does when the last line of a file isn't +> > \n-terminated? +> +> Maybe the previous call returned non-\n -terminated string and +> for this call there was 0 bytes left to return ??? + +But my point is that the previous call *won't* return a +non-\n-terminated string. If my file looks like "a\nb\nc", this will +return "a\n", then "b\n", and then fail (unless I'm following the code +wrong). This is *not* what getline does (the manpage is confusing, +but I just tested it). + +> Tomi +> +> >> + return UTIL_EOF; +> >> + break; + +Unnecessary break. + +> >> + case Z_ERRNO: +> >> + return UTIL_FILE; +> >> + break; + +And here. + +> >> + default: +> >> + return UTIL_ERROR; +> >> + } +> >> + } +> >> + +> >> + offset += strlen (buf + offset); +> >> + +> >> + if ( buf[offset - 1] == '\n' ) +> > +> > Too many spaces! +> > +> >> + break; +> >> + +> >> + len *= 2; +> >> + buf = talloc_realloc (talloc_ctx, buf, char, len); +> > +> > Or talloc_realloc_size, to match the initial talloc_size. +> > Alternatively, the initial talloc_size could be a talloc_array. +> > +> >> + if (buf == NULL) +> >> + return UTIL_OUT_OF_MEMORY; +> >> + } +> >> + +> >> + *bufptr = buf; +> >> + *bufsiz = len; +> >> + *bytes_read = offset; +> >> + return UTIL_SUCCESS; +> >> +} +> >> diff --git a/util/zlib-extra.h b/util/zlib-extra.h +> >> new file mode 100644 +> >> index 0000000..ed46ac1 +> >> --- /dev/null +> >> +++ b/util/zlib-extra.h +> >> @@ -0,0 +1,11 @@ +> >> +#ifndef _ZLIB_EXTRA_H +> >> +#define _ZLIB_EXTRA_H +> >> + +> >> +#include +> >> +#include "util.h" +> > +> > I'd put "util.h" first so we're more likely to catch missing header +> > dependencies (obviously util.h doesn't have any right now, but in the +> > future). +> > +> > Also, I'd put a blank line after the #includes. +> > +> >> +/* Like getline, but read from a gzFile. Allocation is with talloc */ +> >> +util_status_t +> >> +gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read, +> >> + gzFile stream); +> >> + +> >> +#endif -- 2.26.2