From: Austin Clements Date: Fri, 4 Apr 2014 21:56:25 +0000 (+2000) Subject: Re: [Patch v6 4/6] restore: transparently support gzipped input X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=827b03ab4238220e18499d4a5b8923686f93ef4d;p=notmuch-archives.git Re: [Patch v6 4/6] restore: transparently support gzipped input --- diff --git a/4a/a9cacf37b18b765420f44e34fba0e32b9ebd2b b/4a/a9cacf37b18b765420f44e34fba0e32b9ebd2b new file mode 100644 index 000000000..91a673ca3 --- /dev/null +++ b/4a/a9cacf37b18b765420f44e34fba0e32b9ebd2b @@ -0,0 +1,250 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id AFBB1431FBF + for ; Fri, 4 Apr 2014 14:56:35 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: -0.7 +X-Spam-Level: +X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5 + tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id LapDbwHj2+Ig for ; + Fri, 4 Apr 2014 14:56:30 -0700 (PDT) +Received: from dmz-mailsec-scanner-4.mit.edu (dmz-mailsec-scanner-4.mit.edu + [18.9.25.15]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 29E2B431FAF + for ; Fri, 4 Apr 2014 14:56:30 -0700 (PDT) +X-AuditID: 1209190f-f790b6d000000c3a-62-533f2a8d4372 +Received: from mailhub-auth-2.mit.edu ( [18.7.62.36]) + (using TLS with cipher AES256-SHA (256/256 bits)) + (Client did not present a certificate) + by dmz-mailsec-scanner-4.mit.edu (Symantec Messaging Gateway) with SMTP + id 47.49.03130.D8A2F335; Fri, 4 Apr 2014 17:56:29 -0400 (EDT) +Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11]) + by mailhub-auth-2.mit.edu (8.13.8/8.9.2) with ESMTP id s34LuRDG010027; + Fri, 4 Apr 2014 17:56:28 -0400 +Received: from awakening.csail.mit.edu (awakening.csail.mit.edu [18.26.4.91]) + (authenticated bits=0) + (User authenticated as amdragon@ATHENA.MIT.EDU) + by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id s34LuP2r013654 + (version=TLSv1/SSLv3 cipher=DHE-RSA-AES128-SHA bits=128 verify=NOT); + Fri, 4 Apr 2014 17:56:27 -0400 +Received: from amthrax by awakening.csail.mit.edu with local (Exim 4.80) + (envelope-from ) + id 1WWC65-0004DI-N6; Fri, 04 Apr 2014 17:56:25 -0400 +Date: Fri, 4 Apr 2014 17:56:25 -0400 +From: Austin Clements +To: David Bremner +Subject: Re: [Patch v6 4/6] restore: transparently support gzipped input +Message-ID: <20140404215625.GA15472@mit.edu> +References: <1396554083-3892-1-git-send-email-david@tethera.net> + <1396554083-3892-5-git-send-email-david@tethera.net> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline +In-Reply-To: <1396554083-3892-5-git-send-email-david@tethera.net> +User-Agent: Mutt/1.5.21 (2010-09-15) +X-Brightmail-Tracker: + H4sIAAAAAAAAA+NgFmpkleLIzCtJLcpLzFFi42IRYrdT0e3Vsg826DrNY3GjtZvR4vrNmcwO + TB7PVt1i9thy6D1zAFMUl01Kak5mWWqRvl0CV0bP7gtsBVN0Kw5d3MPcwPhNqYuRk0NCwETi + /MTrTBC2mMSFe+vZQGwhgdlMEj96i7oYuYDsDYwST69+ZYdInGKSOLlBDCKxhFHi0NqprCAJ + FgEViUNznzGD2GwCGhLb9i9nBLFFBFQlrm6bDDaVWUBa4tvvZrBtwgIeErvfrAWr5xXQkdgw + aztQDQfQ0HKJp8tzIcKCEidnPmGBaNWSuPHvJRNICciY5f84QMKcAo4Sz658AJsiCnTBlJPb + 2CYwCs1C0j0LSfcshO4FjMyrGGVTcqt0cxMzc4pTk3WLkxPz8lKLdE30cjNL9FJTSjcxgkKa + U5J/B+O3g0qHGAU4GJV4eDt22AULsSaWFVfmHmKU5GBSEuXtUrAPFuJLyk+pzEgszogvKs1J + LT7EKMHBrCTCK6MKlONNSaysSi3Kh0lJc7AoifO+tbYKFhJITyxJzU5NLUgtgsnKcHAoSfBO + 0wRqFCxKTU+tSMvMKUFIM3FwggznARp+DqSGt7ggMbc4Mx0if4pRUUqcNwAkIQCSyCjNg+uF + pZxXjOJArwjzrgKp4gGmK7juV0CDmYAGN4TZgQwuSURISTUwZpysXdPuv0xss49+p9yZ52sy + mrO4/GIOs23pdze49Oj1wanNUSI+XsnZLXd/mDOeFdh+eGsiz2KRvdzCntp895mjV/bpfG6P + fMC9tV87+UGjisihX7pnVnxcMI/7ih73CrHCd/ObzNc0XA2znrN92ezfumdyrrxe1vBqq7yf + uNaJZUHlcdETlViKMxINtZiLihMBOQFFqxQDAAA= +Cc: notmuch@notmuchmail.org +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Fri, 04 Apr 2014 21:56:35 -0000 + +Quoth David Bremner on Apr 03 at 4:41 pm: +> We rely completely on zlib to do the right thing in detecting gzipped +> input. Since our dump format is chosen to be 7 bit ascii, this should +> be fine. +> --- +> doc/man1/notmuch-restore.rst | 8 ++++++++ +> notmuch-restore.c | 41 ++++++++++++++++++++++++++--------------- +> test/T240-dump-restore.sh | 14 ++++++++++++++ +> 3 files changed, 48 insertions(+), 15 deletions(-) +> +> diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst +> index d6cf19a..936b138 100644 +> --- a/doc/man1/notmuch-restore.rst +> +++ b/doc/man1/notmuch-restore.rst +> @@ -50,6 +50,14 @@ Supported options for **restore** include +> format, this heuristic, based the fact that batch-tag format +> contains no parentheses, should be accurate. +> +> +GZIPPED INPUT +> +============= +> + +> +\ **notmuch restore** will detect if the input is compressed in +> +**gzip(1)** format and automatically decompress it while reading. This +> +detection does not depend on file naming and in particular works for +> +standard input. +> + +> SEE ALSO +> ======== +> +> diff --git a/notmuch-restore.c b/notmuch-restore.c +> index c54d513..eb5b7b2 100644 +> --- a/notmuch-restore.c +> +++ b/notmuch-restore.c +> @@ -22,6 +22,7 @@ +> #include "hex-escape.h" +> #include "tag-util.h" +> #include "string-util.h" +> +#include "zlib-extra.h" +> +> static regex_t regex; +> +> @@ -128,10 +129,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) +> tag_op_list_t *tag_ops; +> +> char *input_file_name = NULL; +> - FILE *input = stdin; +> + gzFile input; +> char *line = NULL; +> void *line_ctx = NULL; +> - size_t line_size; +> ssize_t line_len; +> +> int ret = 0; +> @@ -163,13 +163,23 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) +> if (! accumulate) +> flags |= TAG_FLAG_REMOVE_ALL; +> +> - if (input_file_name) { +> - input = fopen (input_file_name, "r"); +> - if (input == NULL) { +> - fprintf (stderr, "Error opening %s for reading: %s\n", +> - input_file_name, strerror (errno)); +> + if (input_file_name) +> + input = gzopen (input_file_name, "r"); +> + else { +> + int infd = dup (STDIN_FILENO); +> + if (infd < 0) { +> + fprintf (stderr, "Error duping stdin\n"); +> return EXIT_FAILURE; +> } +> + input = gzdopen (infd, "r"); +> + if (! input) +> + close (infd); +> + } +> + +> + if (input == NULL) { +> + fprintf (stderr, "Error opening %s for (gzip) reading: %s\n", +> + input_file_name ? input_file_name : "stdin", strerror (errno)); + +There's a sketchy line about errno in the gz(d)open docs: "On error, +gzopen() *may* set the global variable errno to indicate the error." +(emphasis mine). This suggests we should set errno to 0 before the +calls to gz(d)open above. + +> + return EXIT_FAILURE; +> } +> +> if (opt_index < argc) { +> @@ -184,12 +194,17 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) +> } +> +> do { +> - line_len = getline (&line, &line_size, input); +> + util_status_t status; +> + +> + status = gz_getline (line_ctx, &line, &line_len, input); +> +> /* empty input file not considered an error */ +> - if (line_len < 0) +> + if (status == UTIL_EOF) +> return EXIT_SUCCESS; +> +> + if (status) + +Will this lead to a silent exit failure if there's a problem with +decompression? This suggests we should have a UTIL_GZERROR that tells +the caller to consult gzerror for the error message. (Though this is +still an improvement over the original code, which would silently +succeed when getline failed!) + +> + return EXIT_FAILURE; +> + +> } while ((line_len == 0) || +> (line[0] == '#') || +> /* the cast is safe because we checked about for line_len < 0 */ +> @@ -254,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) +> if (ret) +> break; +> +> - } while ((line_len = getline (&line, &line_size, input)) != -1); +> + } while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS); + +It looks like a gz_getline error here will cause restore to stop and +claim that the restore was successful. (The original code has this +problem, too.) + +> +> if (line_ctx != NULL) +> talloc_free (line_ctx); +> @@ -262,13 +277,9 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) +> if (input_format == DUMP_FORMAT_SUP) +> regfree (®ex); +> +> - if (line) +> - free (line); +> - +> notmuch_database_destroy (notmuch); +> +> - if (input != stdin) +> - fclose (input); +> + gzclose_r (input); +> +> return ret ? EXIT_FAILURE : EXIT_SUCCESS; +> } +> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh +> index b6d8602..efe463e 100755 +> --- a/test/T240-dump-restore.sh +> +++ b/test/T240-dump-restore.sh +> @@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz +> gunzip dump-gzip-outfile.gz +> test_expect_equal_file dump.expected dump-gzip-outfile +> +> +test_begin_subtest "restoring gzipped stdin" +> +notmuch dump --gzip --output=backup.gz +> +notmuch tag +new_tag '*' +> +notmuch restore < backup.gz +> +notmuch dump --output=dump.actual +> +test_expect_equal_file dump.expected dump.actual +> + +> +test_begin_subtest "restoring gzipped file" +> +notmuch dump --gzip --output=backup.gz +> +notmuch tag +new_tag '*' +> +notmuch restore --input=backup.gz +> +notmuch dump --output=dump.actual +> +test_expect_equal_file dump.expected dump.actual +> + +> # Note, we assume all messages from cworth have a message-id +> # containing cworth.org +>