From: David Bremner Date: Fri, 4 Apr 2014 02:32:44 +0000 (+2100) Subject: Re: [Patch v6 1/6] dump: support gzipped and atomic output X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=867b8d7382088236693bf65b6ac8e10918d98801;p=notmuch-archives.git Re: [Patch v6 1/6] dump: support gzipped and atomic output --- diff --git a/0d/c85018a3e17a53c97532036b499401b75dc8fa b/0d/c85018a3e17a53c97532036b499401b75dc8fa new file mode 100644 index 000000000..b9fa0436c --- /dev/null +++ b/0d/c85018a3e17a53c97532036b499401b75dc8fa @@ -0,0 +1,373 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 8EF5F431FBD + for ; Thu, 3 Apr 2014 20:13:18 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: 0 +X-Spam-Level: +X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none] + autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id YJRyfhJPe09T for ; + Thu, 3 Apr 2014 20:13:11 -0700 (PDT) +Received: from mx.xen14.node3324.gplhost.com (gitolite.debian.net + [87.98.215.224]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 51C54431FAF + for ; Thu, 3 Apr 2014 20:13:11 -0700 (PDT) +Received: from remotemail by mx.xen14.node3324.gplhost.com with local (Exim + 4.72) (envelope-from ) + id 1WVtwF-0001DY-LD; Fri, 04 Apr 2014 02:33:03 +0000 +Received: (nullmailer pid 24864 invoked by uid 1000); Fri, 04 Apr 2014 + 02:32:44 -0000 +From: David Bremner +To: notmuch@notmuchmail.org +Subject: Re: [Patch v6 1/6] dump: support gzipped and atomic output +In-Reply-To: <1396554083-3892-2-git-send-email-david@tethera.net> +References: <1396554083-3892-1-git-send-email-david@tethera.net> + <1396554083-3892-2-git-send-email-david@tethera.net> +User-Agent: Notmuch/0.17+56~gb9c3d8e (http://notmuchmail.org) Emacs/24.3.1 + (x86_64-pc-linux-gnu) +Date: Thu, 03 Apr 2014 23:32:44 -0300 +Message-ID: <877g75rfhv.fsf@maritornes.cs.unb.ca> +MIME-Version: 1.0 +Content-Type: text/plain +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Fri, 04 Apr 2014 03:13:18 -0000 + +David Bremner writes: + +> The main goal is to support gzipped output for future internal +> calls (e.g. from notmuch-new) to notmuch_database_dump. + +Apologies if this later turns out to be duplicate. I'm not sure what +happened to the cover letter for this series. + +This supercedes + +id:1396401381-18128-1-git-send-email-david@tethera.net + +I tried to take care of Austin and Tomi's comments, but there were +several messages so hopefully I didn't miss anything. + +I ended up merging the "gzipped output" and "atomic output" patches; I +fought it for a bit, but I realized I would have to do some of the +proposed changes twice and throw away the first one. + +diff from v5 follows: + +diff --git a/INSTALL b/INSTALL +index df318fa..b543c50 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -67,6 +67,9 @@ Talloc, and zlib which are each described below: + by Xapian, so if you installed that you will already have + zlib. You may need to install the zlib headers separately. + ++ Notmuch needs the transparent write feature of zlib introduced ++ in version 1.2.5.2 (Dec. 2011). ++ + zlib is available from http://zlib.net + + Building Documentation +diff --git a/configure b/configure +index d685ab3..1d624f7 100755 +--- a/configure ++++ b/configure +@@ -340,9 +340,9 @@ else + errors=$((errors + 1)) + fi + +-printf "Checking for zlib development files... " ++printf "Checking for zlib (>= 1.2.5.2)... " + have_zlib=0 +-if pkg-config --exists zlib; then ++if pkg-config --atleast-version=1.2.5.2 zlib; then + printf "Yes.\n" + have_zlib=1 + zlib_cflags=$(pkg-config --cflags zlib) +diff --git a/notmuch-dump.c b/notmuch-dump.c +index 05ed6b4..2a7252a 100644 +--- a/notmuch-dump.c ++++ b/notmuch-dump.c +@@ -140,7 +140,7 @@ notmuch_database_dump (notmuch_database_t *notmuch, + tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name); + outfd = mkstemp (tempname); + } else { +- outfd = fileno (stdout); ++ outfd = dup (STDOUT_FILENO); + } + + if (outfd < 0) { +@@ -153,6 +153,9 @@ notmuch_database_dump (notmuch_database_t *notmuch, + if (output == NULL) { + fprintf (stderr, "Error opening %s for (gzip) writing: %s\n", + name_for_error, strerror (errno)); ++ if (close (outfd)) ++ fprintf (stderr, "Error closing %s during shutdown: %s\n", ++ name_for_error, strerror (errno)); + goto DONE; + } + +@@ -165,22 +168,25 @@ notmuch_database_dump (notmuch_database_t *notmuch, + goto DONE; + } + +- ret = fdatasync (outfd); +- if (ret) { +- perror ("fdatasync"); +- goto DONE; +- } +- + if (output_file_name) { +- ret = gzclose_w (output); +- if (ret != Z_OK) { +- ret = EXIT_FAILURE; ++ ret = fdatasync (outfd); ++ if (ret) { ++ fprintf (stderr, "Error syncing %s to disk: %s\n", ++ name_for_error, strerror (errno)); + goto DONE; + } ++ } + ++ if (gzclose_w (output) != Z_OK) { ++ ret = EXIT_FAILURE; ++ goto DONE; ++ } ++ ++ if (output_file_name) { + ret = rename (tempname, output_file_name); + if (ret) { +- perror ("rename"); ++ fprintf (stderr, "Error renaming %s to %s: %s\n", ++ tempname, output_file_name, strerror (errno)); + goto DONE; + } + +diff --git a/notmuch-new.c b/notmuch-new.c +index e0431c6..d269c7c 100644 +--- a/notmuch-new.c ++++ b/notmuch-new.c +@@ -997,7 +997,7 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]) + * relatively small. */ + + const char *backup_name = +- talloc_asprintf (notmuch, "%s/backup-%04d-%02d-%02d-%02d:%02d:%02d.gz", ++ talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz", + dot_notmuch_path, + gm_time->tm_year + 1900, + gm_time->tm_mon + 1, +@@ -1009,12 +1009,12 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]) + if (add_files_state.verbosity >= VERBOSITY_NORMAL) { + printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n"); + printf ("This process is safe to interrupt.\n"); +- printf ("Backing up tags to %s\n", backup_name); ++ printf ("Backing up tags to %s...\n", backup_name); + } + + if (notmuch_database_dump (notmuch, backup_name, "", + DUMP_FORMAT_BATCH_TAG, TRUE)) { +- fprintf (stderr, "Backup failed. Aborting upgrade"); ++ fprintf (stderr, "Backup failed. Aborting upgrade."); + return EXIT_FAILURE; + } + +diff --git a/notmuch-restore.c b/notmuch-restore.c +index 86bce20..eb5b7b2 100644 +--- a/notmuch-restore.c ++++ b/notmuch-restore.c +@@ -132,7 +132,6 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) + gzFile input; + char *line = NULL; + void *line_ctx = NULL; +- size_t line_size; + ssize_t line_len; + + int ret = 0; +@@ -166,8 +165,16 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) + + if (input_file_name) + input = gzopen (input_file_name, "r"); +- else +- input = gzdopen (fileno (stdin), "r"); ++ else { ++ int infd = dup (STDIN_FILENO); ++ if (infd < 0) { ++ fprintf (stderr, "Error duping stdin\n"); ++ return EXIT_FAILURE; ++ } ++ input = gzdopen (infd, "r"); ++ if (! input) ++ close (infd); ++ } + + if (input == NULL) { + fprintf (stderr, "Error opening %s for (gzip) reading: %s\n", +@@ -189,7 +196,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) + do { + util_status_t status; + +- status = gz_getline (line_ctx, &line, &line_size, &line_len, input); ++ status = gz_getline (line_ctx, &line, &line_len, input); + + /* empty input file not considered an error */ + if (status == UTIL_EOF) +@@ -262,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) + if (ret) + break; + +- } while (gz_getline (line_ctx, &line, &line_size, &line_len, input) == UTIL_SUCCESS); ++ } while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS); + + if (line_ctx != NULL) + talloc_free (line_ctx); +@@ -272,8 +279,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) + + notmuch_database_destroy (notmuch); + +- if (input_file_name != NULL) +- gzclose_r (input); ++ gzclose_r (input); + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; + } +diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh +index 50d4d48..efe463e 100755 +--- a/test/T240-dump-restore.sh ++++ b/test/T240-dump-restore.sh +@@ -124,6 +124,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \ + sort > OUTPUT.$test_count + test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + ++test_begin_subtest "format=batch-tag, missing newline" ++printf "+a_tag_without_newline -- id:20091117232137.GA7669@griffis1.net" > IN ++notmuch restore --accumulate < IN ++notmuch dump id:20091117232137.GA7669@griffis1.net > OUT ++cat < EXPECTED +++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669@griffis1.net ++EOF ++test_expect_equal_file EXPECTED OUT ++ + test_begin_subtest "format=batch-tag, # round-trip" + notmuch dump --format=sup | sort > EXPECTED.$test_count + notmuch dump --format=batch-tag | notmuch restore --format=batch-tag +diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh +index 7a68ddf..7d5d5aa 100755 +--- a/test/T530-upgrade.sh ++++ b/test/T530-upgrade.sh +@@ -37,7 +37,7 @@ Your notmuch database has now been upgraded to database format version 2. + No new mail." + + test_begin_subtest "tag backup matches pre-upgrade dump" +-gunzip -c ${MAIL_DIR}/.notmuch/backup-*.gz | sort > backup-dump ++gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump + test_expect_equal_file pre-upgrade-dump backup-dump + + test_begin_subtest "folder: no longer matches in the middle of path" +diff --git a/util/zlib-extra.c b/util/zlib-extra.c +index cb1eba0..922ab52 100644 +--- a/util/zlib-extra.c ++++ b/util/zlib-extra.c +@@ -25,34 +25,36 @@ + + /* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */ + util_status_t +-gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read, +- gzFile stream) ++gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) + { +- size_t len = *bufsiz; + char *buf = *bufptr; ++ unsigned int len; + size_t offset = 0; + +- if (len == 0 || buf == NULL) { ++ if (buf) { ++ len = talloc_array_length (buf); ++ } else { + /* same as getdelim from gnulib */ + len = 120; +- buf = talloc_size (talloc_ctx, len); ++ buf = talloc_array (talloc_ctx, char, len); + if (buf == NULL) + return UTIL_OUT_OF_MEMORY; + } + + while (1) { + if (! gzgets (stream, buf + offset, len - offset)) { ++ /* Null indicates EOF or error */ + int zlib_status = 0; + (void) gzerror (stream, &zlib_status); + switch (zlib_status) { + case Z_OK: +- /* follow getline behaviour */ +- *bytes_read = -1; +- return UTIL_EOF; +- break; ++ /* no data read before EOF */ ++ if (offset == 0) ++ return UTIL_EOF; ++ else ++ goto SUCCESS; + case Z_ERRNO: + return UTIL_FILE; +- break; + default: + return UTIL_ERROR; + } +@@ -60,17 +62,16 @@ gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read + + offset += strlen (buf + offset); + +- if ( buf[offset - 1] == '\n' ) +- break; ++ if (buf[offset - 1] == '\n') ++ goto SUCCESS; + + len *= 2; + buf = talloc_realloc (talloc_ctx, buf, char, len); + if (buf == NULL) + return UTIL_OUT_OF_MEMORY; + } +- ++ SUCCESS: + *bufptr = buf; +- *bufsiz = len; + *bytes_read = offset; + return UTIL_SUCCESS; + } +diff --git a/util/zlib-extra.h b/util/zlib-extra.h +index ed46ac1..ed67115 100644 +--- a/util/zlib-extra.h ++++ b/util/zlib-extra.h +@@ -1,11 +1,11 @@ + #ifndef _ZLIB_EXTRA_H + #define _ZLIB_EXTRA_H + +-#include + #include "util.h" ++#include ++ + /* Like getline, but read from a gzFile. Allocation is with talloc */ + util_status_t +-gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read, +- gzFile stream); ++gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream); + + #endif