Return-Path: X-Original-To: notmuch@notmuchmail.org Delivered-To: notmuch@notmuchmail.org Received: from localhost (localhost [127.0.0.1]) by olra.theworths.org (Postfix) with ESMTP id 8EF5F431FBD for ; Thu, 3 Apr 2014 20:13:18 -0700 (PDT) X-Virus-Scanned: Debian amavisd-new at olra.theworths.org X-Spam-Flag: NO X-Spam-Score: 0 X-Spam-Level: X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none] autolearn=disabled Received: from olra.theworths.org ([127.0.0.1]) by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id YJRyfhJPe09T for ; Thu, 3 Apr 2014 20:13:11 -0700 (PDT) Received: from mx.xen14.node3324.gplhost.com (gitolite.debian.net [87.98.215.224]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by olra.theworths.org (Postfix) with ESMTPS id 51C54431FAF for ; Thu, 3 Apr 2014 20:13:11 -0700 (PDT) Received: from remotemail by mx.xen14.node3324.gplhost.com with local (Exim 4.72) (envelope-from ) id 1WVtwF-0001DY-LD; Fri, 04 Apr 2014 02:33:03 +0000 Received: (nullmailer pid 24864 invoked by uid 1000); Fri, 04 Apr 2014 02:32:44 -0000 From: David Bremner To: notmuch@notmuchmail.org Subject: Re: [Patch v6 1/6] dump: support gzipped and atomic output In-Reply-To: <1396554083-3892-2-git-send-email-david@tethera.net> References: <1396554083-3892-1-git-send-email-david@tethera.net> <1396554083-3892-2-git-send-email-david@tethera.net> User-Agent: Notmuch/0.17+56~gb9c3d8e (http://notmuchmail.org) Emacs/24.3.1 (x86_64-pc-linux-gnu) Date: Thu, 03 Apr 2014 23:32:44 -0300 Message-ID: <877g75rfhv.fsf@maritornes.cs.unb.ca> MIME-Version: 1.0 Content-Type: text/plain X-BeenThere: notmuch@notmuchmail.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: "Use and development of the notmuch mail system." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 04 Apr 2014 03:13:18 -0000 David Bremner writes: > The main goal is to support gzipped output for future internal > calls (e.g. from notmuch-new) to notmuch_database_dump. Apologies if this later turns out to be duplicate. I'm not sure what happened to the cover letter for this series. This supercedes id:1396401381-18128-1-git-send-email-david@tethera.net I tried to take care of Austin and Tomi's comments, but there were several messages so hopefully I didn't miss anything. I ended up merging the "gzipped output" and "atomic output" patches; I fought it for a bit, but I realized I would have to do some of the proposed changes twice and throw away the first one. diff from v5 follows: diff --git a/INSTALL b/INSTALL index df318fa..b543c50 100644 --- a/INSTALL +++ b/INSTALL @@ -67,6 +67,9 @@ Talloc, and zlib which are each described below: by Xapian, so if you installed that you will already have zlib. You may need to install the zlib headers separately. + Notmuch needs the transparent write feature of zlib introduced + in version 1.2.5.2 (Dec. 2011). + zlib is available from http://zlib.net Building Documentation diff --git a/configure b/configure index d685ab3..1d624f7 100755 --- a/configure +++ b/configure @@ -340,9 +340,9 @@ else errors=$((errors + 1)) fi -printf "Checking for zlib development files... " +printf "Checking for zlib (>= 1.2.5.2)... " have_zlib=0 -if pkg-config --exists zlib; then +if pkg-config --atleast-version=1.2.5.2 zlib; then printf "Yes.\n" have_zlib=1 zlib_cflags=$(pkg-config --cflags zlib) diff --git a/notmuch-dump.c b/notmuch-dump.c index 05ed6b4..2a7252a 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -140,7 +140,7 @@ notmuch_database_dump (notmuch_database_t *notmuch, tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name); outfd = mkstemp (tempname); } else { - outfd = fileno (stdout); + outfd = dup (STDOUT_FILENO); } if (outfd < 0) { @@ -153,6 +153,9 @@ notmuch_database_dump (notmuch_database_t *notmuch, if (output == NULL) { fprintf (stderr, "Error opening %s for (gzip) writing: %s\n", name_for_error, strerror (errno)); + if (close (outfd)) + fprintf (stderr, "Error closing %s during shutdown: %s\n", + name_for_error, strerror (errno)); goto DONE; } @@ -165,22 +168,25 @@ notmuch_database_dump (notmuch_database_t *notmuch, goto DONE; } - ret = fdatasync (outfd); - if (ret) { - perror ("fdatasync"); - goto DONE; - } - if (output_file_name) { - ret = gzclose_w (output); - if (ret != Z_OK) { - ret = EXIT_FAILURE; + ret = fdatasync (outfd); + if (ret) { + fprintf (stderr, "Error syncing %s to disk: %s\n", + name_for_error, strerror (errno)); goto DONE; } + } + if (gzclose_w (output) != Z_OK) { + ret = EXIT_FAILURE; + goto DONE; + } + + if (output_file_name) { ret = rename (tempname, output_file_name); if (ret) { - perror ("rename"); + fprintf (stderr, "Error renaming %s to %s: %s\n", + tempname, output_file_name, strerror (errno)); goto DONE; } diff --git a/notmuch-new.c b/notmuch-new.c index e0431c6..d269c7c 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -997,7 +997,7 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]) * relatively small. */ const char *backup_name = - talloc_asprintf (notmuch, "%s/backup-%04d-%02d-%02d-%02d:%02d:%02d.gz", + talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz", dot_notmuch_path, gm_time->tm_year + 1900, gm_time->tm_mon + 1, @@ -1009,12 +1009,12 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[]) if (add_files_state.verbosity >= VERBOSITY_NORMAL) { printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n"); printf ("This process is safe to interrupt.\n"); - printf ("Backing up tags to %s\n", backup_name); + printf ("Backing up tags to %s...\n", backup_name); } if (notmuch_database_dump (notmuch, backup_name, "", DUMP_FORMAT_BATCH_TAG, TRUE)) { - fprintf (stderr, "Backup failed. Aborting upgrade"); + fprintf (stderr, "Backup failed. Aborting upgrade."); return EXIT_FAILURE; } diff --git a/notmuch-restore.c b/notmuch-restore.c index 86bce20..eb5b7b2 100644 --- a/notmuch-restore.c +++ b/notmuch-restore.c @@ -132,7 +132,6 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) gzFile input; char *line = NULL; void *line_ctx = NULL; - size_t line_size; ssize_t line_len; int ret = 0; @@ -166,8 +165,16 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) if (input_file_name) input = gzopen (input_file_name, "r"); - else - input = gzdopen (fileno (stdin), "r"); + else { + int infd = dup (STDIN_FILENO); + if (infd < 0) { + fprintf (stderr, "Error duping stdin\n"); + return EXIT_FAILURE; + } + input = gzdopen (infd, "r"); + if (! input) + close (infd); + } if (input == NULL) { fprintf (stderr, "Error opening %s for (gzip) reading: %s\n", @@ -189,7 +196,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) do { util_status_t status; - status = gz_getline (line_ctx, &line, &line_size, &line_len, input); + status = gz_getline (line_ctx, &line, &line_len, input); /* empty input file not considered an error */ if (status == UTIL_EOF) @@ -262,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) if (ret) break; - } while (gz_getline (line_ctx, &line, &line_size, &line_len, input) == UTIL_SUCCESS); + } while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS); if (line_ctx != NULL) talloc_free (line_ctx); @@ -272,8 +279,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[]) notmuch_database_destroy (notmuch); - if (input_file_name != NULL) - gzclose_r (input); + gzclose_r (input); return ret ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh index 50d4d48..efe463e 100755 --- a/test/T240-dump-restore.sh +++ b/test/T240-dump-restore.sh @@ -124,6 +124,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \ sort > OUTPUT.$test_count test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count +test_begin_subtest "format=batch-tag, missing newline" +printf "+a_tag_without_newline -- id:20091117232137.GA7669@griffis1.net" > IN +notmuch restore --accumulate < IN +notmuch dump id:20091117232137.GA7669@griffis1.net > OUT +cat < EXPECTED ++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669@griffis1.net +EOF +test_expect_equal_file EXPECTED OUT + test_begin_subtest "format=batch-tag, # round-trip" notmuch dump --format=sup | sort > EXPECTED.$test_count notmuch dump --format=batch-tag | notmuch restore --format=batch-tag diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh index 7a68ddf..7d5d5aa 100755 --- a/test/T530-upgrade.sh +++ b/test/T530-upgrade.sh @@ -37,7 +37,7 @@ Your notmuch database has now been upgraded to database format version 2. No new mail." test_begin_subtest "tag backup matches pre-upgrade dump" -gunzip -c ${MAIL_DIR}/.notmuch/backup-*.gz | sort > backup-dump +gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump test_expect_equal_file pre-upgrade-dump backup-dump test_begin_subtest "folder: no longer matches in the middle of path" diff --git a/util/zlib-extra.c b/util/zlib-extra.c index cb1eba0..922ab52 100644 --- a/util/zlib-extra.c +++ b/util/zlib-extra.c @@ -25,34 +25,36 @@ /* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */ util_status_t -gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read, - gzFile stream) +gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream) { - size_t len = *bufsiz; char *buf = *bufptr; + unsigned int len; size_t offset = 0; - if (len == 0 || buf == NULL) { + if (buf) { + len = talloc_array_length (buf); + } else { /* same as getdelim from gnulib */ len = 120; - buf = talloc_size (talloc_ctx, len); + buf = talloc_array (talloc_ctx, char, len); if (buf == NULL) return UTIL_OUT_OF_MEMORY; } while (1) { if (! gzgets (stream, buf + offset, len - offset)) { + /* Null indicates EOF or error */ int zlib_status = 0; (void) gzerror (stream, &zlib_status); switch (zlib_status) { case Z_OK: - /* follow getline behaviour */ - *bytes_read = -1; - return UTIL_EOF; - break; + /* no data read before EOF */ + if (offset == 0) + return UTIL_EOF; + else + goto SUCCESS; case Z_ERRNO: return UTIL_FILE; - break; default: return UTIL_ERROR; } @@ -60,17 +62,16 @@ gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read offset += strlen (buf + offset); - if ( buf[offset - 1] == '\n' ) - break; + if (buf[offset - 1] == '\n') + goto SUCCESS; len *= 2; buf = talloc_realloc (talloc_ctx, buf, char, len); if (buf == NULL) return UTIL_OUT_OF_MEMORY; } - + SUCCESS: *bufptr = buf; - *bufsiz = len; *bytes_read = offset; return UTIL_SUCCESS; } diff --git a/util/zlib-extra.h b/util/zlib-extra.h index ed46ac1..ed67115 100644 --- a/util/zlib-extra.h +++ b/util/zlib-extra.h @@ -1,11 +1,11 @@ #ifndef _ZLIB_EXTRA_H #define _ZLIB_EXTRA_H -#include #include "util.h" +#include + /* Like getline, but read from a gzFile. Allocation is with talloc */ util_status_t -gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read, - gzFile stream); +gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream); #endif