From: David Bremner Date: Sat, 5 Apr 2014 15:43:51 +0000 (+2100) Subject: [Patch v7 1/6] dump: support gzipped and atomic output X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=b8b58a642658a2419f8b531c73def2d1578ae717;p=notmuch-archives.git [Patch v7 1/6] dump: support gzipped and atomic output --- diff --git a/1d/2a919499d1dd137831ace6236a7bc6905f1049 b/1d/2a919499d1dd137831ace6236a7bc6905f1049 new file mode 100644 index 000000000..ba9da6a7f --- /dev/null +++ b/1d/2a919499d1dd137831ace6236a7bc6905f1049 @@ -0,0 +1,444 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 2A08B431FCF + for ; Sat, 5 Apr 2014 08:46:41 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: 0 +X-Spam-Level: +X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none] + autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id aNyATK6fzQ4O for ; + Sat, 5 Apr 2014 08:46:32 -0700 (PDT) +Received: from mx.xen14.node3324.gplhost.com (gitolite.debian.net + [87.98.215.224]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) + (No client certificate requested) + by olra.theworths.org (Postfix) with ESMTPS id 50436431FB6 + for ; Sat, 5 Apr 2014 08:46:27 -0700 (PDT) +Received: from remotemail by mx.xen14.node3324.gplhost.com with local (Exim + 4.72) (envelope-from ) + id 1WWSmv-000833-VR; Sat, 05 Apr 2014 15:45:45 +0000 +Received: (nullmailer pid 10912 invoked by uid 1000); Sat, 05 Apr 2014 + 15:43:58 -0000 +From: David Bremner +To: notmuch@notmuchmail.org +Subject: [Patch v7 1/6] dump: support gzipped and atomic output +Date: Sat, 5 Apr 2014 12:43:51 -0300 +Message-Id: <1396712636-10640-2-git-send-email-david@tethera.net> +X-Mailer: git-send-email 1.9.0 +In-Reply-To: <1396712636-10640-1-git-send-email-david@tethera.net> +References: <1396712636-10640-1-git-send-email-david@tethera.net> +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Sat, 05 Apr 2014 15:46:41 -0000 + +The main goal is to support gzipped output for future internal +calls (e.g. from notmuch-new) to notmuch_database_dump. + +The additional dependency is not very heavy since xapian already pulls +in zlib. + +We want the dump to be "atomic", in the sense that after running the +dump file is either present and complete, or not present. This avoids +certain classes of mishaps involving overwriting a good backup with a +bad or partial one. +--- + INSTALL | 20 +++++++-- + Makefile.local | 2 +- + configure | 28 +++++++++++-- + doc/man1/notmuch-dump.rst | 3 ++ + notmuch-client.h | 4 +- + notmuch-dump.c | 101 +++++++++++++++++++++++++++++++++++++--------- + test/T240-dump-restore.sh | 12 ++++++ + 7 files changed, 142 insertions(+), 28 deletions(-) + +diff --git a/INSTALL b/INSTALL +index 690b0ef..b543c50 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -20,8 +20,8 @@ configure stage. + + Dependencies + ------------ +-Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and +-Talloc which are each described below: ++Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6, ++Talloc, and zlib which are each described below: + + Xapian + ------ +@@ -60,6 +60,18 @@ Talloc which are each described below: + + Talloc is available from http://talloc.samba.org/ + ++ zlib ++ ---- ++ ++ zlib is an extremely popular compression library. It is used ++ by Xapian, so if you installed that you will already have ++ zlib. You may need to install the zlib headers separately. ++ ++ Notmuch needs the transparent write feature of zlib introduced ++ in version 1.2.5.2 (Dec. 2011). ++ ++ zlib is available from http://zlib.net ++ + Building Documentation + ---------------------- + +@@ -79,11 +91,11 @@ dependencies with a simple simple command line. For example: + + For Debian and similar: + +- sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx ++ sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx + + For Fedora and similar: + +- sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx ++ sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx + + On other systems, a similar command can be used, but the details of + the package names may be different. +diff --git a/Makefile.local b/Makefile.local +index cb7b106..e5a20a7 100644 +--- a/Makefile.local ++++ b/Makefile.local +@@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py + # Smash together user's values with our extra values + FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS) + FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS) +-FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) ++FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS) + FINAL_NOTMUCH_LINKER = CC + ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1) + FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS) +diff --git a/configure b/configure +index 1d430b9..83b4af7 100755 +--- a/configure ++++ b/configure +@@ -340,6 +340,18 @@ else + errors=$((errors + 1)) + fi + ++printf "Checking for zlib (>= 1.2.5.2)... " ++have_zlib=0 ++if pkg-config --atleast-version=1.2.5.2 zlib; then ++ printf "Yes.\n" ++ have_zlib=1 ++ zlib_cflags=$(pkg-config --cflags zlib) ++ zlib_ldflags=$(pkg-config --libs zlib) ++else ++ printf "No.\n" ++ errors=$((errors + 1)) ++fi ++ + printf "Checking for talloc development files... " + if pkg-config --exists talloc; then + printf "Yes.\n" +@@ -496,6 +508,11 @@ EOF + echo " Xapian library (including development files such as headers)" + echo " http://xapian.org/" + fi ++ if [ $have_zlib -eq 0 ]; then ++ echo " zlib library (>= version 1.2.5.2, including development files such as headers)" ++ echo " http://zlib.net/" ++ echo ++ fi + if [ $have_gmime -eq 0 ]; then + echo " Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR + echo " (including development files such as headers)" +@@ -519,11 +536,11 @@ case a simple command will install everything you need. For example: + + On Debian and similar systems: + +- sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev ++ sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev + + Or on Fedora and similar systems: + +- sudo yum install xapian-core-devel gmime-devel libtalloc-devel ++ sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel + + On other systems, similar commands can be used, but the details of the + package names may be different. +@@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags} + GMIME_CFLAGS = ${gmime_cflags} + GMIME_LDFLAGS = ${gmime_ldflags} + ++# Flags needed to compile and link against zlib ++ZLIB_CFLAGS = ${zlib_cflags} ++ZLIB_LDFLAGS = ${zlib_ldflags} ++ + # Flags needed to compile and link against talloc + TALLOC_CFLAGS = ${talloc_cflags} + TALLOC_LDFLAGS = ${talloc_ldflags} +@@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ + -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER) + + CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ ++ \$(ZLIB_CFLAGS) \\ + \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\ + \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS) \\ + -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR) \\ +@@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ + -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT) \\ + -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER) + +-CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS) ++CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS) + EOF +diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst +index 17d1da5..d94cb4f 100644 +--- a/doc/man1/notmuch-dump.rst ++++ b/doc/man1/notmuch-dump.rst +@@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is + therefore the only critical thing to backup (and much more friendly to + incremental backup than the native database files.) + ++``--gzip`` ++ Compress the output in a format compatible with **gzip(1)**. ++ + ``--format=(sup|batch-tag)`` + Notmuch restore supports two plain text dump formats, both with one + message-id per line, followed by a list of tags. +diff --git a/notmuch-client.h b/notmuch-client.h +index d110648..e1efbe0 100644 +--- a/notmuch-client.h ++++ b/notmuch-client.h +@@ -450,7 +450,9 @@ typedef enum dump_formats { + int + notmuch_database_dump (notmuch_database_t *notmuch, + const char *output_file_name, +- const char *query_str, dump_format_t output_format); ++ const char *query_str, ++ dump_format_t output_format, ++ notmuch_bool_t gzip_output); + + #include "command-line-arguments.h" + #endif +diff --git a/notmuch-dump.c b/notmuch-dump.c +index 21702d7..2849eab 100644 +--- a/notmuch-dump.c ++++ b/notmuch-dump.c +@@ -21,9 +21,11 @@ + #include "notmuch-client.h" + #include "hex-escape.h" + #include "string-util.h" ++#include ++ + + static int +-database_dump_file (notmuch_database_t *notmuch, FILE *output, ++database_dump_file (notmuch_database_t *notmuch, gzFile output, + const char *query_str, int output_format) + { + notmuch_query_t *query; +@@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, + } + + if (output_format == DUMP_FORMAT_SUP) { +- fprintf (output, "%s (", message_id); ++ gzprintf (output, "%s (", message_id); + } + + for (tags = notmuch_message_get_tags (message); +@@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, + const char *tag_str = notmuch_tags_get (tags); + + if (! first) +- fputs (" ", output); ++ gzputs (output, " "); + + first = 0; + + if (output_format == DUMP_FORMAT_SUP) { +- fputs (tag_str, output); ++ gzputs (output, tag_str); + } else { + if (hex_encode (notmuch, tag_str, + &buffer, &buffer_size) != HEX_SUCCESS) { +@@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, + tag_str); + return EXIT_FAILURE; + } +- fprintf (output, "+%s", buffer); ++ gzprintf (output, "+%s", buffer); + } + } + + if (output_format == DUMP_FORMAT_SUP) { +- fputs (")\n", output); ++ gzputs (output, ")\n"); + } else { + if (make_boolean_term (notmuch, "id", message_id, + &buffer, &buffer_size)) { +@@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, + message_id, strerror (errno)); + return EXIT_FAILURE; + } +- fprintf (output, " -- %s\n", buffer); ++ gzprintf (output, " -- %s\n", buffer); + } + + notmuch_message_destroy (message); +@@ -121,24 +123,83 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, + int + notmuch_database_dump (notmuch_database_t *notmuch, + const char *output_file_name, +- const char *query_str, dump_format_t output_format) ++ const char *query_str, ++ dump_format_t output_format, ++ notmuch_bool_t gzip_output) + { +- FILE *output = stdout; +- int ret; ++ gzFile output = NULL; ++ const char *mode = gzip_output ? "w9" : "wT"; ++ const char *name_for_error = output_file_name ? output_file_name : "stdout"; ++ ++ char *tempname = NULL; ++ int outfd = -1; ++ ++ int ret = -1; + + if (output_file_name) { +- output = fopen (output_file_name, "w"); +- if (output == NULL) { +- fprintf (stderr, "Error opening %s for writing: %s\n", +- output_file_name, strerror (errno)); +- return EXIT_FAILURE; +- } ++ tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name); ++ outfd = mkstemp (tempname); ++ } else { ++ outfd = dup (STDOUT_FILENO); ++ } ++ ++ if (outfd < 0) { ++ fprintf (stderr, "Bad output file %s\n", name_for_error); ++ goto DONE; ++ } ++ ++ output = gzdopen (outfd, mode); ++ ++ if (output == NULL) { ++ fprintf (stderr, "Error opening %s for (gzip) writing: %s\n", ++ name_for_error, strerror (errno)); ++ if (close (outfd)) ++ fprintf (stderr, "Error closing %s during shutdown: %s\n", ++ name_for_error, strerror (errno)); ++ goto DONE; + } + + ret = database_dump_file (notmuch, output, query_str, output_format); ++ if (ret) goto DONE; ++ ++ ret = gzflush (output, Z_FINISH); ++ if (ret) { ++ fprintf (stderr, "Error flushing output: %s\n", gzerror (output, NULL)); ++ goto DONE; ++ } ++ ++ if (output_file_name) { ++ ret = fdatasync (outfd); ++ if (ret) { ++ fprintf (stderr, "Error syncing %s to disk: %s\n", ++ name_for_error, strerror (errno)); ++ goto DONE; ++ } ++ } ++ ++ if (gzclose_w (output) != Z_OK) { ++ fprintf (stderr, "Error closing %s: %s\n", name_for_error, ++ gzerror (output, NULL)); ++ ret = EXIT_FAILURE; ++ output = NULL; ++ goto DONE; ++ } ++ ++ if (output_file_name) { ++ ret = rename (tempname, output_file_name); ++ if (ret) { ++ fprintf (stderr, "Error renaming %s to %s: %s\n", ++ tempname, output_file_name, strerror (errno)); ++ goto DONE; ++ } ++ ++ } ++ DONE: ++ if (ret != EXIT_SUCCESS && output) ++ (void) gzclose_w (output); + +- if (output != stdout) +- fclose (output); ++ if (ret != EXIT_SUCCESS && output_file_name) ++ (void) unlink (tempname); + + return ret; + } +@@ -158,6 +219,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) + int opt_index; + + int output_format = DUMP_FORMAT_BATCH_TAG; ++ notmuch_bool_t gzip_output = 0; + + notmuch_opt_desc_t options[] = { + { NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f', +@@ -165,6 +227,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) + { "batch-tag", DUMP_FORMAT_BATCH_TAG }, + { 0, 0 } } }, + { NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0 }, ++ { NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 }, + { 0, 0, 0, 0, 0 } + }; + +@@ -181,7 +244,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) + } + + ret = notmuch_database_dump (notmuch, output_file_name, query_str, +- output_format); ++ output_format, gzip_output); + + notmuch_database_destroy (notmuch); + +diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh +index 0004438..d79aca8 100755 +--- a/test/T240-dump-restore.sh ++++ b/test/T240-dump-restore.sh +@@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --" + notmuch dump --output=dump-1-arg-dash.actual -- + test_expect_equal_file dump.expected dump-1-arg-dash.actual + ++# gzipped output ++ ++test_begin_subtest "dump --gzip" ++notmuch dump --gzip > dump-gzip.gz ++gunzip dump-gzip.gz ++test_expect_equal_file dump.expected dump-gzip ++ ++test_begin_subtest "dump --gzip --output=outfile" ++notmuch dump --gzip --output=dump-gzip-outfile.gz ++gunzip dump-gzip-outfile.gz ++test_expect_equal_file dump.expected dump-gzip-outfile ++ + # Note, we assume all messages from cworth have a message-id + # containing cworth.org + +-- +1.9.0 +