From 4bc570ec27acc67c1c935ba3f44e31ec9a8623fa Mon Sep 17 00:00:00 2001 From: Tomi Ollila Date: Wed, 2 Apr 2014 19:50:14 +0300 Subject: [PATCH] Re: [Patch v5 1/6] dump: support gzipped output --- 64/f65aca0444ef085c30d5b9365e2dea8bcab9e9 | 411 ++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 64/f65aca0444ef085c30d5b9365e2dea8bcab9e9 diff --git a/64/f65aca0444ef085c30d5b9365e2dea8bcab9e9 b/64/f65aca0444ef085c30d5b9365e2dea8bcab9e9 new file mode 100644 index 000000000..706de0a9c --- /dev/null +++ b/64/f65aca0444ef085c30d5b9365e2dea8bcab9e9 @@ -0,0 +1,411 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by olra.theworths.org (Postfix) with ESMTP id 57A5D431FBD + for ; Wed, 2 Apr 2014 09:50:23 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at olra.theworths.org +X-Spam-Flag: NO +X-Spam-Score: 0 +X-Spam-Level: +X-Spam-Status: No, score=0 tagged_above=-999 required=5 tests=[none] + autolearn=disabled +Received: from olra.theworths.org ([127.0.0.1]) + by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id e4bw9X63sQp6 for ; + Wed, 2 Apr 2014 09:50:19 -0700 (PDT) +Received: from guru.guru-group.fi (guru.guru-group.fi [46.183.73.34]) + by olra.theworths.org (Postfix) with ESMTP id D66FE431FAE + for ; Wed, 2 Apr 2014 09:50:18 -0700 (PDT) +Received: from guru.guru-group.fi (localhost [IPv6:::1]) + by guru.guru-group.fi (Postfix) with ESMTP id 128DE100051; + Wed, 2 Apr 2014 19:50:14 +0300 (EEST) +From: Tomi Ollila +To: David Bremner , notmuch@notmuchmail.org +Subject: Re: [Patch v5 1/6] dump: support gzipped output +In-Reply-To: <1396401381-18128-2-git-send-email-david@tethera.net> +References: <1396401381-18128-1-git-send-email-david@tethera.net> + <1396401381-18128-2-git-send-email-david@tethera.net> +User-Agent: Notmuch/0.17+174~gef82849 (http://notmuchmail.org) Emacs/24.3.1 + (x86_64-unknown-linux-gnu) +X-Face: HhBM'cA~ +MIME-Version: 1.0 +Content-Type: text/plain +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.13 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Wed, 02 Apr 2014 16:50:23 -0000 + +On Wed, Apr 02 2014, David Bremner wrote: + +> The main goal is to support gzipped output for future internal +> calls (e.g. from notmuch-new) to notmuch_database_dump. +> +> The additional dependency is not very heavy since xapian already pulls +> in zlib. +> --- +> INSTALL | 17 +++++++++++---- +> Makefile.local | 2 +- +> configure | 28 +++++++++++++++++++++--- +> doc/man1/notmuch-dump.rst | 3 +++ +> notmuch-client.h | 4 +++- +> notmuch-dump.c | 54 ++++++++++++++++++++++++++++++----------------- +> test/T240-dump-restore.sh | 12 +++++++++++ +> 7 files changed, 92 insertions(+), 28 deletions(-) +> +> diff --git a/INSTALL b/INSTALL +> index 690b0ef..df318fa 100644 +> --- a/INSTALL +> +++ b/INSTALL +> @@ -20,8 +20,8 @@ configure stage. +> +> Dependencies +> ------------ +> -Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and +> -Talloc which are each described below: +> +Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6, +> +Talloc, and zlib which are each described below: +> +> Xapian +> ------ +> @@ -60,6 +60,15 @@ Talloc which are each described below: +> +> Talloc is available from http://talloc.samba.org/ +> +> + zlib +> + ---- +> + +> + zlib is an extremely popular compression library. It is used +> + by Xapian, so if you installed that you will already have +> + zlib. You may need to install the zlib headers separately. +> + +> + zlib is available from http://zlib.net +> + +> Building Documentation +> ---------------------- +> +> @@ -79,11 +88,11 @@ dependencies with a simple simple command line. For example: +> +> For Debian and similar: +> +> - sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx +> + sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx +> +> For Fedora and similar: +> +> - sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx +> + sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx +> +> On other systems, a similar command can be used, but the details of +> the package names may be different. +> diff --git a/Makefile.local b/Makefile.local +> index cb7b106..e5a20a7 100644 +> --- a/Makefile.local +> +++ b/Makefile.local +> @@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py +> # Smash together user's values with our extra values +> FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS) +> FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS) +> -FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) +> +FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS) +> FINAL_NOTMUCH_LINKER = CC +> ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1) +> FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS) +> diff --git a/configure b/configure +> index 1d430b9..d685ab3 100755 +> --- a/configure +> +++ b/configure +> @@ -340,6 +340,18 @@ else +> errors=$((errors + 1)) +> fi +> +> +printf "Checking for zlib development files... " +> +have_zlib=0 +> +if pkg-config --exists zlib; then +> + printf "Yes.\n" +> + have_zlib=1 +> + zlib_cflags=$(pkg-config --cflags zlib) +> + zlib_ldflags=$(pkg-config --libs zlib) +> +else +> + printf "No.\n" +> + errors=$((errors + 1)) +> +fi + +Like Austin mentioned on IRC, zlib 1.2.5.2 (Dec 2011) is the first version +supporting 'wT', the test above should use: + +pkg-config --atleast-version=1.2.5.2 zlib + +And the related documentation should mention this version dependency... + +(maybe we can go with this and somebody(tm) who wants support to older +zlibs may provide tolerable patches for that to be done). + +For example Scientific Linux 6.2 (RHEL 6.2-compatible) has zlib 1.2.3 (released +2005). Maybe there are backports and maybe 6.3 has newer... for me it is +just easier to compile newer zlib :D) + + +Tomi + +> + +> printf "Checking for talloc development files... " +> if pkg-config --exists talloc; then +> printf "Yes.\n" +> @@ -496,6 +508,11 @@ EOF +> echo " Xapian library (including development files such as headers)" +> echo " http://xapian.org/" +> fi +> + if [ $have_zlib -eq 0 ]; then +> + echo " zlib library (including development files such as headers)" +> + echo " http://zlib.net/" +> + echo +> + fi +> if [ $have_gmime -eq 0 ]; then +> echo " Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR +> echo " (including development files such as headers)" +> @@ -519,11 +536,11 @@ case a simple command will install everything you need. For example: +> +> On Debian and similar systems: +> +> - sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev +> + sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev +> +> Or on Fedora and similar systems: +> +> - sudo yum install xapian-core-devel gmime-devel libtalloc-devel +> + sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel +> +> On other systems, similar commands can be used, but the details of the +> package names may be different. +> @@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags} +> GMIME_CFLAGS = ${gmime_cflags} +> GMIME_LDFLAGS = ${gmime_ldflags} +> +> +# Flags needed to compile and link against zlib +> +ZLIB_CFLAGS = ${zlib_cflags} +> +ZLIB_LDFLAGS = ${zlib_ldflags} +> + +> # Flags needed to compile and link against talloc +> TALLOC_CFLAGS = ${talloc_cflags} +> TALLOC_LDFLAGS = ${talloc_ldflags} +> @@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ +> -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER) +> +> CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ +> + \$(ZLIB_CFLAGS) \\ +> \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\ +> \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS) \\ +> -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR) \\ +> @@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\ +> -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT) \\ +> -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER) +> +> -CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS) +> +CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS) +> EOF +> diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst +> index 17d1da5..d94cb4f 100644 +> --- a/doc/man1/notmuch-dump.rst +> +++ b/doc/man1/notmuch-dump.rst +> @@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is +> therefore the only critical thing to backup (and much more friendly to +> incremental backup than the native database files.) +> +> +``--gzip`` +> + Compress the output in a format compatible with **gzip(1)**. +> + +> ``--format=(sup|batch-tag)`` +> Notmuch restore supports two plain text dump formats, both with one +> message-id per line, followed by a list of tags. +> diff --git a/notmuch-client.h b/notmuch-client.h +> index d110648..e1efbe0 100644 +> --- a/notmuch-client.h +> +++ b/notmuch-client.h +> @@ -450,7 +450,9 @@ typedef enum dump_formats { +> int +> notmuch_database_dump (notmuch_database_t *notmuch, +> const char *output_file_name, +> - const char *query_str, dump_format_t output_format); +> + const char *query_str, +> + dump_format_t output_format, +> + notmuch_bool_t gzip_output); +> +> #include "command-line-arguments.h" +> #endif +> diff --git a/notmuch-dump.c b/notmuch-dump.c +> index 21702d7..28342b7 100644 +> --- a/notmuch-dump.c +> +++ b/notmuch-dump.c +> @@ -21,9 +21,11 @@ +> #include "notmuch-client.h" +> #include "hex-escape.h" +> #include "string-util.h" +> +#include +> + +> +> static int +> -database_dump_file (notmuch_database_t *notmuch, FILE *output, +> +database_dump_file (notmuch_database_t *notmuch, gzFile output, +> const char *query_str, int output_format) +> { +> notmuch_query_t *query; +> @@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, +> } +> +> if (output_format == DUMP_FORMAT_SUP) { +> - fprintf (output, "%s (", message_id); +> + gzprintf (output, "%s (", message_id); +> } +> +> for (tags = notmuch_message_get_tags (message); +> @@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, +> const char *tag_str = notmuch_tags_get (tags); +> +> if (! first) +> - fputs (" ", output); +> + gzputs (output, " "); +> +> first = 0; +> +> if (output_format == DUMP_FORMAT_SUP) { +> - fputs (tag_str, output); +> + gzputs (output, tag_str); +> } else { +> if (hex_encode (notmuch, tag_str, +> &buffer, &buffer_size) != HEX_SUCCESS) { +> @@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, +> tag_str); +> return EXIT_FAILURE; +> } +> - fprintf (output, "+%s", buffer); +> + gzprintf (output, "+%s", buffer); +> } +> } +> +> if (output_format == DUMP_FORMAT_SUP) { +> - fputs (")\n", output); +> + gzputs (output, ")\n"); +> } else { +> if (make_boolean_term (notmuch, "id", message_id, +> &buffer, &buffer_size)) { +> @@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, +> message_id, strerror (errno)); +> return EXIT_FAILURE; +> } +> - fprintf (output, " -- %s\n", buffer); +> + gzprintf (output, " -- %s\n", buffer); +> } +> +> notmuch_message_destroy (message); +> @@ -121,24 +123,36 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output, +> int +> notmuch_database_dump (notmuch_database_t *notmuch, +> const char *output_file_name, +> - const char *query_str, dump_format_t output_format) +> + const char *query_str, +> + dump_format_t output_format, +> + notmuch_bool_t gzip_output) +> { +> - FILE *output = stdout; +> + gzFile output; +> + const char *mode = gzip_output ? "w9" : "wT"; +> + +> int ret; +> +> - if (output_file_name) { +> - output = fopen (output_file_name, "w"); +> - if (output == NULL) { +> - fprintf (stderr, "Error opening %s for writing: %s\n", +> - output_file_name, strerror (errno)); +> - return EXIT_FAILURE; +> - } +> + if (output_file_name) +> + output = gzopen (output_file_name, mode); +> + else +> + output = gzdopen (fileno (stdout), mode); +> + +> + if (output == NULL) { +> + fprintf (stderr, "Error opening %s for (gzip) writing: %s\n", +> + output_file_name ? output_file_name : "stdout", strerror (errno)); +> + return EXIT_FAILURE; +> } +> +> ret = database_dump_file (notmuch, output, query_str, output_format); +> +> - if (output != stdout) +> - fclose (output); +> + if (gzflush (output, Z_FINISH)) { +> + fprintf (stderr, "Error flushing output: %s\n", +> + gzerror (output, NULL)); +> + return EXIT_FAILURE; +> + } +> + +> + if (output_file_name) +> + gzclose_w (output); +> +> return ret; +> } +> @@ -158,6 +172,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) +> int opt_index; +> +> int output_format = DUMP_FORMAT_BATCH_TAG; +> + notmuch_bool_t gzip_output = 0; +> +> notmuch_opt_desc_t options[] = { +> { NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f', +> @@ -165,6 +180,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) +> { "batch-tag", DUMP_FORMAT_BATCH_TAG }, +> { 0, 0 } } }, +> { NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0 }, +> + { NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 }, +> { 0, 0, 0, 0, 0 } +> }; +> +> @@ -181,7 +197,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) +> } +> +> ret = notmuch_database_dump (notmuch, output_file_name, query_str, +> - output_format); +> + output_format, gzip_output); +> +> notmuch_database_destroy (notmuch); +> +> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh +> index 0004438..d79aca8 100755 +> --- a/test/T240-dump-restore.sh +> +++ b/test/T240-dump-restore.sh +> @@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --" +> notmuch dump --output=dump-1-arg-dash.actual -- +> test_expect_equal_file dump.expected dump-1-arg-dash.actual +> +> +# gzipped output +> + +> +test_begin_subtest "dump --gzip" +> +notmuch dump --gzip > dump-gzip.gz +> +gunzip dump-gzip.gz +> +test_expect_equal_file dump.expected dump-gzip +> + +> +test_begin_subtest "dump --gzip --output=outfile" +> +notmuch dump --gzip --output=dump-gzip-outfile.gz +> +gunzip dump-gzip-outfile.gz +> +test_expect_equal_file dump.expected dump-gzip-outfile +> + +> # Note, we assume all messages from cworth have a message-id +> # containing cworth.org +> +> -- +> 1.9.0 -- 2.26.2