From: Jani Nikula Date: Fri, 15 Apr 2016 19:29:18 +0000 (+0300) Subject: [RFC PATCH 4/5] cli: use homebrew scandir in notmuch new add_files X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=36f543fae656c99b2719f85460369a05240082de;p=notmuch-archives.git [RFC PATCH 4/5] cli: use homebrew scandir in notmuch new add_files --- diff --git a/55/94e10a4c78b63c8bf98df4d9c8fb58b2a995ce b/55/94e10a4c78b63c8bf98df4d9c8fb58b2a995ce new file mode 100644 index 000000000..c05af716f --- /dev/null +++ b/55/94e10a4c78b63c8bf98df4d9c8fb58b2a995ce @@ -0,0 +1,498 @@ +Return-Path: +X-Original-To: notmuch@notmuchmail.org +Delivered-To: notmuch@notmuchmail.org +Received: from localhost (localhost [127.0.0.1]) + by arlo.cworth.org (Postfix) with ESMTP id 195076DE0360 + for ; Fri, 15 Apr 2016 12:31:02 -0700 (PDT) +X-Virus-Scanned: Debian amavisd-new at cworth.org +X-Amavis-Alert: BAD HEADER SECTION, Duplicate header field: "References" +X-Spam-Flag: NO +X-Spam-Score: -0.557 +X-Spam-Level: +X-Spam-Status: No, score=-0.557 tagged_above=-999 required=5 tests=[AWL=0.163, + DKIM_SIGNED=0.1, DKIM_VALID=-0.1, RCVD_IN_DNSWL_LOW=-0.7, + RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01] autolearn=disabled +Received: from arlo.cworth.org ([127.0.0.1]) + by localhost (arlo.cworth.org [127.0.0.1]) (amavisd-new, port 10024) + with ESMTP id KjVxt2cZ6-O5 for ; + Fri, 15 Apr 2016 12:30:54 -0700 (PDT) +Received: from mail-wm0-f67.google.com (mail-wm0-f67.google.com + [74.125.82.67]) by arlo.cworth.org (Postfix) with ESMTPS id 8B0FA6DE02C6 for + ; Fri, 15 Apr 2016 12:30:42 -0700 (PDT) +Received: by mail-wm0-f67.google.com with SMTP id n3so8629843wmn.1 + for ; Fri, 15 Apr 2016 12:30:42 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=nikula-org.20150623.gappssmtp.com; s=20150623; + h=from:to:cc:subject:date:message-id:in-reply-to:references + :in-reply-to:references:mime-version:content-transfer-encoding; + bh=Xp5rFuNK1ThMoMMAw6AJlxsBrC7L1Et3ctHWU3hNPDE=; + b=QFJ5fn9fxYCv4VvdbDI6s009HxS2Bfsre0L8s7aX+MhpSC7u4VCD7tU+0ScofCIiJn + senN73w3W+7UDlNlwIaUquxjqosBWx1m41tG9zFjkZacQE456xRKYpzaM4C6UE+bF6xa + t7CQtl5xZoZR1GmZkNaOsuZPmzVqPQrsKqRRmDJz7Io4vKclsa5W6o/FTE6srlWaR7r3 + 3zFrt1KFOUoIAFFLbUB6+mrfFs4Z2kX6UbQSRO1IrAorF9Pah9HjmjwAyh4N1idEM7mZ + CI9cc/iH2bJVZ6qn4TF0E+sOhmIfjHZUPDYQ9PdjDHv+cOWAfVmEDaRjImUO3Uh1j6OP + 27/Q== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20130820; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:in-reply-to:references:mime-version + :content-transfer-encoding; + bh=Xp5rFuNK1ThMoMMAw6AJlxsBrC7L1Et3ctHWU3hNPDE=; + b=ai2j4a1fyp5znB5uQhUhlkEtR7TipHCFKMIWY0T8o1a3b+a3Iek87M7v15KPZnyNP0 + 0mFu16vAbqMdlurwU9n+Ta8x736NBg0kpMToB8q+3YHepYhM7fhol5/5WSWv2idEaUPm + MOA4vKvtAtG+m1Br3RHRKvtXNAVMrbXuLmmfcqCAF0LiRi8wYN0YuzO4q1M0VtQLKfxd + 6ObMsjBKYnoiLrK1TA45l47Ao8g/n4tjGKMl6Zafl610vkbeNDMPhEChrxI8eqbgdEQu + FKHazxlQNJCmZunmDlndizEpEbqhRBSjYtl5u4ANtfMcrBwFOymuSF41gbWKliV3VBE7 + D1Dg== +X-Gm-Message-State: + AOPr4FVnZF7kdP15dOCMpRIQ2wuAk2jCXpl7CJhfX3miCDSvcPzah2IJArqzZsRtSfNOjQ== +X-Received: by 10.195.13.135 with SMTP id ey7mr23435188wjd.161.1460748641236; + Fri, 15 Apr 2016 12:30:41 -0700 (PDT) +Received: from localhost (mobile-access-bcee7f-102.dhcp.inet.fi. + [188.238.127.102]) + by smtp.gmail.com with ESMTPSA id 202sm20676452wmw.5.2016.04.15.12.30.40 + (version=TLSv1/SSLv3 cipher=OTHER); + Fri, 15 Apr 2016 12:30:40 -0700 (PDT) +From: Jani Nikula +To: notmuch@notmuchmail.org +Subject: [RFC PATCH 4/5] cli: use homebrew scandir in notmuch new add_files +Date: Fri, 15 Apr 2016 22:29:18 +0300 +Message-Id: + <6e67ca7c973419213f2b9d47711b24cf7cb35d7a.1460748142.git.jani@nikula.org> +X-Mailer: git-send-email 2.1.4 +In-Reply-To: +References: +In-Reply-To: +References: +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +X-BeenThere: notmuch@notmuchmail.org +X-Mailman-Version: 2.1.20 +Precedence: list +List-Id: "Use and development of the notmuch mail system." + +List-Unsubscribe: , + +List-Archive: +List-Post: +List-Help: +List-Subscribe: , + +X-List-Received-Date: Fri, 15 Apr 2016 19:31:02 -0000 + +Split the scanning to files and subdirectories. Filter out ignored +files etc. in the scandir filter callback. + +The end result is perhaps slightly easier to follow than before and it +should be easier to add clever ignore mechanisms like globbing in the +scandir filter callback, but it's not at all certain if this is worth +the trouble. + +Some tests will fail due to changes in where files are ignored and +what debug logging is done; I didn't bother updating the logging or +the tests much at this point. +--- + notmuch-new.c | 275 +++++++++++++++++++++++++++++++--------------------------- + 1 file changed, 148 insertions(+), 127 deletions(-) + +diff --git a/notmuch-new.c b/notmuch-new.c +index 930cbbc9b86f..262895d466ae 100644 +--- a/notmuch-new.c ++++ b/notmuch-new.c +@@ -20,6 +20,7 @@ + + #include "notmuch-client.h" + #include "tag-util.h" ++#include "scandir.h" + + #include + +@@ -151,9 +152,12 @@ generic_print_progress (const char *action, const char *object, + } + + static int +-dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b) ++dirent_sort_strcmp_name (const void *_a, const void *_b) + { +- return strcmp ((*a)->d_name, (*b)->d_name); ++ char * const *a = _a; ++ char * const *b = _b; ++ ++ return strcmp (*a, *b); + } + + /* Return the type of a directory entry relative to path as a stat(2) +@@ -206,18 +210,14 @@ dirent_type (const char *path, const struct dirent *entry) + * Return 1 if the directory looks like a Maildir and 0 otherwise. + */ + static int +-_entries_resemble_maildir (const char *path, struct dirent **entries, int count) ++_entries_resemble_maildir (char **subdirs, int count) + { + int i, found = 0; + + for (i = 0; i < count; i++) { +- if (dirent_type (path, entries[i]) != S_IFDIR) +- continue; +- +- if (strcmp(entries[i]->d_name, "new") == 0 || +- strcmp(entries[i]->d_name, "cur") == 0 || +- strcmp(entries[i]->d_name, "tmp") == 0) +- { ++ if (strcmp(subdirs[i], "new") == 0 || ++ strcmp(subdirs[i], "cur") == 0 || ++ strcmp(subdirs[i], "tmp") == 0) { + found++; + if (found == 3) + return 1; +@@ -241,6 +241,49 @@ _entry_in_ignore_list (const char *entry, add_files_state_t *state) + return FALSE; + } + ++struct filter { ++ const char *path; ++ add_files_state_t *state; ++ int entry_type; ++}; ++ ++static int filter_fn (const struct dirent *entry, void *context) ++{ ++ struct filter *filter = context; ++ int entry_type; ++ ++ /* Ignore any files/directories the user has configured to ++ * ignore. We do this before dirent_type both for performance ++ * and because we don't care if dirent_type fails on entries ++ * that are explicitly ignored. ++ */ ++ if (_entry_in_ignore_list (entry->d_name, filter->state)) { ++ if (filter->state->debug) ++ printf ("(D) add_files, pass 1: explicitly ignoring %s/%s\n", ++ filter->path, entry->d_name); ++ return 0; ++ } ++ ++ /* We only want to descend into directories (and symlinks to ++ * directories). */ ++ entry_type = dirent_type (filter->path, entry); ++ if (entry_type == -1) { ++ /* Be pessimistic, e.g. so we don't lose lots of mail just ++ * because a user broke a symlink. */ ++ fprintf (stderr, "Error reading file %s/%s: %s\n", ++ filter->path, entry->d_name, strerror (errno)); ++ return -1; ++ } ++ ++ if (entry_type != filter->entry_type) ++ return 0; ++ ++ if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0) ++ return 0; ++ ++ return 1; ++} ++ + /* Add a single file to the database. */ + static notmuch_status_t + add_file (notmuch_database_t *notmuch, const char *filename, +@@ -345,18 +388,22 @@ add_files (notmuch_database_t *notmuch, + const char *path, + add_files_state_t *state) + { +- struct dirent *entry = NULL; + char *next = NULL; + time_t fs_mtime, db_mtime; + notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; +- struct dirent **fs_entries = NULL; +- int i, num_fs_entries = 0, entry_type; ++ char **fs_files = NULL, **fs_subdirs = NULL; ++ int num_fs_files = 0, num_fs_subdirs = 0; ++ int i; + notmuch_directory_t *directory; + notmuch_filenames_t *db_files = NULL; + notmuch_filenames_t *db_subdirs = NULL; + time_t stat_time; + struct stat st; + notmuch_bool_t is_maildir; ++ struct filter filter = { ++ .path = path, ++ .state = state, ++ }; + + if (stat (path, &st)) { + fprintf (stderr, "Error reading directory %s: %s\n", +@@ -410,11 +457,11 @@ add_files (notmuch_database_t *notmuch, + + /* If the database knows about this directory, then we sort based + * on strcmp to match the database sorting. */ +- num_fs_entries = scandir (path, &fs_entries, 0, +- directory ? +- dirent_sort_strcmp_name : NULL); +- +- if (num_fs_entries == -1) { ++ filter.entry_type = S_IFDIR; ++ num_fs_subdirs = scandirx (path, &fs_subdirs, filter_fn, ++ directory ? dirent_sort_strcmp_name : NULL, ++ &filter); ++ if (num_fs_subdirs == -1) { + fprintf (stderr, "Error opening directory %s: %s\n", + path, strerror (errno)); + /* We consider this a fatal error because, if a user moved a +@@ -426,50 +473,20 @@ add_files (notmuch_database_t *notmuch, + } + + /* Pass 1: Recurse into all sub-directories. */ +- is_maildir = _entries_resemble_maildir (path, fs_entries, num_fs_entries); +- +- for (i = 0; i < num_fs_entries; i++) { +- if (interrupted) +- break; ++ is_maildir = _entries_resemble_maildir (fs_subdirs, num_fs_subdirs); + +- entry = fs_entries[i]; ++ for (i = 0; i < num_fs_subdirs && !interrupted; i++) { ++ const char *name = fs_subdirs[i]; + +- /* Ignore any files/directories the user has configured to +- * ignore. We do this before dirent_type both for performance +- * and because we don't care if dirent_type fails on entries +- * that are explicitly ignored. ++ /* ++ * Ignore the .notmuch directory and any "tmp" directory that ++ * appears within a maildir. + */ +- if (_entry_in_ignore_list (entry->d_name, state)) { +- if (state->debug) +- printf ("(D) add_files, pass 1: explicitly ignoring %s/%s\n", +- path, entry->d_name); ++ if ((is_maildir && strcmp (name, "tmp") == 0) || ++ strcmp (name, ".notmuch") == 0) + continue; +- } + +- /* We only want to descend into directories (and symlinks to +- * directories). */ +- entry_type = dirent_type (path, entry); +- if (entry_type == -1) { +- /* Be pessimistic, e.g. so we don't lose lots of mail just +- * because a user broke a symlink. */ +- fprintf (stderr, "Error reading file %s/%s: %s\n", +- path, entry->d_name, strerror (errno)); +- return NOTMUCH_STATUS_FILE_ERROR; +- } else if (entry_type != S_IFDIR) { +- continue; +- } +- +- /* Ignore special directories to avoid infinite recursion. +- * Also ignore the .notmuch directory and any "tmp" directory +- * that appears within a maildir. +- */ +- if (strcmp (entry->d_name, ".") == 0 || +- strcmp (entry->d_name, "..") == 0 || +- (is_maildir && strcmp (entry->d_name, "tmp") == 0) || +- strcmp (entry->d_name, ".notmuch") == 0) +- continue; +- +- next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); ++ next = talloc_asprintf (notmuch, "%s/%s", path, name); + status = add_files (notmuch, next, state); + if (status) { + ret = status; +@@ -498,27 +515,71 @@ add_files (notmuch_database_t *notmuch, + db_subdirs = notmuch_directory_get_child_directories (directory); + } + +- /* Pass 2: Scan for new files, removed files, and removed directories. */ +- for (i = 0; i < num_fs_entries; i++) +- { +- if (interrupted) +- break; ++ /* Pass 1½: Scan for removed directories. */ ++ for (i = 0; i < num_fs_subdirs && !interrupted; i++) { ++ const char *name = fs_subdirs[i]; + +- entry = fs_entries[i]; ++ /* Check if we've walked past any names in db_subdirs. If so, ++ * these have been deleted. */ ++ while (notmuch_filenames_valid (db_subdirs) && ++ strcmp (notmuch_filenames_get (db_subdirs), name) < 0) { ++ char *absolute = talloc_asprintf (state->removed_directories, ++ "%s/%s", path, ++ notmuch_filenames_get (db_subdirs)); + +- /* Ignore files & directories user has configured to be ignored */ +- if (_entry_in_ignore_list (entry->d_name, state)) { + if (state->debug) +- printf ("(D) add_files, pass 2: explicitly ignoring %s/%s\n", +- path, entry->d_name); +- continue; ++ printf ("(D) add_files, pass 2: queuing passed directory %s for deletion from database\n", ++ absolute); ++ ++ _filename_list_add (state->removed_directories, absolute); ++ ++ notmuch_filenames_move_to_next (db_subdirs); + } + +- /* Check if we've walked past any names in db_files or +- * db_subdirs. If so, these have been deleted. */ ++ if (notmuch_filenames_valid (db_subdirs) && ++ strcmp (notmuch_filenames_get (db_subdirs), name) == 0) ++ notmuch_filenames_move_to_next (db_subdirs); ++ } ++ ++ /* Now that we've walked the whole filesystem subdir list, subdirs ++ * left over in the database list have been deleted. */ ++ while (notmuch_filenames_valid (db_subdirs)) { ++ char *absolute = talloc_asprintf (state->removed_directories, ++ "%s/%s", path, ++ notmuch_filenames_get (db_subdirs)); ++ ++ if (state->debug) ++ printf ("(D) add_files, pass 3: queuing leftover directory %s for deletion from database\n", ++ absolute); ++ ++ _filename_list_add (state->removed_directories, absolute); ++ ++ notmuch_filenames_move_to_next (db_subdirs); ++ } ++ ++ /* Pass 2: Scan for new and removed files. */ ++ filter.entry_type = S_IFREG; ++ num_fs_files = scandirx (path, &fs_files, filter_fn, ++ directory ? dirent_sort_strcmp_name : NULL, ++ &filter); ++ if (num_fs_files == -1) { ++ fprintf (stderr, "Error opening directory %s: %s\n", ++ path, strerror (errno)); ++ /* We consider this a fatal error because, if a user moved a ++ * message from another directory that we were able to scan ++ * into this directory, skipping this directory will cause ++ * that message to be lost. */ ++ ret = NOTMUCH_STATUS_FILE_ERROR; ++ goto DONE; ++ } ++ ++ for (i = 0; i < num_fs_files && !interrupted; i++) { ++ const char *name = fs_files[i]; ++ ++ /* Check if we've walked past any names in db_files. If so, ++ * these have been deleted. */ + while (notmuch_filenames_valid (db_files) && +- strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0) +- { ++ strcmp (notmuch_filenames_get (db_files), name) < 0) { + char *absolute = talloc_asprintf (state->removed_files, + "%s/%s", path, + notmuch_filenames_get (db_files)); +@@ -532,46 +593,16 @@ add_files (notmuch_database_t *notmuch, + notmuch_filenames_move_to_next (db_files); + } + +- while (notmuch_filenames_valid (db_subdirs) && +- strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0) +- { +- const char *filename = notmuch_filenames_get (db_subdirs); +- +- if (strcmp (filename, entry->d_name) < 0) +- { +- char *absolute = talloc_asprintf (state->removed_directories, +- "%s/%s", path, filename); +- if (state->debug) +- printf ("(D) add_files, pass 2: queuing passed directory %s for deletion from database\n", +- absolute); +- +- _filename_list_add (state->removed_directories, absolute); +- } +- +- notmuch_filenames_move_to_next (db_subdirs); +- } +- +- /* Only add regular files (and symlinks to regular files). */ +- entry_type = dirent_type (path, entry); +- if (entry_type == -1) { +- fprintf (stderr, "Error reading file %s/%s: %s\n", +- path, entry->d_name, strerror (errno)); +- return NOTMUCH_STATUS_FILE_ERROR; +- } else if (entry_type != S_IFREG) { +- continue; +- } +- + /* Don't add a file that we've added before. */ + if (notmuch_filenames_valid (db_files) && +- strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0) +- { ++ strcmp (notmuch_filenames_get (db_files), name) == 0) { + notmuch_filenames_move_to_next (db_files); + continue; + } + + /* We're now looking at a regular file that doesn't yet exist + * in the database, so add it. */ +- next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); ++ next = talloc_asprintf (notmuch, "%s/%s", path, name); + + state->processed_files++; + +@@ -605,10 +636,9 @@ add_files (notmuch_database_t *notmuch, + if (interrupted) + goto DONE; + +- /* Now that we've walked the whole filesystem list, anything left +- * over in the database lists has been deleted. */ +- while (notmuch_filenames_valid (db_files)) +- { ++ /* Now that we've walked the whole filesystem file list, files ++ * left over in the database list have been deleted. */ ++ while (notmuch_filenames_valid (db_files)) { + char *absolute = talloc_asprintf (state->removed_files, + "%s/%s", path, + notmuch_filenames_get (db_files)); +@@ -621,21 +651,6 @@ add_files (notmuch_database_t *notmuch, + notmuch_filenames_move_to_next (db_files); + } + +- while (notmuch_filenames_valid (db_subdirs)) +- { +- char *absolute = talloc_asprintf (state->removed_directories, +- "%s/%s", path, +- notmuch_filenames_get (db_subdirs)); +- +- if (state->debug) +- printf ("(D) add_files, pass 3: queuing leftover directory %s for deletion from database\n", +- absolute); +- +- _filename_list_add (state->removed_directories, absolute); +- +- notmuch_filenames_move_to_next (db_subdirs); +- } +- + /* If the directory's mtime is the same as the wall-clock time + * when we stat'ed the directory, we skip updating the mtime in + * the database because a message could be delivered later in this +@@ -647,11 +662,17 @@ add_files (notmuch_database_t *notmuch, + DONE: + if (next) + talloc_free (next); +- if (fs_entries) { +- for (i = 0; i < num_fs_entries; i++) +- free (fs_entries[i]); ++ if (fs_subdirs) { ++ for (i = 0; i < num_fs_subdirs; i++) ++ free (fs_subdirs[i]); ++ ++ free (fs_subdirs); ++ } ++ if (fs_files) { ++ for (i = 0; i < num_fs_files; i++) ++ free (fs_files[i]); + +- free (fs_entries); ++ free (fs_files); + } + if (db_subdirs) + notmuch_filenames_destroy (db_subdirs); +-- +2.1.4 +