From 03d5175001185a7eb0b3219665ea0974147da8cc Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 5 Jan 2010 17:43:03 -0800 Subject: [PATCH] notmuch new: Detect deleted (renamed) files and directories. This takes advantage of the notmuch_directory_t interfaces added recently (with cooresponding storage of directory documents in the database) to detect when files or entire directories are deleted or renamed within the mail store. This also fixes the recent regression where *all* files would be processed by every run of "notmuch new", (now only new files are processed once again). The deleted files and directories are only detected so far. They aren't properly removed from the database. --- notmuch-new.c | 104 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 17 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index af742977..b2b6043a 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -87,11 +87,18 @@ add_files_print_progress (add_files_state_t *state) fflush (stdout); } -static int ino_cmp(const struct dirent **a, const struct dirent **b) +static int +dirent_sort_inode (const struct dirent **a, const struct dirent **b) { return ((*a)->d_ino < (*b)->d_ino) ? -1 : 1; } +static int +dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b) +{ + return strcmp ((*a)->d_name, (*b)->d_name); +} + /* Test if the directory looks like a Maildir directory. * * Search through the array of directory entries to see if we can find all @@ -124,21 +131,37 @@ _entries_resemble_maildir (struct dirent **entries, int count) /* Examine 'path' recursively as follows: * * o Ask the filesystem for the mtime of 'path' (fs_mtime) - * * o Ask the database for its timestamp of 'path' (db_mtime) * - * o For each sub-directory of path, recursively call into this - * same function. + * o Ask the filesystem for files and directories within 'path' + * (via scandir and stored in fs_entries) + * o Ask the database for files and directories within 'path' + * (db_files and db_subdirs) * - * o If 'fs_mtime' > 'db_mtime' + * o Pass 1: For each directory in fs_entries, recursively call into + * this same function. * - * o For each regular file directly within 'path', call - * add_message to add the file to the database. + * o Pass 2: If 'fs_mtime' > 'db_mtime', then walk fs_entries + * simultaneously with db_files and db_subdirs. Look for one of + * three interesting cases: * - * o Tell the database to update its time of 'path' to 'fs_mtime' + * 1. Regular file in fs_entries and not in db_files + * This is a new file to add_message into the database. * - * The 'struct stat *st' must point to a structure that has already - * been initialized for 'path' by calling stat(). + * 2. Filename in db_files not in fs_entries. + * This is a file that has been removed from the mail store. + * + * 3. Directory in db_subdirs not in fs_entries + * This is a directory that has been removed from the mail store. + * + * Note that the addition of a directory is not interesting here, + * since that will have been taken care of in pass 1. Also, we + * don't immediately act on file/directory removal since we must + * ensure that in the case of a rename that the new filename is + * added before the old filename is removed, (so that no + * information is lost from the database). + * + * o Tell the database to update its time of 'path' to 'fs_mtime' */ static notmuch_status_t add_files_recursive (notmuch_database_t *notmuch, @@ -154,6 +177,8 @@ add_files_recursive (notmuch_database_t *notmuch, struct dirent **fs_entries = NULL; int i, num_fs_entries; notmuch_directory_t *directory; + notmuch_filenames_t *db_files = NULL; + notmuch_filenames_t *db_subdirs = NULL; struct stat st; notmuch_bool_t is_maildir; @@ -173,7 +198,12 @@ add_files_recursive (notmuch_database_t *notmuch, directory = notmuch_database_get_directory (notmuch, path); db_mtime = notmuch_directory_get_mtime (directory); - num_fs_entries = scandir (path, &fs_entries, 0, ino_cmp); + /* If the database knows about this directory, then we sort based + * on strcmp to match the database sorting. Otherwise, we can do + * inode-based sorting for faster filesystem operation. */ + num_fs_entries = scandir (path, &fs_entries, 0, + db_mtime ? + dirent_sort_strcmp_name : dirent_sort_inode); if (num_fs_entries == -1) { fprintf (stderr, "Error opening directory %s: %s\n", @@ -182,7 +212,7 @@ add_files_recursive (notmuch_database_t *notmuch, goto DONE; } - /* First, recurse into all sub-directories. */ + /* Pass 1: Recurse into all sub-directories. */ is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries); for (i = 0; i < num_fs_entries; i++) { @@ -217,21 +247,55 @@ add_files_recursive (notmuch_database_t *notmuch, } /* If this directory hasn't been modified since the last - * add_files, then we can skip the second pass where we look for - * new files in this directory. */ + * "notmuch new", then we can skip the second pass entirely. */ if (fs_mtime <= db_mtime) goto DONE; - /* Second, scan the regular files in this directory. */ - for (i = 0; i < num_fs_entries; i++) { + /* Pass 2: Scan for new files, removed files, and removed directories. */ + db_files = notmuch_directory_get_child_files (directory); + db_subdirs = notmuch_directory_get_child_directories (directory); + + for (i = 0; i < num_fs_entries; i++) + { if (interrupted) break; entry = fs_entries[i]; + /* Check if we've walked past any names in db_files or + * db_subdirs. If so, these have been deleted. */ + while (notmuch_filenames_has_more (db_files) && + strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0) + { + printf ("Detected deleted file %s/%s\n", path, + notmuch_filenames_get (db_files)); + + notmuch_filenames_advance (db_files); + } + + while (notmuch_filenames_has_more (db_subdirs) && + strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0) + { + if (strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) < 0) + printf ("Detected deleted directory %s/%s", path, + notmuch_filenames_get (db_subdirs)); + + notmuch_filenames_advance (db_subdirs); + } + if (entry->d_type != DT_REG) continue; + /* Don't add a file that we've added before. */ + if (notmuch_filenames_has_more (db_files) && + strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0) + { + notmuch_filenames_advance (db_files); + continue; + } + + /* We're not looking at a regular file that doesn't yet exist + * in the database, so add it. */ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name); state->processed_files++; @@ -311,6 +375,12 @@ add_files_recursive (notmuch_database_t *notmuch, closedir (dir); if (fs_entries) free (fs_entries); + if (db_subdirs) + notmuch_filenames_destroy (db_subdirs); + if (db_files) + notmuch_filenames_destroy (db_files); + if (directory) + notmuch_directory_destroy (directory); return ret; } @@ -377,7 +447,7 @@ count_files (const char *path, int *count) char *next; struct stat st; struct dirent **fs_entries = NULL; - int num_fs_entries = scandir (path, &fs_entries, 0, ino_cmp); + int num_fs_entries = scandir (path, &fs_entries, 0, dirent_sort_inode); int i = 0; if (num_fs_entries == -1) { -- 2.26.2