[PATCH v2] new: Don't scan unchanged directories with no sub-directories
authorAustin Clements <amdragon@MIT.EDU>
Thu, 24 Oct 2013 21:38:59 +0000 (17:38 +2000)
committerW. Trevor King <wking@tremily.us>
Fri, 7 Nov 2014 17:57:37 +0000 (09:57 -0800)
50/c1d3b65cedca67accd73659a1d30ab3005287d [new file with mode: 0644]

diff --git a/50/c1d3b65cedca67accd73659a1d30ab3005287d b/50/c1d3b65cedca67accd73659a1d30ab3005287d
new file mode 100644 (file)
index 0000000..84da2db
--- /dev/null
@@ -0,0 +1,126 @@
+Return-Path: <amdragon@mit.edu>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+       by olra.theworths.org (Postfix) with ESMTP id CC891431FC2\r
+       for <notmuch@notmuchmail.org>; Thu, 24 Oct 2013 14:39:18 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: -0.7\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=-0.7 tagged_above=-999 required=5\r
+       tests=[RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+       by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+       with ESMTP id R3cJTEIAWxAz for <notmuch@notmuchmail.org>;\r
+       Thu, 24 Oct 2013 14:39:11 -0700 (PDT)\r
+Received: from dmz-mailsec-scanner-2.mit.edu (dmz-mailsec-scanner-2.mit.edu\r
+       [18.9.25.13])\r
+       by olra.theworths.org (Postfix) with ESMTP id 26FD1431FB6\r
+       for <notmuch@notmuchmail.org>; Thu, 24 Oct 2013 14:39:11 -0700 (PDT)\r
+X-AuditID: 1209190d-b7f528e0000009b4-08-5269937ead0a\r
+Received: from mailhub-auth-1.mit.edu ( [18.9.21.35])\r
+       by dmz-mailsec-scanner-2.mit.edu (Symantec Messaging Gateway) with SMTP\r
+       id 06.26.02484.E7399625; Thu, 24 Oct 2013 17:39:10 -0400 (EDT)\r
+Received: from outgoing.mit.edu (outgoing-auth-1.mit.edu [18.9.28.11])\r
+       by mailhub-auth-1.mit.edu (8.13.8/8.9.2) with ESMTP id r9OLd825031538; \r
+       Thu, 24 Oct 2013 17:39:08 -0400\r
+Received: from drake.dyndns.org (26-4-172.dynamic.csail.mit.edu [18.26.4.172])\r
+       (authenticated bits=0)\r
+       (User authenticated as amdragon@ATHENA.MIT.EDU)\r
+       by outgoing.mit.edu (8.13.8/8.12.4) with ESMTP id r9OLd6qB010508\r
+       (version=TLSv1/SSLv3 cipher=AES256-SHA bits=256 verify=NOT);\r
+       Thu, 24 Oct 2013 17:39:07 -0400\r
+Received: from amthrax by drake.dyndns.org with local (Exim 4.77)\r
+       (envelope-from <amdragon@mit.edu>)\r
+       id 1VZScU-0003Gj-89; Thu, 24 Oct 2013 17:39:06 -0400\r
+From: Austin Clements <amdragon@MIT.EDU>\r
+To: notmuch@notmuchmail.org\r
+Subject: [PATCH v2] new: Don't scan unchanged directories with no\r
+       sub-directories\r
+Date: Thu, 24 Oct 2013 17:38:59 -0400\r
+Message-Id: <1382650739-12438-1-git-send-email-amdragon@mit.edu>\r
+X-Mailer: git-send-email 1.8.4.rc3\r
+In-Reply-To: <20131024210837.GH20337@mit.edu>\r
+References: <20131024210837.GH20337@mit.edu>\r
+X-Brightmail-Tracker:\r
+ H4sIAAAAAAAAA+NgFtrLIsWRmVeSWpSXmKPExsUixCmqrFs3OTPIYPNZdYvrN2cyW3Tc3s3m\r
+       wOTxbNUtZo+PT2+xBDBFcdmkpOZklqUW6dslcGXM6q0uuCBU8ePMdPYGxvV8XYycHBICJhJb\r
+       53WyQ9hiEhfurWfrYuTiEBLYxyjxpfUHE4SzkVHi1OW3jBDOESaJpzunsEM4cxkldr3eyAjS\r
+       zyagIbFt/3IwW0RAWmLn3dmsIDazgKPEngPLWUBsYYFAidnHb4DVsAioSiy8PwWshlfAQaLn\r
+       0lY2iDuUJBae2gYW5xTQlTi07RATiC0koCNxo+kN4wRG/gWMDKsYZVNyq3RzEzNzilOTdYuT\r
+       E/PyUot0jfRyM0v0UlNKNzGCQ0mSdwfju4NKhxgFOBiVeHg1PqQHCbEmlhVX5h5ilORgUhLl\r
+       NZ6UGSTEl5SfUpmRWJwRX1Sak1p8iFGCg1lJhHeaHlCONyWxsiq1KB8mJc3BoiTOe5PDPkhI\r
+       ID2xJDU7NbUgtQgmK8PBoSTBGw8yVLAoNT21Ii0zpwQhzcTBCTKcB2h4FkgNb3FBYm5xZjpE\r
+       /hSjopQ47waQhABIIqM0D64XFuuvGMWBXhHmrQSp4gGmCbjuV0CDmYAGT1mSBjK4JBEhJdXA\r
+       KJlbWCsr83PiVstOn8wlwgvv9aa/W/uyUYBb9cuBh0dTtzFudSuwnBcgs2ubzd6F0SYsHGaJ\r
+       VmsVytnaDJt1nzp/fpFc6M5323NL0UKtogMWs/k8eI63cpxet3ypaXBr9e2TM51fCd46f8/y\r
+       mf6EMxcSlOdkJxktfKETz9Rh8WVpUo/d5G9LlFiKMxINtZiLihMBqPKXbNACAAA=\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+       <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+       <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Thu, 24 Oct 2013 21:39:18 -0000\r
+\r
+This can substantially reduce the cost of notmuch new in some\r
+situations, such as when the file system cache is cold or when the\r
+Maildir is on NFS.\r
+---\r
+\r
+This should fix the problem with directories containing symlinks to\r
+other directories, but no actual sub-directories.\r
+\r
+ notmuch-new.c | 29 +++++++++++++++++++++++++++++\r
+ 1 file changed, 29 insertions(+)\r
+\r
+diff --git a/notmuch-new.c b/notmuch-new.c\r
+index faa33f1..ba05cb4 100644\r
+--- a/notmuch-new.c\r
++++ b/notmuch-new.c\r
+@@ -323,6 +323,35 @@ add_files (notmuch_database_t *notmuch,\r
+     }\r
+     db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;\r
\r
++    /* If the directory is unchanged from our last scan and has no\r
++     * sub-directories, then return without scanning it at all.  In\r
++     * some situations, skipping the scan can substantially reduce the\r
++     * cost of notmuch new, especially since the huge numbers of files\r
++     * in Maildirs make scans expensive, but all files live in leaf\r
++     * directories.\r
++     *\r
++     * To check for sub-directories, we borrow a trick from find,\r
++     * kpathsea, and many other UNIX tools: since a directory's link\r
++     * count is the number of sub-directories (specifically, their\r
++     * '..' entries) plus 2 (the link from the parent and the link for\r
++     * '.').  This check is safe even on weird file systems, since\r
++     * file systems that can't compute this will return 0 or 1.  This\r
++     * is safe even on *really* weird file systems like HFS+ that\r
++     * mistakenly return the total number of directory entries, since\r
++     * that only inflates the count beyond 2.\r
++     */\r
++    if (directory && fs_mtime == db_mtime && st.st_nlink == 2) {\r
++      /* There's one catch: pass 1 below considers symlinks to\r
++       * directories to be directories, but these don't increase the\r
++       * file system link count.  So, only bail early if the\r
++       * database agrees that there are no sub-directories. */\r
++      db_subdirs = notmuch_directory_get_child_directories (directory);\r
++      if (!notmuch_filenames_valid (db_subdirs))\r
++          goto DONE;\r
++      notmuch_filenames_destroy (db_subdirs);\r
++      db_subdirs = NULL;\r
++    }\r
++\r
+     /* If the database knows about this directory, then we sort based\r
+      * on strcmp to match the database sorting. Otherwise, we can do\r
+      * inode-based sorting for faster filesystem operation. */\r
+-- \r
+1.8.4.rc3\r
+\r