--- /dev/null
+Return-Path: <ethan.glasser.camp@gmail.com>\r
+X-Original-To: notmuch@notmuchmail.org\r
+Delivered-To: notmuch@notmuchmail.org\r
+Received: from localhost (localhost [127.0.0.1])\r
+ by olra.theworths.org (Postfix) with ESMTP id 22889431FAF\r
+ for <notmuch@notmuchmail.org>; Sun, 6 Oct 2013 21:49:52 -0700 (PDT)\r
+X-Virus-Scanned: Debian amavisd-new at olra.theworths.org\r
+X-Spam-Flag: NO\r
+X-Spam-Score: -0.799\r
+X-Spam-Level: \r
+X-Spam-Status: No, score=-0.799 tagged_above=-999 required=5\r
+ tests=[DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1,\r
+ FREEMAIL_FROM=0.001, RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled\r
+Received: from olra.theworths.org ([127.0.0.1])\r
+ by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)\r
+ with ESMTP id l0dk1rHivO6h for <notmuch@notmuchmail.org>;\r
+ Sun, 6 Oct 2013 21:49:44 -0700 (PDT)\r
+Received: from mail-qe0-f53.google.com (mail-qe0-f53.google.com\r
+ [209.85.128.53]) (using TLSv1 with cipher RC4-SHA (128/128 bits))\r
+ (No client certificate requested)\r
+ by olra.theworths.org (Postfix) with ESMTPS id 83F48431FAE\r
+ for <notmuch@notmuchmail.org>; Sun, 6 Oct 2013 21:49:44 -0700 (PDT)\r
+Received: by mail-qe0-f53.google.com with SMTP id cy11so956017qeb.26\r
+ for <notmuch@notmuchmail.org>; Sun, 06 Oct 2013 21:49:42 -0700 (PDT)\r
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113;\r
+ h=from:to:subject:in-reply-to:references:user-agent:date:message-id\r
+ :mime-version:content-type;\r
+ bh=djCxR7kr3DGkKxSNNOoNrBe3lo2uI3tzo52A5puB6eI=;\r
+ b=W9YJGZc3BuQI5wlBCt6FgoIy59LdhOiQsMaW1yejCmq8RkflhNV3kWbL6q0NPI7P6/\r
+ 8LBCDceH4hfL6x/wTr7q2bYrWlBgRkkyrBBVhax7MpiY0aGL1+pzuM5tpN01d0P65R9J\r
+ /6DXChty02CS4Z/fenIyWN1nWlkkZhuBYJpPjgNzpMpwNC3vZu11w56WD5u/xx7gKT5J\r
+ L3imy4r2lwNHTjWSxDD0yPDYuxQkxYWfgWCDQtBIjZtsKD7PGA3Sq9HT2a+BuOZ1tQIt\r
+ oHD28zD4wPfjyYD+f9Q3v4FH+Rhx0RQEGRKdijahPqJ4tsv1VBDVikKUw6j2wAWPIuug\r
+ 3UmQ==\r
+X-Received: by 10.224.11.133 with SMTP id t5mr35139503qat.34.1381121382839;\r
+ Sun, 06 Oct 2013 21:49:42 -0700 (PDT)\r
+Received: from smtp.gmail.com ([66.114.71.21])\r
+ by mx.google.com with ESMTPSA id g2sm58448024qaf.12.1969.12.31.16.00.00\r
+ (version=TLSv1.2 cipher=RC4-SHA bits=128/128);\r
+ Sun, 06 Oct 2013 21:49:41 -0700 (PDT)\r
+From: Ethan Glasser-Camp <ethan.glasser.camp@gmail.com>\r
+To: David Bremner <david@tethera.net>,\r
+ notmuch mailing list <notmuch@notmuchmail.org>\r
+Subject: Re: On disk tag storage format\r
+In-Reply-To: <87fvsgh5g5.fsf@betacantrips.com>\r
+References: <874nk8v9zw.fsf@zancas.localnet> <87vc9mtpxh.fsf@zancas.localnet>\r
+ <87fvsgh5g5.fsf@betacantrips.com>\r
+User-Agent: Notmuch/0.16+80~g81ee785 (http://notmuchmail.org) Emacs/24.2.1\r
+ (x86_64-pc-linux-gnu)\r
+Date: Mon, 07 Oct 2013 00:49:39 -0400\r
+Message-ID: <87bo31heho.fsf@betacantrips.com>\r
+MIME-Version: 1.0\r
+Content-Type: multipart/mixed; boundary="=-=-="\r
+X-BeenThere: notmuch@notmuchmail.org\r
+X-Mailman-Version: 2.1.13\r
+Precedence: list\r
+List-Id: "Use and development of the notmuch mail system."\r
+ <notmuch.notmuchmail.org>\r
+List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>\r
+List-Archive: <http://notmuchmail.org/pipermail/notmuch>\r
+List-Post: <mailto:notmuch@notmuchmail.org>\r
+List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>\r
+List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,\r
+ <mailto:notmuch-request@notmuchmail.org?subject=subscribe>\r
+X-List-Received-Date: Mon, 07 Oct 2013 04:49:52 -0000\r
+\r
+--=-=-=\r
+Content-Type: text/plain\r
+\r
+Ethan Glasser-Camp <ethan.glasser.camp@gmail.com> writes:\r
+\r
+> I've modified the script so that it would run by mangling filenames,\r
+> which is irreversible (the original tried to encode/decode filenames\r
+> reversibly). Then I got a little carried away, adding --verbose and\r
+> --dry-run options as well as removing a couple trailing\r
+> semicolons. Here's my version, in case it should interest anyone else.\r
+\r
+Hi guys,\r
+\r
+There was a bug in the previous version I sent. It didn't handle\r
+unlinking tags correctly. Also, I spotted a bug in syncing to untagged\r
+messages. Maybe I should stop using emails as version control.\r
+\r
+---- 8< ----\r
+\r
+\r
+\r
+--=-=-=\r
+Content-Type: text/x-python\r
+Content-Disposition: inline; filename=linksync.py\r
+Content-Description: slightly more tested this time\r
+\r
+# Copyright 2013, David Bremner <david@tethera.net>\r
+\r
+# Licensed under the same terms as notmuch.\r
+\r
+import notmuch\r
+import re\r
+import os, errno\r
+import sys\r
+from collections import defaultdict\r
+import argparse\r
+import hashlib\r
+\r
+# skip automatic and maildir tags\r
+\r
+skiptags = re.compile(r"^(attachement|signed|encrypted|draft|flagged|passed|replied|unread)$")\r
+\r
+# some random person on stack overflow suggests:\r
+\r
+def mkdir_p(path):\r
+ try:\r
+ os.makedirs(path)\r
+ except OSError as exc: # Python >2.5\r
+ if exc.errno == errno.EEXIST and os.path.isdir(path):\r
+ pass\r
+ else: raise\r
+\r
+VERBOSE = False\r
+\r
+def log(msg):\r
+ if VERBOSE:\r
+ print(msg)\r
+\r
+CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'\r
+\r
+encode_re = '([^{0}])'.format(CHARSET)\r
+\r
+decode_re = '[%]([0-7][0-9A-Fa-f])'\r
+\r
+def encode_one_char(match):\r
+ return('%{:02x}'.format(ord(match.group(1))))\r
+\r
+def encode_for_fs(str):\r
+ return re.sub(encode_re,encode_one_char, str,0)\r
+\r
+def mangle_message_id(msg_id):\r
+ """\r
+ Return a mangled version of the message id, suitable for use as a filename.\r
+ """\r
+ MAX_LENGTH = 143\r
+ FLAGS_LENGTH = 8 # :2,S...??\r
+ encoded = encode_for_fs(msg_id)\r
+ if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:\r
+ return encoded\r
+\r
+ SHA_LENGTH = 8\r
+ TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH\r
+ PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2\r
+ prefix = encoded[:PREFIX_LENGTH]\r
+ suffix = encoded[-SUFFIX_LENGTH:]\r
+ sha = hashlib.sha256()\r
+ sha.update(encoded)\r
+ return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]\r
+\r
+def decode_one_char(match):\r
+ return chr(int(match.group(1),16))\r
+\r
+def decode_from_fs(str):\r
+ return re.sub(decode_re,decode_one_char, str, 0)\r
+\r
+def mk_tag_dir(tagdir):\r
+\r
+ mkdir_p (os.path.join(tagdir, 'cur'))\r
+ mkdir_p (os.path.join(tagdir, 'new'))\r
+ mkdir_p (os.path.join(tagdir, 'tmp'))\r
+\r
+\r
+flagpart = '(:2,[^:]*)'\r
+flagre = re.compile(flagpart + '$');\r
+\r
+def path_for_msg (dir, msg):\r
+ filename = msg.get_filename()\r
+ flagsmatch = flagre.search(filename)\r
+ if flagsmatch == None:\r
+ flags = ''\r
+ else:\r
+ flags = flagsmatch.group(1)\r
+\r
+ return os.path.join(dir, 'cur', mangle_message_id(msg.get_message_id()) + flags)\r
+\r
+\r
+def unlink_message(dir, msg):\r
+\r
+ dir = os.path.join(dir, 'cur')\r
+\r
+ mangled_id = mangle_message_id(msg.get_message_id())\r
+ filepattern = '^' + re.escape(mangled_id) + flagpart +'?$'\r
+\r
+ filere = re.compile(filepattern)\r
+\r
+ for file in os.listdir(dir):\r
+ if filere.match(file):\r
+ log("Unlinking {}".format(os.path.join(dir, file)))\r
+ if not opts.dry_run:\r
+ os.unlink(os.path.join(dir, file))\r
+\r
+def dir_for_tag(tag):\r
+ enc_tag = encode_for_fs (tag)\r
+ return os.path.join(tagroot, enc_tag)\r
+\r
+disk_tags = defaultdict(set)\r
+disk_ids = set()\r
+\r
+def read_tags_from_disk(rootdir):\r
+\r
+ for root, subFolders, files in os.walk(rootdir):\r
+ for filename in files:\r
+ mangled_id = filename.split(':')[0]\r
+ tag = root.split('/')[-2]\r
+ disk_ids.add(mangled_id)\r
+ disk_tags[mangled_id].add(decode_from_fs(tag))\r
+\r
+# Main program\r
+\r
+parser = argparse.ArgumentParser(description='Sync notmuch tag database to/from link farm')\r
+parser.add_argument('-l','--link-style',choices=['hard','symbolic', 'adaptive'],\r
+ default='adaptive')\r
+parser.add_argument('-d','--destination',choices=['disk','notmuch'], default='disk')\r
+parser.add_argument('-t','--threshold', default=50000L, type=int)\r
+parser.add_argument('-n','--dry-run', default=False, action='store_true')\r
+parser.add_argument('-v','--verbose', default=False, action='store_true')\r
+\r
+parser.add_argument('tagroot')\r
+\r
+opts=parser.parse_args()\r
+VERBOSE = opts.verbose\r
+\r
+tagroot=opts.tagroot\r
+\r
+sync_from_links = (opts.destination == 'notmuch')\r
+\r
+read_tags_from_disk(tagroot)\r
+\r
+if sync_from_links:\r
+ db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)\r
+else:\r
+ db = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)\r
+\r
+dbtags = filter (lambda tag: not skiptags.match(tag), db.get_all_tags())\r
+\r
+if sync_from_links:\r
+ # have to iterate over even untagged messages\r
+ querystr = '*'\r
+else:\r
+ querystr = ' OR '.join(map (lambda tag: 'tag:'+tag, dbtags))\r
+\r
+q_new = notmuch.Query(db, querystr)\r
+q_new.set_sort(notmuch.Query.SORT.UNSORTED)\r
+for msg in q_new.search_messages():\r
+\r
+ # silently ignore empty tags\r
+ db_tags = set(filter (lambda tag: tag != '' and not skiptags.match(tag),\r
+ msg.get_tags()))\r
+\r
+ message_id = msg.get_message_id()\r
+\r
+ mangled_id = mangle_message_id(message_id)\r
+\r
+ disk_ids.discard(mangled_id)\r
+\r
+ missing_on_disk = db_tags.difference(disk_tags[mangled_id])\r
+ missing_in_db = disk_tags[mangled_id].difference(db_tags)\r
+\r
+ if sync_from_links:\r
+ msg.freeze()\r
+\r
+ filename = msg.get_filename()\r
+\r
+ if len(missing_on_disk) > 0:\r
+ if opts.link_style == 'adaptive':\r
+ statinfo = os.stat (filename)\r
+ symlink = (statinfo.st_size > opts.threshold)\r
+ else:\r
+ symlink = opts.link_style == 'symbolic'\r
+\r
+ for tag in missing_on_disk:\r
+\r
+ if sync_from_links:\r
+ log("Removing tag {} from {}".format(tag, message_id))\r
+ if not opts.dry_run:\r
+ msg.remove_tag(tag,sync_maildir_flags=False)\r
+ else:\r
+ tagdir = dir_for_tag (tag)\r
+\r
+ if not opts.dry_run:\r
+ mk_tag_dir (tagdir)\r
+\r
+ newlink = path_for_msg (tagdir, msg)\r
+\r
+ log("Linking {} to {}".format(filename, newlink))\r
+ if not opts.dry_run:\r
+ if symlink:\r
+ os.symlink(filename, newlink)\r
+ else:\r
+ os.link(filename, newlink)\r
+\r
+\r
+ for tag in missing_in_db:\r
+ if sync_from_links:\r
+ log("Adding {} to message {}".format(tag, message_id))\r
+ if not opts.dry_run:\r
+ msg.add_tag(tag,sync_maildir_flags=False)\r
+ else:\r
+ tagdir = dir_for_tag (tag)\r
+ unlink_message(tagdir,msg)\r
+\r
+ if sync_from_links:\r
+ msg.thaw()\r
+\r
+# everything remaining in disk_ids is a deleted message\r
+# unless we are syncing back to the database, in which case\r
+# it just might not currently have any non maildir tags.\r
+\r
+if not sync_from_links:\r
+ for root, subFolders, files in os.walk(tagroot):\r
+ for filename in files:\r
+ mangled_id = filename.split(':')[0]\r
+ if mangled_id in disk_ids:\r
+ os.unlink(os.path.join(root, filename))\r
+\r
+\r
+db.close()\r
+\r
+# currently empty directories are not pruned.\r
+\r
+--=-=-=\r
+Content-Type: text/plain\r
+\r
+\r
+---- 8< ----\r
+\r
+Of course, the next step is to sync using this mechanism. Rsync doesn't\r
+really have a concept of history, which basically makes it unusable for\r
+this purpose [1]. Unison doesn't really understand renames, so it gets\r
+confused when you mark a message as read (which might move it from new\r
+to cur, and definitely changes its tags). Bremner suggested\r
+syncmaildir. Syncmaildir doesn't understand links at all. Bremner\r
+suggested that we could use some parts of syncmaildir to implement the\r
+tag syncing we need.\r
+\r
+I didn't have anything else going on this weekend so I tried to\r
+prototype the approach. It turns out to be possible to leverage some\r
+parts of syncmaildir. I translated a bunch of smd-client into a new\r
+program, tagsync-client, that links to messages in an existing notmuch\r
+DB. It seems like it's possible to use it in place of the existing\r
+smd-client by putting lines like this in your config:\r
+\r
+SMDCLIENT=~/src/tagsync.git/tagsync-client.py\r
+REMOTESMDCLIENT=~/src/tagsync.git/tagsync-client.py\r
+\r
+The sequence of commands I ran:\r
+\r
+- linksync.py to dump tags to ~/Mail/.notmuch/exported-tags\r
+- smd-pull mail to sync ~/Mail but excluding .notmuch\r
+- notmuch new\r
+- smd-pull tagsync (using the above client) to sync ~/Mail/.notmuch/exported-tags\r
+- linksync.py to pull tags from ~/Mail/.notmuch/exported-tags\r
+\r
+syncmaildir doesn't cope well with drafts, so it might choke on that,\r
+and it doesn't like symlinks (it thinks they're always to directories),\r
+so be sure to run linksync with -l hard.\r
+\r
+Here's the script. It's a work in progress; I have only tested it once in one direction.\r
+\r
+---- 8< ----\r
+\r
+\r
+--=-=-=\r
+Content-Type: text/x-python\r
+Content-Disposition: inline; filename=tagsync-client.py\r
+Content-Description: client script\r
+\r
+#! /usr/bin/env python\r
+import sys\r
+from sys import stdout, stdin, stderr\r
+import stat\r
+import urllib\r
+import hashlib\r
+import re\r
+import os.path\r
+import argparse\r
+import subprocess\r
+import traceback\r
+import notmuch\r
+import time\r
+PROTOCOL_VERSION = "1.1"\r
+\r
+# Not reproducing the autoconf logic\r
+XDELTA = 'xdelta'\r
+MDDIFF = 'mddiff'\r
+\r
+VERBOSE = False\r
+\r
+def log(msg):\r
+ if VERBOSE:\r
+ stderr.write("INFO: "+msg+"\n")\r
+\r
+def __error(msg):\r
+ raise ValueError(msg)\r
+\r
+def log_tags_and_fail(msg, *args):\r
+ log_tags(*args)\r
+ __error(msg)\r
+\r
+def log_internal_error_and_fail(msg, *args):\r
+ log_internal_error_tags(msg, *args)\r
+ __error(msg)\r
+\r
+def log_error(msg):\r
+ return stderr.write("ERROR: {}\n".format(msg))\r
+\r
+def log_tag(tag):\r
+ return stderr.write("TAGS: {}\n".format(tag))\r
+\r
+def log_progress(msg):\r
+ pass\r
+\r
+def log_tags(context='unknown', cause='unknown', human=False, *args):\r
+ if human:\r
+ human = "necessary"\r
+ else:\r
+ human = "avoidable"\r
+\r
+ suggestions = {}\r
+ suggestions_string = ""\r
+ if len(args):\r
+ suggestions_string = ' suggested-actions({})'.format(' '.join(args))\r
+\r
+ return log_tag("error::context({}) probable-cause({}) human-intervention({})".format(\r
+ context, cause, human) + suggestions_string)\r
+\r
+def mkdir_p(filename):\r
+ """Maildir-aware mkdir.\r
+\r
+ Creates a directory and all parent directories.\r
+\r
+ Moreover, if the last component is 'tmp', 'cur' or 'new', the\r
+ others are created too."""\r
+\r
+ # The Lua function throws away the last path component if it\r
+ # doesn't end in /. This allows you to just call mkdir_p on any\r
+ # file and a directory for it to live will be created.\r
+ if not filename.endswith('/'):\r
+ filename, _ = os.path.split(filename)\r
+\r
+ if not filename.startswith('/'):\r
+ # This path is relative to HOME, and needs to be translated\r
+ # too.\r
+ filename = translate(filename)\r
+ filename = os.path.expanduser('~/'+filename)\r
+\r
+ dirname, basename = os.path.split(filename)\r
+ try:\r
+ os.makedirs(filename)\r
+ except OSError:\r
+ pass # probably "File exists"\r
+ MAILDIR_SUBDIRS = ['tmp', 'cur', 'new']\r
+ if basename in MAILDIR_SUBDIRS:\r
+ for subdir in MAILDIR_SUBDIRS:\r
+ to_create = os.path.join(dirname, subdir)\r
+ if not os.path.exists(to_create):\r
+ os.mkdir(to_create)\r
+\r
+\r
+class FakeSubprocess(object):\r
+ def __init__(self, init_function):\r
+ self.init_function = init_function\r
+ self.input = None\r
+ self.output = None\r
+ self.pipe_name = None\r
+ self.removed = None\r
+ self.did_write = None\r
+ self.filter = {}\r
+\r
+ def readline(self):\r
+ if not self.input:\r
+ log_internal_error_and_fail("read called before write",\r
+ "make_slave_filter_process")\r
+\r
+\r
+ if not self.removed and self.did_write:\r
+ self.removed = True\r
+ rc = self.input.readline()\r
+ os.unlink(self.pipe_name)\r
+ return rc\r
+ else:\r
+ return self.input.readline()\r
+\r
+ def write(self, *args):\r
+ if not self.output:\r
+ self.init_function(self.filter)\r
+ self.input = self.filter['inf']\r
+ self.output = self.filter['outf']\r
+ self.pipe_name = self.filter['pipe']\r
+\r
+ self.did_write = True\r
+ self.output.write(*args)\r
+\r
+ def flush(self):\r
+ self.output.flush()\r
+\r
+ def lines(self):\r
+ return self.input.readlines()\r
+\r
+def make_slave_filter_process(cmd, seed="no seed"):\r
+ def init(filter):\r
+ if 'inf' not in filter:\r
+ home = os.getenv('HOME')\r
+ user = os.getenv('USER') or 'nobody'\r
+ mangled_name = re.compile('[ %./]').sub('-', seed)\r
+ attempt = 0\r
+ if home:\r
+ base_dir = home + '/.smd/fifo/'\r
+ else:\r
+ base_dir = '/tmp/'\r
+\r
+ rc = subprocess.call([MDDIFF, '--mkdir-p', base_dir])\r
+ if rc != 0:\r
+ log_internal_error_and_fail('unable to create directory',\r
+ 'make_slave_filter_process')\r
+\r
+ while True:\r
+ pipe_name = ''.join([base_dir, 'smd-', user, str(int(time.time())),\r
+ mangled_name, str(attempt)])\r
+ attempt += 1\r
+ rc = subprocess.call([MDDIFF, '--mkfifo', pipe_name])\r
+ if rc == 0 or attempt > 10:\r
+ break\r
+ if rc != 0:\r
+ log_internal_error_and_fail('unable to create fifo',\r
+ "make_slave_filter_process")\r
+\r
+ inferior = cmd(pipe_name)\r
+ filter['inf'] = inferior.stdout\r
+ filter['outf'] = file(pipe_name, 'w')\r
+ filter['pipe'] = pipe_name\r
+\r
+ return FakeSubprocess(init)\r
+\r
+_translator = None\r
+def set_translator(p):\r
+ global _translator\r
+ translator_filter = make_slave_filter_process(\r
+ lambda pipe: subprocess.Popen(p, stdin=file(pipe), stdout=subprocess.PIPE),\r
+ "translate")\r
+ if p == 'cat':\r
+ _translator = lambda x: x\r
+ else:\r
+ def translator_fn(x):\r
+ translator_filter.write(x + '\n')\r
+ translator_filter.flush()\r
+ line = translator_filter.readline()\r
+ if not line or line.strip() == 'ERROR':\r
+ log_error("Translator {} on input {} gave an error".format(\r
+ p, x))\r
+ for l in translator_filter.readlines():\r
+ log_error(l)\r
+ log_tags_and_fail("Unable to translate mailbox",\r
+ 'translate', 'bad-translator', True)\r
+ if '..' in line:\r
+ log_error("Translator {} on input {} returned a path containing ..".format(\r
+ p, x))\r
+ log_tags_and_fail('Translator returned a path containing ..',\r
+ 'translate', 'bad-translator', True)\r
+\r
+ return line\r
+\r
+ _translator = translator_fn\r
+\r
+def translate(x):\r
+ if _translator:\r
+ return _translator(x)\r
+ return x\r
+\r
+\r
+mddiff_sha_handler = make_slave_filter_process(\r
+ lambda pipe: subprocess.Popen([MDDIFF, pipe], stdout=subprocess.PIPE),\r
+ "sha_file")\r
+\r
+def sha_file(name):\r
+ mddiff_sha_handler.write(name+'\n')\r
+ mddiff_sha_handler.flush()\r
+\r
+ data = mddiff_sha_handler.readline()\r
+ if data.startswith('ERROR'):\r
+ log_tags_and_fail("Failed to sha1 message: " + (name or "nil"),\r
+ 'sha_file', 'modify-while-update', False, 'retry')\r
+\r
+ hsha, bsha = data.split()\r
+ if not hsha or not bsha:\r
+ log_internal_error_and_fail('mddiff incorrect behavior', 'mddiff')\r
+\r
+ return hsha, bsha\r
+\r
+def exists_and_sha(name):\r
+ if os.path.exists(name):\r
+ h, b = sha_file(name)\r
+ return True, h, b\r
+\r
+ return False, False, False\r
+\r
+\r
+def touch(f):\r
+ try:\r
+ file(f, 'r')\r
+ except IOError:\r
+ try:\r
+ file(f, 'w')\r
+ except IOError:\r
+ log_error('Unable to touch ' + quote(f))\r
+ log_tags("touch", "bad-permissions", True,\r
+ "display-permissions(" + quote(f) + ")")\r
+ error("Unable to touch a file")\r
+\r
+def quote(s):\r
+ return repr(s)\r
+\r
+\r
+def assert_exists(name):\r
+ assert os.exists(name), "Not found: "+repr(name)\r
+\r
+def url_quote(txt):\r
+ return urllib.quote(txt, safe='')\r
+\r
+def url_decode(s):\r
+ return urllib.unquote(s)\r
+\r
+def log_internal_error_tags(msg, ctx):\r
+ log_tags('internal-error', ctx, True)\r
+ # Blob of "run gnome-open" junk not copied\r
+\r
+def receive(inf, outfile):\r
+ try:\r
+ outf = file(outfile, 'w')\r
+ except IOError:\r
+ log_error("Unable to open " + outfile + " for writing.")\r
+ log_error('It may be caused by bad directory permissions, '+\r
+ 'please check.')\r
+ log_tags("receive", "non-writeable-file", True,\r
+ "display-permissions(" + quote(outfile) +")")\r
+ error("Unable to write incoming data")\r
+\r
+ line = inf.readline()\r
+ if not line or line.strip() == "ABORT":\r
+ log_error("Data transmission failed.")\r
+ log_error("This problem is transient, please retry.")\r
+ log_tags_and_fail('server sent ABORT or connection died',\r
+ "receive", "network", False, "retry")\r
+\r
+ # In the Lua version, this is called "len", but that's a builtin\r
+ # in Python\r
+ chunk_len = int(re.compile(r'^chunk (\d+)').match(line).group(1))\r
+ total = chunk_len\r
+ while chunk_len:\r
+ next_chunk = 16384\r
+ if chunk_len < next_chunk:\r
+ next_chunk = chunk_len\r
+ data = inf.read(next_chunk)\r
+ chunk_len -= len(data)\r
+ outf.write(data)\r
+\r
+ # Probably not strictly speaking necessary in Python\r
+ outf.close()\r
+\r
+ return total\r
+\r
+def handshake(dbfile):\r
+ stdout.write("protocol {}\n".format(PROTOCOL_VERSION))\r
+ touch(dbfile)\r
+ sha_output = subprocess.check_output([MDDIFF, '--sha1sum', dbfile])\r
+ db_sha = sha_output.split()[0]\r
+ err_msg = sha_output[sha_output.index(' ')+1:]\r
+\r
+ if db_sha == 'ERROR':\r
+ log_internal_error_and_fail('unreadable db file: '+quote(dbfile), 'handshake')\r
+\r
+ stdout.write("dbfile {}\n".format(db_sha))\r
+ stdout.flush()\r
+\r
+ line = stdin.readline()\r
+ if not line:\r
+ log_error("Network error.")\r
+ log_error("Unable to get any data from the other endpoint.")\r
+ log_error("This problem may be transient, please retry.")\r
+ log_error("Hint: did you correctly setup the SERVERNAME variable")\r
+ log_error("on your client? Did you add an entry for it in your ssh")\r
+ log_error("configuration file?")\r
+ log_tags_and_fail('Network error', "handshake", "network", False, "retry")\r
+\r
+ protocol = re.compile('^protocol (.+)$').match(line)\r
+ if not protocol or protocol.group(1) != PROTOCOL_VERSION:\r
+ log_error('Wrong protocol version.')\r
+ log_error('The same version of syncmaildir must be used on '+\r
+ 'both endpoints')\r
+ log_tags_and_fail('Protocol version mismatch', "handshake", "protocol-mismatch", True)\r
+\r
+ line = stdin.readline()\r
+ if not line:\r
+ log_error("The client disconnected during handshake")\r
+ log_tags_and_fail('Network error', "handshake", "network", False, "retry")\r
+\r
+ sha = re.compile(r'^dbfile (\S+)$').match(line)\r
+ if not sha or sha.group(1) != db_sha:\r
+ log_error('Local dbfile and remote db file differ.')\r
+ log_error('Remove both files and push/pull again.')\r
+ log_tags_and_fail('Database mismatch', "handshake", "db-mismatch", True, "run(rm "+\r
+ quote(dbfile)+")")\r
+\r
+def dbfile_name(endpoint, mailboxes):\r
+ endpoint = endpoint.rstrip('/')\r
+ mailboxes = mailboxes.rstrip('/')\r
+ subprocess.check_call([MDDIFF, '--mkdir-p', os.path.expanduser('~/.smd/')])\r
+ return os.path.expanduser('~/.smd/{}__{}.db.txt'.format(\r
+ endpoint.replace('/', '_'),\r
+ mailboxes.replace('/', '_').replace('%', '_')\r
+ ))\r
+\r
+def receive_delta(inf):\r
+ cmds = []\r
+ while True:\r
+ line = inf.readline()\r
+ if line and line.strip() != "END":\r
+ cmds.append(line)\r
+\r
+ if not line or line.strip() == "END":\r
+ break\r
+\r
+ if line.strip() != "END":\r
+ log_error('Unable to receive a complete diff')\r
+ log_tags("receive-delta", "network", False, "retry")\r
+ error("network error while receiving delta")\r
+\r
+ return cmds\r
+\r
+def homefy(filename):\r
+ return os.path.expanduser("~/"+filename)\r
+\r
+def execute_add(name, hsha, bsha):\r
+ dir, basename = os.path.split(name)\r
+ # The real smd creates symlinks from workarea to the target\r
+ # directory, I dunno why.\r
+ dest = homefy(name)\r
+ ex, hsha_1, bsha_1 = exists_and_sha(dest)\r
+ if ex:\r
+ if hsha_1 != hsha or bsha_1 != bsha:\r
+ log_error("Failed to add {} since a file with the same name".format(\r
+ dest))\r
+ log_error('exists but its content is different.')\r
+ log_error("Current hash {}/{}, requested hash {}/{}".format(\r
+ hsha_1, bsha_1, hsha, bsha))\r
+ log_error('To fix this problem you should rename '+dest)\r
+ log_error('Executing `cd; mv -n '+quote(name)+' '+\r
+ 'FIXME: tmp_for' +'` should work.')\r
+ log_tags("mail-addition", "concurrent-mailbox-edit", True,\r
+ )\r
+ #mk_act("mv", name))\r
+ return False\r
+\r
+ return True # already there\r
+ if ':2,' in basename:\r
+ basename = basename[:basename.index(':2,')]\r
+ filenames = original_message_filenames(basename)\r
+ for filename in filenames:\r
+ orig_exists, hsha_orig, bsha_orig = exists_and_sha(filename)\r
+ assert orig_exists\r
+ if hsha_orig == hsha or bsha_orig == bsha:\r
+ os.link(filename, dest)\r
+ return True\r
+\r
+\r
+ log_error("Something seriously wrong here: we tried to link {}".format(\r
+ filename))\r
+ log_error("to {} but the hashes were wrong. We wanted {}/{}".format(\r
+ dest, hsha, bsha))\r
+ log_error("but we didn't see that in {}".format(filenames))\r
+ log_tags_and_fail('Mail corpus wrong')\r
+ # FIXME: How do we decide whether to use symlinks or not?\r
+ # Seems like syncmaildir can't cope with symlinks, so let's just\r
+ # always use hard links\r
+ return False\r
+\r
+def execute_delete(name, hsha, bsha):\r
+ name = homefy(name)\r
+ ex, hsha_1, bsha_1 = exists_and_sha(name)\r
+ assert ex\r
+ assert hsha_1 == hsha\r
+ assert bsha_1 == bsha\r
+\r
+ os.unlink(name)\r
+ return True\r
+\r
+def execute_copy(name_src, hsha, bsha, name_tgt):\r
+ name_src = homefy(name_src)\r
+ name_tgt = homefy(name_tgt)\r
+ ex_src, hsha_src, bsha_src = exists_and_sha(name_src)\r
+ ex_tgt, hsha_tgt, bsha_tgt = exists_and_sha(name_tgt)\r
+\r
+ # Not reproducing all logic\r
+ assert ex_src\r
+ assert not ex_tgt\r
+\r
+ assert hsha == hsha_src\r
+ assert bsha == bsha_src\r
+ if stat.S_ISLNK(os.stat(name_src).st_mode):\r
+ link_tgt = os.readlink(name_src)\r
+ os.symlink(link_tgt, name_tgt)\r
+ else:\r
+ os.link(name_src, name_tgt)\r
+ return True\r
+\r
+def execute_error(msg):\r
+ log_error('mddiff failed: '+msg)\r
+ if msg.startswith("Unable to open directory"):\r
+ log_tags("mddiff", "directory-disappeared", false)\r
+ else:\r
+ log_tags("mddiff", "unknown", true)\r
+\r
+ # return (trace(false))\r
+ return False\r
+\r
+\r
+def execute(cmd):\r
+ """The main switch, dispatching actions."""\r
+ opcode = cmd.split()[0]\r
+\r
+ if opcode == "ADD":\r
+ name, hsha, bsha = re.compile(r'^ADD (\S+) (\S+) (\S+)$').match(cmd).groups()\r
+ name = url_decode(name)\r
+ mkdir_p(name)\r
+ return execute_add(name, hsha, bsha)\r
+\r
+ elif opcode == "DELETE":\r
+ name, hsha, bsha = re.compile(r'^DELETE (\S+) (\S+) (\S+)$').match(cmd).groups()\r
+ name = url_decode(name)\r
+ mkdir_p(name)\r
+ return execute_delete(name, hsha, bsha)\r
+\r
+ elif opcode == "COPY":\r
+ name_src, hsha, bsha, name_tgt = re.compile(\r
+ r'COPY (\S+) (\S+) (\S+) TO (\S+)$').match(cmd).groups()\r
+ name_src = url_decode(name_src)\r
+ name_tgt = url_decode(name_tgt)\r
+ mkdir_p(name_src)\r
+ mkdir_p(name_tgt)\r
+ return execute_copy(name_src, hsha, bsha, name_tgt)\r
+\r
+ elif opcode in ['REPLACEHEADER', 'COPYBODY', 'REPLACE']:\r
+ log_internal_error_and_fail(opcode + ' was called: ' + cmd)\r
+ return False\r
+\r
+ elif opcode == "ERROR":\r
+ msg = cmd[cmd.index(' ')+1:]\r
+ return execute_error(msg)\r
+\r
+ else:\r
+ error("Unknown opcode " + opcode)\r
+ return False\r
+\r
+def main():\r
+ parser = argparse.ArgumentParser(description="")\r
+ parser.add_argument('-v', '--verbose', action='store_true', default=False)\r
+ parser.add_argument('-d', '--dry-run', action='store_true', default=False)\r
+ parser.add_argument('-t', '--translator', type=str, default='cat')\r
+ parser.add_argument('endpoint')\r
+ parser.add_argument('mailboxes')\r
+\r
+ opts = parser.parse_args()\r
+\r
+ set_translator(opts.translator)\r
+ read_message_ids()\r
+\r
+ dbfile = dbfile_name(opts.endpoint, opts.mailboxes)\r
+ xdelta = dbfile + '.xdelta'\r
+ newdb = dbfile + '.new'\r
+\r
+ if opts.mailboxes[0] == '/':\r
+ log_error("Absolute paths are not supported: " + opts.mailboxes)\r
+ log_tags_and_fail("Absolute path detected", "main", "mailbox-has--absolute-path", True)\r
+\r
+ handshake(dbfile)\r
+ commands = receive_delta(stdin)\r
+ for cmd in commands:\r
+ try:\r
+ rc = execute(cmd)\r
+ # Just wrap the whole thing in try-except to abort "cleanly"\r
+ except Exception as e:\r
+ log_error("Got an exception when processing {}: {}".format(cmd.strip(), str(e)))\r
+ log_error(traceback.format_exc())\r
+ rc = False\r
+ if not rc:\r
+ stdout.write('ABORT\n')\r
+ stdout.flush()\r
+ sys.exit(3)\r
+\r
+ # if len(get_full_email_queue) > queue_max_len:\r
+ # process_pending_queue()\r
+\r
+ # some commands may still be in the queue, we fire them now\r
+ # process_pending_queue()\r
+\r
+ # we commit and update the dbfile\r
+ stdout.write('COMMIT\n')\r
+ stdout.flush()\r
+ receive(stdin, xdelta)\r
+\r
+ rc = subprocess.call([XDELTA, 'patch', xdelta, dbfile, newdb])\r
+ if rc != 0 and rc != 256:\r
+ log_error('Unable to apply delta to dbfile.')\r
+ stdout.write('ABORT\n')\r
+ stdout.flush()\r
+ sys.exit(4)\r
+\r
+ try:\r
+ os.rename(newdb, dbfile)\r
+ except OSError:\r
+ log_error('Unable to rename ' + newdb + ' to ' + dbfile)\r
+ stdout.write('ABORT\n')\r
+ stdout.flush()\r
+ sys.exit(5)\r
+\r
+ os.unlink(xdelta)\r
+ stdout.write('DONE\n')\r
+ stdout.flush()\r
+\r
+ #log_tag('stats::new-mails(' + statistics.added +\r
+ #'), del-mails(' + statistics.removed + ')')\r
+\r
+CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'\r
+\r
+encode_re = '([^{0}])'.format(CHARSET)\r
+\r
+def encode_one_char(match):\r
+ return('%{:02x}'.format(ord(match.group(1))))\r
+\r
+def encode_for_fs(str):\r
+ return re.sub(encode_re,encode_one_char, str,0)\r
+\r
+def mangle_message_id(msg_id):\r
+ """\r
+ Return a mangled version of the message id, suitable for use as a filename.\r
+ """\r
+ MAX_LENGTH = 143\r
+ FLAGS_LENGTH = 8 # :2,S...??\r
+ encoded = encode_for_fs(msg_id)\r
+ if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:\r
+ return encoded\r
+\r
+ SHA_LENGTH = 8\r
+ TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH\r
+ PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2\r
+ prefix = encoded[:PREFIX_LENGTH]\r
+ suffix = encoded[-SUFFIX_LENGTH:]\r
+ sha = hashlib.sha256()\r
+ sha.update(encoded)\r
+ return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]\r
+\r
+MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES = {}\r
+DB = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)\r
+def read_message_ids():\r
+ # We can't base this on tags at all because tags aren't applied yet\r
+ querystr = '*'\r
+\r
+ q_new = notmuch.Query(DB, querystr)\r
+ q_new.set_sort(notmuch.Query.SORT.UNSORTED)\r
+ for msg in q_new.search_messages():\r
+ mangled_id = mangle_message_id(msg.get_message_id())\r
+ fiter = msg.get_filenames()\r
+ # list(fiter) gives me a NotInitializedException????\r
+ filenames = []\r
+ while True:\r
+ try:\r
+ filename = next(fiter)\r
+ filenames.append(filename)\r
+ except StopIteration:\r
+ break\r
+\r
+ MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_id] = filenames\r
+\r
+def original_message_filenames(mangled_filename):\r
+ if mangled_filename not in MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES:\r
+ log_error("{} not in notmuch. Trying to tag nonexistant message?".format(\r
+ mangled_filename))\r
+ return MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_filename]\r
+\r
+if __name__ == '__main__':\r
+ try:\r
+ main()\r
+ except Exception as e:\r
+ log_error(str(e))\r
+ log_error(traceback.format_exc())\r
+ sys.exit(6)\r
+\r
+--=-=-=--\r