1 Return-Path: <ethan.glasser.camp@gmail.com>
\r
2 X-Original-To: notmuch@notmuchmail.org
\r
3 Delivered-To: notmuch@notmuchmail.org
\r
4 Received: from localhost (localhost [127.0.0.1])
\r
5 by olra.theworths.org (Postfix) with ESMTP id 22889431FAF
\r
6 for <notmuch@notmuchmail.org>; Sun, 6 Oct 2013 21:49:52 -0700 (PDT)
\r
7 X-Virus-Scanned: Debian amavisd-new at olra.theworths.org
\r
11 X-Spam-Status: No, score=-0.799 tagged_above=-999 required=5
\r
12 tests=[DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1,
\r
13 FREEMAIL_FROM=0.001, RCVD_IN_DNSWL_LOW=-0.7] autolearn=disabled
\r
14 Received: from olra.theworths.org ([127.0.0.1])
\r
15 by localhost (olra.theworths.org [127.0.0.1]) (amavisd-new, port 10024)
\r
16 with ESMTP id l0dk1rHivO6h for <notmuch@notmuchmail.org>;
\r
17 Sun, 6 Oct 2013 21:49:44 -0700 (PDT)
\r
18 Received: from mail-qe0-f53.google.com (mail-qe0-f53.google.com
\r
19 [209.85.128.53]) (using TLSv1 with cipher RC4-SHA (128/128 bits))
\r
20 (No client certificate requested)
\r
21 by olra.theworths.org (Postfix) with ESMTPS id 83F48431FAE
\r
22 for <notmuch@notmuchmail.org>; Sun, 6 Oct 2013 21:49:44 -0700 (PDT)
\r
23 Received: by mail-qe0-f53.google.com with SMTP id cy11so956017qeb.26
\r
24 for <notmuch@notmuchmail.org>; Sun, 06 Oct 2013 21:49:42 -0700 (PDT)
\r
25 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113;
\r
26 h=from:to:subject:in-reply-to:references:user-agent:date:message-id
\r
27 :mime-version:content-type;
\r
28 bh=djCxR7kr3DGkKxSNNOoNrBe3lo2uI3tzo52A5puB6eI=;
\r
29 b=W9YJGZc3BuQI5wlBCt6FgoIy59LdhOiQsMaW1yejCmq8RkflhNV3kWbL6q0NPI7P6/
\r
30 8LBCDceH4hfL6x/wTr7q2bYrWlBgRkkyrBBVhax7MpiY0aGL1+pzuM5tpN01d0P65R9J
\r
31 /6DXChty02CS4Z/fenIyWN1nWlkkZhuBYJpPjgNzpMpwNC3vZu11w56WD5u/xx7gKT5J
\r
32 L3imy4r2lwNHTjWSxDD0yPDYuxQkxYWfgWCDQtBIjZtsKD7PGA3Sq9HT2a+BuOZ1tQIt
\r
33 oHD28zD4wPfjyYD+f9Q3v4FH+Rhx0RQEGRKdijahPqJ4tsv1VBDVikKUw6j2wAWPIuug
\r
35 X-Received: by 10.224.11.133 with SMTP id t5mr35139503qat.34.1381121382839;
\r
36 Sun, 06 Oct 2013 21:49:42 -0700 (PDT)
\r
37 Received: from smtp.gmail.com ([66.114.71.21])
\r
38 by mx.google.com with ESMTPSA id g2sm58448024qaf.12.1969.12.31.16.00.00
\r
39 (version=TLSv1.2 cipher=RC4-SHA bits=128/128);
\r
40 Sun, 06 Oct 2013 21:49:41 -0700 (PDT)
\r
41 From: Ethan Glasser-Camp <ethan.glasser.camp@gmail.com>
\r
42 To: David Bremner <david@tethera.net>,
\r
43 notmuch mailing list <notmuch@notmuchmail.org>
\r
44 Subject: Re: On disk tag storage format
\r
45 In-Reply-To: <87fvsgh5g5.fsf@betacantrips.com>
\r
46 References: <874nk8v9zw.fsf@zancas.localnet> <87vc9mtpxh.fsf@zancas.localnet>
\r
47 <87fvsgh5g5.fsf@betacantrips.com>
\r
48 User-Agent: Notmuch/0.16+80~g81ee785 (http://notmuchmail.org) Emacs/24.2.1
\r
49 (x86_64-pc-linux-gnu)
\r
50 Date: Mon, 07 Oct 2013 00:49:39 -0400
\r
51 Message-ID: <87bo31heho.fsf@betacantrips.com>
\r
53 Content-Type: multipart/mixed; boundary="=-=-="
\r
54 X-BeenThere: notmuch@notmuchmail.org
\r
55 X-Mailman-Version: 2.1.13
\r
57 List-Id: "Use and development of the notmuch mail system."
\r
58 <notmuch.notmuchmail.org>
\r
59 List-Unsubscribe: <http://notmuchmail.org/mailman/options/notmuch>,
\r
60 <mailto:notmuch-request@notmuchmail.org?subject=unsubscribe>
\r
61 List-Archive: <http://notmuchmail.org/pipermail/notmuch>
\r
62 List-Post: <mailto:notmuch@notmuchmail.org>
\r
63 List-Help: <mailto:notmuch-request@notmuchmail.org?subject=help>
\r
64 List-Subscribe: <http://notmuchmail.org/mailman/listinfo/notmuch>,
\r
65 <mailto:notmuch-request@notmuchmail.org?subject=subscribe>
\r
66 X-List-Received-Date: Mon, 07 Oct 2013 04:49:52 -0000
\r
69 Content-Type: text/plain
\r
71 Ethan Glasser-Camp <ethan.glasser.camp@gmail.com> writes:
\r
73 > I've modified the script so that it would run by mangling filenames,
\r
74 > which is irreversible (the original tried to encode/decode filenames
\r
75 > reversibly). Then I got a little carried away, adding --verbose and
\r
76 > --dry-run options as well as removing a couple trailing
\r
77 > semicolons. Here's my version, in case it should interest anyone else.
\r
81 There was a bug in the previous version I sent. It didn't handle
\r
82 unlinking tags correctly. Also, I spotted a bug in syncing to untagged
\r
83 messages. Maybe I should stop using emails as version control.
\r
90 Content-Type: text/x-python
\r
91 Content-Disposition: inline; filename=linksync.py
\r
92 Content-Description: slightly more tested this time
\r
94 # Copyright 2013, David Bremner <david@tethera.net>
\r
96 # Licensed under the same terms as notmuch.
\r
102 from collections import defaultdict
\r
106 # skip automatic and maildir tags
\r
108 skiptags = re.compile(r"^(attachement|signed|encrypted|draft|flagged|passed|replied|unread)$")
\r
110 # some random person on stack overflow suggests:
\r
115 except OSError as exc: # Python >2.5
\r
116 if exc.errno == errno.EEXIST and os.path.isdir(path):
\r
126 CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'
\r
128 encode_re = '([^{0}])'.format(CHARSET)
\r
130 decode_re = '[%]([0-7][0-9A-Fa-f])'
\r
132 def encode_one_char(match):
\r
133 return('%{:02x}'.format(ord(match.group(1))))
\r
135 def encode_for_fs(str):
\r
136 return re.sub(encode_re,encode_one_char, str,0)
\r
138 def mangle_message_id(msg_id):
\r
140 Return a mangled version of the message id, suitable for use as a filename.
\r
143 FLAGS_LENGTH = 8 # :2,S...??
\r
144 encoded = encode_for_fs(msg_id)
\r
145 if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:
\r
149 TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH
\r
150 PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2
\r
151 prefix = encoded[:PREFIX_LENGTH]
\r
152 suffix = encoded[-SUFFIX_LENGTH:]
\r
153 sha = hashlib.sha256()
\r
154 sha.update(encoded)
\r
155 return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]
\r
157 def decode_one_char(match):
\r
158 return chr(int(match.group(1),16))
\r
160 def decode_from_fs(str):
\r
161 return re.sub(decode_re,decode_one_char, str, 0)
\r
163 def mk_tag_dir(tagdir):
\r
165 mkdir_p (os.path.join(tagdir, 'cur'))
\r
166 mkdir_p (os.path.join(tagdir, 'new'))
\r
167 mkdir_p (os.path.join(tagdir, 'tmp'))
\r
170 flagpart = '(:2,[^:]*)'
\r
171 flagre = re.compile(flagpart + '$');
\r
173 def path_for_msg (dir, msg):
\r
174 filename = msg.get_filename()
\r
175 flagsmatch = flagre.search(filename)
\r
176 if flagsmatch == None:
\r
179 flags = flagsmatch.group(1)
\r
181 return os.path.join(dir, 'cur', mangle_message_id(msg.get_message_id()) + flags)
\r
184 def unlink_message(dir, msg):
\r
186 dir = os.path.join(dir, 'cur')
\r
188 mangled_id = mangle_message_id(msg.get_message_id())
\r
189 filepattern = '^' + re.escape(mangled_id) + flagpart +'?$'
\r
191 filere = re.compile(filepattern)
\r
193 for file in os.listdir(dir):
\r
194 if filere.match(file):
\r
195 log("Unlinking {}".format(os.path.join(dir, file)))
\r
196 if not opts.dry_run:
\r
197 os.unlink(os.path.join(dir, file))
\r
199 def dir_for_tag(tag):
\r
200 enc_tag = encode_for_fs (tag)
\r
201 return os.path.join(tagroot, enc_tag)
\r
203 disk_tags = defaultdict(set)
\r
206 def read_tags_from_disk(rootdir):
\r
208 for root, subFolders, files in os.walk(rootdir):
\r
209 for filename in files:
\r
210 mangled_id = filename.split(':')[0]
\r
211 tag = root.split('/')[-2]
\r
212 disk_ids.add(mangled_id)
\r
213 disk_tags[mangled_id].add(decode_from_fs(tag))
\r
217 parser = argparse.ArgumentParser(description='Sync notmuch tag database to/from link farm')
\r
218 parser.add_argument('-l','--link-style',choices=['hard','symbolic', 'adaptive'],
\r
219 default='adaptive')
\r
220 parser.add_argument('-d','--destination',choices=['disk','notmuch'], default='disk')
\r
221 parser.add_argument('-t','--threshold', default=50000L, type=int)
\r
222 parser.add_argument('-n','--dry-run', default=False, action='store_true')
\r
223 parser.add_argument('-v','--verbose', default=False, action='store_true')
\r
225 parser.add_argument('tagroot')
\r
227 opts=parser.parse_args()
\r
228 VERBOSE = opts.verbose
\r
230 tagroot=opts.tagroot
\r
232 sync_from_links = (opts.destination == 'notmuch')
\r
234 read_tags_from_disk(tagroot)
\r
236 if sync_from_links:
\r
237 db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)
\r
239 db = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)
\r
241 dbtags = filter (lambda tag: not skiptags.match(tag), db.get_all_tags())
\r
243 if sync_from_links:
\r
244 # have to iterate over even untagged messages
\r
247 querystr = ' OR '.join(map (lambda tag: 'tag:'+tag, dbtags))
\r
249 q_new = notmuch.Query(db, querystr)
\r
250 q_new.set_sort(notmuch.Query.SORT.UNSORTED)
\r
251 for msg in q_new.search_messages():
\r
253 # silently ignore empty tags
\r
254 db_tags = set(filter (lambda tag: tag != '' and not skiptags.match(tag),
\r
257 message_id = msg.get_message_id()
\r
259 mangled_id = mangle_message_id(message_id)
\r
261 disk_ids.discard(mangled_id)
\r
263 missing_on_disk = db_tags.difference(disk_tags[mangled_id])
\r
264 missing_in_db = disk_tags[mangled_id].difference(db_tags)
\r
266 if sync_from_links:
\r
269 filename = msg.get_filename()
\r
271 if len(missing_on_disk) > 0:
\r
272 if opts.link_style == 'adaptive':
\r
273 statinfo = os.stat (filename)
\r
274 symlink = (statinfo.st_size > opts.threshold)
\r
276 symlink = opts.link_style == 'symbolic'
\r
278 for tag in missing_on_disk:
\r
280 if sync_from_links:
\r
281 log("Removing tag {} from {}".format(tag, message_id))
\r
282 if not opts.dry_run:
\r
283 msg.remove_tag(tag,sync_maildir_flags=False)
\r
285 tagdir = dir_for_tag (tag)
\r
287 if not opts.dry_run:
\r
288 mk_tag_dir (tagdir)
\r
290 newlink = path_for_msg (tagdir, msg)
\r
292 log("Linking {} to {}".format(filename, newlink))
\r
293 if not opts.dry_run:
\r
295 os.symlink(filename, newlink)
\r
297 os.link(filename, newlink)
\r
300 for tag in missing_in_db:
\r
301 if sync_from_links:
\r
302 log("Adding {} to message {}".format(tag, message_id))
\r
303 if not opts.dry_run:
\r
304 msg.add_tag(tag,sync_maildir_flags=False)
\r
306 tagdir = dir_for_tag (tag)
\r
307 unlink_message(tagdir,msg)
\r
309 if sync_from_links:
\r
312 # everything remaining in disk_ids is a deleted message
\r
313 # unless we are syncing back to the database, in which case
\r
314 # it just might not currently have any non maildir tags.
\r
316 if not sync_from_links:
\r
317 for root, subFolders, files in os.walk(tagroot):
\r
318 for filename in files:
\r
319 mangled_id = filename.split(':')[0]
\r
320 if mangled_id in disk_ids:
\r
321 os.unlink(os.path.join(root, filename))
\r
326 # currently empty directories are not pruned.
\r
329 Content-Type: text/plain
\r
334 Of course, the next step is to sync using this mechanism. Rsync doesn't
\r
335 really have a concept of history, which basically makes it unusable for
\r
336 this purpose [1]. Unison doesn't really understand renames, so it gets
\r
337 confused when you mark a message as read (which might move it from new
\r
338 to cur, and definitely changes its tags). Bremner suggested
\r
339 syncmaildir. Syncmaildir doesn't understand links at all. Bremner
\r
340 suggested that we could use some parts of syncmaildir to implement the
\r
341 tag syncing we need.
\r
343 I didn't have anything else going on this weekend so I tried to
\r
344 prototype the approach. It turns out to be possible to leverage some
\r
345 parts of syncmaildir. I translated a bunch of smd-client into a new
\r
346 program, tagsync-client, that links to messages in an existing notmuch
\r
347 DB. It seems like it's possible to use it in place of the existing
\r
348 smd-client by putting lines like this in your config:
\r
350 SMDCLIENT=~/src/tagsync.git/tagsync-client.py
\r
351 REMOTESMDCLIENT=~/src/tagsync.git/tagsync-client.py
\r
353 The sequence of commands I ran:
\r
355 - linksync.py to dump tags to ~/Mail/.notmuch/exported-tags
\r
356 - smd-pull mail to sync ~/Mail but excluding .notmuch
\r
358 - smd-pull tagsync (using the above client) to sync ~/Mail/.notmuch/exported-tags
\r
359 - linksync.py to pull tags from ~/Mail/.notmuch/exported-tags
\r
361 syncmaildir doesn't cope well with drafts, so it might choke on that,
\r
362 and it doesn't like symlinks (it thinks they're always to directories),
\r
363 so be sure to run linksync with -l hard.
\r
365 Here's the script. It's a work in progress; I have only tested it once in one direction.
\r
371 Content-Type: text/x-python
\r
372 Content-Disposition: inline; filename=tagsync-client.py
\r
373 Content-Description: client script
\r
375 #! /usr/bin/env python
\r
377 from sys import stdout, stdin, stderr
\r
388 PROTOCOL_VERSION = "1.1"
\r
390 # Not reproducing the autoconf logic
\r
398 stderr.write("INFO: "+msg+"\n")
\r
401 raise ValueError(msg)
\r
403 def log_tags_and_fail(msg, *args):
\r
407 def log_internal_error_and_fail(msg, *args):
\r
408 log_internal_error_tags(msg, *args)
\r
411 def log_error(msg):
\r
412 return stderr.write("ERROR: {}\n".format(msg))
\r
415 return stderr.write("TAGS: {}\n".format(tag))
\r
417 def log_progress(msg):
\r
420 def log_tags(context='unknown', cause='unknown', human=False, *args):
\r
422 human = "necessary"
\r
424 human = "avoidable"
\r
427 suggestions_string = ""
\r
429 suggestions_string = ' suggested-actions({})'.format(' '.join(args))
\r
431 return log_tag("error::context({}) probable-cause({}) human-intervention({})".format(
\r
432 context, cause, human) + suggestions_string)
\r
434 def mkdir_p(filename):
\r
435 """Maildir-aware mkdir.
\r
437 Creates a directory and all parent directories.
\r
439 Moreover, if the last component is 'tmp', 'cur' or 'new', the
\r
440 others are created too."""
\r
442 # The Lua function throws away the last path component if it
\r
443 # doesn't end in /. This allows you to just call mkdir_p on any
\r
444 # file and a directory for it to live will be created.
\r
445 if not filename.endswith('/'):
\r
446 filename, _ = os.path.split(filename)
\r
448 if not filename.startswith('/'):
\r
449 # This path is relative to HOME, and needs to be translated
\r
451 filename = translate(filename)
\r
452 filename = os.path.expanduser('~/'+filename)
\r
454 dirname, basename = os.path.split(filename)
\r
456 os.makedirs(filename)
\r
458 pass # probably "File exists"
\r
459 MAILDIR_SUBDIRS = ['tmp', 'cur', 'new']
\r
460 if basename in MAILDIR_SUBDIRS:
\r
461 for subdir in MAILDIR_SUBDIRS:
\r
462 to_create = os.path.join(dirname, subdir)
\r
463 if not os.path.exists(to_create):
\r
464 os.mkdir(to_create)
\r
467 class FakeSubprocess(object):
\r
468 def __init__(self, init_function):
\r
469 self.init_function = init_function
\r
472 self.pipe_name = None
\r
473 self.removed = None
\r
474 self.did_write = None
\r
477 def readline(self):
\r
479 log_internal_error_and_fail("read called before write",
\r
480 "make_slave_filter_process")
\r
483 if not self.removed and self.did_write:
\r
484 self.removed = True
\r
485 rc = self.input.readline()
\r
486 os.unlink(self.pipe_name)
\r
489 return self.input.readline()
\r
491 def write(self, *args):
\r
492 if not self.output:
\r
493 self.init_function(self.filter)
\r
494 self.input = self.filter['inf']
\r
495 self.output = self.filter['outf']
\r
496 self.pipe_name = self.filter['pipe']
\r
498 self.did_write = True
\r
499 self.output.write(*args)
\r
502 self.output.flush()
\r
505 return self.input.readlines()
\r
507 def make_slave_filter_process(cmd, seed="no seed"):
\r
509 if 'inf' not in filter:
\r
510 home = os.getenv('HOME')
\r
511 user = os.getenv('USER') or 'nobody'
\r
512 mangled_name = re.compile('[ %./]').sub('-', seed)
\r
515 base_dir = home + '/.smd/fifo/'
\r
519 rc = subprocess.call([MDDIFF, '--mkdir-p', base_dir])
\r
521 log_internal_error_and_fail('unable to create directory',
\r
522 'make_slave_filter_process')
\r
525 pipe_name = ''.join([base_dir, 'smd-', user, str(int(time.time())),
\r
526 mangled_name, str(attempt)])
\r
528 rc = subprocess.call([MDDIFF, '--mkfifo', pipe_name])
\r
529 if rc == 0 or attempt > 10:
\r
532 log_internal_error_and_fail('unable to create fifo',
\r
533 "make_slave_filter_process")
\r
535 inferior = cmd(pipe_name)
\r
536 filter['inf'] = inferior.stdout
\r
537 filter['outf'] = file(pipe_name, 'w')
\r
538 filter['pipe'] = pipe_name
\r
540 return FakeSubprocess(init)
\r
543 def set_translator(p):
\r
545 translator_filter = make_slave_filter_process(
\r
546 lambda pipe: subprocess.Popen(p, stdin=file(pipe), stdout=subprocess.PIPE),
\r
549 _translator = lambda x: x
\r
551 def translator_fn(x):
\r
552 translator_filter.write(x + '\n')
\r
553 translator_filter.flush()
\r
554 line = translator_filter.readline()
\r
555 if not line or line.strip() == 'ERROR':
\r
556 log_error("Translator {} on input {} gave an error".format(
\r
558 for l in translator_filter.readlines():
\r
560 log_tags_and_fail("Unable to translate mailbox",
\r
561 'translate', 'bad-translator', True)
\r
563 log_error("Translator {} on input {} returned a path containing ..".format(
\r
565 log_tags_and_fail('Translator returned a path containing ..',
\r
566 'translate', 'bad-translator', True)
\r
570 _translator = translator_fn
\r
574 return _translator(x)
\r
578 mddiff_sha_handler = make_slave_filter_process(
\r
579 lambda pipe: subprocess.Popen([MDDIFF, pipe], stdout=subprocess.PIPE),
\r
582 def sha_file(name):
\r
583 mddiff_sha_handler.write(name+'\n')
\r
584 mddiff_sha_handler.flush()
\r
586 data = mddiff_sha_handler.readline()
\r
587 if data.startswith('ERROR'):
\r
588 log_tags_and_fail("Failed to sha1 message: " + (name or "nil"),
\r
589 'sha_file', 'modify-while-update', False, 'retry')
\r
591 hsha, bsha = data.split()
\r
592 if not hsha or not bsha:
\r
593 log_internal_error_and_fail('mddiff incorrect behavior', 'mddiff')
\r
597 def exists_and_sha(name):
\r
598 if os.path.exists(name):
\r
599 h, b = sha_file(name)
\r
602 return False, False, False
\r
612 log_error('Unable to touch ' + quote(f))
\r
613 log_tags("touch", "bad-permissions", True,
\r
614 "display-permissions(" + quote(f) + ")")
\r
615 error("Unable to touch a file")
\r
621 def assert_exists(name):
\r
622 assert os.exists(name), "Not found: "+repr(name)
\r
624 def url_quote(txt):
\r
625 return urllib.quote(txt, safe='')
\r
628 return urllib.unquote(s)
\r
630 def log_internal_error_tags(msg, ctx):
\r
631 log_tags('internal-error', ctx, True)
\r
632 # Blob of "run gnome-open" junk not copied
\r
634 def receive(inf, outfile):
\r
636 outf = file(outfile, 'w')
\r
638 log_error("Unable to open " + outfile + " for writing.")
\r
639 log_error('It may be caused by bad directory permissions, '+
\r
641 log_tags("receive", "non-writeable-file", True,
\r
642 "display-permissions(" + quote(outfile) +")")
\r
643 error("Unable to write incoming data")
\r
645 line = inf.readline()
\r
646 if not line or line.strip() == "ABORT":
\r
647 log_error("Data transmission failed.")
\r
648 log_error("This problem is transient, please retry.")
\r
649 log_tags_and_fail('server sent ABORT or connection died',
\r
650 "receive", "network", False, "retry")
\r
652 # In the Lua version, this is called "len", but that's a builtin
\r
654 chunk_len = int(re.compile(r'^chunk (\d+)').match(line).group(1))
\r
658 if chunk_len < next_chunk:
\r
659 next_chunk = chunk_len
\r
660 data = inf.read(next_chunk)
\r
661 chunk_len -= len(data)
\r
664 # Probably not strictly speaking necessary in Python
\r
669 def handshake(dbfile):
\r
670 stdout.write("protocol {}\n".format(PROTOCOL_VERSION))
\r
672 sha_output = subprocess.check_output([MDDIFF, '--sha1sum', dbfile])
\r
673 db_sha = sha_output.split()[0]
\r
674 err_msg = sha_output[sha_output.index(' ')+1:]
\r
676 if db_sha == 'ERROR':
\r
677 log_internal_error_and_fail('unreadable db file: '+quote(dbfile), 'handshake')
\r
679 stdout.write("dbfile {}\n".format(db_sha))
\r
682 line = stdin.readline()
\r
684 log_error("Network error.")
\r
685 log_error("Unable to get any data from the other endpoint.")
\r
686 log_error("This problem may be transient, please retry.")
\r
687 log_error("Hint: did you correctly setup the SERVERNAME variable")
\r
688 log_error("on your client? Did you add an entry for it in your ssh")
\r
689 log_error("configuration file?")
\r
690 log_tags_and_fail('Network error', "handshake", "network", False, "retry")
\r
692 protocol = re.compile('^protocol (.+)$').match(line)
\r
693 if not protocol or protocol.group(1) != PROTOCOL_VERSION:
\r
694 log_error('Wrong protocol version.')
\r
695 log_error('The same version of syncmaildir must be used on '+
\r
697 log_tags_and_fail('Protocol version mismatch', "handshake", "protocol-mismatch", True)
\r
699 line = stdin.readline()
\r
701 log_error("The client disconnected during handshake")
\r
702 log_tags_and_fail('Network error', "handshake", "network", False, "retry")
\r
704 sha = re.compile(r'^dbfile (\S+)$').match(line)
\r
705 if not sha or sha.group(1) != db_sha:
\r
706 log_error('Local dbfile and remote db file differ.')
\r
707 log_error('Remove both files and push/pull again.')
\r
708 log_tags_and_fail('Database mismatch', "handshake", "db-mismatch", True, "run(rm "+
\r
711 def dbfile_name(endpoint, mailboxes):
\r
712 endpoint = endpoint.rstrip('/')
\r
713 mailboxes = mailboxes.rstrip('/')
\r
714 subprocess.check_call([MDDIFF, '--mkdir-p', os.path.expanduser('~/.smd/')])
\r
715 return os.path.expanduser('~/.smd/{}__{}.db.txt'.format(
\r
716 endpoint.replace('/', '_'),
\r
717 mailboxes.replace('/', '_').replace('%', '_')
\r
720 def receive_delta(inf):
\r
723 line = inf.readline()
\r
724 if line and line.strip() != "END":
\r
727 if not line or line.strip() == "END":
\r
730 if line.strip() != "END":
\r
731 log_error('Unable to receive a complete diff')
\r
732 log_tags("receive-delta", "network", False, "retry")
\r
733 error("network error while receiving delta")
\r
737 def homefy(filename):
\r
738 return os.path.expanduser("~/"+filename)
\r
740 def execute_add(name, hsha, bsha):
\r
741 dir, basename = os.path.split(name)
\r
742 # The real smd creates symlinks from workarea to the target
\r
743 # directory, I dunno why.
\r
744 dest = homefy(name)
\r
745 ex, hsha_1, bsha_1 = exists_and_sha(dest)
\r
747 if hsha_1 != hsha or bsha_1 != bsha:
\r
748 log_error("Failed to add {} since a file with the same name".format(
\r
750 log_error('exists but its content is different.')
\r
751 log_error("Current hash {}/{}, requested hash {}/{}".format(
\r
752 hsha_1, bsha_1, hsha, bsha))
\r
753 log_error('To fix this problem you should rename '+dest)
\r
754 log_error('Executing `cd; mv -n '+quote(name)+' '+
\r
755 'FIXME: tmp_for' +'` should work.')
\r
756 log_tags("mail-addition", "concurrent-mailbox-edit", True,
\r
758 #mk_act("mv", name))
\r
761 return True # already there
\r
762 if ':2,' in basename:
\r
763 basename = basename[:basename.index(':2,')]
\r
764 filenames = original_message_filenames(basename)
\r
765 for filename in filenames:
\r
766 orig_exists, hsha_orig, bsha_orig = exists_and_sha(filename)
\r
768 if hsha_orig == hsha or bsha_orig == bsha:
\r
769 os.link(filename, dest)
\r
773 log_error("Something seriously wrong here: we tried to link {}".format(
\r
775 log_error("to {} but the hashes were wrong. We wanted {}/{}".format(
\r
777 log_error("but we didn't see that in {}".format(filenames))
\r
778 log_tags_and_fail('Mail corpus wrong')
\r
779 # FIXME: How do we decide whether to use symlinks or not?
\r
780 # Seems like syncmaildir can't cope with symlinks, so let's just
\r
781 # always use hard links
\r
784 def execute_delete(name, hsha, bsha):
\r
785 name = homefy(name)
\r
786 ex, hsha_1, bsha_1 = exists_and_sha(name)
\r
788 assert hsha_1 == hsha
\r
789 assert bsha_1 == bsha
\r
794 def execute_copy(name_src, hsha, bsha, name_tgt):
\r
795 name_src = homefy(name_src)
\r
796 name_tgt = homefy(name_tgt)
\r
797 ex_src, hsha_src, bsha_src = exists_and_sha(name_src)
\r
798 ex_tgt, hsha_tgt, bsha_tgt = exists_and_sha(name_tgt)
\r
800 # Not reproducing all logic
\r
804 assert hsha == hsha_src
\r
805 assert bsha == bsha_src
\r
806 if stat.S_ISLNK(os.stat(name_src).st_mode):
\r
807 link_tgt = os.readlink(name_src)
\r
808 os.symlink(link_tgt, name_tgt)
\r
810 os.link(name_src, name_tgt)
\r
813 def execute_error(msg):
\r
814 log_error('mddiff failed: '+msg)
\r
815 if msg.startswith("Unable to open directory"):
\r
816 log_tags("mddiff", "directory-disappeared", false)
\r
818 log_tags("mddiff", "unknown", true)
\r
820 # return (trace(false))
\r
825 """The main switch, dispatching actions."""
\r
826 opcode = cmd.split()[0]
\r
828 if opcode == "ADD":
\r
829 name, hsha, bsha = re.compile(r'^ADD (\S+) (\S+) (\S+)$').match(cmd).groups()
\r
830 name = url_decode(name)
\r
832 return execute_add(name, hsha, bsha)
\r
834 elif opcode == "DELETE":
\r
835 name, hsha, bsha = re.compile(r'^DELETE (\S+) (\S+) (\S+)$').match(cmd).groups()
\r
836 name = url_decode(name)
\r
838 return execute_delete(name, hsha, bsha)
\r
840 elif opcode == "COPY":
\r
841 name_src, hsha, bsha, name_tgt = re.compile(
\r
842 r'COPY (\S+) (\S+) (\S+) TO (\S+)$').match(cmd).groups()
\r
843 name_src = url_decode(name_src)
\r
844 name_tgt = url_decode(name_tgt)
\r
847 return execute_copy(name_src, hsha, bsha, name_tgt)
\r
849 elif opcode in ['REPLACEHEADER', 'COPYBODY', 'REPLACE']:
\r
850 log_internal_error_and_fail(opcode + ' was called: ' + cmd)
\r
853 elif opcode == "ERROR":
\r
854 msg = cmd[cmd.index(' ')+1:]
\r
855 return execute_error(msg)
\r
858 error("Unknown opcode " + opcode)
\r
862 parser = argparse.ArgumentParser(description="")
\r
863 parser.add_argument('-v', '--verbose', action='store_true', default=False)
\r
864 parser.add_argument('-d', '--dry-run', action='store_true', default=False)
\r
865 parser.add_argument('-t', '--translator', type=str, default='cat')
\r
866 parser.add_argument('endpoint')
\r
867 parser.add_argument('mailboxes')
\r
869 opts = parser.parse_args()
\r
871 set_translator(opts.translator)
\r
874 dbfile = dbfile_name(opts.endpoint, opts.mailboxes)
\r
875 xdelta = dbfile + '.xdelta'
\r
876 newdb = dbfile + '.new'
\r
878 if opts.mailboxes[0] == '/':
\r
879 log_error("Absolute paths are not supported: " + opts.mailboxes)
\r
880 log_tags_and_fail("Absolute path detected", "main", "mailbox-has--absolute-path", True)
\r
883 commands = receive_delta(stdin)
\r
884 for cmd in commands:
\r
887 # Just wrap the whole thing in try-except to abort "cleanly"
\r
888 except Exception as e:
\r
889 log_error("Got an exception when processing {}: {}".format(cmd.strip(), str(e)))
\r
890 log_error(traceback.format_exc())
\r
893 stdout.write('ABORT\n')
\r
897 # if len(get_full_email_queue) > queue_max_len:
\r
898 # process_pending_queue()
\r
900 # some commands may still be in the queue, we fire them now
\r
901 # process_pending_queue()
\r
903 # we commit and update the dbfile
\r
904 stdout.write('COMMIT\n')
\r
906 receive(stdin, xdelta)
\r
908 rc = subprocess.call([XDELTA, 'patch', xdelta, dbfile, newdb])
\r
909 if rc != 0 and rc != 256:
\r
910 log_error('Unable to apply delta to dbfile.')
\r
911 stdout.write('ABORT\n')
\r
916 os.rename(newdb, dbfile)
\r
918 log_error('Unable to rename ' + newdb + ' to ' + dbfile)
\r
919 stdout.write('ABORT\n')
\r
924 stdout.write('DONE\n')
\r
927 #log_tag('stats::new-mails(' + statistics.added +
\r
928 #'), del-mails(' + statistics.removed + ')')
\r
930 CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'
\r
932 encode_re = '([^{0}])'.format(CHARSET)
\r
934 def encode_one_char(match):
\r
935 return('%{:02x}'.format(ord(match.group(1))))
\r
937 def encode_for_fs(str):
\r
938 return re.sub(encode_re,encode_one_char, str,0)
\r
940 def mangle_message_id(msg_id):
\r
942 Return a mangled version of the message id, suitable for use as a filename.
\r
945 FLAGS_LENGTH = 8 # :2,S...??
\r
946 encoded = encode_for_fs(msg_id)
\r
947 if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:
\r
951 TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH
\r
952 PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2
\r
953 prefix = encoded[:PREFIX_LENGTH]
\r
954 suffix = encoded[-SUFFIX_LENGTH:]
\r
955 sha = hashlib.sha256()
\r
956 sha.update(encoded)
\r
957 return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]
\r
959 MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES = {}
\r
960 DB = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)
\r
961 def read_message_ids():
\r
962 # We can't base this on tags at all because tags aren't applied yet
\r
965 q_new = notmuch.Query(DB, querystr)
\r
966 q_new.set_sort(notmuch.Query.SORT.UNSORTED)
\r
967 for msg in q_new.search_messages():
\r
968 mangled_id = mangle_message_id(msg.get_message_id())
\r
969 fiter = msg.get_filenames()
\r
970 # list(fiter) gives me a NotInitializedException????
\r
974 filename = next(fiter)
\r
975 filenames.append(filename)
\r
976 except StopIteration:
\r
979 MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_id] = filenames
\r
981 def original_message_filenames(mangled_filename):
\r
982 if mangled_filename not in MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES:
\r
983 log_error("{} not in notmuch. Trying to tag nonexistant message?".format(
\r
985 return MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_filename]
\r
987 if __name__ == '__main__':
\r
990 except Exception as e:
\r
992 log_error(traceback.format_exc())
\r