From a620a8672601f713edfcb34a54e5fcc96608fcda Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 8 Apr 2014 23:48:31 +0000 Subject: [PATCH] mid2path ignores leading '<' and trailing '>' This simplifies our code a bit, and hopefully in public-inbox, too. There is little practical danger of a Message-ID not having '<>', and having '<>' in all URLs is annoying. This breaks compatibility. Fortunately, this project is not publically announced, yet. --- Documentation/ssoma_repository.txt | 8 ++++---- lib/Ssoma/Extractor.pm | 14 +------------- lib/Ssoma/Git.pm | 2 ++ t/mda-badheaders.t | 2 +- t/mda-conflict.t | 1 + 5 files changed, 9 insertions(+), 18 deletions(-) diff --git a/Documentation/ssoma_repository.txt b/Documentation/ssoma_repository.txt index 7458bbe..f7b24ad 100644 --- a/Documentation/ssoma_repository.txt +++ b/Documentation/ssoma_repository.txt @@ -15,13 +15,13 @@ identifier is used by ssoma clients to track synchronization state. A Message-ID may be extremely long and also contain slashes, so using them as a path name is challenging. Instead we use the SHA-1 hexdigest -of the Message-ID (including the "<" and ">") to generate a path name. -Leading and trailing white space in the Message-ID header is ignored -for hashing. +of the Message-ID (excluding the leading "<" and trailing ">") to +generate a path name. Leading and trailing white space in the +Message-ID header is ignored for hashing. A message with Message-ID of: <20131106023245.GA20224@dcvr.yhbt.net> -Would be stored as: 21/4527ce3741f50bb9afa65e7c5003c8a8ddc4b1 +Would be stored as: f2/8c6cfd2b0a65f994c3e1be266105413b3d3f63 Thus it is easy to look up the contents of a message matching a given a Message-ID. diff --git a/lib/Ssoma/Extractor.pm b/lib/Ssoma/Extractor.pm index 8f05377..afe45ce 100644 --- a/lib/Ssoma/Extractor.pm +++ b/lib/Ssoma/Extractor.pm @@ -139,19 +139,7 @@ sub _deliver_die { sub midextract { my ($self, $message_id, $mbox) = @_; $self->{git}->tmp_git_do(sub { - # leaving <> out of Message-IDs on the command-line is - # common and practical since it frees the user from - # quoting/escaping in most cases, so do not require - # Message-IDs have <> around them - if ($message_id =~ /\A<.+>\z/) { # rare - $self->_midextract($message_id, $mbox); - } else { # common - eval { # try with additional <> first - my $tmpid = "<$message_id>"; - $self->_midextract($tmpid, $mbox); - }; - $self->_midextract($message_id, $mbox) if $@; - } + $self->_midextract($message_id, $mbox); }); } diff --git a/lib/Ssoma/Git.pm b/lib/Ssoma/Git.pm index 87bf868..9a9d82f 100644 --- a/lib/Ssoma/Git.pm +++ b/lib/Ssoma/Git.pm @@ -200,6 +200,8 @@ sub stripws { sub mid2path { my ($self, $message_id) = @_; stripws($message_id); + $message_id =~ s/\A\z//; my $hex = sha1_hex($message_id); $hex =~ /\A([a-f0-9]{2})([a-f0-9]{38})\z/i or die "BUG: not a SHA-1 hex: $hex"; diff --git a/t/mda-badheaders.t b/t/mda-badheaders.t index 22571e2..c363035 100644 --- a/t/mda-badheaders.t +++ b/t/mda-badheaders.t @@ -36,7 +36,7 @@ $mda->deliver($email); local $ENV{GIT_DIR} = "$tmpdir/gittest"; -my $blob_id = sha1_hex("<666\@example.com>"); +my $blob_id = sha1_hex("666\@example.com"); my ($dir, $base) = ($blob_id =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!); ok(defined $dir && defined $base, "bad sha1: $blob_id"); diff --git a/t/mda-conflict.t b/t/mda-conflict.t index 09bd5c1..54b3541 100644 --- a/t/mda-conflict.t +++ b/t/mda-conflict.t @@ -50,6 +50,7 @@ foreach my $line (@tree) { my $mid = $simple->header("message-id"); my $path_sha1 = $path; $path_sha1 =~ tr!/!!d; + $mid =~ tr/<>//d; is($path_sha1, sha1_hex($mid), "path mapping works $mid"); } -- 2.26.2