1 # Copyright (C) 2008-2010 Gianluca Montecchi <gian@grys.it>
2 # W. Trevor King <wking@drexel.edu>
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 """Handle ID creation and parsing.
23 BE IDs are formatted::
25 <bug-directory>[/<bug>[/<comment>]]
27 where each ``<..>`` is a UUID. For example::
29 bea86499-824e-4e77-b085-2d581fa9ccab/3438b72c-6244-4f1d-8722-8c8d41484e35
31 refers to bug ``3438b72c-6244-4f1d-8722-8c8d41484e35`` which is
32 located in bug directory ``bea86499-824e-4e77-b085-2d581fa9ccab``.
33 This is a bit of a mouthful, so you can truncate each UUID so long as
34 it remains unique. For example::
38 If there were two bugs ``3438...`` and ``343a...`` in ``bea``, you'd
43 BE will only truncate each UUID down to three characters to slightly
44 future-proof the short user ids. However, if you want to save keystrokes
45 and you *know* there is only one bug directory, feel free to truncate
46 all the way to zero characters::
53 To refer to other bug-directories/bugs/comments from bug comments, simply
54 enclose the ID in pound signs (``#``). BE will automatically expand the
55 truncations to the full UUIDs before storing the comment, and the reference
56 will be appropriately truncated (and hyperlinked, if possible) when the
62 Although bug and comment IDs always appear in compound references,
63 UUIDs at each level are globally unique. For example, comment
64 ``bea/343/ba96f1c0-ba48-4df8-aaf0-4e3a3144fc46`` will *only* appear
65 under ``bea/343``. The prefix (``bea/343``) allows BE to reduce
66 caching global comment-lookup tables and enables easy error messages
67 ("I couldn't find ``bea/343/ba9`` because I don't know where the
68 ``bea`` bug directory is located").
76 if libbe.TESTING == True:
82 from uuid import uuid4 # Python >= 2.5
87 assert idstr.startswith(start)
88 return idstr[len(start):]
92 from subprocess import Popen, PIPE
95 # Shell-out to system uuidgen
96 args = ['uuidgen', 'r']
98 if sys.platform != "win32":
99 q = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE)
101 # win32 don't have os.execvp() so have to run command in a shell
102 q = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE,
105 strerror = "%s\nwhile executing %s" % (e.args[1], args)
106 raise OSError, strerror
107 output, error = q.communicate()
110 strerror = "%s\nwhile executing %s" % (status, args)
111 raise Exception, strerror
112 return output.rstrip('\n')
115 HIERARCHY = ['bugdir', 'bug', 'comment']
116 """Keep track of the object type hierarchy.
119 class MultipleIDMatches (ValueError):
120 """Multiple IDs match the given user ID.
125 The not-specific-enough truncated UUID.
127 The initial characters common to all matching UUIDs.
128 matches : list of str
129 The list of possibly matching UUIDs.
131 def __init__(self, id, common, matches):
132 msg = ('More than one id matches %s. '
133 'Please be more specific (%s*).\n%s' % (id, common, matches))
134 ValueError.__init__(self, msg)
137 self.matches = matches
139 class NoIDMatches (KeyError):
140 """No IDs match the given user ID.
145 The not-matching, possibly truncated UUID.
146 possible_ids : list of str
147 The list of potential UUIDs at that level.
149 A helpful message explaining what went wrong.
151 def __init__(self, id, possible_ids, msg=None):
152 KeyError.__init__(self, id)
154 self.possible_ids = possible_ids
158 return 'No id matches %s.\n%s' % (self.id, self.possible_ids)
161 class InvalidIDStructure (KeyError):
162 """A purported ID does not have the appropriate syntax.
169 A helpful message explaining what went wrong.
171 def __init__(self, id, msg=None):
172 KeyError.__init__(self, id)
177 return 'Invalid id structure "%s"' % self.id
180 def _assemble(args, check_length=False):
181 """Join a bunch of level UUIDs into a single ID.
188 for i,arg in enumerate(args):
192 if check_length == True:
193 assert len(args) > 0, args
194 if len(args) > len(HIERARCHY):
195 raise InvalidIDStructure(
196 id, '%d > %d levels in "%s"' % (len(args), len(HIERARCHY), id))
199 def _split(id, check_length=False):
200 """Split an ID into a list of level UUIDs.
207 for i,arg in enumerate(args):
210 if check_length == True:
211 assert len(args) > 0, args
212 if len(args) > len(HIERARCHY):
213 raise InvalidIDStructure(
214 id, '%d > %d levels in "%s"' % (len(args), len(HIERARCHY), id))
217 def _truncate(uuid, other_uuids, min_length=3):
218 """Truncate a UUID to the shortest length >= `min_length` such that it
219 is *not* a truncated form of a UUID in `other_uuids`.
224 The UUID to truncate.
225 other_uuids : list of str
226 The other UUIDs which the truncation *might* (but doesn't) refer
229 Avoid rapidly outdated truncations, even if they are unique now.
238 for id in other_uuids:
241 while (id[:chars] == uuid[:chars]):
245 def _expand(truncated_id, common, other_ids):
246 """Expand a truncated UUID.
253 The common portion `truncated_id` shares with the UUIDs in
254 `other_ids`. Not used by ``_expand``, but passed on to the
255 matching exceptions if they occur.
256 other_uuids : list of str
257 The other UUIDs which the truncation *might* (but doesn't) refer
269 other_ids = list(other_ids)
270 if len(other_ids) == 0:
271 raise NoIDMatches(truncated_id, other_ids)
272 if truncated_id == None:
273 if len(other_ids) == 1:
275 raise MultipleIDMatches(truncated_id, common, other_ids)
277 other_ids = list(other_ids)
279 if id.startswith(truncated_id):
280 if id == truncated_id:
284 raise MultipleIDMatches(truncated_id, common, matches)
285 if len(matches) == 0:
286 raise NoIDMatches(truncated_id, other_ids)
291 """Store an object ID and produce various representations.
295 object : :class:`~libbe.bugdir.BugDir` or :class:`~libbe.bug.Bug` or :class:`~libbe.comment.Comment`
296 The object that the ID applies to.
297 type : 'bugdir' or 'bug' or 'comment'
298 The type of the object.
303 IDs have several formats specialized for different uses.
305 In storage, all objects are represented by their uuid alone,
306 because that is the simplest globally unique identifier. You can
307 generate ids of this sort with the .storage() method. Because an
308 object's storage may be distributed across several chunks, and the
309 chunks may not have their own uuid, we generate chunk ids by
310 prepending the objects uuid to the chunk name. The user id types
311 do not support this chunk extension feature.
313 For users, the full uuids are a bit overwhelming, so we truncate
314 them while retaining local uniqueness (with regards to the other
315 objects currently in storage). We also prepend truncated parent
318 1. So that a user can locate the repository containing the
319 referenced object. It would be hard to find bug ``XYZ`` if
320 that's all you knew. Much easier with ``ABC/XYZ``, where
321 ``ABC`` is the bugdir. Each project can publish a list of
322 bugdir-id-to-location mappings, e.g.::
324 ABC...(full uuid)...DEF https://server.com/projectX/be/
326 which is easier than publishing all-object-ids-to-location
329 2. Because it's easier to generate and parse truncated ids if you
330 don't have to fetch all the ids in the storage repository but
331 can restrict yourself to a specific branch.
333 You can generate ids of this sort with the :meth:`user` method,
334 although in order to preform the truncation, your object (and its
335 parents must define a `sibling_uuids` method.
337 While users can use the convenient short user ids in the short
338 term, the truncation will inevitably lead to name collision. To
339 avoid that, we provide a non-truncated form of the short user ids
340 via the :meth:`long_user` method. These long user ids should be
341 converted to short user ids by intelligent user interfaces.
345 parse_user : get uuids back out of the user ids.
346 short_to_long_user : convert a single short user id to a long user id.
347 long_to_short_user : convert a single long user id to a short user id.
348 short_to_long_text : scan text for user ids & convert to long user ids.
349 long_to_short_text : scan text for long user ids & convert to short user ids.
351 def __init__(self, object, type):
352 self._object = object
354 assert self._type in HIERARCHY, self._type
356 def storage(self, *args):
357 return _assemble([self._object.uuid]+list(args))
359 def _ancestors(self):
361 index = HIERARCHY.index(self._type)
365 for i in range(index, 0, -1):
366 parent_name = HIERARCHY[i-1]
367 o = getattr(o, parent_name, None)
372 return _assemble([o.uuid for o in self._ancestors()],
377 for o in self._ancestors():
381 ids.append(_truncate(o.uuid, o.sibling_uuids()))
382 return _assemble(ids, check_length=True)
384 def child_uuids(child_storage_ids):
385 """Extract uuid children from other children generated by
388 This is useful for separating data belonging to a particular
389 object directly from entries for its child objects. Since the
390 :class:`~libbe.storage.base.Storage` backend doesn't distinguish
396 >>> list(child_uuids(['abc123/values', '123abc', '123def']))
399 for id in child_storage_ids:
404 def long_to_short_user(bugdirs, id):
405 """Convert a long user ID to a short user ID (see :class:`ID`).
406 The list of bugdirs allows uniqueness-maintaining truncation of
407 the bugdir portion of the ID.
411 short_to_long_user : inverse
412 long_to_short_text : conversion on a block of text
414 ids = _split(id, check_length=True)
415 matching_bugdirs = [bd for bd in bugdirs if bd.uuid == ids[0]]
416 if len(matching_bugdirs) == 0:
417 raise NoIDMatches(id, [bd.uuid for bd in bugdirs])
418 elif len(matching_bugdirs) > 1:
419 raise MultipleIDMatches(id, '', [bd.uuid for bd in bugdirs])
420 bugdir = matching_bugdirs[0]
423 bug = bugdir.bug_from_uuid(ids[1])
426 comment = bug.comment_from_uuid(ids[2])
427 objects.append(comment)
428 for i,obj in enumerate(objects):
429 ids[i] = _truncate(ids[i], obj.sibling_uuids())
430 return _assemble(ids)
432 def short_to_long_user(bugdirs, id):
433 """Convert a short user ID to a long user ID (see :class:`ID`). The
434 list of bugdirs allows uniqueness-checking during expansion of the
435 bugdir portion of the ID.
439 long_to_short_user : inverse
440 short_to_long_text : conversion on a block of text
442 ids = _split(id, check_length=True)
443 ids[0] = _expand(ids[0], common=None,
444 other_ids=[bd.uuid for bd in bugdirs])
446 return _assemble(ids)
447 bugdir = [bd for bd in bugdirs if bd.uuid == ids[0]][0]
448 ids[1] = _expand(ids[1], common=bugdir.id.user(),
449 other_ids=bugdir.uuids())
451 return _assemble(ids)
452 bug = bugdir.bug_from_uuid(ids[1])
453 ids[2] = _expand(ids[2], common=bug.id.user(),
454 other_ids=bug.uuids())
455 return _assemble(ids)
458 REGEXP = '#([-a-f0-9]*)(/[-a-g0-9]*)?(/[-a-g0-9]*)?#'
459 """Regular expression for matching IDs (both short and long) in text.
462 class IDreplacer (object):
463 """Helper class for ID replacement in text.
465 Reassembles the match elements from :data:`REGEXP` matching
466 into the original ID, for easier replacement.
470 short_to_long_text, long_to_short_text
472 def __init__(self, bugdirs, replace_fn, wrap=True):
473 self.bugdirs = bugdirs
474 self.replace_fn = replace_fn
476 def __call__(self, match):
478 for m in match.groups():
482 replacement = self.replace_fn(self.bugdirs, ''.join(ids))
483 if self.wrap == True:
484 return '#%s#' % replacement
487 def short_to_long_text(bugdirs, text):
488 """Convert short user IDs to long user IDs in text (see :class:`ID`).
489 The list of bugdirs allows uniqueness-checking during expansion of
490 the bugdir portion of the ID.
494 short_to_long_user : conversion on a single ID
495 long_to_short_text : inverse
497 return re.sub(REGEXP, IDreplacer(bugdirs, short_to_long_user), text)
499 def long_to_short_text(bugdirs, text):
500 """Convert long user IDs to short user IDs in text (see :class:`ID`).
501 The list of bugdirs allows uniqueness-maintaining truncation of
502 the bugdir portion of the ID.
506 long_to_short_user : conversion on a single ID
507 short_to_long_text : inverse
509 return re.sub(REGEXP, IDreplacer(bugdirs, long_to_short_user), text)
511 def residual(base, fragment):
512 """Split the short ID `fragment` into a portion corresponding
513 to `base`, and a portion inside `base`.
518 >>> residual('ABC/DEF/', '//GHI')
520 >>> residual('ABC/DEF/', '/D/GHI')
522 >>> residual('ABC/DEF', 'A/D/GHI')
524 >>> residual('ABC/DEF', 'A/D/GHI/JKL')
527 base = base.rstrip('/') + '/'
528 ids = fragment.split('/')
529 base_count = base.count('/')
530 root_ids = ids[:base_count] + ['']
531 residual_ids = ids[base_count:]
532 return ('/'.join(root_ids), '/'.join(residual_ids))
535 """Parse a user ID (see :class:`ID`), returning a dict of parsed
538 The returned dict will contain a value for "type" (from
539 :data:`HIERARCHY`) and values for the levels that are defined.
544 >>> _parse_user('ABC/DEF/GHI') == \\
545 ... {'bugdir':'ABC', 'bug':'DEF', 'comment':'GHI', 'type':'comment'}
547 >>> _parse_user('ABC/DEF') == \\
548 ... {'bugdir':'ABC', 'bug':'DEF', 'type':'bug'}
550 >>> _parse_user('ABC') == \\
551 ... {'bugdir':'ABC', 'type':'bugdir'}
553 >>> _parse_user('') == \\
554 ... {'bugdir':None, 'type':'bugdir'}
556 >>> _parse_user('/') == \\
557 ... {'bugdir':None, 'bug':None, 'type':'bug'}
559 >>> _parse_user('/DEF/') == \\
560 ... {'bugdir':None, 'bug':'DEF', 'comment':None, 'type':'comment'}
562 >>> _parse_user('a/b/c/d')
563 Traceback (most recent call last):
565 InvalidIDStructure: 4 > 3 levels in "a/b/c/d"
568 args = _split(id, check_length=True)
569 for i,(type,arg) in enumerate(zip(HIERARCHY, args)):
570 if arg != None and len(arg) == 0:
571 raise InvalidIDStructure(
572 id, 'Invalid %s part %d "%s" of id "%s"' % (type, i, arg, id))
577 def parse_user(bugdir, id):
578 """Parse a user ID (see :class:`ID`), returning a dict of parsed
581 The returned dict will contain a value for "type" (from
582 :data:`HIERARCHY`) and values for the levels that are defined.
586 This function tries to expand IDs before parsing, so it can handle
587 both short and long IDs successfully.
589 long_id = short_to_long_user([bugdir], id)
590 return _parse_user(long_id)
592 if libbe.TESTING == True:
593 class UUIDtestCase(unittest.TestCase):
594 def testUUID_gen(self):
596 self.failUnless(len(id) == 36, 'invalid UUID "%s"' % id)
598 class DummyObject (object):
599 def __init__(self, uuid, parent=None, siblings=[]):
601 self._siblings = siblings
605 assert parent.type in HIERARCHY, parent
606 setattr(self, parent.type, parent)
607 type_i = HIERARCHY.index(parent.type) + 1
608 self.type = HIERARCHY[type_i]
609 self.id = ID(self, self.type)
610 def sibling_uuids(self):
611 return self._siblings
613 class IDtestCase(unittest.TestCase):
615 self.bugdir = DummyObject('1234abcd')
616 self.bug = DummyObject('abcdef', self.bugdir, ['a1234', 'ab9876'])
617 self.comment = DummyObject('12345678', self.bug, ['1234abcd', '1234cdef'])
618 self.bd_id = self.bugdir.id
619 self.b_id = self.bug.id
620 self.c_id = self.comment.id
621 def test_storage(self):
622 self.failUnless(self.bd_id.storage() == self.bugdir.uuid,
623 self.bd_id.storage())
624 self.failUnless(self.b_id.storage() == self.bug.uuid,
626 self.failUnless(self.c_id.storage() == self.comment.uuid,
628 self.failUnless(self.bd_id.storage('x', 'y', 'z') == \
630 self.bd_id.storage('x', 'y', 'z'))
631 def test_long_user(self):
632 self.failUnless(self.bd_id.long_user() == self.bugdir.uuid,
633 self.bd_id.long_user())
634 self.failUnless(self.b_id.long_user() == \
635 '/'.join([self.bugdir.uuid, self.bug.uuid]),
636 self.b_id.long_user())
637 self.failUnless(self.c_id.long_user() ==
638 '/'.join([self.bugdir.uuid, self.bug.uuid,
642 self.failUnless(self.bd_id.user() == '123',
644 self.failUnless(self.b_id.user() == '123/abc',
646 self.failUnless(self.c_id.user() == '123/abc/12345',
649 class ShortLongParseTestCase(unittest.TestCase):
651 self.bugdir = DummyObject('1234abcd')
652 self.bug = DummyObject('abcdef', self.bugdir, ['a1234', 'ab9876'])
653 self.comment = DummyObject('12345678', self.bug, ['1234abcd', '1234cdef'])
654 self.bd_id = self.bugdir.id
655 self.b_id = self.bug.id
656 self.c_id = self.comment.id
657 self.bugdir.bug_from_uuid = lambda uuid: self.bug
658 self.bugdir.uuids = lambda : self.bug.sibling_uuids() + [self.bug.uuid]
659 self.bug.comment_from_uuid = lambda uuid: self.comment
660 self.bug.uuids = lambda : self.comment.sibling_uuids() + [self.comment.uuid]
661 self.short = 'bla bla #123/abc# bla bla #123/abc/12345# bla bla'
662 self.long = 'bla bla #1234abcd/abcdef# bla bla #1234abcd/abcdef/12345678# bla bla'
663 self.short_id_parse_pairs = [
664 ('', {'bugdir':'1234abcd', 'type':'bugdir'}),
665 ('123/abc', {'bugdir':'1234abcd', 'bug':'abcdef',
667 ('123/abc/12345', {'bugdir':'1234abcd', 'bug':'abcdef',
668 'comment':'12345678', 'type':'comment'}),
670 self.short_id_exception_pairs = [
671 ('z', NoIDMatches('z', ['1234abcd'])),
672 ('///', InvalidIDStructure(
673 '///', msg='4 > 3 levels in "///"')),
674 ('/', MultipleIDMatches(
675 None, '123', ['a1234', 'ab9876', 'abcdef'])),
676 ('123/', MultipleIDMatches(
677 None, '123', ['a1234', 'ab9876', 'abcdef'])),
678 ('123/abc/', MultipleIDMatches(
679 None, '123/abc', ['1234abcd','1234cdef','12345678'])),
681 def test_short_to_long_text(self):
682 self.failUnless(short_to_long_text([self.bugdir], self.short) == self.long,
683 '\n' + self.short + '\n' + short_to_long_text([self.bugdir], self.short) + '\n' + self.long)
684 def test_long_to_short_text(self):
685 self.failUnless(long_to_short_text([self.bugdir], self.long) == self.short,
686 '\n' + long_to_short_text([self.bugdir], self.long) + '\n' + self.short)
687 def test_parse_user(self):
688 for short_id,parsed in self.short_id_parse_pairs:
689 ret = parse_user(self.bugdir, short_id)
690 self.failUnless(ret == parsed,
691 'got %s\nexpected %s' % (ret, parsed))
692 def test_parse_user_exceptions(self):
693 for short_id,exception in self.short_id_exception_pairs:
695 ret = parse_user(self.bugdir, short_id)
696 self.fail('Expected parse_user(bugdir, "%s") to raise %s,'
697 '\n but it returned %s'
698 % (short_id, exception.__class__.__name__, ret))
699 except exception.__class__, e:
701 if attr.startswith('_') or attr == 'args':
703 value = getattr(e, attr)
704 expected = getattr(exception, attr)
707 'Expected parse_user(bugdir, "%s") %s.%s'
708 '\n to be %s, but it is %s\n\n%s'
709 % (short_id, exception.__class__.__name__,
710 attr, expected, value, e))
712 unitsuite =unittest.TestLoader().loadTestsFromModule(sys.modules[__name__])
713 suite = unittest.TestSuite([unitsuite, doctest.DocTestSuite()])