From: W. Trevor King Date: Sat, 16 Apr 2011 02:08:43 +0000 (-0400) Subject: Use builtin set() instead of our old unique() in entrez.py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=06fd1a24e071ba6545a064cdf766147181ea8019;p=mw2txt.git Use builtin set() instead of our old unique() in entrez.py. --- diff --git a/posts/entrez/entrez.py b/posts/entrez/entrez.py index bdf6235..e026929 100755 --- a/posts/entrez/entrez.py +++ b/posts/entrez/entrez.py @@ -98,20 +98,6 @@ def urlencode(param_dict): return urllib.urlencode( [(k,v) for k,v in param_dict.iteritems() if v is not None]) -def unique(seq, keepstr=True): - """ - Return the sequence (list, tuple, etc) without repeating entries - by Paul Rubin and Jordan Callicoat. - http://groups.google.com/group/comp.lang.python/browse_thread/thread/40c6c455f4fd5154/744a1a338afe1331?lnk=gst&rnum=7#744a1a338afe1331 - - for example [1,2,3,1,2] -> [1,2,3] - """ - t = type(seq) - if t in (str, unicode): - t = (list, ''.join)[bool(keepstr)] - seen = [] - return t(c for c in seq if not (c in seen or seen.append(c))) - def get_text(node): """ Given a node ( in the following example), @@ -272,7 +258,7 @@ def database_list(page=None, parsed=None): def field_dict(db='pubmed', page=None, parsed=None): parsed,parsed_islocal = get_parsed_einfo(db, page, parsed) fields = [] - tags = [] + tags = set() field_info = {} fieldlists = parsed.getElementsByTagName("FieldList") assert len(fieldlists) == 1, '%s\n\n%d FieldLists!' % ( @@ -288,8 +274,8 @@ def field_dict(db='pubmed', page=None, parsed=None): 'Multiple field names %s' % str(field['Name'])) field = delist_dict(field) fields.append(field['Name']) - new_tags = unique(tags + new_tags) - if tags != []: + new_tags = tags.union(new_tags) + if tags: assert new_tags == tags, "Inconsistent tags" tags = new_tags field_info[field['Name']] = field @@ -299,7 +285,7 @@ def field_dict(db='pubmed', page=None, parsed=None): def link_dict(db='pubmed', page=None, parsed=None): parsed,parsed_islocal = get_parsed_einfo(db, page, parsed) links = [] - tags = [] + tags = set() link_info = [] linklists = parsed.getElementsByTagName("LinkList") assert len(linklists) == 1, ( @@ -315,8 +301,8 @@ def link_dict(db='pubmed', page=None, parsed=None): 'Multiple link names %s' % str(link['Name'])) link = delist_dict(link) links.append(link['Name']) - new_tags = unique(tags + new_tags) - if tags != []: + new_tags = tags.union(new_tags) + if tags: assert new_tags == tags, "Inconsistent tags" tags = new_tags link_info[link['Name']] = link