From: W. Trevor King Date: Sat, 16 Apr 2011 01:23:17 +0000 (-0400) Subject: Remove extra spaces from around colons in entrez.py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=08c21f31d857f9d4f87495b38325b6e2df4cd10c;p=blog.git Remove extra spaces from around colons in entrez.py. --- diff --git a/posts/entrez/entrez.py b/posts/entrez/entrez.py index 73661b8..e83a26e 100755 --- a/posts/entrez/entrez.py +++ b/posts/entrez/entrez.py @@ -85,19 +85,19 @@ del _handler, _formatter ## XML and list utility functions -def urlencode(param_dict) : +def urlencode(param_dict): params = "" - for key,value in param_dict.items() : - if value == None : + for key,value in param_dict.items(): + if value == None: continue # ignore unused parameter #if type(value)== : # convert True/False to 'y'/ - # if value == True : + # if value == True: # params += "%s=y&" % (key,) - # #else : + # #else: # # params += "%s=n&" % (key,) - if value != None : + if value != None: params += "%s=%s&" % (key, str(value)) - if len(params) > 1 : + if len(params) > 1: params = params[:-1] # remove trailing & return params @@ -115,7 +115,7 @@ def unique(seq, keepstr=True): seen = [] return t(c for c in seq if not (c in seen or seen.append(c))) -def get_text(node) : +def get_text(node): """ Given a node ( in the following example), extract some-text from 'some-text' @@ -126,7 +126,7 @@ def get_text(node) : data = node.childNodes[0].data elif len(node.childNodes) == 0: # empty node data = None - else : + else: raise Exception, "Node contains more than text" return data @@ -139,7 +139,7 @@ def get_child_nodes(node, child_name): for n in node.childNodes: if n.nodeType != n.ELEMENT_NODE: continue # ignore text, comment, etc. nodes - if n.tagName == child_name : + if n.tagName == child_name: ret.append(n) return ret @@ -193,7 +193,7 @@ def get_child_dict(node): tags.append(n.tagName) return (dict, tags) -def delist_dict(dict) : +def delist_dict(dict): """ Given a dict e.g. {'some-tag':['some-text', 'some-other-text', ...], @@ -202,8 +202,8 @@ def delist_dict(dict) : e.g. {'some-tag':['some-text', 'some-other-text', ...], 'other-tag':'some-other-text', ...} , """ - for key,value in dict.items() : - if isinstance(value, list) and len(value) == 1 : + for key,value in dict.items(): + if isinstance(value, list) and len(value) == 1: dict[key] = value[0] return dict @@ -219,8 +219,8 @@ def _query_einfo(db=None): """ params = urlencode ({ 'db': db, - 'tool' : TOOL, - 'email' : EMAIL}) + 'tool': TOOL, + 'email': EMAIL}) LOG.info("getting einfo from '%s?%s'" % (einfo_url, params)) f = urllib.urlopen ("%s?%s" % (einfo_url, params)) @@ -241,11 +241,11 @@ def get_parsed_einfo(db=None, page=None, parsed=None): if page == None and parsed == None: LOG.info('downloading new einfo page') page = _query_einfo(db) - if parsed == None : + if parsed == None: LOG.info('parsing new einfo page') parsed = dom.parseString(page) parsed_islocal = True - else : + else: LOG.info('using old einfo parsing') parsed_islocal = False return (parsed, parsed_islocal) @@ -255,7 +255,7 @@ def clean_parsed_einfo(parsed, parsed_islocal=True): Helper function for various einfo processing functions. Clean up the parsed xml structure if the calling function created it. """ - if parsed_islocal == True : + if parsed_islocal == True: LOG.info('cleaning up einfo parsing') parsed.unlink() # clean up the DOM @@ -279,7 +279,7 @@ def field_dict(db='pubmed', page=None, parsed=None): assert len(fieldlists) == 1, "%s\n\n%d FieldLists!" % (parsed.toxml(), len(fieldlists)) fieldlist = fieldlists[0] for node in fieldlist.childNodes: - if node.nodeType != node.ELEMENT_NODE : + if node.nodeType != node.ELEMENT_NODE: continue # ignore text, comment, etc. nodes assert node.tagName == "Field", "Unrecognized tag '%s' in FieldList" % node.tagName field,new_tags = get_child_dict(node) @@ -303,7 +303,7 @@ def link_dict(db='pubmed', page=None, parsed=None): assert len(linklists) == 1, "%s\n\n%d LinkLists!" % (parsed.toxml(), len(linklists)) linklist = linklists[0] for node in linklist.childNodes: - if node.nodeType != node.ELEMENT_NODE : + if node.nodeType != node.ELEMENT_NODE: continue # ignore text, comment, etc. nodes assert node.tagName == "Link", "Unrecognized tag '%s' in LinkList" % node.tagName link,new_tags = get_child_dict(node) @@ -328,7 +328,7 @@ def database_info(db='pubmed', page=None, parsed=None): def validate_field(field, fields): "Ensure that field is a valid field for the database db." - try : + try: fields.index(field.upper()) except ValueError: raise Exception, "Field '%s' invalid\nValid fields are\n %s" \ @@ -339,17 +339,17 @@ def strip_fields_from_term(term): fields = [] infield = False for i in range(len(term)): - if term[i] == '[' and infield == False : + if term[i] == '[' and infield == False: infield = True field_start = i+1 - elif term[i] == ']' and infield == True : + elif term[i] == ']' and infield == True: infield = False fields.append(term[field_start:i]) return fields def validate_search_term(term, fields): "Ensure that the fields in term are valid fields for the database db." - for field in strip_fields_from_term(term) : + for field in strip_fields_from_term(term): validate_field(field, fields) @@ -358,7 +358,7 @@ def validate_search_term(term, fields): def _query_esearch(term, db='pubmed', field=None, reldate=None, daterange=None, datetype=None, retmax=None, rettype=None, sort=None, - validate=False, valid_fields=None, debug=False) : + validate=False, valid_fields=None, debug=False): """ Search an Entrez database. http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html @@ -403,33 +403,33 @@ def _query_esearch(term, db='pubmed', field=None, PubMed values: author, last+author, journal, pub+date """ - if daterange != None : + if daterange != None: assert len(daterange) == 2, "Invalid daterange '%s', should be e.g. ('2001', '2002/01/01')" reldate == None, "Specifying date with daterange AND reldate!" mindate = daterange[0] maxdate = daterange[1] - else : + else: mindate = None maxdate = None - if validate : + if validate: assert len(valid_fields) > 0, "Need a list of valid fields to validate" - if field != None : + if field != None: validate_field(field) validate_search_term(term, valid_fields) params = urlencode ({ - 'tool' : TOOL, - 'email' : EMAIL, - 'term' : term, + 'tool': TOOL, + 'email': EMAIL, + 'term': term, 'db': db, - 'field' : field, - 'reldate' : reldate, - 'mindate' : mindate, - 'maxdate' : maxdate, - 'datetype' : datetype, - 'maxdate' : maxdate, - 'retmax' : retmax, - 'rettype' : rettype, - 'sort' : sort}) + 'field': field, + 'reldate': reldate, + 'mindate': mindate, + 'maxdate': maxdate, + 'datetype': datetype, + 'maxdate': maxdate, + 'retmax': retmax, + 'rettype': rettype, + 'sort': sort}) LOG.info("getting esearch from '%s?%s'" % (esearch_url, params)) f = urllib.urlopen ("%s?%s" % (esearch_url, params)) @@ -511,17 +511,17 @@ def _query_efetch(id, db='pubmed', """ idstring = "" - for d in id : + for d in id: idstring += "%s," % d idstring = idstring[:-1] # remove trailing comma params = urlencode ({ - 'tool' : TOOL, - 'email' : EMAIL, - 'id' : idstring, + 'tool': TOOL, + 'email': EMAIL, + 'id': idstring, 'db': db, - 'retmax' : retmax, - 'retmode' : retmode, - 'rettype' : rettype}) + 'retmax': retmax, + 'retmode': retmode, + 'rettype': rettype}) LOG.info("getting efetch from '%s?%s'" % (efetch_url, params)) f = urllib.urlopen ("%s?%s" % (efetch_url, params)) @@ -610,14 +610,14 @@ def _query_elink(id, term=None, db='all', dbfrom='pubmed', """ idstring = "" - for d in id : + for d in id: idstring += "%s," % d idstring = idstring[:-1] # remove trailing comma params = urlencode ({ - 'tool' : TOOL, - 'email' : EMAIL, - 'id' : idstring, + 'tool': TOOL, + 'email': EMAIL, + 'id': idstring, 'term': term, 'db': db, 'dbfrom': dbfrom, @@ -628,12 +628,12 @@ def _query_elink(id, term=None, db='all', dbfrom='pubmed', 'reldate': reldate, 'daterange': daterange, 'datetype': datetype, - 'retmode' : retmode}) + 'retmode': retmode}) LOG.info("getting elink from '%s?%s'" % (elink_url, params)) f = urllib.urlopen ("%s?%s" % (elink_url, params)) - if cmd == 'prlinks' and retmode == 'ref' : + if cmd == 'prlinks' and retmode == 'ref': # Just get the link, we don't need the provider's webpage HTML. url = f.geturl() f.close() @@ -828,7 +828,7 @@ class Pipe (object): # we can write up to PIPE_BUF bytes without risk # blocking. POSIX defines PIPE_BUF >= 512 LOG.debug('write to stdin for process 0') - chunk = input[input_offset : input_offset + 512] + chunk = input[input_offset:input_offset+512] bytes_written = os.write( self._procs[0].stdin.fileno(), chunk) input_offset += bytes_written @@ -950,7 +950,7 @@ def medline_xml_to_bibtex(fetch_page): ## Random -def hints() : +def hints(): "Print Entrez search hints and exit" print """ @@ -961,7 +961,7 @@ free full text [sb] ## Test with a mini-searching application -if __name__ == "__main__" : +if __name__ == "__main__": from optparse import OptionParser usage_string = """%prog [options] SEARCH_TERM (print medline xml matching search) @@ -1002,9 +1002,9 @@ for more details. def set_mode(option, opt_str, value, parser): global mode long_option = option.get_opt_string() - if long_option == '--list-mode' : + if long_option == '--list-mode': mode = 'list' - elif long_option == '--explain-mode' : + elif long_option == '--explain-mode': mode = 'explain' parser.add_option('-L', '--list-mode', callback=set_mode, @@ -1017,7 +1017,7 @@ for more details. def set_output(option, opt_str, value, parser): global output long_option = option.get_opt_string() - if long_option == '--output-link' : + if long_option == '--output-link': output = 'link' parser.add_option('-W', '--raw', dest="raw", action="store_true", help="Output raw Entrez xml", default=False) @@ -1064,17 +1064,17 @@ for more details. parser.destroy() # open the output file if specified - if options.filename == None : + if options.filename == None: outfile = sys.stdout - else : + else: outfile = file(options.filename, 'w') - if options.verbose : + if options.verbose: LOG.setLevel(logging.DEBUG) LOG.debug('operating in %s mode' % mode) - if mode == 'list' : + if mode == 'list': print >> outfile, "Available databases:" databases = database_list() for db in databases: @@ -1082,25 +1082,25 @@ for more details. elif mode == 'explain': fields,tags,field_info = field_dict(db=options.database) - if options.field == None : + if options.field == None: print >> outfile, "Available fields in %s:" % options.database field_size = [0,0] - for field in fields : - if len(field) > field_size[0] : + for field in fields: + if len(field) > field_size[0]: field_size[0] = len(field) - if len(field_info[field]['FullName']) > field_size[1] : + if len(field_info[field]['FullName']) > field_size[1]: field_size[1] = len(field_info[field]['FullName']) - for field in fields : + for field in fields: print >> outfile, "\t%*.*s\t%-*.*s" \ % (field_size[0], field_size[0], field, field_size[1], field_size[1], field_info[field]['FullName']) - else : + else: print >> outfile, "Field %s in %s:" % (options.field,options.database) field_size = [0,0] for key in tags: - if len(key) > field_size[0] : + if len(key) > field_size[0]: field_size[0] = len(key) - if len(field_info[options.field][key]) > field_size[1] : + if len(field_info[options.field][key]) > field_size[1]: field_size[1] = len(field_info[options.field][key]) for key in tags: print >> outfile, "\t%*.*s\t%-*.*s" \ @@ -1111,7 +1111,7 @@ for more details. search_term = args[0] LOG.debug('output %s' % output) - if output == 'bibtex' : + if output == 'bibtex': medline_xml = search_fetch_xml(term=search_term, db=options.database, field=options.field, @@ -1123,13 +1123,13 @@ for more details. retmode=options.retmode, rettype='medline') if medline_xml: - if options.raw : + if options.raw: print outfile, medline_xml else: bibtex = medline_xml_to_bibtex(medline_xml) print >> outfile, bibtex - elif output == 'link' : + elif output == 'link': # Assume that if you're looking for links # your search is already pretty refined, # so use the date options for link-limiting. @@ -1155,5 +1155,5 @@ for more details. link_retmode=options.retmode,) print >> outfile, link_xml - if options.filename != None : + if options.filename != None: outfile.close()