## XML and list utility functions
-def urlencode(param_dict) :
+def urlencode(param_dict):
params = ""
- for key,value in param_dict.items() :
- if value == None :
+ for key,value in param_dict.items():
+ if value == None:
continue # ignore unused parameter
#if type(value)== : # convert True/False to 'y'/<no-entry>
- # if value == True :
+ # if value == True:
# params += "%s=y&" % (key,)
- # #else :
+ # #else:
# # params += "%s=n&" % (key,)
- if value != None :
+ if value != None:
params += "%s=%s&" % (key, str(value))
- if len(params) > 1 :
+ if len(params) > 1:
params = params[:-1] # remove trailing &
return params
seen = []
return t(c for c in seq if not (c in seen or seen.append(c)))
-def get_text(node) :
+def get_text(node):
"""
Given a node (<node-name> in the following example),
extract some-text from '<node-name>some-text</node-name>'
data = node.childNodes[0].data
elif len(node.childNodes) == 0: # empty node
data = None
- else :
+ else:
raise Exception, "Node contains more than text"
return data
for n in node.childNodes:
if n.nodeType != n.ELEMENT_NODE:
continue # ignore text, comment, etc. nodes
- if n.tagName == child_name :
+ if n.tagName == child_name:
ret.append(n)
return ret
tags.append(n.tagName)
return (dict, tags)
-def delist_dict(dict) :
+def delist_dict(dict):
"""
Given a dict
e.g. {'some-tag':['some-text', 'some-other-text', ...],
e.g. {'some-tag':['some-text', 'some-other-text', ...],
'other-tag':'some-other-text', ...} ,
"""
- for key,value in dict.items() :
- if isinstance(value, list) and len(value) == 1 :
+ for key,value in dict.items():
+ if isinstance(value, list) and len(value) == 1:
dict[key] = value[0]
return dict
"""
params = urlencode ({
'db': db,
- 'tool' : TOOL,
- 'email' : EMAIL})
+ 'tool': TOOL,
+ 'email': EMAIL})
LOG.info("getting einfo from '%s?%s'" % (einfo_url, params))
f = urllib.urlopen ("%s?%s" % (einfo_url, params))
if page == None and parsed == None:
LOG.info('downloading new einfo page')
page = _query_einfo(db)
- if parsed == None :
+ if parsed == None:
LOG.info('parsing new einfo page')
parsed = dom.parseString(page)
parsed_islocal = True
- else :
+ else:
LOG.info('using old einfo parsing')
parsed_islocal = False
return (parsed, parsed_islocal)
Helper function for various einfo processing functions.
Clean up the parsed xml structure if the calling function created it.
"""
- if parsed_islocal == True :
+ if parsed_islocal == True:
LOG.info('cleaning up einfo parsing')
parsed.unlink() # clean up the DOM
assert len(fieldlists) == 1, "%s\n\n%d FieldLists!" % (parsed.toxml(), len(fieldlists))
fieldlist = fieldlists[0]
for node in fieldlist.childNodes:
- if node.nodeType != node.ELEMENT_NODE :
+ if node.nodeType != node.ELEMENT_NODE:
continue # ignore text, comment, etc. nodes
assert node.tagName == "Field", "Unrecognized tag '%s' in FieldList" % node.tagName
field,new_tags = get_child_dict(node)
assert len(linklists) == 1, "%s\n\n%d LinkLists!" % (parsed.toxml(), len(linklists))
linklist = linklists[0]
for node in linklist.childNodes:
- if node.nodeType != node.ELEMENT_NODE :
+ if node.nodeType != node.ELEMENT_NODE:
continue # ignore text, comment, etc. nodes
assert node.tagName == "Link", "Unrecognized tag '%s' in LinkList" % node.tagName
link,new_tags = get_child_dict(node)
def validate_field(field, fields):
"Ensure that field is a valid field for the database db."
- try :
+ try:
fields.index(field.upper())
except ValueError:
raise Exception, "Field '%s' invalid\nValid fields are\n %s" \
fields = []
infield = False
for i in range(len(term)):
- if term[i] == '[' and infield == False :
+ if term[i] == '[' and infield == False:
infield = True
field_start = i+1
- elif term[i] == ']' and infield == True :
+ elif term[i] == ']' and infield == True:
infield = False
fields.append(term[field_start:i])
return fields
def validate_search_term(term, fields):
"Ensure that the fields in term are valid fields for the database db."
- for field in strip_fields_from_term(term) :
+ for field in strip_fields_from_term(term):
validate_field(field, fields)
def _query_esearch(term, db='pubmed', field=None,
reldate=None, daterange=None, datetype=None,
retmax=None, rettype=None, sort=None,
- validate=False, valid_fields=None, debug=False) :
+ validate=False, valid_fields=None, debug=False):
"""
Search an Entrez database.
http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
PubMed values: author, last+author, journal, pub+date
"""
- if daterange != None :
+ if daterange != None:
assert len(daterange) == 2, "Invalid daterange '%s', should be e.g. ('2001', '2002/01/01')"
reldate == None, "Specifying date with daterange AND reldate!"
mindate = daterange[0]
maxdate = daterange[1]
- else :
+ else:
mindate = None
maxdate = None
- if validate :
+ if validate:
assert len(valid_fields) > 0, "Need a list of valid fields to validate"
- if field != None :
+ if field != None:
validate_field(field)
validate_search_term(term, valid_fields)
params = urlencode ({
- 'tool' : TOOL,
- 'email' : EMAIL,
- 'term' : term,
+ 'tool': TOOL,
+ 'email': EMAIL,
+ 'term': term,
'db': db,
- 'field' : field,
- 'reldate' : reldate,
- 'mindate' : mindate,
- 'maxdate' : maxdate,
- 'datetype' : datetype,
- 'maxdate' : maxdate,
- 'retmax' : retmax,
- 'rettype' : rettype,
- 'sort' : sort})
+ 'field': field,
+ 'reldate': reldate,
+ 'mindate': mindate,
+ 'maxdate': maxdate,
+ 'datetype': datetype,
+ 'maxdate': maxdate,
+ 'retmax': retmax,
+ 'rettype': rettype,
+ 'sort': sort})
LOG.info("getting esearch from '%s?%s'" % (esearch_url, params))
f = urllib.urlopen ("%s?%s" % (esearch_url, params))
"""
idstring = ""
- for d in id :
+ for d in id:
idstring += "%s," % d
idstring = idstring[:-1] # remove trailing comma
params = urlencode ({
- 'tool' : TOOL,
- 'email' : EMAIL,
- 'id' : idstring,
+ 'tool': TOOL,
+ 'email': EMAIL,
+ 'id': idstring,
'db': db,
- 'retmax' : retmax,
- 'retmode' : retmode,
- 'rettype' : rettype})
+ 'retmax': retmax,
+ 'retmode': retmode,
+ 'rettype': rettype})
LOG.info("getting efetch from '%s?%s'" % (efetch_url, params))
f = urllib.urlopen ("%s?%s" % (efetch_url, params))
"""
idstring = ""
- for d in id :
+ for d in id:
idstring += "%s," % d
idstring = idstring[:-1] # remove trailing comma
params = urlencode ({
- 'tool' : TOOL,
- 'email' : EMAIL,
- 'id' : idstring,
+ 'tool': TOOL,
+ 'email': EMAIL,
+ 'id': idstring,
'term': term,
'db': db,
'dbfrom': dbfrom,
'reldate': reldate,
'daterange': daterange,
'datetype': datetype,
- 'retmode' : retmode})
+ 'retmode': retmode})
LOG.info("getting elink from '%s?%s'" % (elink_url, params))
f = urllib.urlopen ("%s?%s" % (elink_url, params))
- if cmd == 'prlinks' and retmode == 'ref' :
+ if cmd == 'prlinks' and retmode == 'ref':
# Just get the link, we don't need the provider's webpage HTML.
url = f.geturl()
f.close()
# we can write up to PIPE_BUF bytes without risk
# blocking. POSIX defines PIPE_BUF >= 512
LOG.debug('write to stdin for process 0')
- chunk = input[input_offset : input_offset + 512]
+ chunk = input[input_offset:input_offset+512]
bytes_written = os.write(
self._procs[0].stdin.fileno(), chunk)
input_offset += bytes_written
## Random
-def hints() :
+def hints():
"Print Entrez search hints and exit"
print """
## Test with a mini-searching application
-if __name__ == "__main__" :
+if __name__ == "__main__":
from optparse import OptionParser
usage_string = """%prog [options] SEARCH_TERM (print medline xml matching search)
def set_mode(option, opt_str, value, parser):
global mode
long_option = option.get_opt_string()
- if long_option == '--list-mode' :
+ if long_option == '--list-mode':
mode = 'list'
- elif long_option == '--explain-mode' :
+ elif long_option == '--explain-mode':
mode = 'explain'
parser.add_option('-L', '--list-mode', callback=set_mode,
def set_output(option, opt_str, value, parser):
global output
long_option = option.get_opt_string()
- if long_option == '--output-link' :
+ if long_option == '--output-link':
output = 'link'
parser.add_option('-W', '--raw', dest="raw", action="store_true",
help="Output raw Entrez xml", default=False)
parser.destroy()
# open the output file if specified
- if options.filename == None :
+ if options.filename == None:
outfile = sys.stdout
- else :
+ else:
outfile = file(options.filename, 'w')
- if options.verbose :
+ if options.verbose:
LOG.setLevel(logging.DEBUG)
LOG.debug('operating in %s mode' % mode)
- if mode == 'list' :
+ if mode == 'list':
print >> outfile, "Available databases:"
databases = database_list()
for db in databases:
elif mode == 'explain':
fields,tags,field_info = field_dict(db=options.database)
- if options.field == None :
+ if options.field == None:
print >> outfile, "Available fields in %s:" % options.database
field_size = [0,0]
- for field in fields :
- if len(field) > field_size[0] :
+ for field in fields:
+ if len(field) > field_size[0]:
field_size[0] = len(field)
- if len(field_info[field]['FullName']) > field_size[1] :
+ if len(field_info[field]['FullName']) > field_size[1]:
field_size[1] = len(field_info[field]['FullName'])
- for field in fields :
+ for field in fields:
print >> outfile, "\t%*.*s\t%-*.*s" \
% (field_size[0], field_size[0], field,
field_size[1], field_size[1], field_info[field]['FullName'])
- else :
+ else:
print >> outfile, "Field %s in %s:" % (options.field,options.database)
field_size = [0,0]
for key in tags:
- if len(key) > field_size[0] :
+ if len(key) > field_size[0]:
field_size[0] = len(key)
- if len(field_info[options.field][key]) > field_size[1] :
+ if len(field_info[options.field][key]) > field_size[1]:
field_size[1] = len(field_info[options.field][key])
for key in tags:
print >> outfile, "\t%*.*s\t%-*.*s" \
search_term = args[0]
LOG.debug('output %s' % output)
- if output == 'bibtex' :
+ if output == 'bibtex':
medline_xml = search_fetch_xml(term=search_term,
db=options.database,
field=options.field,
retmode=options.retmode,
rettype='medline')
if medline_xml:
- if options.raw :
+ if options.raw:
print outfile, medline_xml
else:
bibtex = medline_xml_to_bibtex(medline_xml)
print >> outfile, bibtex
- elif output == 'link' :
+ elif output == 'link':
# Assume that if you're looking for links
# your search is already pretty refined,
# so use the date options for link-limiting.
link_retmode=options.retmode,)
print >> outfile, link_xml
- if options.filename != None :
+ if options.filename != None:
outfile.close()