From: W. Trevor King Date: Sat, 16 Apr 2011 01:21:48 +0000 (-0400) Subject: Strip trailing whitespace from entrez.py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=e9c61d7ca67ac9e2461084c68bf210fc90b053fc;p=blog.git Strip trailing whitespace from entrez.py. --- diff --git a/posts/entrez/entrez.py b/posts/entrez/entrez.py index 89b9da1..73661b8 100755 --- a/posts/entrez/entrez.py +++ b/posts/entrez/entrez.py @@ -16,7 +16,7 @@ # You should have received a copy of the GNU General Public License # along with This program. If not, see . # -# Code following John Vu's medline query code pybliographer/Pyblio/Query.py, +# Code following John Vu's medline query code pybliographer/Pyblio/Query.py, # # Python interface to the Entrez databases. # See http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html @@ -31,7 +31,7 @@ # NLM does not claim the copyright on the abstracts in PubMed; however, journal publishers or authors may. # NLM provides no legal advice concerning distribution of copyrighted materials, consult your legal counsel. # -# For a good Python-and-XML-DOM intro, see +# For a good Python-and-XML-DOM intro, see # http://www.boddie.org.uk/python/XML_intro.html # for the official docs, see # http://docs.python.org/lib/module-xml.dom.html @@ -50,7 +50,7 @@ import urllib # supports Document Object Model (DOM) Level 1 Specification # http://docs.python.org/lib/module-xml.dom.minidom.html import xml.dom.minidom as dom - + # For calling the bibutils conversion programs from subprocess import Popen, PIPE @@ -106,7 +106,7 @@ def unique(seq, keepstr=True): Return the sequence (list, tuple, etc) without repeating entries by Paul Rubin and Jordan Callicoat. http://groups.google.com/group/comp.lang.python/browse_thread/thread/40c6c455f4fd5154/744a1a338afe1331?lnk=gst&rnum=7#744a1a338afe1331 - + for example [1,2,3,1,2] -> [1,2,3] """ t = type(seq) @@ -293,7 +293,7 @@ def field_dict(db='pubmed', page=None, parsed=None): field_info[field['Name']] = field clean_parsed_einfo(parsed,parsed_islocal) return (fields, tags, field_info) - + def link_dict(db='pubmed', page=None, parsed=None): parsed,parsed_islocal = get_parsed_einfo(db, page, parsed) links = [] @@ -325,7 +325,7 @@ def database_info(db='pubmed', page=None, parsed=None): links,link_tags,link_info = link_dict(db=db, parsed=parsed) clean_parsed_einfo(parsed,parsed_islocal) return (fields, field_tags, field_info, links, link_tags, link_info) - + def validate_field(field, fields): "Ensure that field is a valid field for the database db." try : @@ -355,9 +355,9 @@ def validate_search_term(term, fields): ## Search an Entrez database -def _query_esearch(term, db='pubmed', field=None, +def _query_esearch(term, db='pubmed', field=None, reldate=None, daterange=None, datetype=None, - retmax=None, rettype=None, sort=None, + retmax=None, rettype=None, sort=None, validate=False, valid_fields=None, debug=False) : """ Search an Entrez database. @@ -391,7 +391,7 @@ def _query_esearch(term, db='pubmed', field=None, (implemented as mindate=2001&maxdate=2002/01/01) datetype: Limit dates to a specific date field based on database. - For example: datetype=edat + For example: datetype=edat retmax: Limit the number of items retrieved For example: retmax=100 @@ -453,7 +453,7 @@ def parse_esearch(page): ## Fetch records by Primary ID from an Entrez database -def _query_efetch(id, db='pubmed', +def _query_efetch(id, db='pubmed', retmax=None, retmode='xml', rettype='medline'): """ Fetch records by primary ID from an Entrez database. @@ -602,7 +602,7 @@ def _query_elink(id, term=None, db='all', dbfrom='pubmed', (implemented as mindate=2001&maxdate=2002/01/01) datetype: Limit dates to a specific date field based on database. - For example: datetype=edat + For example: datetype=edat retmode: Select the retrieval output format xml (default) @@ -649,14 +649,14 @@ def _query_elink(id, term=None, db='all', dbfrom='pubmed', def search_fetch_xml(term, db='pubmed', field=None, reldate=None, daterange=None, datetype=None, - retmax=None, sort=None, + retmax=None, sort=None, validate=False, valid_fields=None, retmode='xml', rettype='medline'): if validate and valid_fields == None: valid_fields,field_tags,field_info = field_dict(db) search_page = _query_esearch(term, db, field, reldate, daterange, datetype, - retmax, rettype='uilist', sort=sort, + retmax, rettype='uilist', sort=sort, validate=validate, valid_fields=valid_fields) pid_list = parse_esearch(search_page) if not pid_list: @@ -666,7 +666,7 @@ def search_fetch_xml(term, db='pubmed', field=None, def search_link(term, db='pubmed', field=None, reldate=None, daterange=None, datetype=None, - retmax=None, sort=None, + retmax=None, sort=None, validate=False, valid_fields=None, link_term=None, fromdb=None, cmd=None, linkname=None, link_holding=None, @@ -677,7 +677,7 @@ def search_link(term, db='pubmed', field=None, valid_fields,field_tags,field_info = field_dict(db) search_page = _query_esearch(term, db, field, reldate, daterange, datetype, - retmax, rettype='uilist', sort=sort, + retmax, rettype='uilist', sort=sort, validate=validate, valid_fields=valid_fields) pid_list = parse_esearch(search_page) link_page = _query_elink(pid_list, term=link_term, db=db, dbfrom=fromdb, @@ -963,7 +963,7 @@ free full text [sb] if __name__ == "__main__" : from optparse import OptionParser - + usage_string = """%prog [options] SEARCH_TERM (print medline xml matching search) | %prog -l [options] SEARCH_TERM (print links to entries matching search) | %prog -L [-d DATABASE] [-f FILE] (list databases) @@ -1079,7 +1079,7 @@ for more details. databases = database_list() for db in databases: print >> outfile, "\t%s" % db - + elif mode == 'explain': fields,tags,field_info = field_dict(db=options.database) if options.field == None : @@ -1106,7 +1106,7 @@ for more details. print >> outfile, "\t%*.*s\t%-*.*s" \ % (field_size[0], field_size[0], key, field_size[1], field_size[1], field_info[options.field][key]) - + elif mode == 'search': search_term = args[0] LOG.debug('output %s' % output) @@ -1154,6 +1154,6 @@ for more details. link_datetype=options.datetype, link_retmode=options.retmode,) print >> outfile, link_xml - + if options.filename != None : outfile.close()