From: W. Trevor King Date: Sun, 17 Apr 2011 01:32:48 +0000 (-0400) Subject: Add URL for Entrez help page. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=73a9e661da75eb19f8de6f4c92a59838c225781a;p=mw2txt.git Add URL for Entrez help page. --- diff --git a/posts/entrez/entrez.py b/posts/entrez/entrez.py index 0a64942..81ffdb4 100755 --- a/posts/entrez/entrez.py +++ b/posts/entrez/entrez.py @@ -59,12 +59,15 @@ Service ( eUtilsService ) tns="http://www.ncbi.nlm.nih.gov/soap/eutils/" """ import logging as _logging +from xml.sax.saxutils import unescape as _unescape import subprocess as _subprocess import sys as _sys import time as _time +import urllib as _urllib import suds as _suds from suds.client import Client as _Client +from suds.transport import TransportError as _TransportError # Platform constants _MSWINDOWS = _sys.platform == 'win32' @@ -80,6 +83,7 @@ __version__ = '0.2' EUTILS_WSDL_URL = 'http://eutils.ncbi.nlm.nih.gov/soap/v2.0/eutils.wsdl' EFETCH_WSDL_URL = 'http://eutils.ncbi.nlm.nih.gov/soap/v2.0/efetch_%s.wsdl' +EFETCH_PLAIN_URL = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi' EUTILS_CLIENT = _Client(EUTILS_WSDL_URL) @@ -380,6 +384,7 @@ if __name__ == '__main__': '2008-2011, W. Trevor King.', '', 'See the docstrings in %prog or', + ' http://www.ncbi.nlm.nih.gov/books/NBK3837/', ' http://www.ncbi.nlm.nih.gov/entrez/query/static/' 'eutils_help.html', ' http://www.ncbi.nlm.nih.gov/entrez/query/static/' @@ -593,18 +598,43 @@ if __name__ == '__main__': if ret > 0: if output in ['medline', 'bibtex']: - LOG.info('run eFetch on %s' % options.database) - efetch_client = _Client(EFETCH_WSDL_URL % options.database) - f = efetch_client.service.run_eFetch( - id=','.join(q.IdList.Id), tool=TOOL, email=EMAIL) - if hasattr(f, 'ERROR'): - raise Exception(f.ERROR) + e = None + try: + efetch_client = _Client(EFETCH_WSDL_URL % options.database) + except _TransportError, e: + if e.httpcode != 404: + raise + LOG.warn(str(e)) + if e: # Fallback to straight URL fetch + params = { + 'id': ','.join(q.IdList.Id), + 'tool': TOOL, + 'email': EMAIL, + 'db': options.database, + 'report': 'xml', + } + url = '%s?%s' % ( + EFETCH_PLAIN_URL, _urllib.urlencode(params)) + LOG.info('fallback to non-SOAP eFetch request: %s' % url) + f = _urllib.urlopen(url) + xml = f.read() + f.close() + # Remove wrapping HTML and unescape XML + xml = xml.split('
', 1)[-1]
+                    xml = xml.split('
', 1)[0] + xml = _unescape(xml, {'"': '"'}) + else: # Use SOAP eFetch + LOG.info('run eFetch on %s' % options.database) + f = efetch_client.service.run_eFetch( + id=','.join(q.IdList.Id), tool=TOOL, email=EMAIL) + if hasattr(f, 'ERROR'): + raise Exception(f.ERROR) + xml = efetch_client.last_received() if output == 'medline': - outfile.write(str(efetch_client.last_received()).rstrip()+'\n') + outfile.write(str(xml).rstrip()+'\n') elif output == 'bibtex': - outfile.write( - medline_xml_to_bibtex(str(efetch_client.last_received()))) + outfile.write(medline_xml_to_bibtex(str(xml))) elif output == 'link': LOG.info('run eLink on %s' % options.database) f = EUTILS_CLIENT.service.run_eLink(