Add URL for Entrez help page.
[blog.git] / posts / entrez / entrez.py
index 0a649421e1c87ddc4333a882859d7e9ff061526c..81ffdb4214ee2b25d0f3f5c9fb82a71e6d8a7f4c 100755 (executable)
@@ -59,12 +59,15 @@ Service ( eUtilsService ) tns="http://www.ncbi.nlm.nih.gov/soap/eutils/"
 """
 
 import logging as _logging
+from xml.sax.saxutils import unescape as _unescape
 import subprocess as _subprocess
 import sys as _sys
 import time as _time
+import urllib as _urllib
 
 import suds as _suds
 from suds.client import Client as _Client
+from suds.transport import TransportError as _TransportError
 
 # Platform constants
 _MSWINDOWS = _sys.platform == 'win32'
@@ -80,6 +83,7 @@ __version__ = '0.2'
 
 EUTILS_WSDL_URL = 'http://eutils.ncbi.nlm.nih.gov/soap/v2.0/eutils.wsdl'
 EFETCH_WSDL_URL = 'http://eutils.ncbi.nlm.nih.gov/soap/v2.0/efetch_%s.wsdl'
+EFETCH_PLAIN_URL = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
 
 EUTILS_CLIENT = _Client(EUTILS_WSDL_URL)
 
@@ -380,6 +384,7 @@ if __name__ == '__main__':
             '2008-2011, W. Trevor King.',
             '',
             'See the docstrings in %prog or',
+            ' http://www.ncbi.nlm.nih.gov/books/NBK3837/',
             ' http://www.ncbi.nlm.nih.gov/entrez/query/static/'
              'eutils_help.html',
             ' http://www.ncbi.nlm.nih.gov/entrez/query/static/'
@@ -593,18 +598,43 @@ if __name__ == '__main__':
 
         if ret > 0:
             if output in ['medline', 'bibtex']:
-                LOG.info('run eFetch on %s' % options.database)
-                efetch_client = _Client(EFETCH_WSDL_URL % options.database)
-                f = efetch_client.service.run_eFetch(
-                    id=','.join(q.IdList.Id), tool=TOOL, email=EMAIL)
-                if hasattr(f, 'ERROR'):
-                    raise Exception(f.ERROR)
+                e = None
+                try:
+                    efetch_client = _Client(EFETCH_WSDL_URL % options.database)
+                except _TransportError, e:
+                    if e.httpcode != 404:
+                        raise
+                    LOG.warn(str(e))
+                if e:  # Fallback to straight URL fetch
+                    params = {
+                        'id': ','.join(q.IdList.Id),
+                        'tool': TOOL,
+                        'email': EMAIL,
+                        'db': options.database,
+                        'report': 'xml',
+                        }
+                    url = '%s?%s' % (
+                        EFETCH_PLAIN_URL, _urllib.urlencode(params))
+                    LOG.info('fallback to non-SOAP eFetch request: %s' % url)
+                    f = _urllib.urlopen(url)
+                    xml = f.read()
+                    f.close()
+                    # Remove wrapping HTML and unescape XML
+                    xml = xml.split('<pre>', 1)[-1]
+                    xml = xml.split('</pre>', 1)[0]
+                    xml = _unescape(xml, {'&quot;': '"'})
+                else:  # Use SOAP eFetch
+                    LOG.info('run eFetch on %s' % options.database)
+                    f = efetch_client.service.run_eFetch(
+                        id=','.join(q.IdList.Id), tool=TOOL, email=EMAIL)
+                    if hasattr(f, 'ERROR'):
+                        raise Exception(f.ERROR)
+                    xml = efetch_client.last_received()
 
             if output == 'medline':
-                outfile.write(str(efetch_client.last_received()).rstrip()+'\n')
+                outfile.write(str(xml).rstrip()+'\n')
             elif output == 'bibtex':
-                outfile.write(
-                    medline_xml_to_bibtex(str(efetch_client.last_received())))
+                outfile.write(medline_xml_to_bibtex(str(xml)))
             elif output == 'link':
                 LOG.info('run eLink on %s' % options.database)
                 f = EUTILS_CLIENT.service.run_eLink(