From: W. Trevor King Date: Sun, 17 Apr 2011 01:53:11 +0000 (-0400) Subject: Fallback to posting HTTP URLs if XML fetch fails in entrez.py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=a4e55cd13ae9c396635868d117b4620ba5057a70;p=mw2txt.git Fallback to posting HTTP URLs if XML fetch fails in entrez.py. --- diff --git a/posts/entrez/entrez.py b/posts/entrez/entrez.py index 81ffdb4..11356aa 100755 --- a/posts/entrez/entrez.py +++ b/posts/entrez/entrez.py @@ -84,6 +84,7 @@ __version__ = '0.2' EUTILS_WSDL_URL = 'http://eutils.ncbi.nlm.nih.gov/soap/v2.0/eutils.wsdl' EFETCH_WSDL_URL = 'http://eutils.ncbi.nlm.nih.gov/soap/v2.0/efetch_%s.wsdl' EFETCH_PLAIN_URL = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi' +NCBI_PLAIN_URL = 'http://www.ncbi.nlm.nih.gov/%s/%s' EUTILS_CLIENT = _Client(EUTILS_WSDL_URL) @@ -620,9 +621,16 @@ if __name__ == '__main__': xml = f.read() f.close() # Remove wrapping HTML and unescape XML + #LOG.debug('raw data:\n%s' % xml) xml = xml.split('
', 1)[-1]
                     xml = xml.split('
', 1)[0] xml = _unescape(xml, {'"': '"'}) + #LOG.debug('xml data:\n%s' % xml) + if not xml.strip(): # + urls = [NCBI_PLAIN_URL % (options.database, id) + for id in q.IdList.Id] + LOG.warn( + 'no meaningful output; try:\n%s' % '\n'.join(urls)) else: # Use SOAP eFetch LOG.info('run eFetch on %s' % options.database) f = efetch_client.service.run_eFetch( @@ -631,7 +639,9 @@ if __name__ == '__main__': raise Exception(f.ERROR) xml = efetch_client.last_received() - if output == 'medline': + if output is None: + pass # we're bailing + elif output == 'medline': outfile.write(str(xml).rstrip()+'\n') elif output == 'bibtex': outfile.write(medline_xml_to_bibtex(str(xml)))