Began versioning my entrez.py script.

author W. Trevor King <wking@drexel.edu>

Fri, 15 Apr 2011 18:50:58 +0000 (14:50 -0400)

committer W. Trevor King <wking@drexel.edu>

Fri, 15 Apr 2011 18:50:58 +0000 (14:50 -0400)
author W. Trevor King <wking@drexel.edu>
Fri, 15 Apr 2011 18:50:58 +0000 (14:50 -0400)
committer W. Trevor King <wking@drexel.edu>
Fri, 15 Apr 2011 18:50:58 +0000 (14:50 -0400)
diff --git a/posts/entrez/entrez.py b/posts/entrez/entrez.py

new file mode 100755 (executable)

index 0000000..af27730
--- /dev/null
+++ b/posts/entrez/entrez.py
@@ -0,0 +1,904 @@
+#!/usr/bin/python
+#
+# Copyright (C) 1998-2004 Frederic Gobry
+# Copyright (C) 2008 W. Trevor King
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2 
+# of the License, or (at your option) any later version.
+#   
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details. 
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# Code following John Vu's medline query code pybliographer/Pyblio/Query.py, 
+#
+# Python interface to the Entrez databases.
+# See http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+# Current as of August 1, 2007
+#
+# Rules:
+#    * Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time weekdays for any series of more than 100 requests.
+#    * Send E-utilities requests to http://eutils.ncbi.nlm.nih.gov, not the standard NCBI Web address.
+#    * Make no more than one request every 3 seconds.
+#    * Use the URL parameter email, and tool for distributed software, so that we can track your project and contact you if there is a problem.
+#    * NCBI's Disclaimer and Copyright notice must be evident to users of your service.
+#      NLM does not claim the copyright on the abstracts in PubMed; however, journal publishers or authors may.
+#      NLM provides no legal advice concerning distribution of copyrighted materials, consult your legal counsel.
+#
+# For a good Python-and-XML-DOM intro, see 
+#  http://www.boddie.org.uk/python/XML_intro.html
+# for the official docs, see
+#  http://docs.python.org/lib/module-xml.dom.html
+
+"""Python bindings on Entrez database queries.
+"""
+
+# The time module is added for querying date ranges of publications
+import urllib, sys, re, string, time
+
+# DOM module for parsing XML,
+# supports Document Object Model (DOM) Level 1 Specification
+# http://docs.python.org/lib/module-xml.dom.minidom.html
+import xml.dom.minidom as dom
+ 
+# For calling the bibutils conversion programs
+from popen2 import popen2
+
+# Entrez access points
+einfo_url   = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
+esearch_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
+efetch_url  = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
+elink_url   = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
+
+# Entrez-requested tracking information
+TOOL = 'entrezpy'
+EMAIL = 'wking@drexel.edu'
+
+## XML and list utility functions
+
+def urlencode(param_dict) :
+    params = ""
+    for key,value in param_dict.items() :
+        if value == None :
+            continue # ignore unused parameter
+        #if type(value)== : # convert True/False to 'y'/<no-entry>
+        #    if value == True :
+        #        params += "%s=y&" % (key,)
+        #    #else :
+        #    #    params += "%s=n&" % (key,)
+        if value != None :
+            params += "%s=%s&" % (key, str(value))
+    if len(params) > 1 :
+        params = params[:-1] # remove trailing &
+    return params
+
+def unique(seq, keepstr=True):
+    """
+    Return the sequence (list, tuple, etc) without repeating entries
+    by Paul Rubin and Jordan Callicoat.
+    http://groups.google.com/group/comp.lang.python/browse_thread/thread/40c6c455f4fd5154/744a1a338afe1331?lnk=gst&rnum=7#744a1a338afe1331
+    
+    for example [1,2,3,1,2] -> [1,2,3]
+    """
+    t = type(seq)
+    if t in (str, unicode):
+        t = (list, ''.join)[bool(keepstr)]
+    seen = []
+    return t(c for c in seq if not (c in seen or seen.append(c)))
+
+def get_text(node) :
+    """
+    Given a node (<node-name> in the following example),
+     extract some-text from '<node-name>some-text</node-name>'
+     returns u'some-text'.
+    However, if the xml is '</node-name>' returns None
+    """
+    if len(node.childNodes) == 1:
+        data = node.childNodes[0].data
+    elif len(node.childNodes) == 0: # empty node
+        data = None
+    else :
+        raise Exception, "Node contains more than text"
+    return data
+
+def get_child_nodes(node, child_name):
+    """
+    Given a node (<node-name> in the following example),
+    returns an array of nodes matching <child-name>
+    """
+    ret = []
+    for n in node.childNodes:
+        if n.nodeType != n.ELEMENT_NODE:
+            continue # ignore text, comment, etc. nodes
+        if n.tagName == child_name :
+            ret.append(n)
+    return ret
+
+def get_child_nodes(node, child_name):
+    """
+    Given a node (<node-name> in the following example),
+    returns an the node matching <child-name>
+    """
+    nodes = get_child_node(node, child_name)
+    assert len(nodes) == 1, "%d child nodes named %s" % (len(nodes), child_name)
+    return node[0]
+
+def get_child_contents(node, child_name):
+    """
+    Given a node (<node-name> in the following example),
+    extract some-text from '<node-name>
+                              <some-tag>some-text</some-tag>
+                              <other-tag>other-text</other-tag>
+                              <some-tag>some-other-text</some-tag>
+                              ...
+                            </node-name>'
+    Returns ['some-text', 'some-other-text', ...]
+    """
+    nodes = get_child_nodes(node, child_name)
+    ret = []
+    for n in nodes:
+        ret.append(get_text(n))
+    return ret
+
+def get_child_dict(node):
+    """
+    Given a node (<node-name> in the following example),
+    extract some-text from '<node-name>
+                              <some-tag>some-text</some-tag>
+                              <other-tag>other-text</other-tag>
+                              <some-tag>some-other-text</some-tag>
+                              ...
+                            </node-name>'
+    Returns {'some-tag':['some-text', 'some-other-text', ...],
+             'other-tag':['some-other-text']}
+    """
+    dict = {}
+    tags = [] # to preserve order of tags
+    for n in node.childNodes:
+        if n.nodeType != n.ELEMENT_NODE:
+            continue # ignore text, comment, etc. nodes
+        try: # another entry for an existing tag
+            dict[n.tagName].append(get_text(n))
+        except KeyError: # new tag
+            dict[n.tagName] = [get_text(n)]
+            tags.append(n.tagName)
+    return (dict, tags)
+
+def delist_dict(dict) :
+    """
+    Given a dict
+        e.g. {'some-tag':['some-text', 'some-other-text', ...],
+              'other-tag':['some-other-text'], ...}  ,
+    replaces any values in an array of length 1 with the element,
+        e.g. {'some-tag':['some-text', 'some-other-text', ...],
+              'other-tag':'some-other-text', ...}  ,
+    """
+    for key,value in dict.items() :
+        if isinstance(value, list) and len(value) == 1 :
+            dict[key] = value[0]
+    return dict
+
+## Get information about the Entrez databases themselves
+
+def _query_einfo(db=None, debug=False) :
+    """
+    Get information about the Entrez databases themselves.
+    http://eutils.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
+
+    Either list all available databases with db=None, or
+    Specific information on a particular database (e.g. pubmed) with db=pubmed.
+    """
+    params = urlencode ({
+            'db': db,
+            'tool' : TOOL,
+            'email' : EMAIL})
+
+    if debug :
+        print "Getting einfo from '%s?%s'" % (einfo_url, params)
+    f = urllib.urlopen ("%s?%s" % (einfo_url, params))
+    string = f.read()
+    f.close()
+    if debug == True:
+        print string
+        print ""
+    return string
+
+def get_parsed_einfo(db=None, page=None, parsed=None, debug=True):
+    """
+    Helper function for various einfo processing functions.
+    Allow each processor to function
+      independently                      (page=None, parsed=None),
+      with a shared xml string           (page=<xml-string>, parsed=None), or
+      with a shared parsed xml structure (page=*, parsed=<parsed_xml>).
+    Use clean_parsed_einfo() for cleanup
+    """
+    if page == None and parsed == None:
+        if debug == True : print "Downloading new einfo page"
+        page = _query_einfo(db)
+    if parsed == None :
+        if debug == True : print "Parsing new einfo page"
+        parsed = dom.parseString(page)
+        parsed_islocal = True
+    else :
+        if debug == True : print "Using old einfo parsing"
+        parsed_islocal = False
+    return (parsed, parsed_islocal)
+
+def clean_parsed_einfo(parsed, parsed_islocal=True, debug=False):
+    """
+    Helper function for various einfo processing functions.
+    Clean up the parsed xml structure if the calling function created it.
+    """
+    if parsed_islocal == True :
+        if debug == True : print "Cleaning up einfo parsing"
+        parsed.unlink() # clean up the DOM
+
+def database_list(page=None, parsed=None, debug=False):
+    parsed,parsed_islocal = get_parsed_einfo(page=page, parsed=parsed, debug=debug)
+    databases = []
+    for node in parsed.getElementsByTagName("DbName"):
+        # Extract some-text from '<DbName>some-text</DbName>'
+        # by default, xml.dom.minidom uses unicode,
+        # so strings get printed: "u'string contents'"
+        databases.append(get_text(node))
+    clean_parsed_einfo(parsed,parsed_islocal, debug=debug)
+    return databases
+
+def field_dict(db='pubmed', page=None, parsed=None, debug=False):
+    parsed,parsed_islocal = get_parsed_einfo(db, page, parsed, debug)
+    fields = []
+    tags = []
+    field_info = {}
+    fieldlists = parsed.getElementsByTagName("FieldList")
+    assert len(fieldlists) == 1, "%s\n\n%d FieldLists!" % (parsed.toxml(), len(fieldlists))
+    fieldlist = fieldlists[0]
+    for node in fieldlist.childNodes:
+        if node.nodeType != node.ELEMENT_NODE :
+            continue # ignore text, comment, etc. nodes
+        assert node.tagName == "Field", "Unrecognized tag '%s' in FieldList" % node.tagName
+        field,new_tags = get_child_dict(node)
+        assert len(field['Name']) == 1, "Multiple field names %s" % str(field['Name'])
+        field = delist_dict(field)
+        fields.append(field['Name'])
+        new_tags = unique(tags + new_tags)
+        if tags != []:
+            assert new_tags == tags, "Inconsistent tags"
+        tags = new_tags
+        field_info[field['Name']] = field
+    clean_parsed_einfo(parsed,parsed_islocal, debug)
+    return (fields, tags, field_info)
+    
+def link_dict(db='pubmed', page=None, parsed=None, debug=False):
+    parsed,parsed_islocal = get_parsed_einfo(db, page, parsed, debug)
+    links = []
+    tags = []
+    link_info = []
+    linklists = parsed.getElementsByTagName("LinkList")
+    assert len(linklists) == 1, "%s\n\n%d LinkLists!" % (parsed.toxml(), len(linklists))
+    linklist = linklists[0]
+    for node in linklist.childNodes:
+        if node.nodeType != node.ELEMENT_NODE :
+            continue # ignore text, comment, etc. nodes
+        assert node.tagName == "Link", "Unrecognized tag '%s' in LinkList" % node.tagName
+        link,new_tags = get_child_dict(node)
+        assert len(link['Name']) == 1, "Multiple link names %s" % str(link['Name'])
+        link = delist_dict(link)
+        links.append(link['Name'])
+        new_tags = unique(tags + new_tags)
+        if tags != []:
+            assert new_tags == tags, "Inconsistent tags"
+        tags = new_tags
+        link_info[link['Name']] = link
+    clean_parsed_einfo(parsed,parsed_islocal, debug)
+    return (links, tags, link_info)
+
+def database_info(db='pubmed', page=None, parsed=None, debug=False):
+    "Convenience function to call both field_dict and link_dict"
+    parsed,parsed_islocal = get_parsed_einfo(db, page, parsed, debug)
+    fields,field_tags,field_info = field_dict(db=db, parsed=parsed, debug=debug)
+    links,link_tags,link_info = link_dict(db=db, parsed=parsed, debug=debug)
+    clean_parsed_einfo(parsed,parsed_islocal, debug=debug)
+    return (fields, field_tags, field_info, links, link_tags, link_info)
+ 
+def validate_field(field, fields):
+    "Ensure that field is a valid field for the database db."
+    try :
+        fields.index(field.upper())
+    except ValueError:
+        raise Exception, "Field '%s' invalid\nValid fields are\n %s" \
+                         % (field, str(fields))
+
+def strip_fields_from_term(term):
+    "HACK: really stupid algorithm"
+    fields = []
+    infield = False
+    for i in range(len(term)):
+        if term[i] == '[' and infield == False :
+            infield = True
+            field_start = i+1
+        elif term[i] == ']' and infield == True :
+            infield = False
+            fields.append(term[field_start:i])
+    return fields
+
+def validate_search_term(term, fields):
+    "Ensure that the fields in term are valid fields for the database db."
+    for field in strip_fields_from_term(term) :
+        validate_field(field, fields)
+
+
+## Search an Entrez database
+
+def _query_esearch(term, db='pubmed', field=None, 
+                   reldate=None, daterange=None, datetype=None,
+                   retmax=None, rettype=None, sort=None, 
+                   validate=False, valid_fields=None, debug=False) :
+    """
+    Search an Entrez database.
+    http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
+
+    Does not currently support the usehistory, WebEnv, query_key, retstart, or retmode parameters.
+
+    Help with the arguments adapted from esearch_help.html:
+
+    term: This command uses search terms or phrases with or without Boolean operators.
+     You can search in several fields using the [term field] tag.
+     You can search in a single field using the 'field' parameter below.
+     ?You may also tag search terms using field=tag.? I don't understand this line
+     For example: term=asthma[MESH]+OR+hay+fever[MESH]
+      'term=asthma[MESH]' is the same as 'term=asthma&field=MESH'
+      ( http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helpentrez.section.EntrezHelp.Writing_Advanced_Sea )
+
+    db: This command selects the database to be searched
+     For example: db=pubmed
+
+    field: Use this command to specify a specific search field.
+     PubMed fields: affl, auth, ecno, jour, iss, mesh,...
+     Retrieve with field_dict('pubmed')
+     For example: field=auth
+
+    reldate: Limit items a number of days immediately preceding today's date.
+     For example: reldate=365
+
+    daterange:  Limit results bounded by two specific dates.
+     For example: daterange=('2001', '2002/01/01')
+     (implemented as mindate=2001&maxdate=2002/01/01)
+
+    datetype:  Limit dates to a specific date field based on database.
+     For example: datetype=edat 
+
+    retmax: Limit the number of items retrieved
+     For example: retmax=100
+
+    rettype: Select the retrieval type
+     PubMed values: count, uilist (default)
+
+    sort: Sort the returned uilist
+     PubMed values: author, last+author, journal, pub+date
+
+    """
+    if daterange != None :
+        assert len(daterange) == 2, "Invalid daterange '%s', should be e.g. ('2001', '2002/01/01')"
+        reldate == None, "Specifying date with daterange AND reldate!"
+        mindate = daterange[0]
+        maxdate = daterange[1]
+    else :
+        mindate = None
+        maxdate = None
+    if validate :
+        assert len(valid_fields) > 0, "Need a list of valid fields to validate"
+        if field != None :
+            validate_field(field)
+        validate_search_term(term, valid_fields)
+    params = urlencode ({
+            'tool' : TOOL,
+            'email' : EMAIL,
+            'term' : term,
+            'db': db,
+            'field' : field,
+            'reldate' : reldate,
+            'mindate' : mindate,
+            'maxdate' : maxdate,
+            'datetype' : datetype,
+            'maxdate' : maxdate,
+            'retmax' : retmax,
+            'rettype' : rettype,
+            'sort' : sort})
+
+    if debug :
+        print "Getting esearch from '%s?%s'" % (esearch_url, params)
+    f = urllib.urlopen ("%s?%s" % (esearch_url, params))
+    string = f.read()
+    f.close()
+    if debug == True:
+        print string
+        print ""
+    return string
+
+def parse_esearch(page):
+    "Parse the xml returned by _query_esearch()"
+    parsed = dom.parseString(page)
+
+    pid_list = []
+    for node in parsed.getElementsByTagName("Id"):
+        pid_list.append(get_text(node))
+
+    parsed.unlink()
+
+    return pid_list
+
+
+## Fetch records by Primary ID from an Entrez database
+
+def _query_efetch(id, db='pubmed', 
+                  retmax=None, retmode='xml', rettype='medline', 
+                  debug=False) :
+    """
+    Fetch records by primary ID from an Entrez database.
+    http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
+    http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
+
+
+    Does not currently support the usehistory, WebEnv, query_key, or retstart parameters.
+
+    Help with the arguments adapted from efetchlit_help.html:
+
+    id: Primary UIs identifying the documents to fetch
+     For example: 'id=11877539, 11822933,11871444'
+
+    db: This command selects the database to be searched
+     For example: db=pubmed
+
+    retmax: Limit the number of items retrieved (default 20)
+     For example: retmax=100
+
+    retmode: Select the retrieval output format
+     xml   (not journals)
+     html
+     text
+     asn.1 (not journals)
+
+    rettype: Select the retrieval type
+     uilist
+     abstract (not omim)
+     citation (not omim)
+     medline  (not omim)
+     full     (journals and omim)
+
+    Not all retmodes are possible with all rettypes:
+     PubMed Options:
+            uilist abstract citation medline
+      xml     x       x*       x*       x*
+      text    x       x        x        x
+      html    x       x        x        x
+      asn.1  n/a      x*       x*       x
+      x = retrieval mode available
+      * returned retrieval type is the complete record in the retrieval mode
+      n/a - not available
+     OMIM Options: (not case sensitive)
+           uilist    docsum synopsis   variants   detailed ExternalLink
+           (MIM             (Clinical  (Allelic
+            numbers)         synopsis)  Variants)
+      xml     x        x*       x*        x*         x*         x*
+      text    x        x        x         x          x*         x*
+      html    x        x        x         x          x*         x*
+      asn.1   x*       x*       x*        x*         x*         x*
+      x = retrieval mode available
+      * returned retrieval type is the complete record in the retrieval mode
+      n/a - not available
+
+    """
+    idstring = ""
+    for d in id :
+        idstring += "%s," % d
+    idstring = idstring[:-1] # remove trailing comma
+    params = urlencode ({
+            'tool' : TOOL,
+            'email' : EMAIL,
+            'id' : idstring,
+            'db': db,
+            'retmax' : retmax,
+            'retmode' : retmode,
+            'rettype' : rettype})
+
+    if debug :
+        print "Getting efetch from '%s?%s'" % (efetch_url, params)
+    f = urllib.urlopen ("%s?%s" % (efetch_url, params))
+    string = f.read()
+    f.close()
+    if debug == True:
+        print string
+        print ""
+    return string
+
+
+## Fetch links by Primary ID from an Entrez database
+
+def _query_elink(id, term=None, db='all', dbfrom='pubmed',
+                 cmd=None, linkname=None, holding=None,
+                 version=1,
+                 reldate=None, daterange=None, datetype=None,
+                 retmode='xml',
+                 debug=False) :
+    """
+    Fetch links from a list of primary IDs in an Entrez database.
+    http://eutils.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
+    http://www.ncbi.nlm.nih.gov/entrez/query/static/entrezlinks.html
+
+    Does not currently support the WebEnv or query_key parameters.
+
+    Help with the arguments adapted from efetchlit_help.html:
+
+    id: Primary UIs identifying the documents to fetch
+     For example: 'id=11877539, 11822933,11871444'
+
+    term: This command uses search terms or phrases with or without Boolean operators
+     to limit the returned matching links.
+
+    db: This command selects the databases to be searched for link targets.
+     For example: db=all
+
+    dbfrom: This command selects the database containing the ids.
+     For example: dbfrom=pubmed
+
+
+    cmd: Link commands
+     * prlinks - List the hyperlink to the primary LinkOut provider for
+                 multiple IDs and database. Each ID is processed separately.
+     * prlinks&retmode=ref - Create a hyperlink to the primary LinkOut provider
+                             for a single ID and database.  Return the elink
+                             command, since fetching it breaks the relative
+                             links in the publisher's page.
+     * llinks - List LinkOut URLs and Attributes, except PubMed libraries, for
+                multiple IDs and database. Each ID is processed separately.
+     * llinkslib - List LinkOut URLs and Attributes for multiple IDs and
+                   database. Each ID is processed separately.
+     * lcheck - Check for the existence (Y or N) of an external link in for
+                multiple IDs and database.
+     * ncheck - Check for the existence of a neighbor link for each ID within
+                a database, e.g., Related Articles in PubMed.
+     * neighbor - Display neighbors within a database.
+     * neighbor_history - Create history (WebEnv & query_key) for use in other
+                          EUtilities.
+     * acheck - Lists Entrez databases links for multiple IDs from a single
+                database.
+
+    linkname: link to a specific neighbor subset
+     For example: linkname=nucleotide_nucleotide_comp
+
+    holding: List LinkOut URLs for the specified holding provider, (library).
+     Used only in conjunction with cmd=llinks or cmd=llinkslib
+     For example: cmd=llinkslib&holding=medlib
+
+    version: Include a version number to refer to the latest DTD.
+     For example: version=1
+      retrieves the latest DTD (eLink_050511.dtd) that includes the additional
+      elements, MenuTag, LinkInfo and IdLinkSet.
+
+    Date command are only valid for dbfrom=pubmed & cmd=neighbor
+    reldate: Limit items a number of days immediately preceding today's date.
+     For example: reldate=365
+
+    daterange:  Limit results bounded by two specific dates.
+     For example: daterange=('2001', '2002/01/01')
+     (implemented as mindate=2001&maxdate=2002/01/01)
+
+    datetype:  Limit dates to a specific date field based on database.
+     For example: datetype=edat 
+
+    retmode: Select the retrieval output format
+     xml  (default)
+     ref  (only used with cmd=prlinks for one ID)
+
+    """
+    idstring = ""
+    for d in id :
+        idstring += "%s," % d
+    idstring = idstring[:-1] # remove trailing comma
+
+    params = urlencode ({
+            'tool' : TOOL,
+            'email' : EMAIL,
+            'id' : idstring,
+            'term': term,
+            'db': db,
+            'dbfrom': dbfrom,
+            'cmd': cmd,
+            'linkname': linkname,
+            'holding': holding,
+            'version': version,
+            'reldate': reldate,
+            'daterange': daterange,
+            'datetype': datetype,
+            'retmode' : retmode})
+
+    if debug :
+        print "Getting elink from '%s?%s'" % (elink_url, params)
+    f = urllib.urlopen ("%s?%s" % (elink_url, params))
+
+    if cmd == 'prlinks' and retmode == 'ref' :
+        # Just get the link, we don't need the provider's webpage HTML.
+        url = f.geturl()
+        f.close()
+        return url
+
+    string = f.read()
+    f.close()
+    if debug == True:
+        print string
+        print ""
+    return string
+
+
+## Combining the searching and parsing (dropping some of the less used features)
+
+def search_fetch_xml(term, db='pubmed', field=None,
+                     reldate=None, daterange=None, datetype=None,
+                     retmax=None, sort=None, 
+                     validate=False, valid_fields=None,
+                     retmode='xml', rettype='medline', 
+                     debug=False) :
+    if validate and valid_fields == None:
+        valid_fields,field_tags,field_info = field_dict(db, debug=debug)
+    search_page = _query_esearch(term, db, field,
+                                 reldate, daterange, datetype,
+                                 retmax, rettype='uilist', sort=sort, 
+                                 validate=validate, valid_fields=valid_fields,
+                                 debug=debug)
+    pid_list = parse_esearch(search_page)
+    fetch_page = _query_efetch(pid_list, db, retmax, retmode, rettype, debug)
+    return fetch_page
+
+def search_link(term, db='pubmed', field=None,
+                reldate=None, daterange=None, datetype=None,
+                retmax=None, sort=None, 
+                validate=False, valid_fields=None,
+                link_term=None, fromdb=None,
+                cmd=None, linkname=None, link_holding=None,
+                version=1,
+                link_reldate=None, link_daterange=None, link_datetype=None,
+                link_retmode='xml', 
+                debug=False) :
+    if validate and valid_fields == None:
+        valid_fields,field_tags,field_info = field_dict(db, debug=debug)
+    search_page = _query_esearch(term, db, field,
+                                 reldate, daterange, datetype,
+                                 retmax, rettype='uilist', sort=sort, 
+                                 validate=validate, valid_fields=valid_fields,
+                                 debug=debug)
+    pid_list = parse_esearch(search_page)
+    link_page = _query_elink(pid_list, term=link_term, db=db, dbfrom=fromdb,
+                             cmd=cmd, linkname=linkname, holding=link_holding,
+                             version=version,reldate=link_reldate,
+                             daterange=link_daterange, datetype=link_datetype,
+                             retmode=link_retmode,
+                             debug=debug)
+    return link_page
+
+## Use the external bibutils package to convert to BibTeX format
+
+def medline_xml_to_bibtex(fetch_page):
+    child_stdout,child_stdin = popen2("med2xml | xml2bib -fc | bibclean")
+    print >> child_stdin, fetch_page
+    child_stdin.close()
+    bibtex = child_stdout.read()
+    child_stdout.close()
+    return bibtex
+
+## Random
+
+def hints() :
+    "Print Entrez search hints and exit"
+
+    print """
+free full text [sb]
+
+
+"""
+
+## Test with a mini-searching application
+
+if __name__ == "__main__" :
+    from optparse import OptionParser
+    
+    usage_string = """%prog [options] SEARCH_TERM       (print medline xml matching search)
+     | %prog -l [options] SEARCH_TERM    (print links to entries matching search)
+     | %prog -L [-d DATABASE] [-f FILE]  (list databases)
+     | %prog -X [-d DATABASE] [-F FIELD] [-f FILE]  (list fields in a database, or details on a single field)
+
+2008, W. Trevor King.
+
+See the docstrings in %prog or
+ http://www.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+for more details.
+"""
+    parser = OptionParser(usage=usage_string, version="%prog 0.1")
+
+    # Explaination by Jerry Stratton, http://www.hoboes.com/Mimsy/?ART=511
+    # "
+    # metavar is the name used in the help for that options required text,
+    # and dest is the name of the property you'll use to access the value of that option.
+    # "
+
+    parser.add_option('-d', '--database', dest="database",
+                      help="Search DATABASE (default '%default')",
+                      type='string', metavar="DATABASE", default='pubmed')
+    parser.add_option('-f', '--file', dest="filename",
+                      help="write output to FILE (default stdout)",
+                      type='string', metavar="FILE")
+    parser.add_option('-v', '--verbose', dest="verbose", action="store_true",
+                      help="Print lots of debugging information",
+                      default=False)
+    parser.add_option('-H', '--hints', callback=hints,
+                      help="Print Entrez search hints and exit",
+                      action="callback")
+
+
+    # mode control options
+    mode = 'search'
+    def set_mode(option, opt_str, value, parser):
+        global mode
+        long_option = option.get_opt_string()
+        if long_option == '--list-mode' :
+            mode = 'list'
+        elif long_option == '--explain-mode' :
+            mode = 'explain'
+
+    parser.add_option('-L', '--list-mode', callback=set_mode,
+                      help="Run in list mode", action="callback")
+    parser.add_option('-X', '--explain-mode', callback=set_mode,
+                      help="Run in explain mode", action="callback")
+
+    # search-fetch-xml-to-? options
+    output = 'bibtex'
+    def set_output(option, opt_str, value, parser):
+        global output
+        long_option = option.get_opt_string()
+        if long_option == '--output-link' :
+            output = 'link'
+    parser.add_option('-W', '--raw', dest="raw", action="store_true",
+                      help="Output raw Entrez xml", default=False)
+    parser.add_option('-F', '--field', dest="field",
+                      help="Limit SEARCH_TERM to FIELD",
+                      type='string', metavar="FIELD")
+    parser.add_option('-r', '--reldate', dest="reldate",
+                      help="Limit search to dates within DAYS of today",
+                      type='string', metavar="DAYS")
+    parser.add_option('-R', '--daterange', dest="daterange",
+                      help="Limit search to dates within DATERANGE (e.g. '2001/1/1,2002')",
+                      type='string', metavar="DATERANGE")
+    parser.add_option('-t', '--datetype', dest="datetype",
+                      help="Select field to apply date limits to (e.g. 'edat' for Entrez date)",
+                      type='string', metavar="DATETYPE")
+    parser.add_option('-m', '--retmax', dest="retmax",
+                      help="Return at max RETMAX items from a successful search (default %default)",
+                      type='string', metavar="RETMAX", default=20)
+    parser.add_option('-M', '--retmode', dest="retmode",
+                      help="Select fetch/link output format",
+                      type='string', metavar="RETMODE", default='xml')
+    parser.add_option('-V', '--validate', dest="validate", action="store_true",
+                      help="Check that FIELD and field tags in SEARCH_TERM are valid for DB",
+                      default=False)
+
+    # output link options
+    parser.add_option('-l', '--output-link', callback=set_output,
+                      help="Output a link (instead of xml citations)",
+                      action="callback")
+    parser.add_option('-c', '--link-cmd', dest="link_cmd",
+                      help="Select link output",
+                      type='string', metavar="LINK_CMD")
+    parser.add_option('-T', '--link-term', dest="link_term",
+                      help="Limit links to those matching LINK_TERM",
+                      type='string', metavar="LINK_TERM")
+    parser.add_option('-D', '--from-database', dest="fromdb",
+                      help="Limit links to those from FROMDATABASE)",
+                      type='string', metavar="FROMDATABASE")
+    parser.add_option('-n', '--link-name', dest="linkname",
+                      help="Limit links to a specific neighbor",
+                      type='string', metavar="LINKNAME")
+
+    (options, args) = parser.parse_args()
+    parser.destroy()
+
+    # open the output file if specified
+    if options.filename == None :
+        outfile = sys.stdout
+    else :
+        outfile = file(options.filename, 'w')
+
+    if options.verbose :
+        print >> sys.stdout, "Operating in %s mode" % mode
+        
+    if mode == 'list' :
+        print >> outfile, "Available databases:"
+        databases = database_list(debug=options.verbose)
+        for db in databases:
+            print >> outfile, "\t%s" % db
+    
+    elif mode == 'explain':
+        fields,tags,field_info = field_dict(db=options.database,
+                                            debug=options.verbose)
+        if options.field == None :
+            print >> outfile, "Available fields in %s:" % options.database
+            field_size = [0,0]
+            for field in fields :
+                if len(field) > field_size[0] :
+                    field_size[0] = len(field)
+                if len(field_info[field]['FullName']) > field_size[1] :
+                    field_size[1] = len(field_info[field]['FullName'])
+            for field in fields :
+                print >> outfile, "\t%*.*s\t%-*.*s" \
+                    % (field_size[0], field_size[0], field,
+                       field_size[1], field_size[1], field_info[field]['FullName'])
+        else :
+            print >> outfile, "Field %s in %s:" % (options.field,options.database)
+            field_size = [0,0]
+            for key in tags:
+                if len(key) > field_size[0] :
+                    field_size[0] = len(key)
+                if len(field_info[options.field][key]) > field_size[1] :
+                    field_size[1] = len(field_info[options.field][key])
+            for key in tags:
+                print >> outfile, "\t%*.*s\t%-*.*s" \
+                    % (field_size[0], field_size[0], key,
+                       field_size[1], field_size[1], field_info[options.field][key])
+            
+    elif mode == 'search':
+        search_term = args[0]
+        if options.verbose :
+            print >> sys.stdout, "Output %s" % output
+
+        if output == 'bibtex' :
+            medline_xml = search_fetch_xml(term=search_term,
+                                           db=options.database,
+                                           field=options.field,
+                                           reldate=options.reldate,
+                                           daterange=options.daterange,
+                                           datetype=options.datetype,
+                                           retmax=options.retmax,
+                                           validate=options.validate,
+                                           retmode=options.retmode,
+                                           rettype='medline',
+                                           debug=options.verbose)
+            if options.raw :
+                print outfile, medline_xml
+            else :
+                bibtex = medline_xml_to_bibtex(medline_xml)
+                print >> outfile, bibtex
+
+        elif output == 'link' :
+            # Assume that if you're looking for links
+            # your search is already pretty refined,
+            # so use the date options for link-limiting.
+            link_xml = search_link(term=search_term,
+                                   db=options.database,
+                                   field=options.field,
+                                   reldate=None,
+                                   daterange=None,
+                                   datetype=None,
+                                   retmax=None,
+                                   sort=None,
+                                   validate=options.validate,
+                                   valid_fields=None,
+                                   link_term=options.link_term,
+                                   fromdb=options.fromdb,
+                                   cmd=options.link_cmd,
+                                   linkname=options.linkname,
+                                   link_holding=None,
+                                   version=1,
+                                   link_reldate=options.reldate,
+                                   link_daterange=options.daterange,
+                                   link_datetype=options.datetype,
+                                   link_retmode=options.retmode, 
+                                   debug=options.verbose)
+            print >> outfile, link_xml
+    
+    if options.filename != None :
+        outfile.close()
author	W. Trevor King <wking@drexel.edu>
	Fri, 15 Apr 2011 18:50:58 +0000 (14:50 -0400)
committer	W. Trevor King <wking@drexel.edu>
	Fri, 15 Apr 2011 18:50:58 +0000 (14:50 -0400)