# You should have received a copy of the GNU General Public License
# along with This program. If not, see <http://www.gnu.org/licenses/>.
#
-# Code following John Vu's medline query code pybliographer/Pyblio/Query.py,
+# Code following John Vu's medline query code pybliographer/Pyblio/Query.py,
#
# Python interface to the Entrez databases.
# See http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
# NLM does not claim the copyright on the abstracts in PubMed; however, journal publishers or authors may.
# NLM provides no legal advice concerning distribution of copyrighted materials, consult your legal counsel.
#
-# For a good Python-and-XML-DOM intro, see
+# For a good Python-and-XML-DOM intro, see
# http://www.boddie.org.uk/python/XML_intro.html
# for the official docs, see
# http://docs.python.org/lib/module-xml.dom.html
# supports Document Object Model (DOM) Level 1 Specification
# http://docs.python.org/lib/module-xml.dom.minidom.html
import xml.dom.minidom as dom
-
+
# For calling the bibutils conversion programs
from subprocess import Popen, PIPE
Return the sequence (list, tuple, etc) without repeating entries
by Paul Rubin and Jordan Callicoat.
http://groups.google.com/group/comp.lang.python/browse_thread/thread/40c6c455f4fd5154/744a1a338afe1331?lnk=gst&rnum=7#744a1a338afe1331
-
+
for example [1,2,3,1,2] -> [1,2,3]
"""
t = type(seq)
field_info[field['Name']] = field
clean_parsed_einfo(parsed,parsed_islocal)
return (fields, tags, field_info)
-
+
def link_dict(db='pubmed', page=None, parsed=None):
parsed,parsed_islocal = get_parsed_einfo(db, page, parsed)
links = []
links,link_tags,link_info = link_dict(db=db, parsed=parsed)
clean_parsed_einfo(parsed,parsed_islocal)
return (fields, field_tags, field_info, links, link_tags, link_info)
-
+
def validate_field(field, fields):
"Ensure that field is a valid field for the database db."
try :
## Search an Entrez database
-def _query_esearch(term, db='pubmed', field=None,
+def _query_esearch(term, db='pubmed', field=None,
reldate=None, daterange=None, datetype=None,
- retmax=None, rettype=None, sort=None,
+ retmax=None, rettype=None, sort=None,
validate=False, valid_fields=None, debug=False) :
"""
Search an Entrez database.
(implemented as mindate=2001&maxdate=2002/01/01)
datetype: Limit dates to a specific date field based on database.
- For example: datetype=edat
+ For example: datetype=edat
retmax: Limit the number of items retrieved
For example: retmax=100
## Fetch records by Primary ID from an Entrez database
-def _query_efetch(id, db='pubmed',
+def _query_efetch(id, db='pubmed',
retmax=None, retmode='xml', rettype='medline'):
"""
Fetch records by primary ID from an Entrez database.
(implemented as mindate=2001&maxdate=2002/01/01)
datetype: Limit dates to a specific date field based on database.
- For example: datetype=edat
+ For example: datetype=edat
retmode: Select the retrieval output format
xml (default)
def search_fetch_xml(term, db='pubmed', field=None,
reldate=None, daterange=None, datetype=None,
- retmax=None, sort=None,
+ retmax=None, sort=None,
validate=False, valid_fields=None,
retmode='xml', rettype='medline'):
if validate and valid_fields == None:
valid_fields,field_tags,field_info = field_dict(db)
search_page = _query_esearch(term, db, field,
reldate, daterange, datetype,
- retmax, rettype='uilist', sort=sort,
+ retmax, rettype='uilist', sort=sort,
validate=validate, valid_fields=valid_fields)
pid_list = parse_esearch(search_page)
if not pid_list:
def search_link(term, db='pubmed', field=None,
reldate=None, daterange=None, datetype=None,
- retmax=None, sort=None,
+ retmax=None, sort=None,
validate=False, valid_fields=None,
link_term=None, fromdb=None,
cmd=None, linkname=None, link_holding=None,
valid_fields,field_tags,field_info = field_dict(db)
search_page = _query_esearch(term, db, field,
reldate, daterange, datetype,
- retmax, rettype='uilist', sort=sort,
+ retmax, rettype='uilist', sort=sort,
validate=validate, valid_fields=valid_fields)
pid_list = parse_esearch(search_page)
link_page = _query_elink(pid_list, term=link_term, db=db, dbfrom=fromdb,
if __name__ == "__main__" :
from optparse import OptionParser
-
+
usage_string = """%prog [options] SEARCH_TERM (print medline xml matching search)
| %prog -l [options] SEARCH_TERM (print links to entries matching search)
| %prog -L [-d DATABASE] [-f FILE] (list databases)
databases = database_list()
for db in databases:
print >> outfile, "\t%s" % db
-
+
elif mode == 'explain':
fields,tags,field_info = field_dict(db=options.database)
if options.field == None :
print >> outfile, "\t%*.*s\t%-*.*s" \
% (field_size[0], field_size[0], key,
field_size[1], field_size[1], field_info[options.field][key])
-
+
elif mode == 'search':
search_term = args[0]
LOG.debug('output %s' % output)
link_datetype=options.datetype,
link_retmode=options.retmode,)
print >> outfile, link_xml
-
+
if options.filename != None :
outfile.close()