bin/chem_db.py

   1 #!/usr/bin/python
   2
   3 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
   4 #
   5 # This file is part of ChemDB.
   6 #
   7 # ChemDB is free software: you can redistribute it and/or modify it
   8 # under the terms of the GNU General Public License as published by the
   9 # Free Software Foundation, either version 3 of the License, or (at your
  10 # option) any later version.
  11 #
  12 # ChemDB is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with ChemDB.  If not, see <http://www.gnu.org/licenses/>.
  19
  20 from sys import stdout, stdin, stderr
  21
  22 import chemdb.chemdb
  23
  24 from chemdb.db.text import TextDB, DBPrettyPrinter
  25
  26
  27 def open_IOfiles(ifilename=None, ofilename=None, debug=False):
  28     if ifilename:
  29         if debug:  print >> stderr, "open input file '%s'" % ifilename
  30         ifile = file(ifilename, 'r')
  31     else:
  32         ifile = stdin
  33     if ofilename:
  34         if debug:  print >> stderr, "open output file '%s'" % ofilename
  35         ofile = file(ofilename, 'w')
  36     else:
  37         ofile = stdout
  38     return (ifile, ofile)
  39
  40 def close_IOfiles(ifilename=None, ifile=stdin,
  41                   ofilename=None, ofile=stdout,
  42                   debug=False):
  43     if ifilename:
  44         if debug:  print >> stderr, "close input file '%s'" % ifilename
  45         ifile.close()
  46     if ofilename:
  47         if debug:  print >> stderr, "close output file '%s'" % ofilename
  48         ofile.close()
  49
  50 if __name__ == '__main__':
  51     from optparse import OptionParser
  52
  53     parser = OptionParser(usage='usage: %prog [options]', version='%prog 0.1')
  54
  55     parser.add_option('-f', '--input-file', dest='ifilename',
  56                       help='Read input from FILE (default stdin)',
  57                       type='string', metavar='FILE')
  58     parser.add_option('-o', '--output-file', dest='ofilename',
  59                       help='Write output to FILE (default stdout)',
  60                       type='string', metavar='FILE')
  61     parser.add_option('-d', '--delimiter', dest='FS', # field seperator
  62                       help="Set field delimiter (default '%default')",
  63                       type='string', metavar='DELIM', default='\t')
  64     parser.add_option('-p', '--print-fields', dest='print_fields',
  65                       help='Only print certain fields (e.g. 0,3,4,2)',
  66                       type='string', metavar='FIELDS')
  67     parser.add_option('-r', '--print-records', dest='print_records',
  68                       help='Only print certain records (e.g. 0:3)',
  69                       type='string', metavar='RECORDS')
  70     parser.add_option('-w', '--column-width', dest='width',
  71                       help='Set column width for short-format output.',
  72                       type='string', metavar='WIDTH')
  73     parser.add_option('-L', '--long-format', dest='long_format',
  74                       help='Print long format (several lines per record)',
  75                       action='store_true', default=False)
  76     parser.add_option('-l', '--short-format', dest='long_format',
  77                       help='Print short format (default) (one lines per record)',
  78                       action='store_false', default=False)
  79     parser.add_option('--valid-record', dest='valid_record',
  80                       help="Select fields where True == lambda r : eval(EXPRESSION).  default '%default'",
  81                       type='string', metavar='EXPRESSION', default="r['Disposed'] == ''")
  82     parser.add_option('--sort-field', dest='sort_field',
  83                       help="Sort matching records by FIELD (defauly '%default')",
  84                       type='string', metavar='FIELD', default='db_id')
  85     parser.add_option('--pdf-title', dest='pdf_title',
  86                       help='Override the default PDF title',
  87                       type='string', metavar='TITLE')
  88     parser.add_option('--inventory', dest='inventory',
  89                       help='Output a PDF inventory of matching records',
  90                       action='store_true', default=False)
  91     parser.add_option('--door-warning', dest='door_warning',
  92                       help='Output a PDF door warning of matching records',
  93                       action='store_true', default=False)
  94     parser.add_option('-t', '--test', dest='test',
  95                       help='Run docutils tests on db.py',
  96                       action='store_true', default=False)
  97     parser.add_option('--list-locations', dest='locations',
  98                       help='List all currently used locations (no other output)',
  99                       action='store_true', default=False)
 100     parser.add_option('-V', '--validate', dest='validate',
 101                       help='Validate CAS#s (no other output)',
 102                       action='store_true', default=False)
 103     parser.add_option('-A', '--audit', dest='audit',
 104                       help='Search for troublesome entries (no other output)',
 105                       action='store_true', default=False)
 106     parser.add_option('-v', '--verbose', dest='verbose',
 107                       help='Print lots of debugging information',
 108                       action='store_true', default=False)
 109
 110     (options, args) = parser.parse_args()
 111     parser.destroy()
 112
 113     ifile,ofile = open_IOfiles(options.ifilename, options.ofilename,
 114                                options.verbose)
 115
 116     if options.test:
 117         _test()
 118     elif options.locations:
 119         db = TextDB(filename=None)
 120         pp = DBPrettyPrinter(db)
 121
 122         # read in and parse the file
 123         db._parse(ifile.read())
 124
 125         locations = []
 126         for record in db.records():
 127             if len(record['Location']) > 0 and record['Location'] not in locations:
 128                 locations.append(record['Location'])
 129         locations.sort()
 130         print >> ofile, '\n'.join(locations)
 131     elif options.validate:
 132         db = TextDB(filename=None)
 133         pp = DBPrettyPrinter(db)
 134
 135         # read in and parse the file
 136         db._parse(ifile.read())
 137
 138         CAS_DELIM = ',' # seperate CAS entries for chemicals with multiple CAS numbers
 139         PERCENT_DELIM = ':' # seperate CAS number from ingredient percentage
 140         for record in db.records():
 141             valid = True
 142             cas = record['CAS#']
 143             if len(cas.split(CAS_DELIM)) == 0 : # cas = 'N...N-NN-N'
 144                 if not chemdb.chemdb.valid_CASno(cas, options.verbose):
 145                     valid = False
 146                     print >> ofile, "Invalid CAS# in record: '%s'" % cas
 147             else : # cas = 'N...N-NN-N:X%,N...N-NN-N:Y%,...'
 148                 for casterm in cas.split(CAS_DELIM) : # casterm = 'N...N-NN-N:X%'
 149                     c = casterm.split(PERCENT_DELIM)[0]   # c = 'N...N-NN-N'
 150                     if not chemdb.chemdb.valid_CASno(c, options.verbose):
 151                         valid = False
 152                         print >> ofile, "Invalid CAS* in record: '%s'" % c
 153             if not valid:
 154                 print >> ofile, (
 155                     "in record %s: %s" % (record['ID'], record['Name']))
 156                 #pp.full_record_string(record)
 157     elif options.audit:
 158         db = TextDB(filename=None)
 159         pp = DBPrettyPrinter(db)
 160
 161         # read in and parse the file
 162         db._parse(ifile.read())
 163
 164         for record in db.records():
 165             # check for extra spaces
 166             for key,value in record.items():
 167                 if (isinstance(value, types.StringTypes)
 168                     and value.strip() != value):
 169                     print >> ofile, (
 170                         "Extra whitespace for %s - %s field %s : '%s'"
 171                         % (record['ID'], record['Name'], key, value))
 172             # make sure we know the location of all current chemicals
 173             if len(record['Disposed']) == 0 and len(record['Location']) == 0:
 174                 print >> ofile, (
 175                     "Misplaced record: %s - %s"
 176                     % (record['ID'], record['Name']))
 177     elif options.inventory:
 178         db = TextDB(filename=None)
 179         pp = DBPrettyPrinter(db)
 180
 181         # read in and parse the file
 182         db._parse(ifile.read())
 183
 184         dgen = docgen(db)
 185         def valid_record(r):
 186             return eval(options.valid_record,  # expression
 187                         {'__builtins__':None}, # globals
 188                         {'r':r})               # locals
 189         path = dgen.inventory(title=options.pdf_title,
 190                               namewidth=40,
 191                               sort_field=options.sort_field,
 192                               valid_record=valid_record)
 193         print >> ofile, '\n', path
 194     elif options.door_warning:
 195         db = TextDB(filename=None)
 196         pp = DBPrettyPrinter(db)
 197
 198         # read in and parse the file
 199         db._parse(ifile.read())
 200
 201         dgen = docgen(db)
 202         def valid_record(r):
 203             return eval(options.valid_record,  # expression
 204                         {'__builtins__':None}, # globals
 205                         {'r':r})               # locals
 206         path = dgen.door_warning(valid_record=valid_record)
 207         print >> ofile, '\n', path
 208     else:
 209         db = TextDB(filename=None)
 210
 211         # read in and parse the file
 212         db._parse(ifile.read())
 213         pp = DBPrettyPrinter(db)
 214         if options.long_format:
 215             for id in pp._norm_record_ids(options.print_records):
 216                 string = pp.full_record_string_id(id)
 217         else:
 218             # pythonize the width option
 219             if options.width == None or options.width == 'a':
 220                 width = options.width
 221             elif len(options.width.split(':')) == 1:
 222                 width = int(options.width)
 223             elif len(options.width.split(':')) > 1:
 224                 width = {}
 225                 for kv in options.width.split(','):
 226                     spl = kv.split(':')
 227                     assert len(spl) == 2, 'invalid width "%s" in "%s"' % (kv, options.width)
 228                     if spl[1] == 'a':
 229                         width[spl[0]] = spl[1]
 230                     else:
 231                         width[spl[0]] = int(spl[1])
 232
 233             string = pp.multi_record_string(options.print_records,
 234                                             options.print_fields,
 235                                             width,
 236                                             options.FS)
 237             print >> ofile, string,
 238
 239     close_IOfiles(options.ifilename, ifile,
 240                   options.ofilename, ofile, options.verbose)