Filled in Graduation parsing in export_mysql
[sitecorepy.git] / sitecore / prof / export_mysql.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
3 #
4 # This file is part of SiteCorePy.
5 #
6 # SiteCorePy is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU General Public License as published by the
8 # Free Software Foundation, either version 2 of the License, or (at your
9 # option) any later version.
10 #
11 # SiteCorePy is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with SiteCorePy.  If not, see <http://www.gnu.org/licenses/>.
18
19 """Move Professors from MySQL -> YAML.
20
21 Not really SiteCore related, but it seves as an example of what to do
22 before calling prof_import.
23 """
24
25 import getpass
26 import logging
27 import re
28
29 import MySQLdb
30 import yaml
31
32 from .. import get_logger
33 from . import Name, Graduation, Contact, Bio, Professor
34
35
36 GRADUATION_REGEXP = re.compile('^(\S) +(\S), (.*) *(\d)?$')
37 """Examples:
38
39 M.S.  Physics, University of Calcutta, Calcutta, India
40 Ph.D. Physics, University of Maryland, Maryland, 1967
41 """
42
43 class SimpleDB (object):
44     def __init__(self, verbose=0):
45         self.db = None
46         self.cursor = None
47         self.table = None
48         self.logger = get_logger(verbose)
49
50     def connect(self, database, host='localhost',
51                 username=None, password=None):
52         if username == None:
53             username = getpass.getpass('username: ')
54         if password == None:
55             password = getpass.getpass('password: ')
56         self.logger.info('logging in to %s:%s as %s'
57                          % (host, database, username))
58         self.db = MySQLdb.connect(host=host, user=username,
59                                   passwd=password, db=database)
60         del(username)
61         del(password)
62         self.cursor = self.db.cursor()
63
64     def disconnect(self):
65         self.logger.info('disconnecting')
66         self.cursor = None
67         self.db.close()
68
69     def execute(self, command):
70         self.logger.info(command)
71         self.cursor.execute(command)
72
73     def set_default_table(self, table):
74         self.table = table
75
76     def add_entry(self, dict_, table=None):
77         if table == None:
78             table = self.table
79         dict_ = self._clean_dict(dict_)
80         keys = dict_.keys()
81         values = ["'%s'" % dict_[key] for key in keys]
82         self.execute("INSERT INTO %s (%s) VALUES (%s)"
83                      % (table, ', '.join(keys), ', '.join(values)))
84         # error checking
85
86     def list_entries(self, dict_=None, table=None):
87         if table == None:
88             table = self.table
89         where = self._where_string(dict_)
90         self.execute('SELECT * FROM %s%s' % (table, where))
91         numrows = int(self.cursor.rowcount)
92         fields = [dtuple[0] for dtuple in self.cursor.description]
93         ret = []
94         for x in range(0,numrows):
95             row = self.cursor.fetchone()
96             ret.append(dict(zip(fields, row)))
97         return ret
98
99     def delete_entries(self, dict_, table=None):
100         if table == None:
101             table = self.table
102         where = self._where_string(dict_)
103         self.execute('DELETE FROM %s%s' % (table, where))
104
105     def _clean_dict(self, dict_=None):
106         if dict_ == None:
107             return None
108         for key in dict_.keys():
109             value = dict_.pop(key)
110             value = MySQLdb.escape_string(value)
111             key = MySQLdb.escape_string(key)
112             assert key not in dict_, '%s in %s!' % (key, dict_)
113             dict_[key] = value
114         return dict_
115
116     def _where_string(self, dict_=None):
117         dict_ = self._clean_dict(dict_)
118         if dict_ == None or len(dict_) == 0:
119             where = ''
120         else:
121             where_args = []
122             for key,value in dict_.items():
123                 where_args.append("%s='%s'" % (key,value))
124             where = ' where %s' % ' and '.join(where_args)
125         return where
126
127
128 def get_profs(db):
129     """
130 mysql> describe people;
131 +--------------+--------------+------+-----+---------+----------------+
132 | Field        | Type         | Null | Key | Default | Extra          |
133 +--------------+--------------+------+-----+---------+----------------+
134 | ID           | int(11)      | NO   | PRI | NULL    | auto_increment | 
135 | category     | varchar(255) | YES  |     | NULL    |                | 
136 | lastname     | varchar(255) | YES  |     | NULL    |                | 
137 | firstname    | varchar(255) | YES  |     | NULL    |                | 
138 | email        | varchar(255) | YES  |     | NULL    |                | 
139 | office       | varchar(255) | YES  |     | NULL    |                | 
140 | phone        | varchar(255) | YES  |     | NULL    |                | 
141 | lab          | varchar(255) | YES  |     | NULL    |                | 
142 | labphone     | varchar(255) | YES  |     | NULL    |                | 
143 | grouppage    | varchar(255) | YES  |     | NULL    |                | 
144 | personalpage | varchar(255) | YES  |     | NULL    |                | 
145 | position     | varchar(255) | YES  |     | NULL    |                | 
146 | position1    | varchar(255) | YES  |     | NULL    |                | 
147 | degrees      | varchar(255) | YES  |     | NULL    |                | 
148 | research     | mediumtext   | YES  |     | NULL    |                | 
149 | profile      | mediumtext   | YES  |     | NULL    |                | 
150 +--------------+--------------+------+-----+---------+----------------+
151 16 rows in set (0.00 sec)
152
153 Where the relevant categories are
154 | Faculty            | 
155 | Research Faculty   | 
156 | Teaching Faculty   | 
157 | Professor Emeritus | 
158 | Adjunct Professor  | 
159     """
160     db.set_default_table('people')
161     for category in ['Faculty',
162                      'Research Faculty',
163                      'Teaching Faculty',
164                      'Professor Emeritus',
165                      'Adjunct Professor']:
166         for prof in db.list_entries({'category':category}):
167             p = Professor()
168             p.name = Name(
169                 first_middle=prof['firstname'],
170                 last=prof['lastname'],
171                 )
172             p.title = prof['position']
173             if prof['position1'] != None:
174                 p.title += ', %s' % prof['position1']
175             p.graduations = []
176             print prof['degrees']
177             for degree in prof['degrees'].splitlines():
178                 m = GRADUATION_REGEXP.match(degree)
179                 assert m != None, 'Misformed graduation: %s' % degree
180                 title,field,college,year = m.groups()
181                 p.graduations.append(
182                     Graduation(
183                         college=college,
184                         field=field,
185                         title=title,
186                         year=year,
187                         )
188                     )
189             p.contact = Contact(
190                 office=prof['office'],
191                 email=prof['email'],
192                 website=prof['personalpage'],
193                 phone=prof['phone'],
194                 lab=prof['lab'],
195                 lab_phone=prof['labphone'],
196                 )
197             p.bio = Bio(
198                 specialization=prof['research'], # prof['grouppage']
199                 publications=None,
200                 profile=prof['profile'],
201                 cv=None,
202                 )
203             self.logger.debug(unicode(p))
204             yield p
205
206
207 def main(argv):
208     import optparse
209     usage = """%prog [options] PROF_FILE
210
211 Where PROF_FILE is the output YAML file receiving professor data.
212
213 Example:
214   ./sc.py prof-export_mysql profs.yaml
215 """
216     p = optparse.OptionParser(usage)
217     p.add_option('-d', '--database', metavar='NAME', dest='database',
218                  help='Name of the MySQL database (%default)',
219                  default='directory')
220     p.add_option('-v', '--verbose', dest='verbose', action='count',
221                  help='increment verbosity (%default)',
222                  default=0)
223
224     options,args = p.parse_args(argv)
225     prof_file = args[0]
226
227     db = SimpleDB(verbose=options.verbose)
228     db.connect(database=options.database)
229     try:
230         profs = list(get_profs(db))
231     finally:
232         db.disconnect()
233
234     yaml.save(db, profs)