Filled in Graduation parsing in export_mysql
authorW. Trevor King <wking@drexel.edu>
Sat, 26 Jun 2010 21:54:22 +0000 (17:54 -0400)
committerW. Trevor King <wking@drexel.edu>
Sat, 26 Jun 2010 21:54:22 +0000 (17:54 -0400)
sitecore/prof/__init__.py
sitecore/prof/export_mysql.py

index c78899df2be4bab7b6929e42ed4c51c1ab5e6f59..d6011fb1fafe9a616fd0276ce85be483aa4b0e0f 100644 (file)
@@ -98,7 +98,7 @@ class Name (AttributeHolder):
     fields = ['first_middle', 'last']
 
 class Graduation (AttributeHolder):
     fields = ['first_middle', 'last']
 
 class Graduation (AttributeHolder):
-    fields = ['college', 'title', 'year']
+    fields = ['college', 'field', 'title', 'year']
 
 class Contact (AttributeHolder):
     fields = ['office', 'email', 'website', 'phone', 'lab', 'lab_phone']
 
 class Contact (AttributeHolder):
     fields = ['office', 'email', 'website', 'phone', 'lab', 'lab_phone']
index 45664c9b3d749b5badcedbafc0d837bf23e5e096..d0227169a9537a3793c4a63e3949e2a55b9c263f 100644 (file)
@@ -24,6 +24,7 @@ before calling prof_import.
 
 import getpass
 import logging
 
 import getpass
 import logging
+import re
 
 import MySQLdb
 import yaml
 
 import MySQLdb
 import yaml
@@ -32,6 +33,13 @@ from .. import get_logger
 from . import Name, Graduation, Contact, Bio, Professor
 
 
 from . import Name, Graduation, Contact, Bio, Professor
 
 
+GRADUATION_REGEXP = re.compile('^(\S) +(\S), (.*) *(\d)?$')
+"""Examples:
+
+M.S.  Physics, University of Calcutta, Calcutta, India
+Ph.D. Physics, University of Maryland, Maryland, 1967
+"""
+
 class SimpleDB (object):
     def __init__(self, verbose=0):
         self.db = None
 class SimpleDB (object):
     def __init__(self, verbose=0):
         self.db = None
@@ -166,12 +174,16 @@ Where the relevant categories are
                 p.title += ', %s' % prof['position1']
             p.graduations = []
             print prof['degrees']
                 p.title += ', %s' % prof['position1']
             p.graduations = []
             print prof['degrees']
-            for degree in prof['degrees']:
-                p.greaduations.append(
+            for degree in prof['degrees'].splitlines():
+                m = GRADUATION_REGEXP.match(degree)
+                assert m != None, 'Misformed graduation: %s' % degree
+                title,field,college,year = m.groups()
+                p.graduations.append(
                     Graduation(
                     Graduation(
-                        college=None,
-                        title=None,
-                        year=None,
+                        college=college,
+                        field=field,
+                        title=title,
+                        year=year,
                         )
                     )
             p.contact = Contact(
                         )
                     )
             p.contact = Contact(