3 # Copyright (C) 2011-2012 W. Trevor King
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 """Cleanup the LDIF output from abook_ using `python-ldap`_.
20 .. _abook: http://abook.sourceforge.net/
21 .. _python-ldap: http://www.python-ldap.org/
30 def cleanup(text, basedn):
31 # pre-parser formatting
32 text = remove_trailing_mail(text)
33 text = remove_cn_commas(text)
35 records = ldif.ParseLDIF(StringIO.StringIO(text))
37 # post-parser formatting
38 records = remove_empty_mail(records)
39 records = remove_top_objectclass(records)
40 records = add_inetorgperson_objectclass(records)
41 records = add_base_dn(records, basedn)
42 records = add_names(records)
43 records = standardize_phone_numbers(records)
44 records = standardize_country_code(records)
45 records = rename_locality(records)
46 records = rename_cellphone(records)
47 records = rename_xmozillaanyphone(records)
48 records = rename_xmozillanickname(records)
49 records = rename_homeurl(records)
50 records = set_postaladdress(records)
52 # convert back to a string
53 s = StringIO.StringIO()
54 writer = ldif.LDIFWriter(s)
55 for dn,record in records:
56 writer.unparse(dn, record)
57 return 'version: 1\n\n%s' % s.getvalue()
59 def remove_trailing_mail(text):
61 >>> print(remove_trailing_mail('\\n'.join([
63 ... 'dn: cn=John Doe,mail=',
68 dn: cn=John Doe,mail=x@y.com
72 return re.sub(',mail=$', ',mail=x@y.com', text, flags=re.MULTILINE)
74 def _sub_cn_commas(match):
75 cn = match.group(1).replace(',', '_')
76 return 'cn=%s,mail=' % cn
78 def remove_cn_commas(text):
80 >>> print(remove_cn_commas('\\n'.join([
82 ... 'dn: cn=John, Jane, and Jim Doe,mail=x@y.com',
83 ... 'cn: John, Jane, and Jim Doe',
87 dn: cn=John_ Jane_ and Jim Doe,mail=x@y.com
88 cn: John, Jane, and Jim Doe
91 return re.sub('cn=(.*),mail=', _sub_cn_commas, text)
93 def remove_empty_mail(records):
94 for dn,record in records:
95 if 'mail' in record and record['mail'] == ['']:
99 def remove_top_objectclass(records):
100 for dn,record in records:
101 if 'top' in record['objectclass']:
102 record['objectclass'].remove('top')
105 def add_inetorgperson_objectclass(records):
106 for dn,record in records:
107 record['objectclass'].extend(
108 ['organizationalPerson', 'inetOrgPerson', 'extensibleObject'])
109 # extensibleObject required for countryName
112 def add_base_dn(records, basedn):
113 regexp = re.compile(',mail=.*')
114 subst = ', ' + basedn
115 for i,(dn,record) in enumerate(records):
116 new_dn = regexp.sub(subst, dn)
117 records[i] = (new_dn, record)
120 def _set_key(record, key, value, override=True):
121 """Case-agnostic value setter.
123 >>> record = {'aB': 'old'}
124 >>> _set_key(record, 'AB', 'new')
128 keys = [k for k in record.keys() if k.lower() == key.lower()]
133 if override or k not in record:
136 def add_names(records):
138 Surname and givenName are defined in `RFC 4519`_.
140 .. _RFC 4512: http://tools.ietf.org/html/rfc4519
142 for dn,record in records:
144 gn,sn = cn[0].rsplit(' ', 1)
145 _set_key(record, 'sn', [sn], override=False)
146 _set_key(record, 'givenName', [gn], override=False)
149 def standardize_phone_numbers(records):
150 """Standardize phone numbers to match `E.123`_ international notation
152 Assumes numbers not starting with a '+' live in the USA.
156 ... ('cn=John', {'homephone': '123-456-7890'},
157 ... ('cn=Jane', {TODO})]
158 >>> pprint.pprint(standardize_phone_numbers(records))
160 .. _E.123: http://en.wikipedia.org/wiki/E.123
165 def standardize_country_code(records):
168 # http://tools.ietf.org/html/rfc4519
169 # http://tools.ietf.org/html/rfc4517
182 for dn,record in records:
183 if 'countryname' in record:
184 record['countryname'] = [
185 table.get(c, c) for c in record['countryname']]
188 def rename_locality(records):
189 # locality -> l (localityName)
190 for dn,record in records:
191 if 'locality' in record:
192 record['localityname'] = record.pop('locality')
195 def rename_cellphone(records):
196 # cellphone -> mobile
197 for dn,record in records:
198 if 'cellphone' in record:
199 record['mobile'] = record.pop('cellphone')
202 def rename_xmozillaanyphone(records):
203 # xmozillaanyphone -> telephonenumber
204 for dn,record in records:
205 if 'xmozillaanyphone' in record:
206 record['telephonenumber'] = record.pop('xmozillaanyphone')
209 def rename_xmozillanickname(records):
210 # xmozillanickname -> displayname
211 for dn,record in records:
212 if 'xmozillanickname' in record:
213 record['displayname'] = record.pop('xmozillanickname')
216 def rename_homeurl(records):
217 # homeurl -> labeledURI
218 for dn,record in records:
219 if 'homeurl' in record:
220 record['labeleduri'] = [
221 '%s Home Page' % x for x in record.pop('homeurl')]
224 def set_postaladdress(records):
225 # postalAddress defined in rfc4517
226 # homePostalAddress defined in ?
227 # streetAddress defined in rfc4519
228 for dn,record in records:
229 street = record.get('streetaddress', [None])[0]
230 addr2 = record.get('streetaddress2', [None])[0]
231 locality = record.get('localityname', [None])[0]
232 state = record.get('st', [None])[0]
241 post = record.get('postalcode', [None])[0]
242 country = record.get('countryname', [None])[0]
243 if 'streetaddress2' in record:
244 record.pop('streetaddress2')
246 [line for line in [street, addr2, ls, post, country] if line])
247 _set_key(record, 'homepostaladdress', [addr], override=False)
251 if __name__ == '__main__':
255 p = argparse.ArgumentParser(description=__doc__)
257 '-b', '--basedn', dest='basedn', metavar='DNBASE',
258 default='ou=people,dc=example,dc=org',
259 help="Base distinguished name for the entries (%(default)s)")
261 args = p.parse_args()
263 text = sys.stdin.read()
264 text = cleanup(text, basedn=args.basedn)
265 sys.stdout.write(text)