5 Nanoblogger-to-Ikiwiki import tool
8 Copyright (C) 2007 Chris Lamb <lamby@debian.org>
9 Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
11 This program is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see .
24 Usage: run --help as an argument with this script.
27 I added some extra bits to include the [[!tag tags/foo]] stuff in the post,
28 as it wasn't before, at all. I'll diff the versions out so you can see
37 from datetime import datetime
38 import codecs, htmlentitydefs
42 def __init__(self, path):
44 lines = [x.strip() for x in open(path, 'r').readlines()]
46 self.files = lines[1:]
48 def is_tagged(self, path):
49 return os.path.basename(path) in self.files
52 def is_tag_file(filename):
53 return filename.startswith('cat_') and filename.endswith('.db')
56 def parse_file(path, possible_tags):
57 lines = open(path, 'r').readlines()
59 while True: # parse header
61 if line.startswith('-----'):
63 field,value = [x.strip() for x in line.split(':', 1)]
64 post_dict[field.lower()] = value
65 assert lines[0].startswith('BODY:'), lines[0]
67 assert lines[-1].startswith('END-----'), lines[-1]
69 text = '\n'.join([unicode(x.rstrip(), 'utf-8') for x in lines])
70 post_dict['text'] = text
72 post_dict['timestamp'] = time.mktime(time.strptime(
73 post_dict['date'].replace('EST ', '').replace('EDT ', ''), '%c'))
74 if 'EDT' in post_dict['date']:
75 post_dict['timestamp'] += 4*60*60
76 elif 'EST' in post_dict['date']:
77 post_dict['timestamp'] += 5*60*60
79 raise NotImplementedError('unknown time zone in %s'
81 post_dict['tags'] = [t.tag for t in possible_tags if t.is_tagged(path)]
85 def format_commit(post_dict, name, email, subdir, branch):
86 stub = post_dict['title'].replace(' ', '_')
87 if post_dict['format'].lower() == 'markdown':
90 raise NotImplementedError('Unkown extension for %s'
91 % post_dict['format'])
92 commit_msg = '''Importing NanoBlogger post "%s"''' % (post_dict['title'])
95 '[[!meta title="%s"]]' % (post_dict['title'].replace('"', r"'")),
96 '[[!meta date="%s"]]' % datetime.fromtimestamp(post_dict['timestamp']),
99 if len(post_dict['tags']) > 0:
101 for tag in post_dict['tags']:
103 '[[!tag tags/%s]]' % (tag.replace(' ', '-').replace('/', '-').lower()))
105 data = '\n'.join(lines).encode('utf-8', 'html_replace')
107 "commit refs/heads/%s" % branch,
108 "committer %s <%s> %d +0000" % (name, email, post_dict['timestamp']),
109 "data %d" % len(commit_msg),
111 "M 644 inline %s" % os.path.join(subdir, "%s.%s" % (stub, ext)),
112 "data %d" % len(data),
115 return '\n'.join(ret)
118 def main(name, email, data_dir, subdir, branch='master'):
119 files = os.listdir(data_dir)
121 for x in files: # read tag (category) files
122 if Tag.is_tag_file(x):
123 tags.append(Tag(os.path.join(data_dir, x)))
126 if Tag.is_tag_file(x):
128 if x.endswith('.db'):
129 continue # ignore master.db. it just repeats tag info
130 posts.append(parse_file(os.path.join(data_dir, x), tags))
131 posts.sort(key=lambda x:x['timestamp'])
133 print format_commit(x, name, email, subdir, branch)
136 if __name__ == "__main__":
137 if len(sys.argv) not in (5, 6):
138 print >>sys.stderr, "%s: usage: %s name email datadir subdir [branch] | git-fast-import " % (sys.argv[0], sys.argv[0])