--- /dev/null
+#!/usr/bin/env python
+
+"""
+ Purpose:
+ Nanoblogger-to-Ikiwiki import tool
+
+ Copyright:
+ Copyright (C) 2007 Chris Lamb <lamby@debian.org>
+ Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+ Usage: run --help as an argument with this script.
+
+ Notes:
+ I added some extra bits to include the [[!tag foo]] stuff in the post,
+ as it wasn't before, at all. I'll diff the versions out so you can see
+ the mess I made :).
+
+"""
+
+import os, sys
+import time
+import re
+
+from datetime import datetime
+import codecs, htmlentitydefs
+
+
+class Tag (object):
+ def __init__(self, path):
+ self.path = path
+ lines = [x.strip() for x in open(path, 'r').readlines()]
+ self.tag = lines[0]
+ self.files = lines[1:]
+
+ def is_tagged(self, path):
+ return os.path.basename(path) in self.files
+
+ @staticmethod
+ def is_tag_file(filename):
+ return filename.startswith('cat_') and filename.endswith('.db')
+
+
+def parse_file(path, possible_tags):
+ lines = open(path, 'r').readlines()
+ post_dict = {}
+ while True: # parse header
+ line = lines.pop(0)
+ if line.startswith('-----'):
+ break
+ field,value = [x.strip() for x in line.split(':', 1)]
+ post_dict[field.lower()] = value
+ assert lines[0].startswith('BODY:'), lines[0]
+ lines.pop(0)
+ assert lines[-1].startswith('END-----'), lines[-1]
+ lines.pop(-1)
+ text = '\n'.join([unicode(x.rstrip(), 'utf-8') for x in lines])
+ post_dict['text'] = text
+
+ post_dict['timestamp'] = time.mktime(time.strptime(
+ post_dict['date'].replace('EST ', '').replace('EDT ', ''), '%c'))
+ if 'EDT' in post_dict['date']:
+ post_dict['timestamp'] += 4*60*60
+ elif 'EST' in post_dict['date']:
+ post_dict['timestamp'] += 5*60*60
+ else:
+ raise NotImplementedError('unknown time zone in %s'
+ % post_dict['date'])
+ post_dict['tags'] = [t.tag for t in possible_tags if t.is_tagged(path)]
+ return post_dict
+
+
+def format_commit(post_dict, name, email, subdir, branch):
+ stub = post_dict['title'].replace(' ', '_')
+ if post_dict['format'].lower() == 'markdown':
+ ext = 'mdwn'
+ else:
+ raise NotImplementedError('Unkown extension for %s'
+ % post_dict['format'])
+ commit_msg = '''Importing NanoBlogger post "%s"''' % (post_dict['title'])
+
+ lines = [
+ '[[!meta title="%s"]]' % (post_dict['title'].replace('"', r"'")),
+ '[[!meta date="%s"]]' % datetime.fromtimestamp(post_dict['timestamp']),
+ post_dict['text']]
+
+ if len(post_dict['tags']) > 0:
+ lines.append('')
+ for tag in post_dict['tags']:
+ lines.append(
+ '[[!tag %s]]' % (tag.replace(' ', '-').replace('/', '-').lower()))
+ lines.append('')
+ data = '\n'.join(lines).encode('utf-8', 'html_replace')
+ ret = [
+ "commit refs/heads/%s" % branch,
+ "committer %s <%s> %d +0000" % (name, email, post_dict['timestamp']),
+ "data %d" % len(commit_msg),
+ commit_msg,
+ "M 644 inline %s" % os.path.join(subdir, "%s.%s" % (stub, ext)),
+ "data %d" % len(data),
+ data,
+ ]
+ return '\n'.join(ret)
+
+
+def main(name, email, data_dir, subdir, branch='master'):
+ files = os.listdir(data_dir)
+ tags = []
+ for x in files: # read tag (category) files
+ if Tag.is_tag_file(x):
+ tags.append(Tag(os.path.join(data_dir, x)))
+ posts = []
+ for x in files:
+ if Tag.is_tag_file(x):
+ continue
+ if x.endswith('.db'):
+ continue # ignore master.db. it just repeats tag info
+ posts.append(parse_file(os.path.join(data_dir, x), tags))
+ posts.sort(key=lambda x:x['timestamp'])
+ for x in posts:
+ print format_commit(x, name, email, subdir, branch)
+
+
+if __name__ == "__main__":
+ if len(sys.argv) not in (5, 6):
+ print >>sys.stderr, "%s: usage: %s name email datadir subdir [branch] | git-fast-import " % (sys.argv[0], sys.argv[0])
+ else:
+ main(*sys.argv[1:])