From 81ac0a90c68f96ba258cd8662d2ab5b772e70f03 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 26 Aug 2010 20:51:32 -0400 Subject: [PATCH] Document NanoBlogger -> ikiwiki migration process --- posts/first_post.mdwn | 4 - posts/ikiwiki-nanoblogger-import.py | 140 ++++++++++++++++++++++ posts/migrate_nanoblogger_to_ikiwiki.mdwn | 9 ++ 3 files changed, 149 insertions(+), 4 deletions(-) delete mode 100644 posts/first_post.mdwn create mode 100644 posts/ikiwiki-nanoblogger-import.py create mode 100644 posts/migrate_nanoblogger_to_ikiwiki.mdwn diff --git a/posts/first_post.mdwn b/posts/first_post.mdwn deleted file mode 100644 index d494323..0000000 --- a/posts/first_post.mdwn +++ /dev/null @@ -1,4 +0,0 @@ -This is the first post to this example blog. To add new posts, just add -files to the posts/ subdirectory, or use the web form. - -[[!tag tags/tech]] diff --git a/posts/ikiwiki-nanoblogger-import.py b/posts/ikiwiki-nanoblogger-import.py new file mode 100644 index 0000000..d41e7e2 --- /dev/null +++ b/posts/ikiwiki-nanoblogger-import.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python + +""" + Purpose: + Nanoblogger-to-Ikiwiki import tool + + Copyright: + Copyright (C) 2007 Chris Lamb + Copyright (C) 2010 W. Trevor King + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Usage: run --help as an argument with this script. + + Notes: + I added some extra bits to include the [[!tag foo]] stuff in the post, + as it wasn't before, at all. I'll diff the versions out so you can see + the mess I made :). + +""" + +import os, sys +import time +import re + +from datetime import datetime +import codecs, htmlentitydefs + + +class Tag (object): + def __init__(self, path): + self.path = path + lines = [x.strip() for x in open(path, 'r').readlines()] + self.tag = lines[0] + self.files = lines[1:] + + def is_tagged(self, path): + return os.path.basename(path) in self.files + + @staticmethod + def is_tag_file(filename): + return filename.startswith('cat_') and filename.endswith('.db') + + +def parse_file(path, possible_tags): + lines = open(path, 'r').readlines() + post_dict = {} + while True: # parse header + line = lines.pop(0) + if line.startswith('-----'): + break + field,value = [x.strip() for x in line.split(':', 1)] + post_dict[field.lower()] = value + assert lines[0].startswith('BODY:'), lines[0] + lines.pop(0) + assert lines[-1].startswith('END-----'), lines[-1] + lines.pop(-1) + text = '\n'.join([unicode(x.rstrip(), 'utf-8') for x in lines]) + post_dict['text'] = text + + post_dict['timestamp'] = time.mktime(time.strptime( + post_dict['date'].replace('EST ', '').replace('EDT ', ''), '%c')) + if 'EDT' in post_dict['date']: + post_dict['timestamp'] += 4*60*60 + elif 'EST' in post_dict['date']: + post_dict['timestamp'] += 5*60*60 + else: + raise NotImplementedError('unknown time zone in %s' + % post_dict['date']) + post_dict['tags'] = [t.tag for t in possible_tags if t.is_tagged(path)] + return post_dict + + +def format_commit(post_dict, name, email, subdir, branch): + stub = post_dict['title'].replace(' ', '_') + if post_dict['format'].lower() == 'markdown': + ext = 'mdwn' + else: + raise NotImplementedError('Unkown extension for %s' + % post_dict['format']) + commit_msg = '''Importing NanoBlogger post "%s"''' % (post_dict['title']) + + lines = [ + '[[!meta title="%s"]]' % (post_dict['title'].replace('"', r"'")), + '[[!meta date="%s"]]' % datetime.fromtimestamp(post_dict['timestamp']), + post_dict['text']] + + if len(post_dict['tags']) > 0: + lines.append('') + for tag in post_dict['tags']: + lines.append( + '[[!tag %s]]' % (tag.replace(' ', '-').replace('/', '-').lower())) + lines.append('') + data = '\n'.join(lines).encode('utf-8', 'html_replace') + ret = [ + "commit refs/heads/%s" % branch, + "committer %s <%s> %d +0000" % (name, email, post_dict['timestamp']), + "data %d" % len(commit_msg), + commit_msg, + "M 644 inline %s" % os.path.join(subdir, "%s.%s" % (stub, ext)), + "data %d" % len(data), + data, + ] + return '\n'.join(ret) + + +def main(name, email, data_dir, subdir, branch='master'): + files = os.listdir(data_dir) + tags = [] + for x in files: # read tag (category) files + if Tag.is_tag_file(x): + tags.append(Tag(os.path.join(data_dir, x))) + posts = [] + for x in files: + if Tag.is_tag_file(x): + continue + if x.endswith('.db'): + continue # ignore master.db. it just repeats tag info + posts.append(parse_file(os.path.join(data_dir, x), tags)) + posts.sort(key=lambda x:x['timestamp']) + for x in posts: + print format_commit(x, name, email, subdir, branch) + + +if __name__ == "__main__": + if len(sys.argv) not in (5, 6): + print >>sys.stderr, "%s: usage: %s name email datadir subdir [branch] | git-fast-import " % (sys.argv[0], sys.argv[0]) + else: + main(*sys.argv[1:]) diff --git a/posts/migrate_nanoblogger_to_ikiwiki.mdwn b/posts/migrate_nanoblogger_to_ikiwiki.mdwn new file mode 100644 index 0000000..7f9583f --- /dev/null +++ b/posts/migrate_nanoblogger_to_ikiwiki.mdwn @@ -0,0 +1,9 @@ +Begin migrating from [nanoblogger](http://nanoblogger.sourceforge.net/). + +[Some](http://www.seanius.net/blog/2008/09/moving-to-ikiwiki/) +[people](http://devnull.li/~jerojasro/blog/posts/blog_migrated_to_ikiwiki/) +mention doing this, but I couldn't find a canned recipe. I followed +seanius in tweaking the [Wordpress import +script](http://ikiwiki.info/tips/Importing_posts_from_Wordpress/). + +[[!tag tags/tech]] -- 2.26.2