Document NanoBlogger -> ikiwiki migration process
authorW. Trevor King <wking@drexel.edu>
Fri, 27 Aug 2010 00:51:32 +0000 (20:51 -0400)
committerW. Trevor King <wking@drexel.edu>
Fri, 27 Aug 2010 00:51:32 +0000 (20:51 -0400)
posts/first_post.mdwn [deleted file]
posts/ikiwiki-nanoblogger-import.py [new file with mode: 0644]
posts/migrate_nanoblogger_to_ikiwiki.mdwn [new file with mode: 0644]

diff --git a/posts/first_post.mdwn b/posts/first_post.mdwn
deleted file mode 100644 (file)
index d494323..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-This is the first post to this example blog. To add new posts, just add
-files to the posts/ subdirectory, or use the web form.
-
-[[!tag tags/tech]]
diff --git a/posts/ikiwiki-nanoblogger-import.py b/posts/ikiwiki-nanoblogger-import.py
new file mode 100644 (file)
index 0000000..d41e7e2
--- /dev/null
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+"""
+    Purpose:
+    Nanoblogger-to-Ikiwiki import tool
+
+    Copyright:
+    Copyright (C) 2007  Chris Lamb <lamby@debian.org>
+    Copyright (C) 2010  W. Trevor King <wking@drexel.edu>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see .
+
+    Usage: run --help as an argument with this script.
+
+    Notes:
+    I added some extra bits to include the [[!tag  foo]] stuff in the post,
+    as it wasn't before, at all. I'll diff the versions out so you can see
+    the mess I made :).
+
+"""
+
+import os, sys
+import time
+import re
+
+from datetime import datetime
+import codecs, htmlentitydefs
+
+
+class Tag (object):
+    def __init__(self, path):
+        self.path = path
+        lines = [x.strip() for x in open(path, 'r').readlines()]
+        self.tag = lines[0]
+        self.files = lines[1:]
+
+    def is_tagged(self, path):
+        return os.path.basename(path) in self.files
+
+    @staticmethod
+    def is_tag_file(filename):
+        return filename.startswith('cat_') and filename.endswith('.db')
+
+
+def parse_file(path, possible_tags):
+    lines = open(path, 'r').readlines()
+    post_dict = {}
+    while True:  # parse header
+        line = lines.pop(0)
+        if line.startswith('-----'):
+            break
+        field,value = [x.strip() for x in line.split(':', 1)]
+        post_dict[field.lower()] = value
+    assert lines[0].startswith('BODY:'), lines[0]
+    lines.pop(0)
+    assert lines[-1].startswith('END-----'), lines[-1]
+    lines.pop(-1)
+    text = '\n'.join([unicode(x.rstrip(), 'utf-8') for x in lines])
+    post_dict['text'] = text
+
+    post_dict['timestamp'] = time.mktime(time.strptime(
+            post_dict['date'].replace('EST ', '').replace('EDT ', ''), '%c'))
+    if 'EDT' in post_dict['date']:
+        post_dict['timestamp'] += 4*60*60
+    elif 'EST' in post_dict['date']:
+        post_dict['timestamp'] += 5*60*60
+    else:
+        raise NotImplementedError('unknown time zone in %s'
+                                  % post_dict['date'])
+    post_dict['tags'] = [t.tag for t in possible_tags if t.is_tagged(path)]
+    return post_dict
+
+
+def format_commit(post_dict, name, email, subdir, branch):
+    stub = post_dict['title'].replace(' ', '_')
+    if post_dict['format'].lower() == 'markdown':
+        ext = 'mdwn'
+    else:
+        raise NotImplementedError('Unkown extension for %s'
+                                  % post_dict['format'])
+    commit_msg = '''Importing NanoBlogger post "%s"''' % (post_dict['title'])
+
+    lines = [
+        '[[!meta  title="%s"]]' % (post_dict['title'].replace('"', r"'")),
+        '[[!meta  date="%s"]]' % datetime.fromtimestamp(post_dict['timestamp']),
+        post_dict['text']]
+
+    if len(post_dict['tags']) > 0:
+        lines.append('')
+    for tag in post_dict['tags']:
+        lines.append(
+            '[[!tag  %s]]' % (tag.replace(' ', '-').replace('/', '-').lower()))
+    lines.append('')
+    data = '\n'.join(lines).encode('utf-8', 'html_replace')
+    ret = [
+        "commit refs/heads/%s" % branch,
+        "committer %s <%s> %d +0000" % (name, email, post_dict['timestamp']),
+        "data %d" % len(commit_msg),
+        commit_msg,
+        "M 644 inline %s" % os.path.join(subdir, "%s.%s" % (stub, ext)),
+        "data %d" % len(data),
+        data,
+    ]
+    return '\n'.join(ret)
+
+
+def main(name, email, data_dir, subdir, branch='master'):
+    files = os.listdir(data_dir)
+    tags = []
+    for x in files:  # read tag (category) files
+        if Tag.is_tag_file(x):
+            tags.append(Tag(os.path.join(data_dir, x)))
+    posts = []
+    for x in files:
+        if Tag.is_tag_file(x):
+            continue
+        if x.endswith('.db'):
+            continue  # ignore master.db.  it just repeats tag info
+        posts.append(parse_file(os.path.join(data_dir, x), tags))
+    posts.sort(key=lambda x:x['timestamp'])
+    for x in posts:
+        print format_commit(x, name, email, subdir, branch)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in (5, 6):
+        print >>sys.stderr, "%s: usage: %s name email datadir subdir [branch] | git-fast-import " % (sys.argv[0], sys.argv[0])
+    else:
+        main(*sys.argv[1:])
diff --git a/posts/migrate_nanoblogger_to_ikiwiki.mdwn b/posts/migrate_nanoblogger_to_ikiwiki.mdwn
new file mode 100644 (file)
index 0000000..7f9583f
--- /dev/null
@@ -0,0 +1,9 @@
+Begin migrating from [nanoblogger](http://nanoblogger.sourceforge.net/).
+
+[Some](http://www.seanius.net/blog/2008/09/moving-to-ikiwiki/)
+[people](http://devnull.li/~jerojasro/blog/posts/blog_migrated_to_ikiwiki/)
+mention doing this, but I couldn't find a canned recipe.  I followed
+seanius in tweaking the [Wordpress import
+script](http://ikiwiki.info/tips/Importing_posts_from_Wordpress/).
+
+[[!tag tags/tech]]