From: W. Trevor King Date: Thu, 9 Feb 2012 11:54:34 +0000 (-0500) Subject: Remove `[:-len('�')]` from Unicode parsing of pdftk output in pdf-merge.py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=abdbff4;p=mw2txt.git Remove `[:-len('�')]` from Unicode parsing of pdftk output in pdf-merge.py. Thanks to Larry Cai for pointing this out: On Thu, Feb 09, 2012 at 03:25:09PM +0800, Larry Cai wrote: > … > When I just remove "[:-len('�')]", it seem works!! > … I had thought that pdftk always appended a trailing null byte to Unicode strings, but that appears to be incorrect. --- diff --git a/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py b/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py index 77c32c3..cb6bec0 100755 --- a/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py +++ b/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py @@ -102,7 +102,7 @@ class BookmarkedPDF (object): ... 'BookmarkTitle: Section 1.1.2', ... 'BookmarkLevel: 3', ... 'BookmarkPageNumber: 4', - ... 'BookmarkTitle: αβγ�', + ... 'BookmarkTitle: αβγ', ... 'BookmarkLevel: 4', ... 'BookmarkPageNumber: 4', ... 'BookmarkTitle: Section 1.2', @@ -146,7 +146,7 @@ class BookmarkedPDF (object): value = int(value) elif k == 'title': if self._UNICODE_REGEXP.search(value): - value = self._unicode_replace(value[:-len('�')]) + value = self._unicode_replace(value) else: value = unicode(value) bookmark_info[k] = value