From: W. Trevor King Date: Thu, 9 Feb 2012 15:46:01 +0000 (-0500) Subject: Add `--pdfmarks` option to pdf-merge.py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c75bc05;p=mw2txt.git Add `--pdfmarks` option to pdf-merge.py. This makes it easy get a pdfmark file from one PDF and and apply it to another (among other things). The workflow suggested by Larry was: On Thu, Feb 09, 2012 at 03:25:09PM +0800, Larry Cai wrote: > 1. pdftk source.pdf dump_data_utf8 > in.info # save rawdata > 2. pdftk A=book-cover.pdf B=source.pdf cat A1-2 B3-end output replaced.pdf > # replace two pages for the book > 3. pdf-convert < in.info > pdfmarks > 4. gs -dBATCH -dNOPAUSE -sDEVICE=pdfwrite -sOutputFile=output.pdf > replaced.pdf pdfmarks # put bookmark back With the new option, you can accomplish the same goal with $ rm -f pdfmarks $ pdf-convert.py --pdfmarks pdfmarks source.pdf $ pdftk A=book-cover.pdf B=source.pdf cat A1-2 B3-end output replaced.pdf $ pdf-convert.py --pdfmarks pdfmarks --output output.pdf replaced.pdf With the first call to pdf-convert.py creating pdfmarks (using `pdftk`), and the second call applying it (using `gs`). --- diff --git a/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py b/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py index cb6bec0..6266fe4 100755 --- a/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py +++ b/posts/PDF_bookmarks_with_Ghostscript/pdf-merge.py @@ -18,12 +18,14 @@ """Merge PDFs perserving bookmarks. -Thanks to Larry Cai for suggesting that Unicode be supported. +Thanks to Larry Cai for suggesting that Unicode be supported and for +discussion about the `--pdfmarks` option. """ import codecs as _codecs import locale as _locale import os as _os +import os.path as _os_path import re as _re import subprocess as _subprocess import sys as _sys @@ -373,6 +375,14 @@ if __name__ == '__main__': help='path to the pdftk executable') parser.add_argument('--gs', dest='gs', default=GS, help='path to the gs (Ghostscript) executable') + parser.add_argument('--pdfmarks', dest='pdfmarks', + help=('path to pdfmarks file. If not given, a ' + 'temporary file is used. If given and the file ' + 'is missing, execution will stop after the file ' + 'is created (before the Ghostscript run). If ' + 'given and the file exists, no attempt will be ' + 'make to use pdftk to generate the mark file (I ' + 'assume your input file is what you want).')) parser.add_argument('--argv-encoding', dest='argv_encoding', help=('Optionally override the locale encoding for ' 'your command line arguments.')) @@ -418,7 +428,13 @@ if __name__ == '__main__': keywords = [unicode(k, argv_encoding) for k in args.keywords] else: keywords = None - pdfmarks = generate_pdfmarks( - inputs, title=title, author=author, keywords=keywords) + if args.pdfmarks and _os_path.isfile(args.pdfmarks): + pdfmarks = open(args.pdfmarks, 'r').read() + else: + pdfmarks = generate_pdfmarks( + inputs, title=title, author=author, keywords=keywords) + if args.pdfmarks: + open(args.pdfmarks, 'w').write(pdfmarks) + _sys.exit(0) merge_pdfs(inputs=inputs, pdfmarks=pdfmarks, output=args.output, pause_for_manual_tweaking=args.pause_for_manual_tweaking)