From: Tomi Ollila Date: Mon, 4 Aug 2014 17:39:32 +0000 (+0300) Subject: devel: make man-to-mdwn.pl to work with generated manual pages X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=f7be8f4ebf4157a80f5ffad75be988c30267c950;p=notmuch.git devel: make man-to-mdwn.pl to work with generated manual pages The new manual pages converted from rst using sphinx or rst2man has somewhat different syntax. man-to-mdwn.pl is now adjusted to produce even better output from this syntax. The changes also include using utf-8 locale (e.g. for tables and generated hypens) and and quite a few bugs fixes. This tool still produces better results than just using the html pages generated using sphinx / rst2html. For example those tools don't create inter-page hyperlinks -- and the preformatted pages written by man-to-mdwn.pl just works well with manual page content. --- diff --git a/devel/man-to-mdwn.pl b/devel/man-to-mdwn.pl index 4b59bd66..5024569e 100755 --- a/devel/man-to-mdwn.pl +++ b/devel/man-to-mdwn.pl @@ -6,18 +6,18 @@ # This program is used to generate mdwn-formatted notmuch manual pages # for notmuch wiki. Example run: # -# $ ./devel/man-to-mdwn.pl man ../notmuch-wiki +# $ ./devel/man-to-mdwn.pl doc/_build/man ../notmuch-wiki # # In case taken into more generic use, modify these comments and examples. -use 5.8.1; +use 5.10.1; use strict; use warnings; unless (@ARGV == 2) { warn "\n$0 \n\n"; # Remove/edit this comment if this script is taken into generic use. - warn "Example: ./devel/man-to-mdwn.pl man ../notmuch-wiki\n\n"; + warn "Example: ./devel/man-to-mdwn.pl doc/_build/man ../notmuch-wiki\n\n"; exit 1; } @@ -48,11 +48,6 @@ while (

) } close P; -#undef $ENV{'GROFF_NO_SGR'}; -#delete $ENV{'GROFF_NO_SGR'}; -$ENV{'GROFF_NO_SGR'} = '1'; -$ENV{'TERM'} = 'vt100'; # does this matter ? - my %htmlqh = qw/& & < < > > ' ' " "/; # do html quotation to $_[0] (which is an alias to the given arg) sub htmlquote($) @@ -70,8 +65,11 @@ while (my ($k, $v) = each %fhash) #next if -l $v; # skip symlinks here. -- not... references there may be. my @lines; - #open I, '-|', qw/groff -man -T utf8/, $v; - open I, '-|', qw/groff -man -T latin1/, $v; # this and GROFF_NO_SGR='1' + open I, '-|', qw/env -i/, "PATH=$ENV{PATH}", + qw/TERM=vt100 LANG=en_US.utf8 LC_ALL=en_US.utf8/, + qw/GROFF_NO_SGR=1 MAN_KEEP_FORMATTING=1 MANWIDTH=80/, + qw/man/, $v or die "$!"; + binmode I, ':utf8'; my ($emptyline, $pre, $hl) = (0, 0, 'h1'); while () { @@ -79,13 +77,15 @@ while (my ($k, $v) = each %fhash) $emptyline = 1; next; } - s/(?<=\S)\s{8,}.*//; # $hl = 'h1' if s/(?<=\S)\s{8,}.*//; - htmlquote $_; + # keep only leftmost in lines like 'NOTMUCH(1) notmuch NOTMUCH(1)' + s/\S\K\s{8,}\S.+\s{8,}\S.*//; # $hl = 'h1' if s/(?<=\S)\s{8,}.*//; s/[_&]\010&/&/g; - s/((?:_\010[^_])+)/$1<\/u>/g; + s/((?:_\010[^_])+)/\001u\002$1\001\/u\002/g; s/_\010(.)/$1/g; - s/((?:.\010.)+)/$1<\/b>/g; + s/((?:.\010.)+)/\001b\002$1\001\/b\002/g; s/.\010(.)/$1/g; + htmlquote $_; + s/\001//g; if (/^\S/) { $pre = 0, push @lines, "\n" if $pre; @@ -111,16 +111,18 @@ while (my ($k, $v) = each %fhash) $lines[0] =~ s/^\n//; $k = "$ARGV[1]/manpages/$k.mdwn"; open O, '>', $k or die; + binmode O, ':utf8'; print STDOUT 'Writing ', "'$k'\n"; select O; - my $pe = ''; + my ($pe, $hyphen) = ('', ''); foreach (@lines) { + #print $_; next; if ($pe) { - if (s/^(\s+)([^<]+)<\/b>\((\d+)\)//) { + if (s/^(\s+)([^<]+)\((\d+)\)<\/b>//) { my $link = maymakelink "$pe-$2-$3"; $link = maymakelink "$pe$2-$3" unless $link; if ($link) { - print "$pe-\n"; + print "$pe$hyphen\n"; print "$1$2($3)"; } else { @@ -132,8 +134,8 @@ while (my ($k, $v) = each %fhash) } $pe = ''; } - s/([^<]+)<\/b>\((\d+)\)/mayconvert($1, $2)/ge; - $pe = $1 if s/([^<]+)-<\/b>\s*$//; + s/([^<]+)\((\d+)\)<\/b>/mayconvert($1, $2)/ge; + ($pe, $hyphen) = ($1, $2) if s/([^<]+)([-\x{2010}])<\/b>\s*$//; print $_; } } @@ -169,7 +171,7 @@ foreach (sort srt values %fhash) open I, '<', $in or die $!; my $s; while () { - if (/^\s*[.]TH\s+\S+\s+(\S+)/) { + if (/^\s*[.]TH\s+\S+\s+"?(\S+?)"?\s/) { $s = $1; last; }