4a045b4c9bc11646ec5b2cfe2ae1493d12f50cea
[genkernel.git] / maintenance / docmatcher.py
1 #! /usr/bin/python
2 # Copyright (C) 2010 Gentoo Foundation
3 # Written by Sebastian Pipping <sebastian@pipping.org>
4 # Licensed under GPL v2 or later
5
6 from __future__ import print_function
7 import re
8 import sys
9 import os
10
11
12 NON_VARIABLES = ('UTF', 'USE', 'TCP', 'SMP', 'PXE', 'PPC', 'MAC',
13         'GTK', 'GNU', 'CPU', 'DOS', 'NIC', 'NFS', 'ISO', 'TIMEOUT',
14         'TFTP', 'SYSTEM', 'SPARC', 'RAID', 'LABEL', 'PROMPT', 'KERNEL',
15         'GRP', 'DOCTYPE', 'DHCP', 'DEFAULT', 'ATARAID', 'APPEND')
16
17 EXTRA_VARIABLES = ['ARCH_OVERRIDE', 'BOOTLOADER', 'CLEAR_CACHE_DIR', 'DEFAULT_KERNEL_SOURCE', 'DISTDIR', 'GK_SHARE']
18 for app in ('DEVICE_MAPPER', 'UNIONFS_FUSE', 'BUSYBOX', 'DMRAID', 'LVM', 'ISCSI', 'FUSE'):
19         for prop in ('DIR', 'SRCTAR', 'VER'):
20                 EXTRA_VARIABLES.append('%s_%s' % (app, prop))
21 EXTRA_VARIABLES = tuple(EXTRA_VARIABLES)
22
23
24 def exract_gen_cmdline_sh():
25         f = open('gen_cmdline.sh', 'r')
26         gen_cmdline_sh = f.read()
27         f.close()
28
29         usage_lines = list()
30         parsing_lines = list()
31         dest = None
32
33         for line in gen_cmdline_sh.split('\n'):
34                 if line in ('longusage() {', 'usage() {'):
35                         dest = usage_lines
36                 elif line == 'parse_cmdline() {':
37                         dest = parsing_lines
38
39                 if dest is not None:
40                         dest.append(line)
41
42                 if line == '}':
43                         dest = None
44
45         del gen_cmdline_sh
46
47
48         parsing_code = '\n'.join(parsing_lines)
49         del parsing_lines
50
51         gen_cmdline_sh_parsing_long_params = set()
52         for match in re.finditer('--([a-z][a-z0-9-]+)', parsing_code):
53                 para_name = match.group(1)
54                 gen_cmdline_sh_parsing_long_params.add(para_name)
55
56         gen_cmdline_sh_variables = set()
57         for match in re.finditer('^\s*([A-Z_]+)=', parsing_code, re.MULTILINE):
58                 var_name = match.group(1)
59                 if var_name.startswith('CMD_'):
60                         continue
61                 gen_cmdline_sh_variables.add(var_name)
62
63         del parsing_code
64
65
66         gen_cmdline_sh_usage_long_params = set()
67         for match in re.finditer('--([a-z][a-z0-9-]+)', '\n'.join(usage_lines)):
68                 para_name = match.group(1)
69                 gen_cmdline_sh_usage_long_params.add(para_name)
70         del usage_lines
71
72         return gen_cmdline_sh_parsing_long_params, gen_cmdline_sh_usage_long_params, gen_cmdline_sh_variables
73
74
75 def extract_genkernel_8_txt():
76         f = open(os.path.join('doc', 'genkernel.8.txt'), 'r')
77         genkernel_8_txt = f.read()
78         f.close()
79
80         # Preprocess
81         genkernel_8_txt = genkernel_8_txt.replace('*[*no-*]*','[no-]')
82
83         yes_no = re.compile('^\\[(no-)\\]([a-z0-9-]+)$')
84
85         genkernel_8_txt_long_params = set()
86         for match in re.finditer('--((?:[a-z]|\\[no-\\])[a-z0-9-]+)', genkernel_8_txt):
87                 para_name = match.group(1)
88
89                 # Black list
90                 if para_name == 'no-':
91                         continue
92
93                 m = yes_no.match(para_name)
94                 if m:
95                         p_yes = m.group(2)
96                         p_no = m.group(1) + m.group(2)
97                         genkernel_8_txt_long_params.add(p_yes)
98                         genkernel_8_txt_long_params.add(p_no)
99                 else:
100                         genkernel_8_txt_long_params.add(para_name)
101
102         del genkernel_8_txt
103
104         return genkernel_8_txt_long_params
105
106
107 def extract_genkernel_xml(genkernel_xml_path, variables_blacklist):
108         f = open(genkernel_xml_path, 'r')
109         genkernel_xml = f.read()
110         f.close()
111
112         # Preprocess
113         genkernel_xml = genkernel_xml.replace('<c>','').replace('</c>','').replace('<b>','').replace('</b>','')
114
115         yes_no = re.compile('^(no-)([a-z0-9-]+)$')
116
117         genkernel_xml_long_params = set()
118         for match in re.finditer('--([a-z][a-z0-9-]+)', genkernel_xml):
119                 para_name = match.group(1)
120
121                 # Fix doc error "--no install"
122                 if para_name == 'no':
123                         para_name = 'no-install'
124
125                 m = yes_no.match(para_name)
126                 if m and para_name != 'no-ramdisk-modules':
127                         p_yes = m.group(2)
128                         p_no = m.group(1) + m.group(2)
129                         genkernel_xml_long_params.add(p_yes)
130                         genkernel_xml_long_params.add(p_no)
131                 else:
132                         genkernel_xml_long_params.add(para_name)
133
134         genkernel_xml_variables = set()
135         for match in re.finditer('[A-Z_]{3,}', genkernel_xml):
136                 var_name = match.group(0)
137                 if var_name in variables_blacklist:
138                         continue
139                 genkernel_xml_variables.add(var_name)
140
141         del genkernel_xml
142
143         return genkernel_xml_long_params, genkernel_xml_variables
144
145
146 def extract_gen_determineargs_sh():
147         f = open('gen_determineargs.sh', 'r')
148         gen_determineargs_sh = f.read()
149         f.close()
150
151         gen_determineargs_sh_variables = set()
152         for match in re.finditer('set_config_with_override\s+[0-9]+\s+([A-Z_]+)', gen_determineargs_sh):
153                 var_name = match.group(1)
154                 gen_determineargs_sh_variables.add(var_name)
155
156         for match in re.finditer('([A-Z_]+)=`(?:arch|cache)_replace "\\$\\{\\1\\}"`', gen_determineargs_sh):
157                 var_name = match.group(1)
158                 gen_determineargs_sh_variables.add(var_name)
159
160         del gen_determineargs_sh
161
162         return gen_determineargs_sh_variables
163
164
165 def extract_genkernel_conf(variables_blacklist):
166         f = open('genkernel.conf', 'r')
167         genkernel_conf = f.read()
168         f.close()
169
170         genkernel_conf_variables = set()
171         for match in re.finditer('^#*\\s*([A-Z_]{3,})', genkernel_conf, re.MULTILINE):
172                 var_name = match.group(1)
173                 if var_name in variables_blacklist:
174                         continue
175                 genkernel_conf_variables.add(var_name)
176
177         del genkernel_conf
178
179         return genkernel_conf_variables
180
181
182 def print_set(s):
183         if s:
184                 print('\n'.join(('- ' + e) for e in sorted(s)))
185         else:
186                 print('  NONE')
187         print()
188
189
190 def  usage():
191         print('USAGE: %s GENTOO/xml/htdocs/doc/en/genkernel.xml' % os.path.basename(sys.argv[0]))
192
193
194 def main():
195         if len(sys.argv) != 2:
196                 usage()
197                 sys.exit(1)
198
199         gen_cmdline_sh_parsing_long_params, gen_cmdline_sh_usage_long_params, gen_cmdline_sh_variables = exract_gen_cmdline_sh()
200         genkernel_8_txt_long_params = extract_genkernel_8_txt()
201         gen_determineargs_sh_variables = extract_gen_determineargs_sh()
202
203         variables_blacklist = set(NON_VARIABLES).difference(gen_determineargs_sh_variables)
204         known_variales = set(EXTRA_VARIABLES).union(gen_determineargs_sh_variables).union(gen_cmdline_sh_variables)
205
206         genkernel_xml_long_params, genkernel_xml_variables = extract_genkernel_xml(sys.argv[1], variables_blacklist)
207         genkernel_conf_variables = extract_genkernel_conf(variables_blacklist)
208
209
210         # Status quo
211         print('Options used by parser in *gen_cmdline.sh*:')
212         print_set(gen_cmdline_sh_parsing_long_params)
213
214         print('Options mentioned in usage of *gen_cmdline.sh*:')
215         print_set(gen_cmdline_sh_usage_long_params)
216
217         print('Options mentioned in *man page*:')
218         print_set(genkernel_8_txt_long_params)
219
220         print('Options mentioned in *web page*:')
221         print_set(genkernel_xml_long_params)
222
223
224         print('Variables set by *gen_cmdline.sh*:')
225         print_set(gen_cmdline_sh_variables)
226
227         print('Variables read by *gen_determineargs.sh*:')
228         print_set(gen_determineargs_sh_variables)
229
230         print('Variables mentioned in *web page*:')
231         print_set(genkernel_xml_variables)
232
233         print('Variables used in *genkernel.conf*:')
234         print_set(genkernel_conf_variables)
235
236
237         # Future work (due extensions)
238         print('Options missing from the *man page*:')
239         print_set(gen_cmdline_sh_parsing_long_params.difference(genkernel_8_txt_long_params))
240
241         print('Options missing from *--help*:')
242         print_set(gen_cmdline_sh_parsing_long_params.difference(gen_cmdline_sh_usage_long_params))
243
244         print('Options missing from *web page*:')
245         print_set(gen_cmdline_sh_parsing_long_params.difference(genkernel_xml_long_params))
246
247         print('Variables missing from *web page*:')
248         print_set(known_variales.difference(genkernel_xml_variables))
249
250         print('Variables missing from *genkernel.conf*:')
251         print_set(known_variales.difference(genkernel_conf_variables))
252
253
254         # Future work (due removal and updates)
255         print('Removed options still mentioned in the *man page*:')
256         print_set(genkernel_8_txt_long_params.difference(gen_cmdline_sh_parsing_long_params))
257
258         print('Removed options still mentioned in *--help*:')
259         print_set(gen_cmdline_sh_usage_long_params.difference(gen_cmdline_sh_parsing_long_params))
260
261         print('Removed options still mentioned in *web page*:')
262         print_set(genkernel_xml_long_params.difference(gen_cmdline_sh_parsing_long_params))
263
264         print('Removed variables still mentioned in *web page*:')
265         print_set(genkernel_xml_variables.difference(known_variales))
266
267         print('Removed variables still mentioned in *genkernel.conf*:')
268         print_set(genkernel_conf_variables.difference(known_variales))
269
270
271 if __name__ == '__main__':
272         main()