Move the variable name validation regexes (for bug 211949) into
[portage.git] / bin / filter-bash-environment.py
1 #!/usr/bin/env python
2 # Copyright 1999-2007 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
4 # $Id$
5
6 import os, re, sys
7
8 egrep_compat_map = {
9         "[:alnum:]" : r'\w',
10         "[:digit:]" : r'\d',
11         "[:space:]" : r'\s',
12 }
13
14 here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
15 func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
16 func_end_re = re.compile(r'^\}$')
17
18 var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^export\s+)([^=\s]+)=("|\')?.*$')
19 close_quote_re = re.compile(r'(\\"|"|\')\s*$')
20
21 def compile_egrep_pattern(s):
22         for k, v in egrep_compat_map.iteritems():
23                 s = s.replace(k, v)
24         return re.compile(s)
25
26 def have_end_quote(quote, line):
27         """
28         Check if the line has an end quote (useful for handling multi-line
29         quotes). This handles escaped double quotes that may occur at the
30         end of a line. The posix spec does not allow escaping of single
31         quotes inside of single quotes, so that case is not handled.
32         """
33         close_quote_match = close_quote_re.search(line)
34         return close_quote_match is not None and \
35                 close_quote_match.group(1) == quote
36
37 def filter_bash_environment(pattern, file_in, file_out):
38         here_doc_delim = None
39         in_func = None
40         multi_line_quote = None
41         multi_line_quote_filter = None
42         for line in file_in:
43                 if multi_line_quote is not None:
44                         if not multi_line_quote_filter:
45                                 file_out.write(line)
46                         if have_end_quote(multi_line_quote, line):
47                                 multi_line_quote = None
48                                 multi_line_quote_filter = None
49                         continue
50                 if here_doc_delim is None and in_func is None:
51                         var_assign_match = var_assign_re.match(line)
52                         if var_assign_match is not None:
53                                 quote = var_assign_match.group(3)
54                                 filter_this = pattern.match(var_assign_match.group(2)) \
55                                         is not None
56                                 if quote is not None and not have_end_quote(quote, line):
57                                         multi_line_quote = quote
58                                         multi_line_quote_filter = filter_this
59                                 if not filter_this:
60                                         file_out.write(line)
61                                 continue
62                 if here_doc_delim is not None:
63                         if here_doc_delim.match(line):
64                                 here_doc_delim = None
65                         file_out.write(line)
66                         continue
67                 here_doc = here_doc_re.match(line)
68                 if here_doc is not None:
69                         here_doc_delim = re.compile("^%s$" % here_doc.group(1))
70                         file_out.write(line)
71                         continue
72                 # Note: here-documents are handled before functions since otherwise
73                 # it would be possible for the content of a here-document to be
74                 # mistaken as the end of a function.
75                 if in_func:
76                         if func_end_re.match(line) is not None:
77                                 in_func = None
78                         file_out.write(line)
79                         continue
80                 in_func = func_start_re.match(line)
81                 if in_func is not None:
82                         file_out.write(line)
83                         continue
84                 # This line is not recognized as part of a variable assignment,
85                 # function definition, or here document, so just allow it to
86                 # pass through.
87                 file_out.write(line)
88
89 if __name__ == "__main__":
90         description = "Filter out variable assignments for varable " + \
91                 "names matching a given PATTERN " + \
92                 "while leaving bash function definitions and here-documents " + \
93                 "intact. The PATTERN is a space separated list of variable names" + \
94                 " and it supports python regular expression syntax in addition to" + \
95                 " [:alnum:], [:digit:], and [:space:] " + \
96                 "character classes which will be automatically translated " + \
97                 "for compatibility with egrep syntax."
98         usage = "usage: %s PATTERN" % os.path.basename(sys.argv[0])
99         from optparse import OptionParser
100         parser = OptionParser(description=description, usage=usage)
101         options, args = parser.parse_args(sys.argv[1:])
102         if len(args) != 1:
103                 parser.error("Missing required PATTERN argument.")
104         file_in = sys.stdin
105         file_out = sys.stdout
106         var_pattern = args[0].split()
107
108         # Filter invalid variable names that are not supported by bash.
109         var_pattern.append(r'\d.*')
110         var_pattern.append(r'.*\W.*')
111
112         var_pattern = "^(%s)$" % "|".join(var_pattern)
113         filter_bash_environment(
114                 compile_egrep_pattern(var_pattern), file_in, file_out)
115         file_out.flush()