Enable BytesWarnings.
[portage.git] / bin / filter-bash-environment.py
1 #!/usr/bin/python -bb
2 # Copyright 1999-2014 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
4
5 import codecs
6 import io
7 import os
8 import re
9 import sys
10
11 here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
12 func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
13 func_end_re = re.compile(r'^\}$')
14
15 var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
16 close_quote_re = re.compile(r'(\\"|"|\')\s*$')
17 readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+')
18 # declare without assignment
19 var_declare_re = re.compile(r'^declare(\s+-\S+)?\s+([^=\s]+)\s*$')
20
21 def have_end_quote(quote, line):
22         """
23         Check if the line has an end quote (useful for handling multi-line
24         quotes). This handles escaped double quotes that may occur at the
25         end of a line. The posix spec does not allow escaping of single
26         quotes inside of single quotes, so that case is not handled.
27         """
28         close_quote_match = close_quote_re.search(line)
29         return close_quote_match is not None and \
30                 close_quote_match.group(1) == quote
31
32 def filter_declare_readonly_opt(line):
33         readonly_match = readonly_re.match(line)
34         if readonly_match is not None:
35                 declare_opts = ''
36                 for i in (1, 2):
37                         group = readonly_match.group(i)
38                         if group is not None:
39                                 declare_opts += group
40                 if declare_opts:
41                         line = 'declare -%s %s' % \
42                                 (declare_opts, line[readonly_match.end():])
43                 else:
44                         line = 'declare ' + line[readonly_match.end():]
45         return line
46
47 def filter_bash_environment(pattern, file_in, file_out):
48         # Filter out any instances of the \1 character from variable values
49         # since this character multiplies each time that the environment
50         # is saved (strange bash behavior). This can eventually result in
51         # mysterious 'Argument list too long' errors from programs that have
52         # huge strings of \1 characters in their environment. See bug #222091.
53         here_doc_delim = None
54         in_func = None
55         multi_line_quote = None
56         multi_line_quote_filter = None
57         for line in file_in:
58                 if multi_line_quote is not None:
59                         if not multi_line_quote_filter:
60                                 file_out.write(line.replace("\1", ""))
61                         if have_end_quote(multi_line_quote, line):
62                                 multi_line_quote = None
63                                 multi_line_quote_filter = None
64                         continue
65                 if here_doc_delim is None and in_func is None:
66                         var_assign_match = var_assign_re.match(line)
67                         if var_assign_match is not None:
68                                 quote = var_assign_match.group(3)
69                                 filter_this = pattern.match(var_assign_match.group(2)) \
70                                         is not None
71                                 # Exclude the start quote when searching for the end quote,
72                                 # to ensure that the start quote is not misidentified as the
73                                 # end quote (happens if there is a newline immediately after
74                                 # the start quote).
75                                 if quote is not None and not \
76                                         have_end_quote(quote, line[var_assign_match.end(2)+2:]):
77                                         multi_line_quote = quote
78                                         multi_line_quote_filter = filter_this
79                                 if not filter_this:
80                                         line = filter_declare_readonly_opt(line)
81                                         file_out.write(line.replace("\1", ""))
82                                 continue
83                         else:
84                                 declare_match = var_declare_re.match(line)
85                                 if declare_match is not None:
86                                         # declare without assignment
87                                         filter_this = pattern.match(declare_match.group(2)) \
88                                                 is not None
89                                         if not filter_this:
90                                                 line = filter_declare_readonly_opt(line)
91                                                 file_out.write(line)
92                                         continue
93
94                 if here_doc_delim is not None:
95                         if here_doc_delim.match(line):
96                                 here_doc_delim = None
97                         file_out.write(line)
98                         continue
99                 here_doc = here_doc_re.match(line)
100                 if here_doc is not None:
101                         here_doc_delim = re.compile("^%s$" % here_doc.group(1))
102                         file_out.write(line)
103                         continue
104                 # Note: here-documents are handled before functions since otherwise
105                 # it would be possible for the content of a here-document to be
106                 # mistaken as the end of a function.
107                 if in_func:
108                         if func_end_re.match(line) is not None:
109                                 in_func = None
110                         file_out.write(line)
111                         continue
112                 in_func = func_start_re.match(line)
113                 if in_func is not None:
114                         file_out.write(line)
115                         continue
116                 # This line is not recognized as part of a variable assignment,
117                 # function definition, or here document, so just allow it to
118                 # pass through.
119                 file_out.write(line)
120
121 if __name__ == "__main__":
122         description = "Filter out variable assignments for variable " + \
123                 "names matching a given PATTERN " + \
124                 "while leaving bash function definitions and here-documents " + \
125                 "intact. The PATTERN is a space separated list of variable names" + \
126                 " and it supports python regular expression syntax."
127         usage = "usage: %s PATTERN" % os.path.basename(sys.argv[0])
128         args = sys.argv[1:]
129
130         if '-h' in args or '--help' in args:
131                 sys.stdout.write(usage + "\n")
132                 sys.stdout.flush()
133                 sys.exit(os.EX_OK)
134
135         if len(args) != 1:
136                 sys.stderr.write(usage + "\n")
137                 sys.stderr.write("Exactly one PATTERN argument required.\n")
138                 sys.stderr.flush()
139                 sys.exit(2)
140
141         file_in = sys.stdin
142         file_out = sys.stdout
143         if sys.hexversion >= 0x3000000:
144                 file_in = codecs.iterdecode(sys.stdin.buffer.raw,
145                         'utf_8', errors='replace')
146                 file_out = io.TextIOWrapper(sys.stdout.buffer,
147                         'utf_8', errors='backslashreplace')
148
149         var_pattern = args[0].split()
150
151         # Filter invalid variable names that are not supported by bash.
152         var_pattern.append(r'\d.*')
153         var_pattern.append(r'.*\W.*')
154
155         var_pattern = "^(%s)$" % "|".join(var_pattern)
156         filter_bash_environment(
157                 re.compile(var_pattern), file_in, file_out)
158         file_out.flush()