Merge branch 'master' of git+ssh://git.overlays.gentoo.org/proj/portage
[portage.git] / bin / filter-bash-environment.py
1 #!/usr/bin/python
2 # Copyright 1999-2007 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
4
5 import codecs
6 import os, re, sys
7
8 here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
9 func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
10 func_end_re = re.compile(r'^\}$')
11
12 var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
13 close_quote_re = re.compile(r'(\\"|"|\')\s*$')
14 readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+')
15 # declare without assignment
16 var_declare_re = re.compile(r'^declare(\s+-\S+)?\s+([^=\s]+)\s*$')
17
18 def have_end_quote(quote, line):
19         """
20         Check if the line has an end quote (useful for handling multi-line
21         quotes). This handles escaped double quotes that may occur at the
22         end of a line. The posix spec does not allow escaping of single
23         quotes inside of single quotes, so that case is not handled.
24         """
25         close_quote_match = close_quote_re.search(line)
26         return close_quote_match is not None and \
27                 close_quote_match.group(1) == quote
28
29 def filter_declare_readonly_opt(line):
30         readonly_match = readonly_re.match(line)
31         if readonly_match is not None:
32                 declare_opts = ''
33                 for i in (1, 2):
34                         group = readonly_match.group(i)
35                         if group is not None:
36                                 declare_opts += group
37                 if declare_opts:
38                         line = 'declare -%s %s' % \
39                                 (declare_opts, line[readonly_match.end():])
40                 else:
41                         line = 'declare ' + line[readonly_match.end():]
42         return line
43
44 def filter_bash_environment(pattern, file_in, file_out):
45         # Filter out any instances of the \1 character from variable values
46         # since this character multiplies each time that the environment
47         # is saved (strange bash behavior). This can eventually result in
48         # mysterious 'Argument list too long' errors from programs that have
49         # huge strings of \1 characters in their environment. See bug #222091.
50         here_doc_delim = None
51         in_func = None
52         multi_line_quote = None
53         multi_line_quote_filter = None
54         for line in file_in:
55                 if multi_line_quote is not None:
56                         if not multi_line_quote_filter:
57                                 file_out.write(line.replace("\1", ""))
58                         if have_end_quote(multi_line_quote, line):
59                                 multi_line_quote = None
60                                 multi_line_quote_filter = None
61                         continue
62                 if here_doc_delim is None and in_func is None:
63                         var_assign_match = var_assign_re.match(line)
64                         if var_assign_match is not None:
65                                 quote = var_assign_match.group(3)
66                                 filter_this = pattern.match(var_assign_match.group(2)) \
67                                         is not None
68                                 # Exclude the start quote when searching for the end quote,
69                                 # to ensure that the start quote is not misidentified as the
70                                 # end quote (happens if there is a newline immediately after
71                                 # the start quote).
72                                 if quote is not None and not \
73                                         have_end_quote(quote, line[var_assign_match.end(2)+2:]):
74                                         multi_line_quote = quote
75                                         multi_line_quote_filter = filter_this
76                                 if not filter_this:
77                                         line = filter_declare_readonly_opt(line)
78                                         file_out.write(line.replace("\1", ""))
79                                 continue
80                         else:
81                                 declare_match = var_declare_re.match(line)
82                                 if declare_match is not None:
83                                         # declare without assignment
84                                         filter_this = pattern.match(declare_match.group(2)) \
85                                                 is not None
86                                         if not filter_this:
87                                                 line = filter_declare_readonly_opt(line)
88                                                 file_out.write(line)
89                                         continue
90
91                 if here_doc_delim is not None:
92                         if here_doc_delim.match(line):
93                                 here_doc_delim = None
94                         file_out.write(line)
95                         continue
96                 here_doc = here_doc_re.match(line)
97                 if here_doc is not None:
98                         here_doc_delim = re.compile("^%s$" % here_doc.group(1))
99                         file_out.write(line)
100                         continue
101                 # Note: here-documents are handled before functions since otherwise
102                 # it would be possible for the content of a here-document to be
103                 # mistaken as the end of a function.
104                 if in_func:
105                         if func_end_re.match(line) is not None:
106                                 in_func = None
107                         file_out.write(line)
108                         continue
109                 in_func = func_start_re.match(line)
110                 if in_func is not None:
111                         file_out.write(line)
112                         continue
113                 # This line is not recognized as part of a variable assignment,
114                 # function definition, or here document, so just allow it to
115                 # pass through.
116                 file_out.write(line)
117
118 if __name__ == "__main__":
119         description = "Filter out variable assignments for variable " + \
120                 "names matching a given PATTERN " + \
121                 "while leaving bash function definitions and here-documents " + \
122                 "intact. The PATTERN is a space separated list of variable names" + \
123                 " and it supports python regular expression syntax."
124         usage = "usage: %s PATTERN" % os.path.basename(sys.argv[0])
125         from optparse import OptionParser
126         parser = OptionParser(description=description, usage=usage)
127         options, args = parser.parse_args(sys.argv[1:])
128         if len(args) != 1:
129                 parser.error("Missing required PATTERN argument.")
130         file_in = sys.stdin
131         file_out = sys.stdout
132         if sys.hexversion >= 0x3000000:
133                 file_in = codecs.iterdecode(sys.stdin.buffer.raw,
134                         'utf_8', errors='replace')
135                 import io
136                 file_out = io.TextIOWrapper(sys.stdout.buffer,
137                         'utf_8', errors='backslashreplace')
138
139         var_pattern = args[0].split()
140
141         # Filter invalid variable names that are not supported by bash.
142         var_pattern.append(r'\d.*')
143         var_pattern.append(r'.*\W.*')
144
145         var_pattern = "^(%s)$" % "|".join(var_pattern)
146         filter_bash_environment(
147                 re.compile(var_pattern), file_in, file_out)
148         file_out.flush()