Bug #222091 - Filter out any instances of the \1 character from variable
authorZac Medico <zmedico@gentoo.org>
Mon, 6 Oct 2008 17:17:32 +0000 (17:17 -0000)
committerZac Medico <zmedico@gentoo.org>
Mon, 6 Oct 2008 17:17:32 +0000 (17:17 -0000)
values since this character multiplies each time that the environment
is saved (strange bash behavior). This can eventually result in
mysterious 'Argument list too long' errors from programs that have
huge strings of \1 characters in their environment. (trunk r11485)

svn path=/main/branches/2.1.4/; revision=11639

bin/filter-bash-environment.py

index d3434ff0e18d25827324e5d91c0b17c34ccf35b3..a078f0418dfc143a4c991ac0b0445f5bc0fbf474 100755 (executable)
@@ -5,62 +5,21 @@
 
 import os, re, sys
 
-here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
-func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
-func_end_re = re.compile(r'^\}$')
+egrep_compat_map = {
+       "[:alnum:]" : r'\w',
+       "[:space:]" : r'\s',
+}
 
-var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$')
-close_quote_re = re.compile(r'(\\"|"|\')\s*$')
-readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+')
+here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
 
-def have_end_quote(quote, line):
-       """
-       Check if the line has an end quote (useful for handling multi-line
-       quotes). This handles escaped double quotes that may occur at the
-       end of a line. The posix spec does not allow escaping of single
-       quotes inside of single quotes, so that case is not handled.
-       """
-       close_quote_match = close_quote_re.search(line)
-       return close_quote_match is not None and \
-               close_quote_match.group(1) == quote
+def compile_egrep_pattern(s):
+       for k, v in egrep_compat_map.iteritems():
+               s = s.replace(k, v)
+       return re.compile(s)
 
 def filter_bash_environment(pattern, file_in, file_out):
        here_doc_delim = None
-       in_func = None
-       multi_line_quote = None
-       multi_line_quote_filter = None
        for line in file_in:
-               if multi_line_quote is not None:
-                       if not multi_line_quote_filter:
-                               file_out.write(line)
-                       if have_end_quote(multi_line_quote, line):
-                               multi_line_quote = None
-                               multi_line_quote_filter = None
-                       continue
-               if here_doc_delim is None and in_func is None:
-                       var_assign_match = var_assign_re.match(line)
-                       if var_assign_match is not None:
-                               quote = var_assign_match.group(3)
-                               filter_this = pattern.match(var_assign_match.group(2)) \
-                                       is not None
-                               if quote is not None and not have_end_quote(quote, line):
-                                       multi_line_quote = quote
-                                       multi_line_quote_filter = filter_this
-                               if not filter_this:
-                                       readonly_match = readonly_re.match(line)
-                                       if readonly_match is not None:
-                                               declare_opts = ""
-                                               for i in (1, 2):
-                                                       group = readonly_match.group(i)
-                                                       if group is not None:
-                                                               declare_opts += group
-                                               if declare_opts:
-                                                       line = "declare -%s %s" % \
-                                                               (declare_opts, line[readonly_match.end():])
-                                               else:
-                                                       line = "declare " + line[readonly_match.end():]
-                                       file_out.write(line)
-                               continue
                if here_doc_delim is not None:
                        if here_doc_delim.match(line):
                                here_doc_delim = None
@@ -71,29 +30,15 @@ def filter_bash_environment(pattern, file_in, file_out):
                        here_doc_delim = re.compile("^%s$" % here_doc.group(1))
                        file_out.write(line)
                        continue
-               # Note: here-documents are handled before functions since otherwise
-               # it would be possible for the content of a here-document to be
-               # mistaken as the end of a function.
-               if in_func:
-                       if func_end_re.match(line) is not None:
-                               in_func = None
-                       file_out.write(line)
-                       continue
-               in_func = func_start_re.match(line)
-               if in_func is not None:
-                       file_out.write(line)
-                       continue
-               # This line is not recognized as part of a variable assignment,
-               # function definition, or here document, so just allow it to
-               # pass through.
-               file_out.write(line)
+               if pattern.match(line) is None:
+                       file_out.write(line.replace("\1", ""))
 
 if __name__ == "__main__":
-       description = "Filter out variable assignments for varable " + \
-               "names matching a given PATTERN " + \
-               "while leaving bash function definitions and here-documents " + \
-               "intact. The PATTERN is a space separated list of variable names" + \
-               " and it supports python regular expression syntax."
+       description = "Filter out any lines that match a given PATTERN " + \
+               "while leaving bash here-documents intact. The PATTERN should " + \
+               "use python regular expression syntax but [:space:] and " + \
+               "[:alnum:] character classes will be automatically translated " + \
+               "for compatibility with egrep syntax."
        usage = "usage: %s PATTERN" % os.path.basename(sys.argv[0])
        from optparse import OptionParser
        parser = OptionParser(description=description, usage=usage)
@@ -102,13 +47,6 @@ if __name__ == "__main__":
                parser.error("Missing required PATTERN argument.")
        file_in = sys.stdin
        file_out = sys.stdout
-       var_pattern = args[0].split()
-
-       # Filter invalid variable names that are not supported by bash.
-       var_pattern.append(r'\d.*')
-       var_pattern.append(r'.*\W.*')
-
-       var_pattern = "^(%s)$" % "|".join(var_pattern)
        filter_bash_environment(
-               re.compile(var_pattern), file_in, file_out)
+               compile_egrep_pattern(args[0]), file_in, file_out)
        file_out.flush()