Added fix for text/TEX/multi-line_include_options.py.
[scons.git] / src / engine / SCons / Scanner / LaTeX.py
index c544108892a65cf3bfe15f5139c962c01fb96e2d..f43b19cf4360a86a10139d74e7a7a87b20ba0b1a 100644 (file)
@@ -30,10 +30,73 @@ This module implements the dependency scanner for LaTeX code.
 __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
 
 import os.path
-import string
 import re
 
 import SCons.Scanner
+import SCons.Util
+
+# list of graphics file extensions for TeX and LaTeX
+TexGraphics   = ['.eps', '.ps']
+LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
+
+# Used as a return value of modify_env_var if the variable is not set.
+class _Null:
+    pass
+_null = _Null
+
+# The user specifies the paths in env[variable], similar to other builders.
+# They may be relative and must be converted to absolute, as expected
+# by LaTeX and Co. The environment may already have some paths in
+# env['ENV'][var]. These paths are honored, but the env[var] paths have
+# higher precedence. All changes are un-done on exit.
+def modify_env_var(env, var, abspath):
+    try:
+        save = env['ENV'][var]
+    except KeyError:
+        save = _null
+    env.PrependENVPath(var, abspath)
+    try:
+        if SCons.Util.is_List(env[var]):
+            #TODO(1.5)
+            #env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
+            env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
+        else:
+            # Split at os.pathsep to convert into absolute path
+            #TODO(1.5) env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
+            env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
+    except KeyError:
+        pass
+
+    # Convert into a string explicitly to append ":" (without which it won't search system
+    # paths as well). The problem is that env.AppendENVPath(var, ":")
+    # does not work, refuses to append ":" (os.pathsep).
+
+    if SCons.Util.is_List(env['ENV'][var]):
+        # TODO(1.5)
+        #env['ENV'][var] = os.pathsep.join(env['ENV'][var])
+        env['ENV'][var] = os.pathsep.join(env['ENV'][var])
+    # Append the trailing os.pathsep character here to catch the case with no env[var]
+    env['ENV'][var] = env['ENV'][var] + os.pathsep
+
+    return save
+
+class FindENVPathDirs:
+    """A class to bind a specific *PATH variable name to a function that
+    will return all of the *path directories."""
+    def __init__(self, variable):
+        self.variable = variable
+    def __call__(self, env, dir=None, target=None, source=None, argument=None):
+        import SCons.PathList
+        try:
+            path = env['ENV'][self.variable]
+        except KeyError:
+            return ()
+
+        dir = dir or env.fs._cwd
+        path = SCons.PathList.PathList(path).subst_path(env, target, source)
+        return tuple(dir.Rfindalldirs(path))
+
+
 
 def LaTeXScanner():
     """Return a prototype Scanner instance for scanning LaTeX source files
@@ -42,7 +105,7 @@ def LaTeXScanner():
     ds = LaTeX(name = "LaTeXScanner",
                suffixes =  '$LATEXSUFFIXES',
                # in the search order, see below in LaTeX class docstring
-               graphics_extensions = ['.eps', '.ps'],
+               graphics_extensions = TexGraphics,
                recursive = 0)
     return ds
 
@@ -53,7 +116,7 @@ def PDFLaTeXScanner():
     ds = LaTeX(name = "PDFLaTeXScanner",
                suffixes =  '$LATEXSUFFIXES',
                # in the search order, see below in LaTeX class docstring
-               graphics_extensions = ['.png', '.pdf', '.jpg', '.tif'],
+               graphics_extensions = LatexGraphics,
                recursive = 0)
     return ds
 
@@ -86,7 +149,8 @@ class LaTeX(SCons.Scanner.Base):
     Another difference is that the search path is determined by the type
     of the file being searched:
     env['TEXINPUTS'] for "input" and "include" keywords
-    env['TEXPICTS'] for "includegraphics" keyword
+    env['TEXINPUTS'] for "includegraphics" keyword
+    env['TEXINPUTS'] for "lstinputlisting" keyword
     env['BIBINPUTS'] for "bibliography" keyword
     env['BSTINPUTS'] for "bibliographystyle" keyword
 
@@ -95,23 +159,31 @@ class LaTeX(SCons.Scanner.Base):
     """
     keyword_paths = {'include': 'TEXINPUTS',
                      'input': 'TEXINPUTS',
-                     'includegraphics': 'TEXPICTS',
+                     'includegraphics': 'TEXINPUTS',
                      'bibliography': 'BIBINPUTS',
                      'bibliographystyle': 'BSTINPUTS',
-                     'usepackage': 'TEXINPUTS'}
+                     'usepackage': 'TEXINPUTS',
+                     'lstinputlisting': 'TEXINPUTS'}
     env_variables = SCons.Util.unique(keyword_paths.values())
 
     def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
 
-        regex = '\\\\(include|includegraphics(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}'
+        # We have to include \n with the % we exclude from the first part
+        # part of the regex because the expression is compiled with re.M.
+        # Without the \n,  the ^ could match the beginning of a *previous*
+        # line followed by one or more newline characters (i.e. blank
+        # lines), interfering with a match on the next line.
+        regex = r'^[^%\n]*\\(include|includegraphics(?:\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}'
         self.cre = re.compile(regex, re.M)
+        self.comment_re = re.compile(r'^([^%\n]*)(.*)$', re.M)
+
         self.graphics_extensions = graphics_extensions
 
         def _scan(node, env, path=(), self=self):
             node = node.rfile()
             if not node.exists():
                 return []
-            return self.scan(node, path)
+            return self.scan_recurse(node, path)
 
         class FindMultiPathDirs:
             """The stock FindPathDirs function has the wrong granularity:
@@ -126,16 +198,20 @@ class LaTeX(SCons.Scanner.Base):
             """
             def __init__(self, dictionary):
                 self.dictionary = {}
-                for k, n  in dictionary.iteritems():
-                    self.dictionary[k] = SCons.Scanner.FindPathDirs(n)
-            def __call__(self, env, dir=None, target=None, source=None, argument=None):
+                for k,n in dictionary.items():
+                    self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
+                                           FindENVPathDirs(n) )
+
+            def __call__(self, env, dir=None, target=None, source=None,
+                                    argument=None):
                 di = {}
-                for k, c  in self.dictionary.iteritems():
-                    p = c(env, dir=None, target=None, source=None, argument=None)
-                    di[k] = p
+                for k,(c,cENV)  in self.dictionary.items():
+                    di[k] = ( c(env, dir=None, target=None, source=None,
+                                   argument=None) ,
+                              cENV(env, dir=None, target=None, source=None,
+                                   argument=None) )
                 # To prevent "dict is not hashable error"
-                rv = tuple([(k,v) for k, v in di.iteritems()])
-                return rv
+                return tuple(di.items())
 
         class LaTeXScanCheck:
             """Skip all but LaTeX source files, i.e., do not scan *.eps,
@@ -145,18 +221,18 @@ class LaTeX(SCons.Scanner.Base):
                 self.suffixes = suffixes
             def __call__(self, node, env):
                 current = not node.has_builder() or node.is_up_to_date()
-                scannable = node.get_suffix() in env.subst(self.suffixes)
+                scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
                 # Returning false means that the file is not scanned.
                 return scannable and current
 
         kw['function'] = _scan
         kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
-        kw['recursive'] = 1
+        kw['recursive'] = 0
         kw['skeys'] = suffixes
         kw['scan_check'] = LaTeXScanCheck(suffixes)
         kw['name'] = name
 
-        apply(SCons.Scanner.Base.__init__, (self,) + args, kw)
+        SCons.Scanner.Base.__init__(self, *args, **kw)
 
     def _latex_names(self, include):
         filename = include[1]
@@ -177,7 +253,11 @@ class LaTeX(SCons.Scanner.Base):
         if include[0] == 'includegraphics':
             base, ext = os.path.splitext( filename )
             if ext == "":
-                return [filename + e for e in self.graphics_extensions]
+                #TODO(1.5) return [filename + e for e in self.graphics_extensions]
+                #return map(lambda e: filename+e, self.graphics_extensions + TexGraphics)
+                # use the line above to find dependency for PDF builder when only .eps figure is present
+                # Since it will be found if the user tell scons how to make the pdf figure leave it out for now.
+                return [filename+e for e in self.graphics_extensions]
         return [filename]
 
     def sort_key(self, include):
@@ -186,27 +266,45 @@ class LaTeX(SCons.Scanner.Base):
     def find_include(self, include, source_dir, path):
         try:
             sub_path = path[include[0]]
-        except:
+        except (IndexError, KeyError):
             sub_path = ()
         try_names = self._latex_names(include)
         for n in try_names:
-            i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path)
+            # see if we find it using the path in env[var]
+            i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0])
+            if i:
+                return i, include
+            # see if we find it using the path in env['ENV'][var]
+            i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1])
             if i:
                 return i, include
         return i, include
 
-    def scan(self, node, path=()):
+    def canonical_text(self, text):
+        out = []
+        line_continues_a_comment = False
+        for line in text.splitlines():
+            line,comment = self.comment_re.findall(line)[0]
+            if line_continues_a_comment == True:
+                out[-1] = out[-1] + ' ' + line.lstrip()
+            else:
+                out.append(line)
+            line_continues_a_comment = len(comment) > 0
+        return '\n'.join(out).rstrip()+'\n'
+
+    def scan(self, node):
         # Modify the default scan function to allow for the regular
         # expression to return a comma separated list of file names
         # as can be the case with the bibliography keyword.
 
         # Cache the includes list in node so we only scan it once:
-        path_dict = dict(list(path))
+        path_dict = dict(list(path))
         noopt_cre = re.compile('\[.*$')
         if node.includes != None:
             includes = node.includes
         else:
-            includes = self.cre.findall(node.get_contents())
+            text = self.canonical_text(node.get_text_contents())
+            includes = self.cre.findall(text)
             # 1. Split comma-separated lines, e.g.
             #      ('bibliography', 'phys,comp')
             #    should become two entries
@@ -219,13 +317,26 @@ class LaTeX(SCons.Scanner.Base):
             split_includes = []
             for include in includes:
                 inc_type = noopt_cre.sub('', include[0])
-                inc_list = string.split(include[1],',')
+                inc_list = include[1].split(',')
                 for j in range(len(inc_list)):
                     split_includes.append( (inc_type, inc_list[j]) )
             #
             includes = split_includes
             node.includes = includes
 
+        return includes
+
+    def scan_recurse(self, node, path=()):
+        """ do a recursive scan of the top level target file
+        This lets us search for included files based on the
+        directory of the main file just as latex does"""
+
+        path_dict = dict(list(path))
+        
+        queue = [] 
+        queue.extend( self.scan(node) )
+        seen = {}
+
         # This is a hand-coded DSU (decorate-sort-undecorate, or
         # Schwartzian transform) pattern.  The sort key is the raw name
         # of the file as specifed on the \include, \input, etc. line.
@@ -235,7 +346,24 @@ class LaTeX(SCons.Scanner.Base):
         # is actually found in a Repository or locally."""
         nodes = []
         source_dir = node.get_dir()
-        for include in includes:
+        #for include in includes:
+        while queue:
+            
+            include = queue.pop()
+            # TODO(1.5):  more compact:
+            #try:
+            #    if seen[include[1]] == 1:
+            #        continue
+            #except KeyError:
+            #    seen[include[1]] = 1
+            try:
+                already_seen = seen[include[1]]
+            except KeyError:
+                seen[include[1]] = 1
+                already_seen = False
+            if already_seen:
+                continue
+
             #
             # Handle multiple filenames in include[1]
             #
@@ -249,7 +377,13 @@ class LaTeX(SCons.Scanner.Base):
             else:
                 sortkey = self.sort_key(n)
                 nodes.append((sortkey, n))
-        #
-        nodes.sort()
-        nodes = map(lambda pair: pair[1], nodes)
-        return nodes
+                # recurse down 
+                queue.extend( self.scan(n) )
+
+        return [pair[1] for pair in sorted(nodes)]
+
+# Local Variables:
+# tab-width:4
+# indent-tabs-mode:nil
+# End:
+# vim: set expandtab tabstop=4 shiftwidth=4: