src/engine/SCons/Scanner/LaTeX.py

   1 """SCons.Scanner.LaTeX
   2
   3 This module implements the dependency scanner for LaTeX code.
   4
   5 """
   6
   7 #
   8 # __COPYRIGHT__
   9 #
  10 # Permission is hereby granted, free of charge, to any person obtaining
  11 # a copy of this software and associated documentation files (the
  12 # "Software"), to deal in the Software without restriction, including
  13 # without limitation the rights to use, copy, modify, merge, publish,
  14 # distribute, sublicense, and/or sell copies of the Software, and to
  15 # permit persons to whom the Software is furnished to do so, subject to
  16 # the following conditions:
  17 #
  18 # The above copyright notice and this permission notice shall be included
  19 # in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
  22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29
  30 __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
  31
  32 import os.path
  33 import re
  34
  35 import SCons.Scanner
  36 import SCons.Util
  37
  38 # list of graphics file extensions for TeX and LaTeX
  39 TexGraphics   = ['.eps', '.ps']
  40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
  41
  42 # Used as a return value of modify_env_var if the variable is not set.
  43 class _Null:
  44     pass
  45 _null = _Null
  46
  47 # The user specifies the paths in env[variable], similar to other builders.
  48 # They may be relative and must be converted to absolute, as expected
  49 # by LaTeX and Co. The environment may already have some paths in
  50 # env['ENV'][var]. These paths are honored, but the env[var] paths have
  51 # higher precedence. All changes are un-done on exit.
  52 def modify_env_var(env, var, abspath):
  53     try:
  54         save = env['ENV'][var]
  55     except KeyError:
  56         save = _null
  57     env.PrependENVPath(var, abspath)
  58     try:
  59         if SCons.Util.is_List(env[var]):
  60             #TODO(1.5)
  61             #env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
  62             env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
  63         else:
  64             # Split at os.pathsep to convert into absolute path
  65             #TODO(1.5) env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
  66             env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
  67     except KeyError:
  68         pass
  69
  70     # Convert into a string explicitly to append ":" (without which it won't search system
  71     # paths as well). The problem is that env.AppendENVPath(var, ":")
  72     # does not work, refuses to append ":" (os.pathsep).
  73
  74     if SCons.Util.is_List(env['ENV'][var]):
  75         # TODO(1.5)
  76         #env['ENV'][var] = os.pathsep.join(env['ENV'][var])
  77         env['ENV'][var] = os.pathsep.join(env['ENV'][var])
  78     # Append the trailing os.pathsep character here to catch the case with no env[var]
  79     env['ENV'][var] = env['ENV'][var] + os.pathsep
  80
  81     return save
  82
  83 class FindENVPathDirs:
  84     """A class to bind a specific *PATH variable name to a function that
  85     will return all of the *path directories."""
  86     def __init__(self, variable):
  87         self.variable = variable
  88     def __call__(self, env, dir=None, target=None, source=None, argument=None):
  89         import SCons.PathList
  90         try:
  91             path = env['ENV'][self.variable]
  92         except KeyError:
  93             return ()
  94
  95         dir = dir or env.fs._cwd
  96         path = SCons.PathList.PathList(path).subst_path(env, target, source)
  97         return tuple(dir.Rfindalldirs(path))
  98
  99
 100
 101 def LaTeXScanner():
 102     """Return a prototype Scanner instance for scanning LaTeX source files
 103     when built with latex.
 104     """
 105     ds = LaTeX(name = "LaTeXScanner",
 106                suffixes =  '$LATEXSUFFIXES',
 107                # in the search order, see below in LaTeX class docstring
 108                graphics_extensions = TexGraphics,
 109                recursive = 0)
 110     return ds
 111
 112 def PDFLaTeXScanner():
 113     """Return a prototype Scanner instance for scanning LaTeX source files
 114     when built with pdflatex.
 115     """
 116     ds = LaTeX(name = "PDFLaTeXScanner",
 117                suffixes =  '$LATEXSUFFIXES',
 118                # in the search order, see below in LaTeX class docstring
 119                graphics_extensions = LatexGraphics,
 120                recursive = 0)
 121     return ds
 122
 123 class LaTeX(SCons.Scanner.Base):
 124     """Class for scanning LaTeX files for included files.
 125
 126     Unlike most scanners, which use regular expressions that just
 127     return the included file name, this returns a tuple consisting
 128     of the keyword for the inclusion ("include", "includegraphics",
 129     "input", or "bibliography"), and then the file name itself.
 130     Based on a quick look at LaTeX documentation, it seems that we
 131     should append .tex suffix for the "include" keywords, append .tex if
 132     there is no extension for the "input" keyword, and need to add .bib
 133     for the "bibliography" keyword that does not accept extensions by itself.
 134
 135     Finally, if there is no extension for an "includegraphics" keyword
 136     latex will append .ps or .eps to find the file, while pdftex may use .pdf,
 137     .jpg, .tif, .mps, or .png.
 138
 139     The actual subset and search order may be altered by
 140     DeclareGraphicsExtensions command. This complication is ignored.
 141     The default order corresponds to experimentation with teTeX
 142         $ latex --version
 143         pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
 144         kpathsea version 3.5.4
 145     The order is:
 146         ['.eps', '.ps'] for latex
 147         ['.png', '.pdf', '.jpg', '.tif'].
 148
 149     Another difference is that the search path is determined by the type
 150     of the file being searched:
 151     env['TEXINPUTS'] for "input" and "include" keywords
 152     env['TEXINPUTS'] for "includegraphics" keyword
 153     env['TEXINPUTS'] for "lstinputlisting" keyword
 154     env['BIBINPUTS'] for "bibliography" keyword
 155     env['BSTINPUTS'] for "bibliographystyle" keyword
 156
 157     FIXME: also look for the class or style in document[class|style]{}
 158     FIXME: also look for the argument of bibliographystyle{}
 159     """
 160     keyword_paths = {'include': 'TEXINPUTS',
 161                      'input': 'TEXINPUTS',
 162                      'includegraphics': 'TEXINPUTS',
 163                      'bibliography': 'BIBINPUTS',
 164                      'bibliographystyle': 'BSTINPUTS',
 165                      'usepackage': 'TEXINPUTS',
 166                      'lstinputlisting': 'TEXINPUTS'}
 167     env_variables = SCons.Util.unique(keyword_paths.values())
 168
 169     def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
 170
 171         # We have to include \n with the % we exclude from the first part
 172         # part of the regex because the expression is compiled with re.M.
 173         # Without the \n,  the ^ could match the beginning of a *previous*
 174         # line followed by one or more newline characters (i.e. blank
 175         # lines), interfering with a match on the next line.
 176         regex = r'^[^%\n]*\\(include|includegraphics(?:\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}'
 177         self.cre = re.compile(regex, re.M)
 178         self.graphics_extensions = graphics_extensions
 179
 180         def _scan(node, env, path=(), self=self):
 181             node = node.rfile()
 182             if not node.exists():
 183                 return []
 184             return self.scan_recurse(node, path)
 185
 186         class FindMultiPathDirs:
 187             """The stock FindPathDirs function has the wrong granularity:
 188             it is called once per target, while we need the path that depends
 189             on what kind of included files is being searched. This wrapper
 190             hides multiple instances of FindPathDirs, one per the LaTeX path
 191             variable in the environment. When invoked, the function calculates
 192             and returns all the required paths as a dictionary (converted into
 193             a tuple to become hashable). Then the scan function converts it
 194             back and uses a dictionary of tuples rather than a single tuple
 195             of paths.
 196             """
 197             def __init__(self, dictionary):
 198                 self.dictionary = {}
 199                 for k,n in dictionary.items():
 200                     self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
 201                                            FindENVPathDirs(n) )
 202
 203             def __call__(self, env, dir=None, target=None, source=None,
 204                                     argument=None):
 205                 di = {}
 206                 for k,(c,cENV)  in self.dictionary.items():
 207                     di[k] = ( c(env, dir=None, target=None, source=None,
 208                                    argument=None) ,
 209                               cENV(env, dir=None, target=None, source=None,
 210                                    argument=None) )
 211                 # To prevent "dict is not hashable error"
 212                 return tuple(di.items())
 213
 214         class LaTeXScanCheck:
 215             """Skip all but LaTeX source files, i.e., do not scan *.eps,
 216             *.pdf, *.jpg, etc.
 217             """
 218             def __init__(self, suffixes):
 219                 self.suffixes = suffixes
 220             def __call__(self, node, env):
 221                 current = not node.has_builder() or node.is_up_to_date()
 222                 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
 223                 # Returning false means that the file is not scanned.
 224                 return scannable and current
 225
 226         kw['function'] = _scan
 227         kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
 228         kw['recursive'] = 0
 229         kw['skeys'] = suffixes
 230         kw['scan_check'] = LaTeXScanCheck(suffixes)
 231         kw['name'] = name
 232
 233         SCons.Scanner.Base.__init__(self, *args, **kw)
 234
 235     def _latex_names(self, include):
 236         filename = include[1]
 237         if include[0] == 'input':
 238             base, ext = os.path.splitext( filename )
 239             if ext == "":
 240                 return [filename + '.tex']
 241         if (include[0] == 'include'):
 242             return [filename + '.tex']
 243         if include[0] == 'bibliography':
 244             base, ext = os.path.splitext( filename )
 245             if ext == "":
 246                 return [filename + '.bib']
 247         if include[0] == 'usepackage':
 248             base, ext = os.path.splitext( filename )
 249             if ext == "":
 250                 return [filename + '.sty']
 251         if include[0] == 'includegraphics':
 252             base, ext = os.path.splitext( filename )
 253             if ext == "":
 254                 #TODO(1.5) return [filename + e for e in self.graphics_extensions]
 255                 #return map(lambda e: filename+e, self.graphics_extensions + TexGraphics)
 256                 # use the line above to find dependency for PDF builder when only .eps figure is present
 257                 # Since it will be found if the user tell scons how to make the pdf figure leave it out for now.
 258                 return [filename+e for e in self.graphics_extensions]
 259         return [filename]
 260
 261     def sort_key(self, include):
 262         return SCons.Node.FS._my_normcase(str(include))
 263
 264     def find_include(self, include, source_dir, path):
 265         try:
 266             sub_path = path[include[0]]
 267         except (IndexError, KeyError):
 268             sub_path = ()
 269         try_names = self._latex_names(include)
 270         for n in try_names:
 271             # see if we find it using the path in env[var]
 272             i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0])
 273             if i:
 274                 return i, include
 275             # see if we find it using the path in env['ENV'][var]
 276             i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1])
 277             if i:
 278                 return i, include
 279         return i, include
 280
 281     def scan(self, node):
 282         # Modify the default scan function to allow for the regular
 283         # expression to return a comma separated list of file names
 284         # as can be the case with the bibliography keyword.
 285
 286         # Cache the includes list in node so we only scan it once:
 287         # path_dict = dict(list(path))
 288         noopt_cre = re.compile('\[.*$')
 289         if node.includes != None:
 290             includes = node.includes
 291         else:
 292             includes = self.cre.findall(node.get_text_contents())
 293             # 1. Split comma-separated lines, e.g.
 294             #      ('bibliography', 'phys,comp')
 295             #    should become two entries
 296             #      ('bibliography', 'phys')
 297             #      ('bibliography', 'comp')
 298             # 2. Remove the options, e.g., such as
 299             #      ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps')
 300             #    should become
 301             #      ('includegraphics', 'picture.eps')
 302             split_includes = []
 303             for include in includes:
 304                 inc_type = noopt_cre.sub('', include[0])
 305                 inc_list = include[1].split(',')
 306                 for j in range(len(inc_list)):
 307                     split_includes.append( (inc_type, inc_list[j]) )
 308             #
 309             includes = split_includes
 310             node.includes = includes
 311
 312         return includes
 313
 314     def scan_recurse(self, node, path=()):
 315         """ do a recursive scan of the top level target file
 316         This lets us search for included files based on the
 317         directory of the main file just as latex does"""
 318
 319         path_dict = dict(list(path))
 320
 321         queue = []
 322         queue.extend( self.scan(node) )
 323         seen = {}
 324
 325         # This is a hand-coded DSU (decorate-sort-undecorate, or
 326         # Schwartzian transform) pattern.  The sort key is the raw name
 327         # of the file as specifed on the \include, \input, etc. line.
 328         # TODO: what about the comment in the original Classic scanner:
 329         # """which lets
 330         # us keep the sort order constant regardless of whether the file
 331         # is actually found in a Repository or locally."""
 332         nodes = []
 333         source_dir = node.get_dir()
 334         #for include in includes:
 335         while queue:
 336
 337             include = queue.pop()
 338             # TODO(1.5):  more compact:
 339             #try:
 340             #    if seen[include[1]] == 1:
 341             #        continue
 342             #except KeyError:
 343             #    seen[include[1]] = 1
 344             try:
 345                 already_seen = seen[include[1]]
 346             except KeyError:
 347                 seen[include[1]] = 1
 348                 already_seen = False
 349             if already_seen:
 350                 continue
 351
 352             #
 353             # Handle multiple filenames in include[1]
 354             #
 355             n, i = self.find_include(include, source_dir, path_dict)
 356             if n is None:
 357                 # Do not bother with 'usepackage' warnings, as they most
 358                 # likely refer to system-level files
 359                 if include[0] != 'usepackage':
 360                     SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
 361                                         "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
 362             else:
 363                 sortkey = self.sort_key(n)
 364                 nodes.append((sortkey, n))
 365                 # recurse down
 366                 queue.extend( self.scan(n) )
 367
 368         return [pair[1] for pair in sorted(nodes)]
 369
 370 # Local Variables:
 371 # tab-width:4
 372 # indent-tabs-mode:nil
 373 # End:
 374 # vim: set expandtab tabstop=4 shiftwidth=4: