src/engine/SCons/Scanner/LaTeX.py

   1 """SCons.Scanner.LaTeX
   2
   3 This module implements the dependency scanner for LaTeX code.
   4
   5 """
   6
   7 #
   8 # __COPYRIGHT__
   9 #
  10 # Permission is hereby granted, free of charge, to any person obtaining
  11 # a copy of this software and associated documentation files (the
  12 # "Software"), to deal in the Software without restriction, including
  13 # without limitation the rights to use, copy, modify, merge, publish,
  14 # distribute, sublicense, and/or sell copies of the Software, and to
  15 # permit persons to whom the Software is furnished to do so, subject to
  16 # the following conditions:
  17 #
  18 # The above copyright notice and this permission notice shall be included
  19 # in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
  22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29
  30 __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
  31
  32 import os.path
  33 import re
  34
  35 import SCons.Scanner
  36 import SCons.Util
  37
  38 # list of graphics file extensions for TeX and LaTeX
  39 TexGraphics   = ['.eps', '.ps']
  40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
  41
  42 # Used as a return value of modify_env_var if the variable is not set.
  43 class _Null:
  44     pass
  45 _null = _Null
  46
  47 # The user specifies the paths in env[variable], similar to other builders.
  48 # They may be relative and must be converted to absolute, as expected
  49 # by LaTeX and Co. The environment may already have some paths in
  50 # env['ENV'][var]. These paths are honored, but the env[var] paths have
  51 # higher precedence. All changes are un-done on exit.
  52 def modify_env_var(env, var, abspath):
  53     try:
  54         save = env['ENV'][var]
  55     except KeyError:
  56         save = _null
  57     env.PrependENVPath(var, abspath)
  58     try:
  59         if SCons.Util.is_List(env[var]):
  60             #TODO(1.5)
  61             #env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
  62             env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
  63         else:
  64             # Split at os.pathsep to convert into absolute path
  65             #TODO(1.5) env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
  66             env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
  67     except KeyError:
  68         pass
  69
  70     # Convert into a string explicitly to append ":" (without which it won't search system
  71     # paths as well). The problem is that env.AppendENVPath(var, ":")
  72     # does not work, refuses to append ":" (os.pathsep).
  73
  74     if SCons.Util.is_List(env['ENV'][var]):
  75         # TODO(1.5)
  76         #env['ENV'][var] = os.pathsep.join(env['ENV'][var])
  77         env['ENV'][var] = os.pathsep.join(env['ENV'][var])
  78     # Append the trailing os.pathsep character here to catch the case with no env[var]
  79     env['ENV'][var] = env['ENV'][var] + os.pathsep
  80
  81     return save
  82
  83 class FindENVPathDirs:
  84     """A class to bind a specific *PATH variable name to a function that
  85     will return all of the *path directories."""
  86     def __init__(self, variable):
  87         self.variable = variable
  88     def __call__(self, env, dir=None, target=None, source=None, argument=None):
  89         import SCons.PathList
  90         try:
  91             path = env['ENV'][self.variable]
  92         except KeyError:
  93             return ()
  94
  95         dir = dir or env.fs._cwd
  96         path = SCons.PathList.PathList(path).subst_path(env, target, source)
  97         return tuple(dir.Rfindalldirs(path))
  98
  99
 100
 101 def LaTeXScanner():
 102     """Return a prototype Scanner instance for scanning LaTeX source files
 103     when built with latex.
 104     """
 105     ds = LaTeX(name = "LaTeXScanner",
 106                suffixes =  '$LATEXSUFFIXES',
 107                # in the search order, see below in LaTeX class docstring
 108                graphics_extensions = TexGraphics,
 109                recursive = 0)
 110     return ds
 111
 112 def PDFLaTeXScanner():
 113     """Return a prototype Scanner instance for scanning LaTeX source files
 114     when built with pdflatex.
 115     """
 116     ds = LaTeX(name = "PDFLaTeXScanner",
 117                suffixes =  '$LATEXSUFFIXES',
 118                # in the search order, see below in LaTeX class docstring
 119                graphics_extensions = LatexGraphics,
 120                recursive = 0)
 121     return ds
 122
 123 class LaTeX(SCons.Scanner.Base):
 124     """Class for scanning LaTeX files for included files.
 125
 126     Unlike most scanners, which use regular expressions that just
 127     return the included file name, this returns a tuple consisting
 128     of the keyword for the inclusion ("include", "includegraphics",
 129     "input", or "bibliography"), and then the file name itself.
 130     Based on a quick look at LaTeX documentation, it seems that we
 131     should append .tex suffix for the "include" keywords, append .tex if
 132     there is no extension for the "input" keyword, and need to add .bib
 133     for the "bibliography" keyword that does not accept extensions by itself.
 134
 135     Finally, if there is no extension for an "includegraphics" keyword
 136     latex will append .ps or .eps to find the file, while pdftex may use .pdf,
 137     .jpg, .tif, .mps, or .png.
 138
 139     The actual subset and search order may be altered by
 140     DeclareGraphicsExtensions command. This complication is ignored.
 141     The default order corresponds to experimentation with teTeX
 142         $ latex --version
 143         pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
 144         kpathsea version 3.5.4
 145     The order is:
 146         ['.eps', '.ps'] for latex
 147         ['.png', '.pdf', '.jpg', '.tif'].
 148
 149     Another difference is that the search path is determined by the type
 150     of the file being searched:
 151     env['TEXINPUTS'] for "input" and "include" keywords
 152     env['TEXINPUTS'] for "includegraphics" keyword
 153     env['TEXINPUTS'] for "lstinputlisting" keyword
 154     env['BIBINPUTS'] for "bibliography" keyword
 155     env['BSTINPUTS'] for "bibliographystyle" keyword
 156
 157     FIXME: also look for the class or style in document[class|style]{}
 158     FIXME: also look for the argument of bibliographystyle{}
 159     """
 160     keyword_paths = {'include': 'TEXINPUTS',
 161                      'input': 'TEXINPUTS',
 162                      'includegraphics': 'TEXINPUTS',
 163                      'bibliography': 'BIBINPUTS',
 164                      'bibliographystyle': 'BSTINPUTS',
 165                      'usepackage': 'TEXINPUTS',
 166                      'lstinputlisting': 'TEXINPUTS'}
 167     env_variables = SCons.Util.unique(keyword_paths.values())
 168
 169     def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
 170
 171         # We have to include \n with the % we exclude from the first part
 172         # part of the regex because the expression is compiled with re.M.
 173         # Without the \n,  the ^ could match the beginning of a *previous*
 174         # line followed by one or more newline characters (i.e. blank
 175         # lines), interfering with a match on the next line.
 176         regex = r'^[^%\n]*\\(include|includegraphics(?:\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}'
 177         self.cre = re.compile(regex, re.M)
 178         self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M)
 179
 180         self.graphics_extensions = graphics_extensions
 181
 182         def _scan(node, env, path=(), self=self):
 183             node = node.rfile()
 184             if not node.exists():
 185                 return []
 186             return self.scan_recurse(node, path)
 187
 188         class FindMultiPathDirs:
 189             """The stock FindPathDirs function has the wrong granularity:
 190             it is called once per target, while we need the path that depends
 191             on what kind of included files is being searched. This wrapper
 192             hides multiple instances of FindPathDirs, one per the LaTeX path
 193             variable in the environment. When invoked, the function calculates
 194             and returns all the required paths as a dictionary (converted into
 195             a tuple to become hashable). Then the scan function converts it
 196             back and uses a dictionary of tuples rather than a single tuple
 197             of paths.
 198             """
 199             def __init__(self, dictionary):
 200                 self.dictionary = {}
 201                 for k,n in dictionary.items():
 202                     self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
 203                                            FindENVPathDirs(n) )
 204
 205             def __call__(self, env, dir=None, target=None, source=None,
 206                                     argument=None):
 207                 di = {}
 208                 for k,(c,cENV)  in self.dictionary.items():
 209                     di[k] = ( c(env, dir=None, target=None, source=None,
 210                                    argument=None) ,
 211                               cENV(env, dir=None, target=None, source=None,
 212                                    argument=None) )
 213                 # To prevent "dict is not hashable error"
 214                 return tuple(di.items())
 215
 216         class LaTeXScanCheck:
 217             """Skip all but LaTeX source files, i.e., do not scan *.eps,
 218             *.pdf, *.jpg, etc.
 219             """
 220             def __init__(self, suffixes):
 221                 self.suffixes = suffixes
 222             def __call__(self, node, env):
 223                 current = not node.has_builder() or node.is_up_to_date()
 224                 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
 225                 # Returning false means that the file is not scanned.
 226                 return scannable and current
 227
 228         kw['function'] = _scan
 229         kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
 230         kw['recursive'] = 0
 231         kw['skeys'] = suffixes
 232         kw['scan_check'] = LaTeXScanCheck(suffixes)
 233         kw['name'] = name
 234
 235         SCons.Scanner.Base.__init__(self, *args, **kw)
 236
 237     def _latex_names(self, include):
 238         filename = include[1]
 239         if include[0] == 'input':
 240             base, ext = os.path.splitext( filename )
 241             if ext == "":
 242                 return [filename + '.tex']
 243         if (include[0] == 'include'):
 244             return [filename + '.tex']
 245         if include[0] == 'bibliography':
 246             base, ext = os.path.splitext( filename )
 247             if ext == "":
 248                 return [filename + '.bib']
 249         if include[0] == 'usepackage':
 250             base, ext = os.path.splitext( filename )
 251             if ext == "":
 252                 return [filename + '.sty']
 253         if include[0] == 'includegraphics':
 254             base, ext = os.path.splitext( filename )
 255             if ext == "":
 256                 #TODO(1.5) return [filename + e for e in self.graphics_extensions]
 257                 #return map(lambda e: filename+e, self.graphics_extensions + TexGraphics)
 258                 # use the line above to find dependency for PDF builder when only .eps figure is present
 259                 # Since it will be found if the user tell scons how to make the pdf figure leave it out for now.
 260                 return [filename+e for e in self.graphics_extensions]
 261         return [filename]
 262
 263     def sort_key(self, include):
 264         return SCons.Node.FS._my_normcase(str(include))
 265
 266     def find_include(self, include, source_dir, path):
 267         try:
 268             sub_path = path[include[0]]
 269         except (IndexError, KeyError):
 270             sub_path = ()
 271         try_names = self._latex_names(include)
 272         for n in try_names:
 273             # see if we find it using the path in env[var]
 274             i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0])
 275             if i:
 276                 return i, include
 277             # see if we find it using the path in env['ENV'][var]
 278             i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1])
 279             if i:
 280                 return i, include
 281         return i, include
 282
 283     def canonical_text(self, text):
 284         """Standardize an input TeX-file contents.
 285
 286         Currently:
 287           * removes comments, unwrapping comment-wrapped lines.
 288         """
 289         out = []
 290         line_continues_a_comment = False
 291         for line in text.splitlines():
 292             line,comment = self.comment_re.findall(line)[0]
 293             if line_continues_a_comment == True:
 294                 out[-1] = out[-1] + ' ' + line.lstrip()
 295             else:
 296                 out.append(line)
 297             line_continues_a_comment = len(comment) > 0
 298         return '\n'.join(out).rstrip()+'\n'
 299
 300     def scan(self, node):
 301         # Modify the default scan function to allow for the regular
 302         # expression to return a comma separated list of file names
 303         # as can be the case with the bibliography keyword.
 304
 305         # Cache the includes list in node so we only scan it once:
 306         # path_dict = dict(list(path))
 307         noopt_cre = re.compile('\[.*$')
 308         if node.includes != None:
 309             includes = node.includes
 310         else:
 311             text = self.canonical_text(node.get_text_contents())
 312             includes = self.cre.findall(text)
 313             # 1. Split comma-separated lines, e.g.
 314             #      ('bibliography', 'phys,comp')
 315             #    should become two entries
 316             #      ('bibliography', 'phys')
 317             #      ('bibliography', 'comp')
 318             # 2. Remove the options, e.g., such as
 319             #      ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps')
 320             #    should become
 321             #      ('includegraphics', 'picture.eps')
 322             split_includes = []
 323             for include in includes:
 324                 inc_type = noopt_cre.sub('', include[0])
 325                 inc_list = include[1].split(',')
 326                 for j in range(len(inc_list)):
 327                     split_includes.append( (inc_type, inc_list[j]) )
 328             #
 329             includes = split_includes
 330             node.includes = includes
 331
 332         return includes
 333
 334     def scan_recurse(self, node, path=()):
 335         """ do a recursive scan of the top level target file
 336         This lets us search for included files based on the
 337         directory of the main file just as latex does"""
 338
 339         path_dict = dict(list(path))
 340
 341         queue = []
 342         queue.extend( self.scan(node) )
 343         seen = {}
 344
 345         # This is a hand-coded DSU (decorate-sort-undecorate, or
 346         # Schwartzian transform) pattern.  The sort key is the raw name
 347         # of the file as specifed on the \include, \input, etc. line.
 348         # TODO: what about the comment in the original Classic scanner:
 349         # """which lets
 350         # us keep the sort order constant regardless of whether the file
 351         # is actually found in a Repository or locally."""
 352         nodes = []
 353         source_dir = node.get_dir()
 354         #for include in includes:
 355         while queue:
 356
 357             include = queue.pop()
 358             # TODO(1.5):  more compact:
 359             #try:
 360             #    if seen[include[1]] == 1:
 361             #        continue
 362             #except KeyError:
 363             #    seen[include[1]] = 1
 364             try:
 365                 already_seen = seen[include[1]]
 366             except KeyError:
 367                 seen[include[1]] = 1
 368                 already_seen = False
 369             if already_seen:
 370                 continue
 371
 372             #
 373             # Handle multiple filenames in include[1]
 374             #
 375             n, i = self.find_include(include, source_dir, path_dict)
 376             if n is None:
 377                 # Do not bother with 'usepackage' warnings, as they most
 378                 # likely refer to system-level files
 379                 if include[0] != 'usepackage':
 380                     SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
 381                                         "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
 382             else:
 383                 sortkey = self.sort_key(n)
 384                 nodes.append((sortkey, n))
 385                 # recurse down
 386                 queue.extend( self.scan(n) )
 387
 388         return [pair[1] for pair in sorted(nodes)]
 389
 390 # Local Variables:
 391 # tab-width:4
 392 # indent-tabs-mode:nil
 393 # End:
 394 # vim: set expandtab tabstop=4 shiftwidth=4: