3 This module implements the dependency scanner for LaTeX code.
10 # Permission is hereby granted, free of charge, to any person obtaining
11 # a copy of this software and associated documentation files (the
12 # "Software"), to deal in the Software without restriction, including
13 # without limitation the rights to use, copy, modify, merge, publish,
14 # distribute, sublicense, and/or sell copies of the Software, and to
15 # permit persons to whom the Software is furnished to do so, subject to
16 # the following conditions:
18 # The above copyright notice and this permission notice shall be included
19 # in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
38 # list of graphics file extensions for TeX and LaTeX
39 TexGraphics = ['.eps', '.ps']
40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
42 # Used as a return value of modify_env_var if the variable is not set.
47 # The user specifies the paths in env[variable], similar to other builders.
48 # They may be relative and must be converted to absolute, as expected
49 # by LaTeX and Co. The environment may already have some paths in
50 # env['ENV'][var]. These paths are honored, but the env[var] paths have
51 # higher precedence. All changes are un-done on exit.
52 def modify_env_var(env, var, abspath):
54 save = env['ENV'][var]
57 env.PrependENVPath(var, abspath)
59 if SCons.Util.is_List(env[var]):
61 #env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
62 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
64 # Split at os.pathsep to convert into absolute path
65 #TODO(1.5) env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
66 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
70 # Convert into a string explicitly to append ":" (without which it won't search system
71 # paths as well). The problem is that env.AppendENVPath(var, ":")
72 # does not work, refuses to append ":" (os.pathsep).
74 if SCons.Util.is_List(env['ENV'][var]):
76 #env['ENV'][var] = os.pathsep.join(env['ENV'][var])
77 env['ENV'][var] = os.pathsep.join(env['ENV'][var])
78 # Append the trailing os.pathsep character here to catch the case with no env[var]
79 env['ENV'][var] = env['ENV'][var] + os.pathsep
83 class FindENVPathDirs:
84 """A class to bind a specific *PATH variable name to a function that
85 will return all of the *path directories."""
86 def __init__(self, variable):
87 self.variable = variable
88 def __call__(self, env, dir=None, target=None, source=None, argument=None):
91 path = env['ENV'][self.variable]
95 dir = dir or env.fs._cwd
96 path = SCons.PathList.PathList(path).subst_path(env, target, source)
97 return tuple(dir.Rfindalldirs(path))
102 """Return a prototype Scanner instance for scanning LaTeX source files
103 when built with latex.
105 ds = LaTeX(name = "LaTeXScanner",
106 suffixes = '$LATEXSUFFIXES',
107 # in the search order, see below in LaTeX class docstring
108 graphics_extensions = TexGraphics,
112 def PDFLaTeXScanner():
113 """Return a prototype Scanner instance for scanning LaTeX source files
114 when built with pdflatex.
116 ds = LaTeX(name = "PDFLaTeXScanner",
117 suffixes = '$LATEXSUFFIXES',
118 # in the search order, see below in LaTeX class docstring
119 graphics_extensions = LatexGraphics,
123 class LaTeX(SCons.Scanner.Base):
124 """Class for scanning LaTeX files for included files.
126 Unlike most scanners, which use regular expressions that just
127 return the included file name, this returns a tuple consisting
128 of the keyword for the inclusion ("include", "includegraphics",
129 "input", or "bibliography"), and then the file name itself.
130 Based on a quick look at LaTeX documentation, it seems that we
131 should append .tex suffix for the "include" keywords, append .tex if
132 there is no extension for the "input" keyword, and need to add .bib
133 for the "bibliography" keyword that does not accept extensions by itself.
135 Finally, if there is no extension for an "includegraphics" keyword
136 latex will append .ps or .eps to find the file, while pdftex may use .pdf,
137 .jpg, .tif, .mps, or .png.
139 The actual subset and search order may be altered by
140 DeclareGraphicsExtensions command. This complication is ignored.
141 The default order corresponds to experimentation with teTeX
143 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
144 kpathsea version 3.5.4
146 ['.eps', '.ps'] for latex
147 ['.png', '.pdf', '.jpg', '.tif'].
149 Another difference is that the search path is determined by the type
150 of the file being searched:
151 env['TEXINPUTS'] for "input" and "include" keywords
152 env['TEXINPUTS'] for "includegraphics" keyword
153 env['TEXINPUTS'] for "lstinputlisting" keyword
154 env['BIBINPUTS'] for "bibliography" keyword
155 env['BSTINPUTS'] for "bibliographystyle" keyword
157 FIXME: also look for the class or style in document[class|style]{}
158 FIXME: also look for the argument of bibliographystyle{}
160 keyword_paths = {'include': 'TEXINPUTS',
161 'input': 'TEXINPUTS',
162 'includegraphics': 'TEXINPUTS',
163 'bibliography': 'BIBINPUTS',
164 'bibliographystyle': 'BSTINPUTS',
165 'usepackage': 'TEXINPUTS',
166 'lstinputlisting': 'TEXINPUTS'}
167 env_variables = SCons.Util.unique(keyword_paths.values())
169 def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
171 # We have to include \n with the % we exclude from the first part
172 # part of the regex because the expression is compiled with re.M.
173 # Without the \n, the ^ could match the beginning of a *previous*
174 # line followed by one or more newline characters (i.e. blank
175 # lines), interfering with a match on the next line.
176 regex = r'^[^%\n]*\\(include|includegraphics(?:\[[^\]]+\])?|lstinputlisting(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}'
177 self.cre = re.compile(regex, re.M)
178 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M)
180 self.graphics_extensions = graphics_extensions
182 def _scan(node, env, path=(), self=self):
184 if not node.exists():
186 return self.scan_recurse(node, path)
188 class FindMultiPathDirs:
189 """The stock FindPathDirs function has the wrong granularity:
190 it is called once per target, while we need the path that depends
191 on what kind of included files is being searched. This wrapper
192 hides multiple instances of FindPathDirs, one per the LaTeX path
193 variable in the environment. When invoked, the function calculates
194 and returns all the required paths as a dictionary (converted into
195 a tuple to become hashable). Then the scan function converts it
196 back and uses a dictionary of tuples rather than a single tuple
199 def __init__(self, dictionary):
201 for k,n in dictionary.items():
202 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
205 def __call__(self, env, dir=None, target=None, source=None,
208 for k,(c,cENV) in self.dictionary.items():
209 di[k] = ( c(env, dir=None, target=None, source=None,
211 cENV(env, dir=None, target=None, source=None,
213 # To prevent "dict is not hashable error"
214 return tuple(di.items())
216 class LaTeXScanCheck:
217 """Skip all but LaTeX source files, i.e., do not scan *.eps,
220 def __init__(self, suffixes):
221 self.suffixes = suffixes
222 def __call__(self, node, env):
223 current = not node.has_builder() or node.is_up_to_date()
224 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
225 # Returning false means that the file is not scanned.
226 return scannable and current
228 kw['function'] = _scan
229 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
231 kw['skeys'] = suffixes
232 kw['scan_check'] = LaTeXScanCheck(suffixes)
235 SCons.Scanner.Base.__init__(self, *args, **kw)
237 def _latex_names(self, include):
238 filename = include[1]
239 if include[0] == 'input':
240 base, ext = os.path.splitext( filename )
242 return [filename + '.tex']
243 if (include[0] == 'include'):
244 return [filename + '.tex']
245 if include[0] == 'bibliography':
246 base, ext = os.path.splitext( filename )
248 return [filename + '.bib']
249 if include[0] == 'usepackage':
250 base, ext = os.path.splitext( filename )
252 return [filename + '.sty']
253 if include[0] == 'includegraphics':
254 base, ext = os.path.splitext( filename )
256 #TODO(1.5) return [filename + e for e in self.graphics_extensions]
257 #return map(lambda e: filename+e, self.graphics_extensions + TexGraphics)
258 # use the line above to find dependency for PDF builder when only .eps figure is present
259 # Since it will be found if the user tell scons how to make the pdf figure leave it out for now.
260 return [filename+e for e in self.graphics_extensions]
263 def sort_key(self, include):
264 return SCons.Node.FS._my_normcase(str(include))
266 def find_include(self, include, source_dir, path):
268 sub_path = path[include[0]]
269 except (IndexError, KeyError):
271 try_names = self._latex_names(include)
273 # see if we find it using the path in env[var]
274 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0])
277 # see if we find it using the path in env['ENV'][var]
278 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1])
283 def canonical_text(self, text):
284 """Standardize an input TeX-file contents.
287 * removes comments, unwrapping comment-wrapped lines.
290 line_continues_a_comment = False
291 for line in text.splitlines():
292 line,comment = self.comment_re.findall(line)[0]
293 if line_continues_a_comment == True:
294 out[-1] = out[-1] + ' ' + line.lstrip()
297 line_continues_a_comment = len(comment) > 0
298 return '\n'.join(out).rstrip()+'\n'
300 def scan(self, node):
301 # Modify the default scan function to allow for the regular
302 # expression to return a comma separated list of file names
303 # as can be the case with the bibliography keyword.
305 # Cache the includes list in node so we only scan it once:
306 # path_dict = dict(list(path))
307 noopt_cre = re.compile('\[.*$')
308 if node.includes != None:
309 includes = node.includes
311 text = self.canonical_text(node.get_text_contents())
312 includes = self.cre.findall(text)
313 # 1. Split comma-separated lines, e.g.
314 # ('bibliography', 'phys,comp')
315 # should become two entries
316 # ('bibliography', 'phys')
317 # ('bibliography', 'comp')
318 # 2. Remove the options, e.g., such as
319 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps')
321 # ('includegraphics', 'picture.eps')
323 for include in includes:
324 inc_type = noopt_cre.sub('', include[0])
325 inc_list = include[1].split(',')
326 for j in range(len(inc_list)):
327 split_includes.append( (inc_type, inc_list[j]) )
329 includes = split_includes
330 node.includes = includes
334 def scan_recurse(self, node, path=()):
335 """ do a recursive scan of the top level target file
336 This lets us search for included files based on the
337 directory of the main file just as latex does"""
339 path_dict = dict(list(path))
342 queue.extend( self.scan(node) )
345 # This is a hand-coded DSU (decorate-sort-undecorate, or
346 # Schwartzian transform) pattern. The sort key is the raw name
347 # of the file as specifed on the \include, \input, etc. line.
348 # TODO: what about the comment in the original Classic scanner:
350 # us keep the sort order constant regardless of whether the file
351 # is actually found in a Repository or locally."""
353 source_dir = node.get_dir()
354 #for include in includes:
357 include = queue.pop()
358 # TODO(1.5): more compact:
360 # if seen[include[1]] == 1:
363 # seen[include[1]] = 1
365 already_seen = seen[include[1]]
373 # Handle multiple filenames in include[1]
375 n, i = self.find_include(include, source_dir, path_dict)
377 # Do not bother with 'usepackage' warnings, as they most
378 # likely refer to system-level files
379 if include[0] != 'usepackage':
380 SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
381 "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
383 sortkey = self.sort_key(n)
384 nodes.append((sortkey, n))
386 queue.extend( self.scan(n) )
388 return [pair[1] for pair in sorted(nodes)]
392 # indent-tabs-mode:nil
394 # vim: set expandtab tabstop=4 shiftwidth=4: