"""SCons.Scanner.Fortran
-This module implements the dependency scanner for Fortran code.
+This module implements the dependency scanner for Fortran code.
"""
#
-# Copyright (c) 2001, 2002 Steven Knight
+# __COPYRIGHT__
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
-
-import copy
-import os.path
import re
+import string
import SCons.Node
import SCons.Node.FS
import SCons.Util
import SCons.Warnings
-include_re = re.compile("INCLUDE[ \t]+'([\\w./\\\\]+)'", re.M)
-
-def FortranScan(fs = SCons.Node.FS.default_fs):
- """Return a prototype Scanner instance for scanning source files
- for Fortran INCLUDE statements"""
- scanner = SCons.Scanner.Recursive(scan, "FortranScan", fs,
- [".f", ".F", ".for", ".FOR"])
- return scanner
+class F90Scanner(SCons.Scanner.Classic):
+ """
+ A Classic Scanner subclass for Fortran source files which takes
+ into account both USE and INCLUDE statements. This scanner will
+ work for both F77 and F90 (and beyond) compilers.
-def scan(node, env, target, fs = SCons.Node.FS.default_fs):
+ Currently, this scanner assumes that the include files do not contain
+ USE statements. To enable the ability to deal with USE statements
+ in include files, add logic right after the module names are found
+ to loop over each include file, search for and locate each USE
+ statement, and append each module name to the list of dependencies.
+ Caching the search results in a common dictionary somewhere so that
+ the same include file is not searched multiple times would be a
+ smart thing to do.
"""
- scan(node, Environment) -> [node]
- the Fortran dependency scanner function
+ def __init__(self, name, suffixes, path_variable,
+ use_regex, incl_regex, def_regex, *args, **kw):
- This function is intentionally simple. There are two rules it
- follows:
-
- 1) #include <foo.h> - search for foo.h in F77PATH followed by the
- directory 'filename' is in
- 2) #include \"foo.h\" - search for foo.h in the directory 'filename' is
- in followed by F77PATH
+ self.cre_use = re.compile(use_regex, re.M)
+ self.cre_incl = re.compile(incl_regex, re.M)
+ self.cre_def = re.compile(def_regex, re.M)
- These rules approximate the behaviour of most C/C++ compilers.
+ def _scan(node, env, path, self=self):
+ node = node.rfile()
- This scanner also ignores #ifdef and other preprocessor conditionals, so
- it may find more depencies than there really are, but it never misses
- dependencies.
- """
+ if not node.exists():
+ return []
- # This function caches various information in node and target:
- # target.f77path - env['F77PATH'] converted to nodes
- # node.found_includes - include files found by previous call to scan,
- # keyed on f77path
- # node.includes - the result of include_re.findall()
+ return self.scan(node, env, path)
- if not hasattr(target, 'f77path'):
- try:
- target.f77path = tuple(fs.Rsearchall(SCons.Util.mapPaths(env['F77PATH'], target.cwd, env), clazz=SCons.Node.FS.Dir, must_exist=0))
- except KeyError:
- target.f77path = ()
+ kw['function'] = _scan
+ kw['path_function'] = SCons.Scanner.FindPathDirs(path_variable)
+ kw['recursive'] = 1
+ kw['skeys'] = suffixes
+ kw['name'] = name
- f77path = target.f77path
+ apply(SCons.Scanner.Current.__init__, (self,) + args, kw)
- nodes = []
+ def scan(self, node, env, path=()):
- node = node.rfile()
- try:
- nodes = node.found_includes[f77path]
- except KeyError:
- if node.rexists():
+ # cache the includes list in node so we only scan it once:
+ if node.includes != None:
+ mods_and_includes = node.includes
+ else:
+ # retrieve all included filenames
+ includes = self.cre_incl.findall(node.get_text_contents())
+ # retrieve all USE'd module names
+ modules = self.cre_use.findall(node.get_text_contents())
+ # retrieve all defined module names
+ defmodules = self.cre_def.findall(node.get_text_contents())
- # cache the includes list in node so we only scan it once:
- if node.includes != None:
- includes = node.includes
+ # Remove all USE'd module names that are defined in the same file
+ d = {}
+ for m in defmodules:
+ d[m] = 1
+ modules = filter(lambda m, d=d: not d.has_key(m), modules)
+ #modules = self.undefinedModules(modules, defmodules)
+
+ # Convert module name to a .mod filename
+ suffix = env.subst('$FORTRANMODSUFFIX')
+ modules = map(lambda x, s=suffix: string.lower(x) + s, modules)
+ # Remove unique items from the list
+ mods_and_includes = SCons.Util.unique(includes+modules)
+ node.includes = mods_and_includes
+
+ # This is a hand-coded DSU (decorate-sort-undecorate, or
+ # Schwartzian transform) pattern. The sort key is the raw name
+ # of the file as specifed on the USE or INCLUDE line, which lets
+ # us keep the sort order constant regardless of whether the file
+ # is actually found in a Repository or locally.
+ nodes = []
+ source_dir = node.get_dir()
+ if callable(path):
+ path = path()
+ for dep in mods_and_includes:
+ n, i = self.find_include(dep, source_dir, path)
+
+ if n is None:
+ SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
+ "No dependency generated for file: %s (referenced by: %s) -- file not found" % (i, node))
else:
- includes = include_re.findall(node.get_contents())
- node.includes = includes
-
- source_dir = node.get_dir()
-
- for include in includes:
- n = SCons.Node.FS.find_file(include,
- (source_dir,) + f77path,
- fs.File)
- if not n is None:
- nodes.append(n)
- else:
- SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
- "No dependency generated for file: %s (included from: %s) -- file not found" % (include, node))
- node.found_includes[f77path] = nodes
-
- # Schwartzian transform from the Python FAQ Wizard
- def st(List, Metric):
- def pairing(element, M = Metric):
- return (M(element), element)
- def stripit(pair):
- return pair[1]
- paired = map(pairing, List)
- paired.sort()
- return map(stripit, paired)
-
- def normalize(node):
- # We don't want the order of includes to be
- # modified by case changes on case insensitive OSes, so
- # normalize the case of the filename here:
- # (see test/win32pathmadness.py for a test of this)
- return SCons.Node.FS._my_normcase(str(node))
-
- return st(nodes, normalize)
+ sortkey = self.sort_key(dep)
+ nodes.append((sortkey, n))
+
+ nodes.sort()
+ nodes = map(lambda pair: pair[1], nodes)
+ return nodes
+
+def FortranScan(path_variable="FORTRANPATH"):
+ """Return a prototype Scanner instance for scanning source files
+ for Fortran USE & INCLUDE statements"""
+
+# The USE statement regex matches the following:
+#
+# USE module_name
+# USE :: module_name
+# USE, INTRINSIC :: module_name
+# USE, NON_INTRINSIC :: module_name
+#
+# Limitations
+#
+# -- While the regex can handle multiple USE statements on one line,
+# it cannot properly handle them if they are commented out.
+# In either of the following cases:
+#
+# ! USE mod_a ; USE mod_b [entire line is commented out]
+# USE mod_a ! ; USE mod_b [in-line comment of second USE statement]
+#
+# the second module name (mod_b) will be picked up as a dependency
+# even though it should be ignored. The only way I can see
+# to rectify this would be to modify the scanner to eliminate
+# the call to re.findall, read in the contents of the file,
+# treating the comment character as an end-of-line character
+# in addition to the normal linefeed, loop over each line,
+# weeding out the comments, and looking for the USE statements.
+# One advantage to this is that the regex passed to the scanner
+# would no longer need to match a semicolon.
+#
+# -- I question whether or not we need to detect dependencies to
+# INTRINSIC modules because these are built-in to the compiler.
+# If we consider them a dependency, will SCons look for them, not
+# find them, and kill the build? Or will we there be standard
+# compiler-specific directories we will need to point to so the
+# compiler and SCons can locate the proper object and mod files?
+
+# Here is a breakdown of the regex:
+#
+# (?i) : regex is case insensitive
+# ^ : start of line
+# (?: : group a collection of regex symbols without saving the match as a "group"
+# ^|; : matches either the start of the line or a semicolon - semicolon
+# ) : end the unsaved grouping
+# \s* : any amount of white space
+# USE : match the string USE, case insensitive
+# (?: : group a collection of regex symbols without saving the match as a "group"
+# \s+| : match one or more whitespace OR .... (the next entire grouped set of regex symbols)
+# (?: : group a collection of regex symbols without saving the match as a "group"
+# (?: : establish another unsaved grouping of regex symbols
+# \s* : any amount of white space
+# , : match a comma
+# \s* : any amount of white space
+# (?:NON_)? : optionally match the prefix NON_, case insensitive
+# INTRINSIC : match the string INTRINSIC, case insensitive
+# )? : optionally match the ", INTRINSIC/NON_INTRINSIC" grouped expression
+# \s* : any amount of white space
+# :: : match a double colon that must appear after the INTRINSIC/NON_INTRINSIC attribute
+# ) : end the unsaved grouping
+# ) : end the unsaved grouping
+# \s* : match any amount of white space
+# (\w+) : match the module name that is being USE'd
+#
+#
+ use_regex = "(?i)(?:^|;)\s*USE(?:\s+|(?:(?:\s*,\s*(?:NON_)?INTRINSIC)?\s*::))\s*(\w+)"
+
+
+# The INCLUDE statement regex matches the following:
+#
+# INCLUDE 'some_Text'
+# INCLUDE "some_Text"
+# INCLUDE "some_Text" ; INCLUDE "some_Text"
+# INCLUDE kind_"some_Text"
+# INCLUDE kind_'some_Text"
+#
+# where some_Text can include any alphanumeric and/or special character
+# as defined by the Fortran 2003 standard.
+#
+# Limitations:
+#
+# -- The Fortran standard dictates that a " or ' in the INCLUDE'd
+# string must be represented as a "" or '', if the quotes that wrap
+# the entire string are either a ' or ", respectively. While the
+# regular expression below can detect the ' or " characters just fine,
+# the scanning logic, presently is unable to detect them and reduce
+# them to a single instance. This probably isn't an issue since,
+# in practice, ' or " are not generally used in filenames.
+#
+# -- This regex will not properly deal with multiple INCLUDE statements
+# when the entire line has been commented out, ala
+#
+# ! INCLUDE 'some_file' ; INCLUDE 'some_file'
+#
+# In such cases, it will properly ignore the first INCLUDE file,
+# but will actually still pick up the second. Interestingly enough,
+# the regex will properly deal with these cases:
+#
+# INCLUDE 'some_file'
+# INCLUDE 'some_file' !; INCLUDE 'some_file'
+#
+# To get around the above limitation, the FORTRAN programmer could
+# simply comment each INCLUDE statement separately, like this
+#
+# ! INCLUDE 'some_file' !; INCLUDE 'some_file'
+#
+# The way I see it, the only way to get around this limitation would
+# be to modify the scanning logic to replace the calls to re.findall
+# with a custom loop that processes each line separately, throwing
+# away fully commented out lines before attempting to match against
+# the INCLUDE syntax.
+#
+# Here is a breakdown of the regex:
+#
+# (?i) : regex is case insensitive
+# (?: : begin a non-saving group that matches the following:
+# ^ : either the start of the line
+# | : or
+# ['">]\s*; : a semicolon that follows a single quote,
+# double quote or greater than symbol (with any
+# amount of whitespace in between). This will
+# allow the regex to match multiple INCLUDE
+# statements per line (although it also requires
+# the positive lookahead assertion that is
+# used below). It will even properly deal with
+# (i.e. ignore) cases in which the additional
+# INCLUDES are part of an in-line comment, ala
+# " INCLUDE 'someFile' ! ; INCLUDE 'someFile2' "
+# ) : end of non-saving group
+# \s* : any amount of white space
+# INCLUDE : match the string INCLUDE, case insensitive
+# \s+ : match one or more white space characters
+# (?\w+_)? : match the optional "kind-param _" prefix allowed by the standard
+# [<"'] : match the include delimiter - an apostrophe, double quote, or less than symbol
+# (.+?) : match one or more characters that make up
+# the included path and file name and save it
+# in a group. The Fortran standard allows for
+# any non-control character to be used. The dot
+# operator will pick up any character, including
+# control codes, but I can't conceive of anyone
+# putting control codes in their file names.
+# The question mark indicates it is non-greedy so
+# that regex will match only up to the next quote,
+# double quote, or greater than symbol
+# (?=["'>]) : positive lookahead assertion to match the include
+# delimiter - an apostrophe, double quote, or
+# greater than symbol. This level of complexity
+# is required so that the include delimiter is
+# not consumed by the match, thus allowing the
+# sub-regex discussed above to uniquely match a
+# set of semicolon-separated INCLUDE statements
+# (as allowed by the F2003 standard)
+
+ include_regex = """(?i)(?:^|['">]\s*;)\s*INCLUDE\s+(?:\w+_)?[<"'](.+?)(?=["'>])"""
+
+# The MODULE statement regex finds module definitions by matching
+# the following:
+#
+# MODULE module_name
+#
+# but *not* the following:
+#
+# MODULE PROCEDURE procedure_name
+#
+# Here is a breakdown of the regex:
+#
+# (?i) : regex is case insensitive
+# ^\s* : any amount of white space
+# MODULE : match the string MODULE, case insensitive
+# \s+ : match one or more white space characters
+# (?!PROCEDURE) : but *don't* match if the next word matches
+# PROCEDURE (negative lookahead assertion),
+# case insensitive
+# (\w+) : match one or more alphanumeric characters
+# that make up the defined module name and
+# save it in a group
+
+ def_regex = """(?i)^\s*MODULE\s+(?!PROCEDURE)(\w+)"""
+
+ scanner = F90Scanner("FortranScan",
+ "$FORTRANSUFFIXES",
+ path_variable,
+ use_regex,
+ include_regex,
+ def_regex)
+ return scanner