src/engine/SCons/Scanner/Fortran.py

   1 """SCons.Scanner.Fortran
   2
   3 This module implements the dependency scanner for Fortran code.
   4
   5 """
   6
   7 #
   8 # __COPYRIGHT__
   9 #
  10 # Permission is hereby granted, free of charge, to any person obtaining
  11 # a copy of this software and associated documentation files (the
  12 # "Software"), to deal in the Software without restriction, including
  13 # without limitation the rights to use, copy, modify, merge, publish,
  14 # distribute, sublicense, and/or sell copies of the Software, and to
  15 # permit persons to whom the Software is furnished to do so, subject to
  16 # the following conditions:
  17 #
  18 # The above copyright notice and this permission notice shall be included
  19 # in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
  22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29 from __future__ import generators  ### KEEP FOR COMPATIBILITY FIXERS
  30
  31 __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
  32
  33 import re
  34
  35 import SCons.Node
  36 import SCons.Node.FS
  37 import SCons.Scanner
  38 import SCons.Util
  39 import SCons.Warnings
  40
  41 class F90Scanner(SCons.Scanner.Classic):
  42     """
  43     A Classic Scanner subclass for Fortran source files which takes
  44     into account both USE and INCLUDE statements.  This scanner will
  45     work for both F77 and F90 (and beyond) compilers.
  46
  47     Currently, this scanner assumes that the include files do not contain
  48     USE statements.  To enable the ability to deal with USE statements
  49     in include files, add logic right after the module names are found
  50     to loop over each include file, search for and locate each USE
  51     statement, and append each module name to the list of dependencies.
  52     Caching the search results in a common dictionary somewhere so that
  53     the same include file is not searched multiple times would be a
  54     smart thing to do.
  55     """
  56
  57     def __init__(self, name, suffixes, path_variable,
  58                  use_regex, incl_regex, def_regex, *args, **kw):
  59
  60         self.cre_use = re.compile(use_regex, re.M)
  61         self.cre_incl = re.compile(incl_regex, re.M)
  62         self.cre_def = re.compile(def_regex, re.M)
  63
  64         def _scan(node, env, path, self=self):
  65             node = node.rfile()
  66
  67             if not node.exists():
  68                 return []
  69
  70             return self.scan(node, env, path)
  71
  72         kw['function'] = _scan
  73         kw['path_function'] = SCons.Scanner.FindPathDirs(path_variable)
  74         kw['recursive'] = 1
  75         kw['skeys'] = suffixes
  76         kw['name'] = name
  77
  78         SCons.Scanner.Current.__init__(self, *args, **kw)
  79
  80     def scan(self, node, env, path=()):
  81
  82         # cache the includes list in node so we only scan it once:
  83         if node.includes != None:
  84             mods_and_includes = node.includes
  85         else:
  86             # retrieve all included filenames
  87             includes = self.cre_incl.findall(node.get_text_contents())
  88             # retrieve all USE'd module names
  89             modules = self.cre_use.findall(node.get_text_contents())
  90             # retrieve all defined module names
  91             defmodules = self.cre_def.findall(node.get_text_contents())
  92
  93             # Remove all USE'd module names that are defined in the same file
  94             d = {}
  95             for m in defmodules:
  96                 d[m] = 1
  97             modules = [m for m in modules if m not in d]
  98             #modules = self.undefinedModules(modules, defmodules)
  99
 100             # Convert module name to a .mod filename
 101             suffix = env.subst('$FORTRANMODSUFFIX')
 102             modules = [x.lower() + suffix for x in modules]
 103             # Remove unique items from the list
 104             mods_and_includes = SCons.Util.unique(includes+modules)
 105             node.includes = mods_and_includes
 106
 107         # This is a hand-coded DSU (decorate-sort-undecorate, or
 108         # Schwartzian transform) pattern.  The sort key is the raw name
 109         # of the file as specifed on the USE or INCLUDE line, which lets
 110         # us keep the sort order constant regardless of whether the file
 111         # is actually found in a Repository or locally.
 112         nodes = []
 113         source_dir = node.get_dir()
 114         if callable(path):
 115             path = path()
 116         for dep in mods_and_includes:
 117             n, i = self.find_include(dep, source_dir, path)
 118
 119             if n is None:
 120                 SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
 121                                     "No dependency generated for file: %s (referenced by: %s) -- file not found" % (i, node))
 122             else:
 123                 sortkey = self.sort_key(dep)
 124                 nodes.append((sortkey, n))
 125
 126         nodes.sort()
 127         nodes = [pair[1] for pair in nodes]
 128         return nodes
 129
 130 def FortranScan(path_variable="FORTRANPATH"):
 131     """Return a prototype Scanner instance for scanning source files
 132     for Fortran USE & INCLUDE statements"""
 133
 134 #   The USE statement regex matches the following:
 135 #
 136 #   USE module_name
 137 #   USE :: module_name
 138 #   USE, INTRINSIC :: module_name
 139 #   USE, NON_INTRINSIC :: module_name
 140 #
 141 #   Limitations
 142 #
 143 #   --  While the regex can handle multiple USE statements on one line,
 144 #       it cannot properly handle them if they are commented out.
 145 #       In either of the following cases:
 146 #
 147 #            !  USE mod_a ; USE mod_b         [entire line is commented out]
 148 #               USE mod_a ! ; USE mod_b       [in-line comment of second USE statement]
 149 #
 150 #       the second module name (mod_b) will be picked up as a dependency
 151 #       even though it should be ignored.  The only way I can see
 152 #       to rectify this would be to modify the scanner to eliminate
 153 #       the call to re.findall, read in the contents of the file,
 154 #       treating the comment character as an end-of-line character
 155 #       in addition to the normal linefeed, loop over each line,
 156 #       weeding out the comments, and looking for the USE statements.
 157 #       One advantage to this is that the regex passed to the scanner
 158 #       would no longer need to match a semicolon.
 159 #
 160 #   --  I question whether or not we need to detect dependencies to
 161 #       INTRINSIC modules because these are built-in to the compiler.
 162 #       If we consider them a dependency, will SCons look for them, not
 163 #       find them, and kill the build?  Or will we there be standard
 164 #       compiler-specific directories we will need to point to so the
 165 #       compiler and SCons can locate the proper object and mod files?
 166
 167 #   Here is a breakdown of the regex:
 168 #
 169 #   (?i)               : regex is case insensitive
 170 #   ^                  : start of line
 171 #   (?:                : group a collection of regex symbols without saving the match as a "group"
 172 #      ^|;             : matches either the start of the line or a semicolon - semicolon
 173 #   )                  : end the unsaved grouping
 174 #   \s*                : any amount of white space
 175 #   USE                : match the string USE, case insensitive
 176 #   (?:                : group a collection of regex symbols without saving the match as a "group"
 177 #      \s+|            : match one or more whitespace OR ....  (the next entire grouped set of regex symbols)
 178 #      (?:             : group a collection of regex symbols without saving the match as a "group"
 179 #         (?:          : establish another unsaved grouping of regex symbols
 180 #            \s*          : any amount of white space
 181 #            ,         : match a comma
 182 #            \s*       : any amount of white space
 183 #            (?:NON_)? : optionally match the prefix NON_, case insensitive
 184 #            INTRINSIC : match the string INTRINSIC, case insensitive
 185 #         )?           : optionally match the ", INTRINSIC/NON_INTRINSIC" grouped expression
 186 #         \s*          : any amount of white space
 187 #         ::           : match a double colon that must appear after the INTRINSIC/NON_INTRINSIC attribute
 188 #      )               : end the unsaved grouping
 189 #   )                  : end the unsaved grouping
 190 #   \s*                : match any amount of white space
 191 #   (\w+)              : match the module name that is being USE'd
 192 #
 193 #
 194     use_regex = "(?i)(?:^|;)\s*USE(?:\s+|(?:(?:\s*,\s*(?:NON_)?INTRINSIC)?\s*::))\s*(\w+)"
 195
 196
 197 #   The INCLUDE statement regex matches the following:
 198 #
 199 #   INCLUDE 'some_Text'
 200 #   INCLUDE "some_Text"
 201 #   INCLUDE "some_Text" ; INCLUDE "some_Text"
 202 #   INCLUDE kind_"some_Text"
 203 #   INCLUDE kind_'some_Text"
 204 #
 205 #   where some_Text can include any alphanumeric and/or special character
 206 #   as defined by the Fortran 2003 standard.
 207 #
 208 #   Limitations:
 209 #
 210 #   --  The Fortran standard dictates that a " or ' in the INCLUDE'd
 211 #       string must be represented as a "" or '', if the quotes that wrap
 212 #       the entire string are either a ' or ", respectively.   While the
 213 #       regular expression below can detect the ' or " characters just fine,
 214 #       the scanning logic, presently is unable to detect them and reduce
 215 #       them to a single instance.  This probably isn't an issue since,
 216 #       in practice, ' or " are not generally used in filenames.
 217 #
 218 #   --  This regex will not properly deal with multiple INCLUDE statements
 219 #       when the entire line has been commented out, ala
 220 #
 221 #           ! INCLUDE 'some_file' ; INCLUDE 'some_file'
 222 #
 223 #       In such cases, it will properly ignore the first INCLUDE file,
 224 #       but will actually still pick up the second.  Interestingly enough,
 225 #       the regex will properly deal with these cases:
 226 #
 227 #             INCLUDE 'some_file'
 228 #             INCLUDE 'some_file' !; INCLUDE 'some_file'
 229 #
 230 #       To get around the above limitation, the FORTRAN programmer could
 231 #       simply comment each INCLUDE statement separately, like this
 232 #
 233 #           ! INCLUDE 'some_file' !; INCLUDE 'some_file'
 234 #
 235 #       The way I see it, the only way to get around this limitation would
 236 #       be to modify the scanning logic to replace the calls to re.findall
 237 #       with a custom loop that processes each line separately, throwing
 238 #       away fully commented out lines before attempting to match against
 239 #       the INCLUDE syntax.
 240 #
 241 #   Here is a breakdown of the regex:
 242 #
 243 #   (?i)               : regex is case insensitive
 244 #   (?:                : begin a non-saving group that matches the following:
 245 #      ^               :    either the start of the line
 246 #      |               :                or
 247 #      ['">]\s*;       :    a semicolon that follows a single quote,
 248 #                           double quote or greater than symbol (with any
 249 #                           amount of whitespace in between).  This will
 250 #                           allow the regex to match multiple INCLUDE
 251 #                           statements per line (although it also requires
 252 #                           the positive lookahead assertion that is
 253 #                           used below).  It will even properly deal with
 254 #                           (i.e. ignore) cases in which the additional
 255 #                           INCLUDES are part of an in-line comment, ala
 256 #                                           "  INCLUDE 'someFile' ! ; INCLUDE 'someFile2' "
 257 #   )                  : end of non-saving group
 258 #   \s*                : any amount of white space
 259 #   INCLUDE            : match the string INCLUDE, case insensitive
 260 #   \s+                : match one or more white space characters
 261 #   (?\w+_)?           : match the optional "kind-param _" prefix allowed by the standard
 262 #   [<"']              : match the include delimiter - an apostrophe, double quote, or less than symbol
 263 #   (.+?)              : match one or more characters that make up
 264 #                        the included path and file name and save it
 265 #                        in a group.  The Fortran standard allows for
 266 #                        any non-control character to be used.  The dot
 267 #                        operator will pick up any character, including
 268 #                        control codes, but I can't conceive of anyone
 269 #                        putting control codes in their file names.
 270 #                        The question mark indicates it is non-greedy so
 271 #                        that regex will match only up to the next quote,
 272 #                        double quote, or greater than symbol
 273 #   (?=["'>])          : positive lookahead assertion to match the include
 274 #                        delimiter - an apostrophe, double quote, or
 275 #                        greater than symbol.  This level of complexity
 276 #                        is required so that the include delimiter is
 277 #                        not consumed by the match, thus allowing the
 278 #                        sub-regex discussed above to uniquely match a
 279 #                        set of semicolon-separated INCLUDE statements
 280 #                        (as allowed by the F2003 standard)
 281
 282     include_regex = """(?i)(?:^|['">]\s*;)\s*INCLUDE\s+(?:\w+_)?[<"'](.+?)(?=["'>])"""
 283
 284 #   The MODULE statement regex finds module definitions by matching
 285 #   the following:
 286 #
 287 #   MODULE module_name
 288 #
 289 #   but *not* the following:
 290 #
 291 #   MODULE PROCEDURE procedure_name
 292 #
 293 #   Here is a breakdown of the regex:
 294 #
 295 #   (?i)               : regex is case insensitive
 296 #   ^\s*               : any amount of white space
 297 #   MODULE             : match the string MODULE, case insensitive
 298 #   \s+                : match one or more white space characters
 299 #   (?!PROCEDURE)      : but *don't* match if the next word matches
 300 #                        PROCEDURE (negative lookahead assertion),
 301 #                        case insensitive
 302 #   (\w+)              : match one or more alphanumeric characters
 303 #                        that make up the defined module name and
 304 #                        save it in a group
 305
 306     def_regex = """(?i)^\s*MODULE\s+(?!PROCEDURE)(\w+)"""
 307
 308     scanner = F90Scanner("FortranScan",
 309                          "$FORTRANSUFFIXES",
 310                          path_variable,
 311                          use_regex,
 312                          include_regex,
 313                          def_regex)
 314     return scanner
 315
 316 # Local Variables:
 317 # tab-width:4
 318 # indent-tabs-mode:nil
 319 # End:
 320 # vim: set expandtab tabstop=4 shiftwidth=4: