Issue 2255: Handle scanning of UTF-8 and UTF-16 files. (Greg Spencer)

[scons.git] / src / engine / SCons / Node / FS.py
diff --git a/src/engine/SCons/Node/FS.py b/src/engine/SCons/Node/FS.py

index e04bacd43414f4e86b2da9ca71a0a0554c1f3dec..bdc1bfd2cc2d1aa40b2a117285b04464976c3683 100644 (file)
--- a/src/engine/SCons/Node/FS.py
+++ b/src/engine/SCons/Node/FS.py
@@ -35,8 +35,9 @@ that can be used by scripts or modules looking for the canonical default.
  
  __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
  
-import fnmatch
  from itertools import izip
+import cStringIO
+import fnmatch
  import os
  import os.path
  import re
@@ -45,7 +46,11 @@ import stat
  import string
  import sys
  import time
-import cStringIO
+
+try:
+    import codecs
+except ImportError:
+    pass
  
  import SCons.Action
  from SCons.Debug import logInstanceCreation
@@ -61,6 +66,23 @@ from SCons.Debug import Trace
  
  do_store_info = True
  
+
+class EntryProxyAttributeError(AttributeError):
+    """
+    An AttributeError subclass for recording and displaying the name
+    of the underlying Entry involved in an AttributeError exception.
+    """
+    def __init__(self, entry_proxy, attribute):
+        AttributeError.__init__(self)
+        self.entry_proxy = entry_proxy
+        self.attribute = attribute
+    def __str__(self):
+        entry = self.entry_proxy.get()
+        fmt = "%s instance %s has no attribute %s"
+        return fmt % (entry.__class__.__name__,
+                      repr(entry.name),
+                      repr(self.attribute))
+
  # The max_drift value:  by default, use a cached signature value for
  # any file that's been untouched for more than two days.
  default_max_drift = 2*24*60*60
@@ -225,8 +247,6 @@ def LinkFunc(target, source, env):
              if func == Link_Funcs[-1]:
                  # exception of the last link method (copy) are fatal
                  raise
-            else:
-                pass
      return 0
  
  Link = SCons.Action.Action(LinkFunc, None)
@@ -485,16 +505,11 @@ class EntryProxy(SCons.Util.Proxy):
          except KeyError:
              try:
                  attr = SCons.Util.Proxy.__getattr__(self, name)
-            except AttributeError:
-                entry = self.get()
-                classname = string.split(str(entry.__class__), '.')[-1]
-                if classname[-2:] == "'>":
-                    # new-style classes report their name as:
-                    #   "<class 'something'>"
-                    # instead of the classic classes:
-                    #   "something"
-                    classname = classname[:-2]
-                raise AttributeError, "%s instance '%s' has no attribute '%s'" % (classname, entry.name, name)
+            except AttributeError, e:
+                # Raise our own AttributeError subclass with an
+                # overridden __str__() method that identifies the
+                # name of the entry that caused the exception.
+                raise EntryProxyAttributeError(self, name)
              return attr
          else:
              return attr_function(self)
@@ -592,7 +607,7 @@ class Base(SCons.Node.Node):
          if self.duplicate or self.is_derived():
              return self.get_path()
          srcnode = self.srcnode()
-        if srcnode.stat() is None and not self.stat() is None:
+        if srcnode.stat() is None and self.stat() is not None:
              result = self.get_path()
          else:
              result = srcnode.get_path()
@@ -607,7 +622,7 @@ class Base(SCons.Node.Node):
              # values that the underlying stat() method saved.
              try: del self._memo['stat']
              except KeyError: pass
-            if not self is srcnode:
+            if self is not srcnode:
                  try: del srcnode._memo['stat']
                  except KeyError: pass
          return result
@@ -625,7 +640,7 @@ class Base(SCons.Node.Node):
          return result
  
      def exists(self):
-        return not self.stat() is None
+        return self.stat() is not None
  
      def rexists(self):
          return self.rfile().exists()
@@ -642,11 +657,11 @@ class Base(SCons.Node.Node):
  
      def isdir(self):
          st = self.stat()
-        return not st is None and stat.S_ISDIR(st[stat.ST_MODE])
+        return st is not None and stat.S_ISDIR(st[stat.ST_MODE])
  
      def isfile(self):
          st = self.stat()
-        return not st is None and stat.S_ISREG(st[stat.ST_MODE])
+        return st is not None and stat.S_ISREG(st[stat.ST_MODE])
  
      if hasattr(os, 'symlink'):
          def islink(self):
@@ -866,11 +881,8 @@ class Entry(Base):
          return self.get_suffix()
  
      def get_contents(self):
-        """Fetch the contents of the entry.
-
-        Since this should return the real contents from the file
-        system, we check to see into what sort of subclass we should
-        morph this Entry."""
+        """Fetch the contents of the entry.  Returns the exact binary
+        contents of the file."""
          try:
              self = self.disambiguate(must_exist=1)
          except SCons.Errors.UserError:
@@ -883,10 +895,28 @@ class Entry(Base):
          else:
              return self.get_contents()
  
+    def get_text_contents(self):
+        """Fetch the decoded text contents of a Unicode encoded Entry.
+
+        Since this should return the text contents from the file
+        system, we check to see into what sort of subclass we should
+        morph this Entry."""
+        try:
+            self = self.disambiguate(must_exist=1)
+        except SCons.Errors.UserError:
+            # There was nothing on disk with which to disambiguate
+            # this entry.  Leave it as an Entry, but return a null
+            # string so calls to get_text_contents() in emitters and
+            # the like (e.g. in qt.py) don't have to disambiguate by
+            # hand or catch the exception.
+            return ''
+        else:
+            return self.get_text_contents()
+
      def must_be_same(self, klass):
          """Called to make sure a Node is a Dir.  Since we're an
          Entry, we can morph into one."""
-        if not self.__class__ is klass:
+        if self.__class__ is not klass:
              self.__class__ = klass
              self._morph()
              self.clear()
@@ -910,7 +940,7 @@ class Entry(Base):
  
      def rel_path(self, other):
          d = self.disambiguate()
-        if d.__class__ == Entry:
+        if d.__class__ is Entry:
              raise "rel_path() could not disambiguate File/Dir"
          return d.rel_path(other)
  
@@ -1065,7 +1095,7 @@ class FS(LocalFS):
          """
          curr=self._cwd
          try:
-            if not dir is None:
+            if dir is not None:
                  self._cwd = dir
                  if change_os_dir:
                      os.chdir(dir.abspath)
@@ -1348,7 +1378,7 @@ class Dir(Base):
                          del node._srcreps
                      except AttributeError:
                          pass
-                    if duplicate != None:
+                    if duplicate is not None:
                          node.duplicate=duplicate
  
      def __resetDuplicate(self, node):
@@ -1450,15 +1480,15 @@ class Dir(Base):
          """Return a path to "other" relative to this directory.
          """
  
-       # This complicated and expensive method, which constructs relative
-       # paths between arbitrary Node.FS objects, is no longer used
-       # by SCons itself.  It was introduced to store dependency paths
-       # in .sconsign files relative to the target, but that ended up
-       # being significantly inefficient.
+        # This complicated and expensive method, which constructs relative
+        # paths between arbitrary Node.FS objects, is no longer used
+        # by SCons itself.  It was introduced to store dependency paths
+        # in .sconsign files relative to the target, but that ended up
+        # being significantly inefficient.
          #
-       # We're continuing to support the method because some SConstruct
-       # files out there started using it when it was available, and
-       # we're all about backwards compatibility..
+        # We're continuing to support the method because some SConstruct
+        # files out there started using it when it was available, and
+        # we're all about backwards compatibility..
  
          try:
              memo_dict = self._memo['rel_path']
@@ -1472,11 +1502,9 @@ class Dir(Base):
                  pass
  
          if self is other:
-
              result = '.'
  
          elif not other in self.path_elements:
-
              try:
                  other_dir = other.get_dir()
              except AttributeError:
@@ -1490,9 +1518,7 @@ class Dir(Base):
                          result = other.name
                      else:
                          result = dir_rel_path + os.sep + other.name
-
          else:
-
              i = self.path_elements.index(other) + 1
  
              path_elems = ['..'] * (len(self.path_elements) - i) \
@@ -1543,7 +1569,7 @@ class Dir(Base):
      def build(self, **kw):
          """A null "builder" for directories."""
          global MkdirBuilder
-        if not self.builder is MkdirBuilder:
+        if self.builder is not MkdirBuilder:
              apply(SCons.Node.Node.build, [self,], kw)
  
      #
@@ -1559,10 +1585,9 @@ class Dir(Base):
              if parent.exists():
                  break
              listDirs.append(parent)
-            p = parent.up()
-            if p is None:
-                raise SCons.Errors.StopError, parent.path
-            parent = p
+            parent = parent.up()
+        else:
+            raise SCons.Errors.StopError, parent.path
          listDirs.reverse()
          for dirnode in listDirs:
              try:
@@ -1582,7 +1607,7 @@ class Dir(Base):
  
      def multiple_side_effect_has_builder(self):
          global MkdirBuilder
-        return not self.builder is MkdirBuilder and self.has_builder()
+        return self.builder is not MkdirBuilder and self.has_builder()
  
      def alter_targets(self):
          """Return any corresponding targets in a variant directory.
@@ -1593,13 +1618,18 @@ class Dir(Base):
          """A directory does not get scanned."""
          return None
  
+    def get_text_contents(self):
+        """We already emit things in text, so just return the binary
+        version."""
+        return self.get_contents()
+
      def get_contents(self):
          """Return content signatures and names of all our children
          separated by new-lines. Ensure that the nodes are sorted."""
          contents = []
          name_cmp = lambda a, b: cmp(a.name, b.name)
          sorted_children = self.children()[:]
-        sorted_children.sort(name_cmp)        
+        sorted_children.sort(name_cmp)
          for node in sorted_children:
              contents.append('%s %s\n' % (node.get_csig(), node.name))
          return string.join(contents, '')
@@ -1621,7 +1651,7 @@ class Dir(Base):
      def is_up_to_date(self):
          """If any child is not up-to-date, then this directory isn't,
          either."""
-        if not self.builder is MkdirBuilder and not self.exists():
+        if self.builder is not MkdirBuilder and not self.exists():
              return 0
          up_to_date = SCons.Node.up_to_date
          for kid in self.children():
@@ -1794,7 +1824,7 @@ class Dir(Base):
              except TypeError: pass
          node = self.srcdir_duplicate(name)
          if isinstance(node, Dir):
-            node = None
+            return None
          return node
  
      def walk(self, func, arg):
@@ -1887,6 +1917,7 @@ class Dir(Base):
          for srcdir in self.srcdir_list():
              search_dir_list.extend(srcdir.get_all_rdirs())
  
+        selfEntry = self.Entry
          names = []
          for dir in search_dir_list:
              # We use the .name attribute from the Node because the keys of
@@ -1896,34 +1927,39 @@ class Dir(Base):
              entry_names = filter(lambda n: n not in ('.', '..'), dir.entries.keys())
              node_names = map(lambda n, e=dir.entries: e[n].name, entry_names)
              names.extend(node_names)
+            if not strings:
+                # Make sure the working directory (self) actually has
+                # entries for all Nodes in repositories or variant dirs.
+                map(selfEntry, node_names)
              if ondisk:
                  try:
                      disk_names = os.listdir(dir.abspath)
                  except os.error:
-                    pass
-                else:
-                    names.extend(disk_names)
-                    if not strings:
-                        # We're going to return corresponding Nodes in
-                        # the local directory, so we need to make sure
-                        # those Nodes exist.  We only want to create
-                        # Nodes for the entries that will match the
-                        # specified pattern, though, which means we
-                        # need to filter the list here, even though
-                        # the overall list will also be filtered later,
-                        # after we exit this loop.
-                        if pattern[0] != '.':
-                            #disk_names = [ d for d in disk_names if d[0] != '.' ]
-                            disk_names = filter(lambda x: x[0] != '.', disk_names)
-                        disk_names = fnmatch.filter(disk_names, pattern)
-                        rep_nodes = map(dir.Entry, disk_names)
-                        #rep_nodes = [ n.disambiguate() for n in rep_nodes ]
-                        rep_nodes = map(lambda n: n.disambiguate(), rep_nodes)
-                        for node, name in izip(rep_nodes, disk_names):
-                            n = self.Entry(name)
-                            if n.__class__ != node.__class__:
-                                n.__class__ = node.__class__
-                                n._morph()
+                    continue
+                names.extend(disk_names)
+                if not strings:
+                    # We're going to return corresponding Nodes in
+                    # the local directory, so we need to make sure
+                    # those Nodes exist.  We only want to create
+                    # Nodes for the entries that will match the
+                    # specified pattern, though, which means we
+                    # need to filter the list here, even though
+                    # the overall list will also be filtered later,
+                    # after we exit this loop.
+                    if pattern[0] != '.':
+                        #disk_names = [ d for d in disk_names if d[0] != '.' ]
+                        disk_names = filter(lambda x: x[0] != '.', disk_names)
+                    disk_names = fnmatch.filter(disk_names, pattern)
+                    dirEntry = dir.Entry
+                    for name in disk_names:
+                        # Add './' before disk filename so that '#' at
+                        # beginning of filename isn't interpreted.
+                        name = './' + name
+                        node = dirEntry(name).disambiguate()
+                        n = selfEntry(name)
+                        if n.__class__ != node.__class__:
+                            n.__class__ = node.__class__
+                            n._morph()
  
          names = set(names)
          if pattern[0] != '.':
@@ -2132,14 +2168,13 @@ class FileBuildInfo(SCons.Node.BuildInfoBase):
                  strings = getattr(self, nattr)
                  nodeinfos = getattr(self, sattr)
              except AttributeError:
-                pass
-            else:
-                nodes = []
-                for s, ni in izip(strings, nodeinfos):
-                    if not isinstance(s, SCons.Node.Node):
-                        s = ni.str_to_node(s)
-                    nodes.append(s)
-                setattr(self, nattr, nodes)
+                continue
+            nodes = []
+            for s, ni in izip(strings, nodeinfos):
+                if not isinstance(s, SCons.Node.Node):
+                    s = ni.str_to_node(s)
+                nodes.append(s)
+            setattr(self, nattr, nodes)
      def format(self, names=0):
          result = []
          bkids = self.bsources + self.bdepends + self.bimplicit
@@ -2171,23 +2206,25 @@ class File(Base):
          self._morph()
  
      def Entry(self, name):
-        #"""Create an entry node named 'name' relative to
-        #the directory of this file."""
+        """Create an entry node named 'name' relative to
+        the directory of this file."""
          return self.dir.Entry(name)
  
      def Dir(self, name, create=True):
-        #"""Create a directory node named 'name' relative to
-        #the directory of this file."""
+        """Create a directory node named 'name' relative to
+        the directory of this file."""
          return self.dir.Dir(name, create=create)
  
      def Dirs(self, pathlist):
          """Create a list of directories relative to the SConscript
          directory of this file."""
+        # TODO(1.5)
+        # return [self.Dir(p) for p in pathlist]
          return map(lambda p, s=self: s.Dir(p), pathlist)
  
      def File(self, name):
-        #"""Create a file node named 'name' relative to
-        #the directory of this file."""
+        """Create a file node named 'name' relative to
+        the directory of this file."""
          return self.dir.File(name)
  
      #def generate_build_dict(self):
@@ -2224,12 +2261,28 @@ class File(Base):
              return ''
          fname = self.rfile().abspath
          try:
-            r = open(fname, "rb").read()
+            contents = open(fname, "rb").read()
          except EnvironmentError, e:
              if not e.filename:
                  e.filename = fname
              raise
-        return r
+        return contents
+
+    try:
+        import codecs
+    except ImportError:
+        get_text_contents = get_contents
+    else:
+        # This attempts to figure out what the encoding of the text is
+        # based upon the BOM bytes, and then decodes the contents so that
+        # it's a valid python string.
+        def get_text_contents(self):
+            contents = self.get_contents()
+            if contents.startswith(codecs.BOM_UTF8):
+                contents = contents.decode('utf-8')
+            elif contents.startswith(codecs.BOM_UTF16):
+                contents = contents.decode('utf-16')
+            return contents
  
      def get_content_hash(self):
          """
@@ -2382,26 +2435,24 @@ class File(Base):
              try:
                  value = getattr(old_entry, attr)
              except AttributeError:
-                pass
-            else:
-                setattr(binfo, attr, value)
-                delattr(old_entry, attr)
+                continue
+            setattr(binfo, attr, value)
+            delattr(old_entry, attr)
          for attr in self.convert_sig_attrs:
              try:
                  sig_list = getattr(old_entry, attr)
              except AttributeError:
-                pass
-            else:
-                value = []
-                for sig in sig_list:
-                    ninfo = self.new_ninfo()
-                    if len(sig) == 32:
-                        ninfo.csig = sig
-                    else:
-                        ninfo.timestamp = sig
-                    value.append(ninfo)
-                setattr(binfo, attr, value)
-                delattr(old_entry, attr)
+                continue
+            value = []
+            for sig in sig_list:
+                ninfo = self.new_ninfo()
+                if len(sig) == 32:
+                    ninfo.csig = sig
+                else:
+                    ninfo.timestamp = sig
+                value.append(ninfo)
+            setattr(binfo, attr, value)
+            delattr(old_entry, attr)
          return new_entry
  
      memoizer_counters.append(SCons.Memoize.CountValue('get_stored_info'))
@@ -2465,6 +2516,7 @@ class File(Base):
                  pass
  
          if scanner:
+            # result = [n.disambiguate() for n in scanner(self, env, path)]
              result = scanner(self, env, path)
              result = map(lambda N: N.disambiguate(), result)
          else:
@@ -2568,7 +2620,7 @@ class File(Base):
              scb = self.sbuilder
          except AttributeError:
              scb = self.sbuilder = self.find_src_builder()
-        return not scb is None
+        return scb is not None
  
      def alter_targets(self):
          """Return any corresponding targets in a variant directory.
@@ -2641,7 +2693,7 @@ class File(Base):
          # Duplicate from source path if we are set up to do this.
          if self.duplicate and not self.is_derived() and not self.linked:
              src = self.srcnode()
-            if not src is self:
+            if src is not self:
                  # At this point, src is meant to be copied in a variant directory.
                  src = src.rfile()
                  if src.abspath != self.abspath:
@@ -2675,23 +2727,22 @@ class File(Base):
          old = self.get_stored_info()
          mtime = self.get_timestamp()
  
-        csig = None
          max_drift = self.fs.max_drift
          if max_drift > 0:
              if (time.time() - mtime) > max_drift:
                  try:
                      n = old.ninfo
                      if n.timestamp and n.csig and n.timestamp == mtime:
-                        csig = n.csig
+                        return n.csig
                  except AttributeError:
                      pass
          elif max_drift == 0:
              try:
-                csig = old.ninfo.csig
+                return old.ninfo.csig
              except AttributeError:
                  pass
  
-        return csig
+        return None
  
      def get_csig(self):
          """
@@ -2746,7 +2797,7 @@ class File(Base):
              return 1
  
      def changed_state(self, target, prev_ni):
-        return (self.state != SCons.Node.up_to_date)
+        return self.state != SCons.Node.up_to_date
  
      def changed_timestamp_then_content(self, target, prev_ni):
          if not self.changed_timestamp_match(target, prev_ni):
@@ -2824,6 +2875,19 @@ class File(Base):
                     (isinstance(node, File) or isinstance(node, Entry) \
                      or not node.is_derived()):
                          result = node
+                        # Copy over our local attributes to the repository
+                        # Node so we identify shared object files in the
+                        # repository and don't assume they're static.
+                        #
+                        # This isn't perfect; the attribute would ideally
+                        # be attached to the object in the repository in
+                        # case it was built statically in the repository
+                        # and we changed it to shared locally, but that's
+                        # rarely the case and would only occur if you
+                        # intentionally used the same suffix for both
+                        # shared and static objects anyway.  So this
+                        # should work well in practice.
+                        result.attributes = self.attributes
                          break
          self._memo['rfile'] = result
          return result
@@ -2867,13 +2931,14 @@ class File(Base):
          # Add the path to the cache signature, because multiple
          # targets built by the same action will all have the same
          # build signature, and we have to differentiate them somehow.
-        children =  self.children()
-        sigs = map(lambda n: n.get_cachedir_csig(), children)
+        children = self.children()
          executor = self.get_executor()
+        # sigs = [n.get_cachedir_csig() for n in children]
+        sigs = map(lambda n: n.get_cachedir_csig(), children)
          sigs.append(SCons.Util.MD5signature(executor.get_contents()))
          sigs.append(self.path)
-        self.cachesig = SCons.Util.MD5collect(sigs)
-        return self.cachesig
+        result = self.cachesig = SCons.Util.MD5collect(sigs)
+        return result
  
  
  default_fs = None
@@ -3039,12 +3104,11 @@ def invalidate_node_memos(targets):
      # affected. XXX The way to check if Execute() is in the stacktrace
      # is a very dirty hack and should be replaced by a more sensible
      # solution.
-    must_invalidate = 0
-    tb = extract_stack()
-    for f in tb:
+    for f in extract_stack():
          if f[2] == 'Execute' and f[0][-14:] == 'Environment.py':
-            must_invalidate = 1
-    if not must_invalidate:
+            break
+    else:
+        # Dont have to invalidate, so return
          return
  
      if not SCons.Util.is_List(targets):