Issue 2255: Handle scanning of UTF-8 and UTF-16 files. (Greg Spencer)
[scons.git] / src / engine / SCons / Node / FS.py
index e04bacd43414f4e86b2da9ca71a0a0554c1f3dec..bdc1bfd2cc2d1aa40b2a117285b04464976c3683 100644 (file)
@@ -35,8 +35,9 @@ that can be used by scripts or modules looking for the canonical default.
 
 __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
 
-import fnmatch
 from itertools import izip
+import cStringIO
+import fnmatch
 import os
 import os.path
 import re
@@ -45,7 +46,11 @@ import stat
 import string
 import sys
 import time
-import cStringIO
+
+try:
+    import codecs
+except ImportError:
+    pass
 
 import SCons.Action
 from SCons.Debug import logInstanceCreation
@@ -61,6 +66,23 @@ from SCons.Debug import Trace
 
 do_store_info = True
 
+
+class EntryProxyAttributeError(AttributeError):
+    """
+    An AttributeError subclass for recording and displaying the name
+    of the underlying Entry involved in an AttributeError exception.
+    """
+    def __init__(self, entry_proxy, attribute):
+        AttributeError.__init__(self)
+        self.entry_proxy = entry_proxy
+        self.attribute = attribute
+    def __str__(self):
+        entry = self.entry_proxy.get()
+        fmt = "%s instance %s has no attribute %s"
+        return fmt % (entry.__class__.__name__,
+                      repr(entry.name),
+                      repr(self.attribute))
+
 # The max_drift value:  by default, use a cached signature value for
 # any file that's been untouched for more than two days.
 default_max_drift = 2*24*60*60
@@ -225,8 +247,6 @@ def LinkFunc(target, source, env):
             if func == Link_Funcs[-1]:
                 # exception of the last link method (copy) are fatal
                 raise
-            else:
-                pass
     return 0
 
 Link = SCons.Action.Action(LinkFunc, None)
@@ -485,16 +505,11 @@ class EntryProxy(SCons.Util.Proxy):
         except KeyError:
             try:
                 attr = SCons.Util.Proxy.__getattr__(self, name)
-            except AttributeError:
-                entry = self.get()
-                classname = string.split(str(entry.__class__), '.')[-1]
-                if classname[-2:] == "'>":
-                    # new-style classes report their name as:
-                    #   "<class 'something'>"
-                    # instead of the classic classes:
-                    #   "something"
-                    classname = classname[:-2]
-                raise AttributeError, "%s instance '%s' has no attribute '%s'" % (classname, entry.name, name)
+            except AttributeError, e:
+                # Raise our own AttributeError subclass with an
+                # overridden __str__() method that identifies the
+                # name of the entry that caused the exception.
+                raise EntryProxyAttributeError(self, name)
             return attr
         else:
             return attr_function(self)
@@ -592,7 +607,7 @@ class Base(SCons.Node.Node):
         if self.duplicate or self.is_derived():
             return self.get_path()
         srcnode = self.srcnode()
-        if srcnode.stat() is None and not self.stat() is None:
+        if srcnode.stat() is None and self.stat() is not None:
             result = self.get_path()
         else:
             result = srcnode.get_path()
@@ -607,7 +622,7 @@ class Base(SCons.Node.Node):
             # values that the underlying stat() method saved.
             try: del self._memo['stat']
             except KeyError: pass
-            if not self is srcnode:
+            if self is not srcnode:
                 try: del srcnode._memo['stat']
                 except KeyError: pass
         return result
@@ -625,7 +640,7 @@ class Base(SCons.Node.Node):
         return result
 
     def exists(self):
-        return not self.stat() is None
+        return self.stat() is not None
 
     def rexists(self):
         return self.rfile().exists()
@@ -642,11 +657,11 @@ class Base(SCons.Node.Node):
 
     def isdir(self):
         st = self.stat()
-        return not st is None and stat.S_ISDIR(st[stat.ST_MODE])
+        return st is not None and stat.S_ISDIR(st[stat.ST_MODE])
 
     def isfile(self):
         st = self.stat()
-        return not st is None and stat.S_ISREG(st[stat.ST_MODE])
+        return st is not None and stat.S_ISREG(st[stat.ST_MODE])
 
     if hasattr(os, 'symlink'):
         def islink(self):
@@ -866,11 +881,8 @@ class Entry(Base):
         return self.get_suffix()
 
     def get_contents(self):
-        """Fetch the contents of the entry.
-
-        Since this should return the real contents from the file
-        system, we check to see into what sort of subclass we should
-        morph this Entry."""
+        """Fetch the contents of the entry.  Returns the exact binary
+        contents of the file."""
         try:
             self = self.disambiguate(must_exist=1)
         except SCons.Errors.UserError:
@@ -883,10 +895,28 @@ class Entry(Base):
         else:
             return self.get_contents()
 
+    def get_text_contents(self):
+        """Fetch the decoded text contents of a Unicode encoded Entry.
+
+        Since this should return the text contents from the file
+        system, we check to see into what sort of subclass we should
+        morph this Entry."""
+        try:
+            self = self.disambiguate(must_exist=1)
+        except SCons.Errors.UserError:
+            # There was nothing on disk with which to disambiguate
+            # this entry.  Leave it as an Entry, but return a null
+            # string so calls to get_text_contents() in emitters and
+            # the like (e.g. in qt.py) don't have to disambiguate by
+            # hand or catch the exception.
+            return ''
+        else:
+            return self.get_text_contents()
+
     def must_be_same(self, klass):
         """Called to make sure a Node is a Dir.  Since we're an
         Entry, we can morph into one."""
-        if not self.__class__ is klass:
+        if self.__class__ is not klass:
             self.__class__ = klass
             self._morph()
             self.clear()
@@ -910,7 +940,7 @@ class Entry(Base):
 
     def rel_path(self, other):
         d = self.disambiguate()
-        if d.__class__ == Entry:
+        if d.__class__ is Entry:
             raise "rel_path() could not disambiguate File/Dir"
         return d.rel_path(other)
 
@@ -1065,7 +1095,7 @@ class FS(LocalFS):
         """
         curr=self._cwd
         try:
-            if not dir is None:
+            if dir is not None:
                 self._cwd = dir
                 if change_os_dir:
                     os.chdir(dir.abspath)
@@ -1348,7 +1378,7 @@ class Dir(Base):
                         del node._srcreps
                     except AttributeError:
                         pass
-                    if duplicate != None:
+                    if duplicate is not None:
                         node.duplicate=duplicate
 
     def __resetDuplicate(self, node):
@@ -1450,15 +1480,15 @@ class Dir(Base):
         """Return a path to "other" relative to this directory.
         """
 
-       # This complicated and expensive method, which constructs relative
-       # paths between arbitrary Node.FS objects, is no longer used
-       # by SCons itself.  It was introduced to store dependency paths
-       # in .sconsign files relative to the target, but that ended up
-       # being significantly inefficient.
+        # This complicated and expensive method, which constructs relative
+        # paths between arbitrary Node.FS objects, is no longer used
+        # by SCons itself.  It was introduced to store dependency paths
+        # in .sconsign files relative to the target, but that ended up
+        # being significantly inefficient.
         #
-       # We're continuing to support the method because some SConstruct
-       # files out there started using it when it was available, and
-       # we're all about backwards compatibility..
+        # We're continuing to support the method because some SConstruct
+        # files out there started using it when it was available, and
+        # we're all about backwards compatibility..
 
         try:
             memo_dict = self._memo['rel_path']
@@ -1472,11 +1502,9 @@ class Dir(Base):
                 pass
 
         if self is other:
-
             result = '.'
 
         elif not other in self.path_elements:
-
             try:
                 other_dir = other.get_dir()
             except AttributeError:
@@ -1490,9 +1518,7 @@ class Dir(Base):
                         result = other.name
                     else:
                         result = dir_rel_path + os.sep + other.name
-
         else:
-
             i = self.path_elements.index(other) + 1
 
             path_elems = ['..'] * (len(self.path_elements) - i) \
@@ -1543,7 +1569,7 @@ class Dir(Base):
     def build(self, **kw):
         """A null "builder" for directories."""
         global MkdirBuilder
-        if not self.builder is MkdirBuilder:
+        if self.builder is not MkdirBuilder:
             apply(SCons.Node.Node.build, [self,], kw)
 
     #
@@ -1559,10 +1585,9 @@ class Dir(Base):
             if parent.exists():
                 break
             listDirs.append(parent)
-            p = parent.up()
-            if p is None:
-                raise SCons.Errors.StopError, parent.path
-            parent = p
+            parent = parent.up()
+        else:
+            raise SCons.Errors.StopError, parent.path
         listDirs.reverse()
         for dirnode in listDirs:
             try:
@@ -1582,7 +1607,7 @@ class Dir(Base):
 
     def multiple_side_effect_has_builder(self):
         global MkdirBuilder
-        return not self.builder is MkdirBuilder and self.has_builder()
+        return self.builder is not MkdirBuilder and self.has_builder()
 
     def alter_targets(self):
         """Return any corresponding targets in a variant directory.
@@ -1593,13 +1618,18 @@ class Dir(Base):
         """A directory does not get scanned."""
         return None
 
+    def get_text_contents(self):
+        """We already emit things in text, so just return the binary
+        version."""
+        return self.get_contents()
+
     def get_contents(self):
         """Return content signatures and names of all our children
         separated by new-lines. Ensure that the nodes are sorted."""
         contents = []
         name_cmp = lambda a, b: cmp(a.name, b.name)
         sorted_children = self.children()[:]
-        sorted_children.sort(name_cmp)        
+        sorted_children.sort(name_cmp)
         for node in sorted_children:
             contents.append('%s %s\n' % (node.get_csig(), node.name))
         return string.join(contents, '')
@@ -1621,7 +1651,7 @@ class Dir(Base):
     def is_up_to_date(self):
         """If any child is not up-to-date, then this directory isn't,
         either."""
-        if not self.builder is MkdirBuilder and not self.exists():
+        if self.builder is not MkdirBuilder and not self.exists():
             return 0
         up_to_date = SCons.Node.up_to_date
         for kid in self.children():
@@ -1794,7 +1824,7 @@ class Dir(Base):
             except TypeError: pass
         node = self.srcdir_duplicate(name)
         if isinstance(node, Dir):
-            node = None
+            return None
         return node
 
     def walk(self, func, arg):
@@ -1887,6 +1917,7 @@ class Dir(Base):
         for srcdir in self.srcdir_list():
             search_dir_list.extend(srcdir.get_all_rdirs())
 
+        selfEntry = self.Entry
         names = []
         for dir in search_dir_list:
             # We use the .name attribute from the Node because the keys of
@@ -1896,34 +1927,39 @@ class Dir(Base):
             entry_names = filter(lambda n: n not in ('.', '..'), dir.entries.keys())
             node_names = map(lambda n, e=dir.entries: e[n].name, entry_names)
             names.extend(node_names)
+            if not strings:
+                # Make sure the working directory (self) actually has
+                # entries for all Nodes in repositories or variant dirs.
+                map(selfEntry, node_names)
             if ondisk:
                 try:
                     disk_names = os.listdir(dir.abspath)
                 except os.error:
-                    pass
-                else:
-                    names.extend(disk_names)
-                    if not strings:
-                        # We're going to return corresponding Nodes in
-                        # the local directory, so we need to make sure
-                        # those Nodes exist.  We only want to create
-                        # Nodes for the entries that will match the
-                        # specified pattern, though, which means we
-                        # need to filter the list here, even though
-                        # the overall list will also be filtered later,
-                        # after we exit this loop.
-                        if pattern[0] != '.':
-                            #disk_names = [ d for d in disk_names if d[0] != '.' ]
-                            disk_names = filter(lambda x: x[0] != '.', disk_names)
-                        disk_names = fnmatch.filter(disk_names, pattern)
-                        rep_nodes = map(dir.Entry, disk_names)
-                        #rep_nodes = [ n.disambiguate() for n in rep_nodes ]
-                        rep_nodes = map(lambda n: n.disambiguate(), rep_nodes)
-                        for node, name in izip(rep_nodes, disk_names):
-                            n = self.Entry(name)
-                            if n.__class__ != node.__class__:
-                                n.__class__ = node.__class__
-                                n._morph()
+                    continue
+                names.extend(disk_names)
+                if not strings:
+                    # We're going to return corresponding Nodes in
+                    # the local directory, so we need to make sure
+                    # those Nodes exist.  We only want to create
+                    # Nodes for the entries that will match the
+                    # specified pattern, though, which means we
+                    # need to filter the list here, even though
+                    # the overall list will also be filtered later,
+                    # after we exit this loop.
+                    if pattern[0] != '.':
+                        #disk_names = [ d for d in disk_names if d[0] != '.' ]
+                        disk_names = filter(lambda x: x[0] != '.', disk_names)
+                    disk_names = fnmatch.filter(disk_names, pattern)
+                    dirEntry = dir.Entry
+                    for name in disk_names:
+                        # Add './' before disk filename so that '#' at
+                        # beginning of filename isn't interpreted.
+                        name = './' + name
+                        node = dirEntry(name).disambiguate()
+                        n = selfEntry(name)
+                        if n.__class__ != node.__class__:
+                            n.__class__ = node.__class__
+                            n._morph()
 
         names = set(names)
         if pattern[0] != '.':
@@ -2132,14 +2168,13 @@ class FileBuildInfo(SCons.Node.BuildInfoBase):
                 strings = getattr(self, nattr)
                 nodeinfos = getattr(self, sattr)
             except AttributeError:
-                pass
-            else:
-                nodes = []
-                for s, ni in izip(strings, nodeinfos):
-                    if not isinstance(s, SCons.Node.Node):
-                        s = ni.str_to_node(s)
-                    nodes.append(s)
-                setattr(self, nattr, nodes)
+                continue
+            nodes = []
+            for s, ni in izip(strings, nodeinfos):
+                if not isinstance(s, SCons.Node.Node):
+                    s = ni.str_to_node(s)
+                nodes.append(s)
+            setattr(self, nattr, nodes)
     def format(self, names=0):
         result = []
         bkids = self.bsources + self.bdepends + self.bimplicit
@@ -2171,23 +2206,25 @@ class File(Base):
         self._morph()
 
     def Entry(self, name):
-        #"""Create an entry node named 'name' relative to
-        #the directory of this file."""
+        """Create an entry node named 'name' relative to
+        the directory of this file."""
         return self.dir.Entry(name)
 
     def Dir(self, name, create=True):
-        #"""Create a directory node named 'name' relative to
-        #the directory of this file."""
+        """Create a directory node named 'name' relative to
+        the directory of this file."""
         return self.dir.Dir(name, create=create)
 
     def Dirs(self, pathlist):
         """Create a list of directories relative to the SConscript
         directory of this file."""
+        # TODO(1.5)
+        # return [self.Dir(p) for p in pathlist]
         return map(lambda p, s=self: s.Dir(p), pathlist)
 
     def File(self, name):
-        #"""Create a file node named 'name' relative to
-        #the directory of this file."""
+        """Create a file node named 'name' relative to
+        the directory of this file."""
         return self.dir.File(name)
 
     #def generate_build_dict(self):
@@ -2224,12 +2261,28 @@ class File(Base):
             return ''
         fname = self.rfile().abspath
         try:
-            r = open(fname, "rb").read()
+            contents = open(fname, "rb").read()
         except EnvironmentError, e:
             if not e.filename:
                 e.filename = fname
             raise
-        return r
+        return contents
+
+    try:
+        import codecs
+    except ImportError:
+        get_text_contents = get_contents
+    else:
+        # This attempts to figure out what the encoding of the text is
+        # based upon the BOM bytes, and then decodes the contents so that
+        # it's a valid python string.
+        def get_text_contents(self):
+            contents = self.get_contents()
+            if contents.startswith(codecs.BOM_UTF8):
+                contents = contents.decode('utf-8')
+            elif contents.startswith(codecs.BOM_UTF16):
+                contents = contents.decode('utf-16')
+            return contents
 
     def get_content_hash(self):
         """
@@ -2382,26 +2435,24 @@ class File(Base):
             try:
                 value = getattr(old_entry, attr)
             except AttributeError:
-                pass
-            else:
-                setattr(binfo, attr, value)
-                delattr(old_entry, attr)
+                continue
+            setattr(binfo, attr, value)
+            delattr(old_entry, attr)
         for attr in self.convert_sig_attrs:
             try:
                 sig_list = getattr(old_entry, attr)
             except AttributeError:
-                pass
-            else:
-                value = []
-                for sig in sig_list:
-                    ninfo = self.new_ninfo()
-                    if len(sig) == 32:
-                        ninfo.csig = sig
-                    else:
-                        ninfo.timestamp = sig
-                    value.append(ninfo)
-                setattr(binfo, attr, value)
-                delattr(old_entry, attr)
+                continue
+            value = []
+            for sig in sig_list:
+                ninfo = self.new_ninfo()
+                if len(sig) == 32:
+                    ninfo.csig = sig
+                else:
+                    ninfo.timestamp = sig
+                value.append(ninfo)
+            setattr(binfo, attr, value)
+            delattr(old_entry, attr)
         return new_entry
 
     memoizer_counters.append(SCons.Memoize.CountValue('get_stored_info'))
@@ -2465,6 +2516,7 @@ class File(Base):
                 pass
 
         if scanner:
+            # result = [n.disambiguate() for n in scanner(self, env, path)]
             result = scanner(self, env, path)
             result = map(lambda N: N.disambiguate(), result)
         else:
@@ -2568,7 +2620,7 @@ class File(Base):
             scb = self.sbuilder
         except AttributeError:
             scb = self.sbuilder = self.find_src_builder()
-        return not scb is None
+        return scb is not None
 
     def alter_targets(self):
         """Return any corresponding targets in a variant directory.
@@ -2641,7 +2693,7 @@ class File(Base):
         # Duplicate from source path if we are set up to do this.
         if self.duplicate and not self.is_derived() and not self.linked:
             src = self.srcnode()
-            if not src is self:
+            if src is not self:
                 # At this point, src is meant to be copied in a variant directory.
                 src = src.rfile()
                 if src.abspath != self.abspath:
@@ -2675,23 +2727,22 @@ class File(Base):
         old = self.get_stored_info()
         mtime = self.get_timestamp()
 
-        csig = None
         max_drift = self.fs.max_drift
         if max_drift > 0:
             if (time.time() - mtime) > max_drift:
                 try:
                     n = old.ninfo
                     if n.timestamp and n.csig and n.timestamp == mtime:
-                        csig = n.csig
+                        return n.csig
                 except AttributeError:
                     pass
         elif max_drift == 0:
             try:
-                csig = old.ninfo.csig
+                return old.ninfo.csig
             except AttributeError:
                 pass
 
-        return csig
+        return None
 
     def get_csig(self):
         """
@@ -2746,7 +2797,7 @@ class File(Base):
             return 1
 
     def changed_state(self, target, prev_ni):
-        return (self.state != SCons.Node.up_to_date)
+        return self.state != SCons.Node.up_to_date
 
     def changed_timestamp_then_content(self, target, prev_ni):
         if not self.changed_timestamp_match(target, prev_ni):
@@ -2824,6 +2875,19 @@ class File(Base):
                    (isinstance(node, File) or isinstance(node, Entry) \
                     or not node.is_derived()):
                         result = node
+                        # Copy over our local attributes to the repository
+                        # Node so we identify shared object files in the
+                        # repository and don't assume they're static.
+                        #
+                        # This isn't perfect; the attribute would ideally
+                        # be attached to the object in the repository in
+                        # case it was built statically in the repository
+                        # and we changed it to shared locally, but that's
+                        # rarely the case and would only occur if you
+                        # intentionally used the same suffix for both
+                        # shared and static objects anyway.  So this
+                        # should work well in practice.
+                        result.attributes = self.attributes
                         break
         self._memo['rfile'] = result
         return result
@@ -2867,13 +2931,14 @@ class File(Base):
         # Add the path to the cache signature, because multiple
         # targets built by the same action will all have the same
         # build signature, and we have to differentiate them somehow.
-        children =  self.children()
-        sigs = map(lambda n: n.get_cachedir_csig(), children)
+        children = self.children()
         executor = self.get_executor()
+        # sigs = [n.get_cachedir_csig() for n in children]
+        sigs = map(lambda n: n.get_cachedir_csig(), children)
         sigs.append(SCons.Util.MD5signature(executor.get_contents()))
         sigs.append(self.path)
-        self.cachesig = SCons.Util.MD5collect(sigs)
-        return self.cachesig
+        result = self.cachesig = SCons.Util.MD5collect(sigs)
+        return result
 
 
 default_fs = None
@@ -3039,12 +3104,11 @@ def invalidate_node_memos(targets):
     # affected. XXX The way to check if Execute() is in the stacktrace
     # is a very dirty hack and should be replaced by a more sensible
     # solution.
-    must_invalidate = 0
-    tb = extract_stack()
-    for f in tb:
+    for f in extract_stack():
         if f[2] == 'Execute' and f[0][-14:] == 'Environment.py':
-            must_invalidate = 1
-    if not must_invalidate:
+            break
+    else:
+        # Dont have to invalidate, so return
         return
 
     if not SCons.Util.is_List(targets):