add hashlib support for checksum verification, refactor checksum functions to not...
authorMarius Mauch <genone@gentoo.org>
Sat, 23 Jun 2007 13:12:07 +0000 (13:12 -0000)
committerMarius Mauch <genone@gentoo.org>
Sat, 23 Jun 2007 13:12:07 +0000 (13:12 -0000)
svn path=/main/trunk/; revision=6972

pym/portage/__init__.py
pym/portage/checksum.py

index 6a4564ba5657c9e7a11487380c2d2e2b483b7382..17dc3089b1d488f0907571b424814040a6ee54d6 100644 (file)
@@ -819,7 +819,7 @@ def autouse(myvartree, use_cache=1, mysettings=None):
 
 def check_config_instance(test):
        if not isinstance(test, config):
-               raise TypeError("Invalid type for config object: %s" % test.__class__)
+               raise TypeError("Invalid type for config object: %s (should be %s)" % (test.__class__, config))
 
 class config(object):
        """
index a29f3c3258d506f9907a8b86d465997d5bbc9e36..1315237e83fbdad82037107a6c2cab20ae609a16 100644 (file)
@@ -3,9 +3,6 @@
 # Distributed under the terms of the GNU General Public License v2
 # $Id$
 
-if not hasattr(__builtins__, "set"):
-       from sets import Set as set
-
 from portage.const import PRIVATE_PATH,PRELINK_BINARY,HASHING_BLOCKSIZE
 import os
 import errno
@@ -16,50 +13,88 @@ import portage.process
 import portage.util
 import portage.locks
 import commands
-import sha
-
-
-# actual hash functions first
+import md5, sha
 
 #dict of all available hash functions
 hashfunc_map = {}
+hashorigin_map = {}
 
-# We _try_ to load this module. If it fails we do the slightly slower fallback.
-try:
-       import fchksum
+def _generate_hash_function(hashtype, hashobject, origin="unknown"):
+       def pyhash(filename):
+               """
+               Run a checksum against a file.
        
-       def md5hash(filename):
-               return fchksum.fmd5t(filename)
+               @param filename: File to run the checksum against
+               @type filename: String
+               @return: The hash and size of the data
+               """
+               f = open(filename, 'rb')
+               blocksize = HASHING_BLOCKSIZE
+               data = f.read(blocksize)
+               size = 0L
+               sum = hashobject()
+               while data:
+                       sum.update(data)
+                       size = size + len(data)
+                       data = f.read(blocksize)
+               f.close()
 
-except ImportError:
-       import md5
-       def md5hash(filename):
-               return pyhash(filename, md5)
-hashfunc_map["MD5"] = md5hash
+               return (sum.hexdigest(), size)
+       hashfunc_map[hashtype] = pyhash
+       hashorigin_map[hashtype] = origin
+       return pyhash
+
+# Define hash functions, try to use the best module available. Later definitions
+# override earlier ones
+
+# Use the internal modules as last fallback
+md5hash = _generate_hash_function("MD5", md5.new, origin="internal")
+sha1hash = _generate_hash_function("SHA1", sha.new, origin="internal")
 
-def sha1hash(filename):
-       return pyhash(filename, sha)
-hashfunc_map["SHA1"] = sha1hash
+# Use pycrypto when available, prefer it over the internal fallbacks
+try:
+       from Crypto.Hash import MD5, SHA, SHA256, RIPEMD
        
-# Keep pycrypto optional for now, there are no internal fallbacks for these
+       md5hash = _generate_hash_function("MD5", MD5.new, origin="pycrypto")
+       sha1hash = _generate_hash_function("SHA1", SHA.new, origin="pycrypto")
+       sha256hash = _generate_hash_function("SHA256", SHA256.new, origin="pycrypto")
+       rmd160hash = _generate_hash_function("RMD160", RIPEMD.new, origin="pycrypto")
+except ImportError, e:
+       pass
+
+# Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks.
+# Need special handling for RMD160 as it may not always be provided by hashlib.
 try:
-       import Crypto.Hash.SHA256
+       import hashlib
        
-       def sha256hash(filename):
-               return pyhash(filename, Crypto.Hash.SHA256)
-       hashfunc_map["SHA256"] = sha256hash
-except ImportError:
+       md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib")
+       sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib")
+       sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib")
+       try:
+               hashlib.new('ripemd160')
+       except ValueError:
+               pass
+       else:
+               def rmd160():
+                       return hashlib.new('ripemd160')
+               rmd160hash = _generate_hash_function("RMD160", rmd160, origin="hashlib")
+except ImportError, e:
        pass
+       
 
+# Use python-fchksum if available, prefer it over all other MD5 implementations
 try:
-       import Crypto.Hash.RIPEMD
+       import fchksum
        
-       def rmd160hash(filename):
-               return pyhash(filename, Crypto.Hash.RIPEMD)
-       hashfunc_map["RMD160"] = rmd160hash
+       def md5hash(filename):
+               return fchksum.fmd5t(filename)
+       hashfunc_map["MD5"] = md5hash
+       hashorigin_map["MD5"] = "python-fchksum"
+
 except ImportError:
        pass
 
+# There is only one implementation for size
 def getsize(filename):
        size = os.stat(filename).st_size
        return (size, size)
@@ -86,6 +121,11 @@ def perform_all(x, calc_prelink=0):
 def get_valid_checksum_keys():
        return hashfunc_map.keys()
 
+def get_hash_origin(hashtype):
+       if not hashtype in hashfunc_map.keys():
+               raise KeyError(hashtype)
+       return hashorigin_map.get(hashtype, "unknown")
+
 def verify_all(filename, mydict, calc_prelink=0, strict=0):
        """
        Verify all checksums against a file.
@@ -145,29 +185,6 @@ def verify_all(filename, mydict, calc_prelink=0, strict=0):
                                        break
        return file_is_ok,reason
 
-def pyhash(filename, hashobject):
-       """
-       Run a checksum against a file.
-
-       @param filename: File to run the checksum against
-       @type filename: String
-       @param hashname: The hash object that will execute the checksum on the file
-       @type hashname: Object
-       @return: The hash and size of the data
-       """
-       f = open(filename, 'rb')
-       blocksize = HASHING_BLOCKSIZE
-       data = f.read(blocksize)
-       size = 0L
-       sum = hashobject.new()
-       while data:
-               sum.update(data)
-               size = size + len(data)
-               data = f.read(blocksize)
-       f.close()
-
-       return (sum.hexdigest(), size)
-
 def perform_checksum(filename, hashname="MD5", calc_prelink=0):
        """
        Run a specific checksum against a file.
@@ -237,6 +254,6 @@ def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
        rVal = {}
        for x in hashes:
                if x not in hashfunc_map:
-                       raise portage.exception.DigestException, x+" hash function not available (needs dev-python/pycrypto)"
+                       raise portage.exception.DigestException, x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)"
                rVal[x] = perform_checksum(filename, x, calc_prelink)[0]
        return rVal