1 # checksum.py -- core Portage functionality
2 # Copyright 1998-2011 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
6 from portage.const import PRELINK_BINARY,HASHING_BLOCKSIZE
7 from portage.localization import _
9 from portage import _encodings
10 from portage import _unicode_encode
15 #dict of all available hash functions
19 class _generate_hash_function(object):
21 __slots__ = ("_hashobject",)
23 def __init__(self, hashtype, hashobject, origin="unknown"):
24 self._hashobject = hashobject
25 hashfunc_map[hashtype] = self
26 hashorigin_map[hashtype] = origin
28 def __call__(self, filename):
30 Run a checksum against a file.
32 @param filename: File to run the checksum against
33 @type filename: String
34 @return: The hash and size of the data
37 f = open(_unicode_encode(filename,
38 encoding=_encodings['fs'], errors='strict'), 'rb')
40 func_call = "open('%s')" % filename
41 if e.errno == errno.EPERM:
42 raise portage.exception.OperationNotPermitted(func_call)
43 elif e.errno == errno.EACCES:
44 raise portage.exception.PermissionDenied(func_call)
45 elif e.errno == errno.ENOENT:
46 raise portage.exception.FileNotFound(filename)
49 blocksize = HASHING_BLOCKSIZE
50 data = f.read(blocksize)
52 checksum = self._hashobject()
55 size = size + len(data)
56 data = f.read(blocksize)
59 return (checksum.hexdigest(), size)
61 # Define hash functions, try to use the best module available. Later definitions
62 # override earlier ones
64 # Use the internal modules as last fallback
66 from hashlib import md5 as _new_md5
68 from md5 import new as _new_md5
70 md5hash = _generate_hash_function("MD5", _new_md5, origin="internal")
73 from hashlib import sha1 as _new_sha1
75 from sha import new as _new_sha1
77 sha1hash = _generate_hash_function("SHA1", _new_sha1, origin="internal")
79 # Use pycrypto when available, prefer it over the internal fallbacks
81 from Crypto.Hash import SHA256, RIPEMD
82 sha256hash = _generate_hash_function("SHA256", SHA256.new, origin="pycrypto")
83 rmd160hash = _generate_hash_function("RMD160", RIPEMD.new, origin="pycrypto")
84 except ImportError as e:
87 # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks.
88 # Need special handling for RMD160 as it may not always be provided by hashlib.
90 import hashlib, functools
92 md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib")
93 sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib")
94 sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib")
95 for local_name, hash_name in (("rmd160", "ripemd160"), ):
97 hashlib.new(hash_name)
101 globals()['%shash' % local_name] = \
102 _generate_hash_function(local_name.upper(), \
103 functools.partial(hashlib.new, hash_name), \
106 except ImportError as e:
110 # Use python-fchksum if available, prefer it over all other MD5 implementations
114 def md5hash(filename):
115 return fchksum.fmd5t(filename)
116 hashfunc_map["MD5"] = md5hash
117 hashorigin_map["MD5"] = "python-fchksum"
122 # There is only one implementation for size
123 def getsize(filename):
124 size = os.stat(filename).st_size
126 hashfunc_map["size"] = getsize
128 # end actual hash functions
130 prelink_capable = False
131 if os.path.exists(PRELINK_BINARY):
132 results = portage.subprocess_getstatusoutput(
133 "%s --version > /dev/null 2>&1" % (PRELINK_BINARY,))
134 if (results[0] >> 8) == 0:
138 def perform_md5(x, calc_prelink=0):
139 return perform_checksum(x, "MD5", calc_prelink)[0]
141 def _perform_md5_merge(x, **kwargs):
142 return perform_md5(_unicode_encode(x,
143 encoding=_encodings['merge'], errors='strict'), **kwargs)
145 def perform_all(x, calc_prelink=0):
147 for k in hashfunc_map:
148 mydict[k] = perform_checksum(x, hashfunc_map[k], calc_prelink)[0]
151 def get_valid_checksum_keys():
152 return list(hashfunc_map)
154 def get_hash_origin(hashtype):
155 if hashtype not in hashfunc_map:
156 raise KeyError(hashtype)
157 return hashorigin_map.get(hashtype, "unknown")
159 def verify_all(filename, mydict, calc_prelink=0, strict=0):
161 Verify all checksums against a file.
163 @param filename: File to run the checksums against
164 @type filename: String
165 @param calc_prelink: Whether or not to reverse prelink before running the checksum
166 @type calc_prelink: Integer
167 @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception)
168 @type strict: Integer
170 @return: Result of the checks and possible message:
171 1) If size fails, False, and a tuple containing a message, the given size, and the actual size
172 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls
173 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash
174 4) If all checks succeed, return True and a fake reason
176 # Dict relates to single file only.
177 # returns: (passed,reason)
179 reason = "Reason unknown"
181 mysize = os.stat(filename)[stat.ST_SIZE]
182 if mydict["size"] != mysize:
183 return False,(_("Filesize does not match recorded size"), mysize, mydict["size"])
185 if e.errno == errno.ENOENT:
186 raise portage.exception.FileNotFound(filename)
187 return False, (str(e), None, None)
189 verifiable_hash_types = set(mydict).intersection(hashfunc_map)
190 verifiable_hash_types.discard("size")
191 if not verifiable_hash_types:
192 expected = set(hashfunc_map)
193 expected.discard("size")
194 expected = list(expected)
196 expected = " ".join(expected)
202 return False, (_("Insufficient data for checksum verification"), got, expected)
204 for x in sorted(mydict):
207 elif x in hashfunc_map:
208 myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0]
209 if mydict[x] != myhash:
211 raise portage.exception.DigestException(
212 ("Failed to verify '$(file)s' on " + \
213 "checksum type '%(type)s'") % \
214 {"file" : filename, "type" : x})
217 reason = (("Failed on %s verification" % x), myhash,mydict[x])
219 return file_is_ok,reason
221 def perform_checksum(filename, hashname="MD5", calc_prelink=0):
223 Run a specific checksum against a file. The filename can
224 be either unicode or an encoded byte string. If filename
225 is unicode then a UnicodeDecodeError will be raised if
228 @param filename: File to run the checksum against
229 @type filename: String
230 @param hashname: The type of hash function to run
231 @type hashname: String
232 @param calc_prelink: Whether or not to reverse prelink before running the checksum
233 @type calc_prelink: Integer
235 @return: The hash and size of the data
237 global prelink_capable
238 # Make sure filename is encoded with the correct encoding before
239 # it is passed to spawn (for prelink) and/or the hash function.
240 filename = _unicode_encode(filename,
241 encoding=_encodings['fs'], errors='strict')
242 myfilename = filename
243 prelink_tmpfile = None
245 if calc_prelink and prelink_capable:
246 # Create non-prelinked temporary file to checksum.
247 # Files rejected by prelink are summed in place.
249 tmpfile_fd, prelink_tmpfile = tempfile.mkstemp()
251 retval = portage.process.spawn([PRELINK_BINARY,
252 "--verify", filename], fd_pipes={1:tmpfile_fd})
255 if retval == os.EX_OK:
256 myfilename = prelink_tmpfile
257 except portage.exception.CommandNotFound:
258 # This happens during uninstallation of prelink.
259 prelink_capable = False
261 if hashname not in hashfunc_map:
262 raise portage.exception.DigestException(hashname + \
263 " hash function not available (needs dev-python/pycrypto)")
264 myhash, mysize = hashfunc_map[hashname](myfilename)
265 except (OSError, IOError) as e:
266 if e.errno == errno.ENOENT:
267 raise portage.exception.FileNotFound(myfilename)
269 return myhash, mysize
273 os.unlink(prelink_tmpfile)
275 if e.errno != errno.ENOENT:
279 def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
281 Run a group of checksums against a file.
283 @param filename: File to run the checksums against
284 @type filename: String
285 @param hashes: A list of checksum functions to run against the file
287 @param calc_prelink: Whether or not to reverse prelink before running the checksum
288 @type calc_prelink: Integer
290 @return: A dictionary in the form:
291 return_value[hash_name] = (hash_result,size)
292 for each given checksum
296 if x not in hashfunc_map:
297 raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)")
298 rVal[x] = perform_checksum(filename, x, calc_prelink)[0]