1 # checksum.py -- core Portage functionality
2 # Copyright 1998-2011 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
6 from portage.const import PRELINK_BINARY,HASHING_BLOCKSIZE
7 from portage.localization import _
9 from portage import _encodings
10 from portage import _unicode_encode
15 #dict of all available hash functions
19 class _generate_hash_function(object):
21 __slots__ = ("_hashobject",)
23 def __init__(self, hashtype, hashobject, origin="unknown"):
24 self._hashobject = hashobject
25 hashfunc_map[hashtype] = self
26 hashorigin_map[hashtype] = origin
28 def __call__(self, filename):
30 Run a checksum against a file.
32 @param filename: File to run the checksum against
33 @type filename: String
34 @return: The hash and size of the data
37 f = open(_unicode_encode(filename,
38 encoding=_encodings['fs'], errors='strict'), 'rb')
40 func_call = "open('%s')" % filename
41 if e.errno == errno.EPERM:
42 raise portage.exception.OperationNotPermitted(func_call)
43 elif e.errno == errno.EACCES:
44 raise portage.exception.PermissionDenied(func_call)
45 elif e.errno == errno.ENOENT:
46 raise portage.exception.FileNotFound(filename)
49 blocksize = HASHING_BLOCKSIZE
50 data = f.read(blocksize)
52 checksum = self._hashobject()
55 size = size + len(data)
56 data = f.read(blocksize)
59 return (checksum.hexdigest(), size)
61 # Define hash functions, try to use the best module available. Later definitions
62 # override earlier ones
64 # Use the internal modules as last fallback
66 from hashlib import md5 as _new_md5
68 from md5 import new as _new_md5
70 md5hash = _generate_hash_function("MD5", _new_md5, origin="internal")
73 from hashlib import sha1 as _new_sha1
75 from sha import new as _new_sha1
77 sha1hash = _generate_hash_function("SHA1", _new_sha1, origin="internal")
79 # Use pycrypto when available, prefer it over the internal fallbacks
81 from Crypto.Hash import SHA256, RIPEMD
82 sha256hash = _generate_hash_function("SHA256", SHA256.new, origin="pycrypto")
83 rmd160hash = _generate_hash_function("RMD160", RIPEMD.new, origin="pycrypto")
84 except ImportError as e:
87 # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks.
88 # Need special handling for RMD160 as it may not always be provided by hashlib.
92 md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib")
93 sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib")
94 sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib")
96 hashlib.new('ripemd160')
101 return hashlib.new('ripemd160')
102 rmd160hash = _generate_hash_function("RMD160", rmd160, origin="hashlib")
103 except ImportError as e:
107 # Use python-fchksum if available, prefer it over all other MD5 implementations
111 def md5hash(filename):
112 return fchksum.fmd5t(filename)
113 hashfunc_map["MD5"] = md5hash
114 hashorigin_map["MD5"] = "python-fchksum"
119 # There is only one implementation for size
120 def getsize(filename):
121 size = os.stat(filename).st_size
123 hashfunc_map["size"] = getsize
125 # end actual hash functions
127 prelink_capable = False
128 if os.path.exists(PRELINK_BINARY):
129 results = portage.subprocess_getstatusoutput(
130 "%s --version > /dev/null 2>&1" % (PRELINK_BINARY,))
131 if (results[0] >> 8) == 0:
135 def perform_md5(x, calc_prelink=0):
136 return perform_checksum(x, "MD5", calc_prelink)[0]
138 def _perform_md5_merge(x, **kwargs):
139 return perform_md5(_unicode_encode(x,
140 encoding=_encodings['merge'], errors='strict'), **kwargs)
142 def perform_all(x, calc_prelink=0):
144 for k in hashfunc_map:
145 mydict[k] = perform_checksum(x, hashfunc_map[k], calc_prelink)[0]
148 def get_valid_checksum_keys():
149 return list(hashfunc_map)
151 def get_hash_origin(hashtype):
152 if hashtype not in hashfunc_map:
153 raise KeyError(hashtype)
154 return hashorigin_map.get(hashtype, "unknown")
156 def verify_all(filename, mydict, calc_prelink=0, strict=0):
158 Verify all checksums against a file.
160 @param filename: File to run the checksums against
161 @type filename: String
162 @param calc_prelink: Whether or not to reverse prelink before running the checksum
163 @type calc_prelink: Integer
164 @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception)
165 @type strict: Integer
167 @return: Result of the checks and possible message:
168 1) If size fails, False, and a tuple containing a message, the given size, and the actual size
169 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls
170 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash
171 4) If all checks succeed, return True and a fake reason
173 # Dict relates to single file only.
174 # returns: (passed,reason)
176 reason = "Reason unknown"
178 mysize = os.stat(filename)[stat.ST_SIZE]
179 if mydict["size"] != mysize:
180 return False,(_("Filesize does not match recorded size"), mysize, mydict["size"])
182 if e.errno == errno.ENOENT:
183 raise portage.exception.FileNotFound(filename)
184 return False, (str(e), None, None)
186 verifiable_hash_types = set(mydict).intersection(hashfunc_map)
187 verifiable_hash_types.discard("size")
188 if not verifiable_hash_types:
189 expected = set(hashfunc_map)
190 expected.discard("size")
191 expected = list(expected)
193 expected = " ".join(expected)
199 return False, (_("Insufficient data for checksum verification"), got, expected)
201 for x in sorted(mydict):
204 elif x in hashfunc_map:
205 myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0]
206 if mydict[x] != myhash:
208 raise portage.exception.DigestException(
209 ("Failed to verify '$(file)s' on " + \
210 "checksum type '%(type)s'") % \
211 {"file" : filename, "type" : x})
214 reason = (("Failed on %s verification" % x), myhash,mydict[x])
216 return file_is_ok,reason
218 def perform_checksum(filename, hashname="MD5", calc_prelink=0):
220 Run a specific checksum against a file. The filename can
221 be either unicode or an encoded byte string. If filename
222 is unicode then a UnicodeDecodeError will be raised if
225 @param filename: File to run the checksum against
226 @type filename: String
227 @param hashname: The type of hash function to run
228 @type hashname: String
229 @param calc_prelink: Whether or not to reverse prelink before running the checksum
230 @type calc_prelink: Integer
232 @return: The hash and size of the data
234 global prelink_capable
235 # Make sure filename is encoded with the correct encoding before
236 # it is passed to spawn (for prelink) and/or the hash function.
237 filename = _unicode_encode(filename,
238 encoding=_encodings['fs'], errors='strict')
239 myfilename = filename
240 prelink_tmpfile = None
242 if calc_prelink and prelink_capable:
243 # Create non-prelinked temporary file to checksum.
244 # Files rejected by prelink are summed in place.
246 tmpfile_fd, prelink_tmpfile = tempfile.mkstemp()
248 retval = portage.process.spawn([PRELINK_BINARY,
249 "--verify", filename], fd_pipes={1:tmpfile_fd})
252 if retval == os.EX_OK:
253 myfilename = prelink_tmpfile
254 except portage.exception.CommandNotFound:
255 # This happens during uninstallation of prelink.
256 prelink_capable = False
258 if hashname not in hashfunc_map:
259 raise portage.exception.DigestException(hashname + \
260 " hash function not available (needs dev-python/pycrypto)")
261 myhash, mysize = hashfunc_map[hashname](myfilename)
262 except (OSError, IOError) as e:
263 if e.errno == errno.ENOENT:
264 raise portage.exception.FileNotFound(myfilename)
266 return myhash, mysize
270 os.unlink(prelink_tmpfile)
272 if e.errno != errno.ENOENT:
276 def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0):
278 Run a group of checksums against a file.
280 @param filename: File to run the checksums against
281 @type filename: String
282 @param hashes: A list of checksum functions to run against the file
284 @param calc_prelink: Whether or not to reverse prelink before running the checksum
285 @type calc_prelink: Integer
287 @return: A dictionary in the form:
288 return_value[hash_name] = (hash_result,size)
289 for each given checksum
293 if x not in hashfunc_map:
294 raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)")
295 rVal[x] = perform_checksum(filename, x, calc_prelink)[0]