From 8048e247bddd0acc67b8c3f2ac9a627bddb6021c Mon Sep 17 00:00:00 2001 From: Eric Edgar Date: Mon, 19 Dec 2005 20:53:13 +0000 Subject: [PATCH] Change the internal hash checking to be quicker and more memory efficient. Add additional hash digests options. git-svn-id: svn+ssh://svn.gentoo.org/var/svnroot/catalyst/trunk@982 d1e1f19c-881f-0410-ab34-b69fee027534 --- ChangeLog | 8 +++- catalyst | 25 ++++++----- files/catalyst.conf | 19 ++++++--- modules/catalyst_support.py | 73 ++++++++++++++++++++------------- modules/generic_stage_target.py | 71 ++++++++++++++------------------ modules/livecd_stage2_target.py | 14 +++---- modules/stage2_target.py | 4 +- 7 files changed, 121 insertions(+), 93 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6857d014..c9d113ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ # Copyright 2002-2005 Gentoo Foundation; Distributed under the GPL v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/ChangeLog,v 1.469 2005/12/19 19:10:04 wolf31o2 Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/ChangeLog,v 1.470 2005/12/19 20:53:13 rocket Exp $ + + 19 Dec 2005; Eric Edgar catalyst, files/catalyst.conf, + modules/catalyst_support.py, modules/generic_stage_target.py, + modules/livecd_stage2_target.py, modules/stage2_target.py: + Change the internal hash checking to be quicker and more memory efficient. + Add additional hash digests options. 19 Dec 2005; Chris Gianelloni targets/tinderbox/tinderbox-chroot.sh, diff --git a/catalyst b/catalyst index 8f7081df..f83c8649 100755 --- a/catalyst +++ b/catalyst @@ -1,7 +1,7 @@ #!/usr/bin/python -OO # Copyright 1999-2005 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/catalyst,v 1.115 2005/12/19 15:48:00 rocket Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/catalyst,v 1.116 2005/12/19 20:53:13 rocket Exp $ # Maintained in full by: # Eric Edgar @@ -9,6 +9,7 @@ import os,sys,imp,string,getopt import pdb + __maintainer__="Chris Gianelloni " __version__="2.0_rc9" @@ -53,7 +54,8 @@ def parse_config(myconfig): confdefaults={ "storedir":"/var/tmp/catalyst",\ "sharedir":"/usr/share/catalyst","distdir":"/usr/portage/distfiles",\ "portdir":"/usr/portage","options":"",\ - "snapshot_cache":"/var/tmp/catalyst/snapshot_cache" } + "snapshot_cache":"/var/tmp/catalyst/snapshot_cache",\ + "hash_function":"crc32"} # first, try the one passed (presumably from the cmdline) if myconfig: @@ -134,13 +136,8 @@ def parse_config(myconfig): print "Envscript support enabled." conf_values["ENVSCRIPT"]=myconf["envscript"] - if "md5" in string.split(conf_values["options"]): - print "MD5 .digests file creation support enabled." - conf_values["MD5"]="1" - - if "sha" in string.split(conf_values["options"]): - print "SHA .digests file creation support enabled." - conf_values["SHA"]="1" + if myconf.has_key("digests"): + conf_values["digests"]=myconf["digests"] def import_modules(): # import catalyst's own modules (i.e. catalyst_support and the arch modules) @@ -326,7 +323,15 @@ if __name__ == "__main__": parse_config(myconfig) sys.path.append(conf_values["sharedir"]+"/modules") from catalyst_support import * - + + # Start checking that digests are valid now that the hash_map was imported from catalyst_support + if conf_values.has_key("digests"): + for i in conf_values["digests"].split(): + if not hash_map.has_key(i): + print "Valid digest entries:" + print hash_map.keys() + raise CatalystError, i+" is not a valid digest entry" + # import the rest of the catalyst modules targetmap=import_modules() diff --git a/files/catalyst.conf b/files/catalyst.conf index f7fb10f7..14fa783f 100644 --- a/files/catalyst.conf +++ b/files/catalyst.conf @@ -1,6 +1,6 @@ # Copyright 1999-2005 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/files/catalyst.conf,v 1.19 2005/12/09 17:22:57 wolf31o2 Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/files/catalyst.conf,v 1.20 2005/12/19 20:53:13 rocket Exp $ # Simple desriptions of catalyst settings. Please refer to the online # documentation for more information. @@ -27,11 +27,8 @@ distdir="/usr/portage/distfiles" # WARNING: moving parts of the portage tree from within fsscript *will* break # your cache. The cache is unlinked before any empty or rm processing, though. # -# DIGESTS CREATION -# md5 = Create a .digests file containing the md5 of the output object -# sha = Create a .digests file containing the sha1 of the output object # (These options can be used together) -options="autoresume kerncache md5 pkgcache seedcache sha snapcache" +options="autoresume kerncache pkgcache seedcache snapcache" # sharedir specifies where all of the catalyst runtime executables are. Most # users do not need to change this. @@ -48,6 +45,18 @@ storedir="/var/tmp/catalyst" # enabled in the options. # snapshot_cache="" +# Internal hash function catalyst should use for things like autoresume,seedcache,etc. +# crc32 is the default and fastest. You shouldnt ever need to change this unless your os +# doesnt support it. +# Supported options: sha1, sha224, ripemd128, ripemd320, sha384, crc32, ripemd256, sha256, sha512, ripemd160, md5 +# hash_function="crc32" + +# DIGESTS CREATION +# Creates a .digests file containing the hash output from any of the supported options below. Adding them all +# May take a long time. +# Supported options: sha1, sha224, ripemd128, ripemd320, sha384, crc32, ripemd256, sha256, sha512, ripemd160, md5 +# digests="sha1 sha224 ripemd128 ripemd320 sha384 crc32 ripemd256 sha256 sha512 ripemd160 md5" + # envscript allows users to set options such as http proxies, MAKEOPTS, # GENTOO_MIRRORS, or any other environment variables needed for building. # The envscript file sets environment variables like so: diff --git a/modules/catalyst_support.py b/modules/catalyst_support.py index dbdfabd2..7262d306 100644 --- a/modules/catalyst_support.py +++ b/modules/catalyst_support.py @@ -1,8 +1,9 @@ # Copyright 1999-2005 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/modules/catalyst_support.py,v 1.63 2005/12/05 18:13:12 rocket Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/modules/catalyst_support.py,v 1.64 2005/12/19 20:53:13 rocket Exp $ -import sys,string,os,types,re,signal,traceback,md5,sha,time +import sys,string,os,types,re,signal,traceback,time +#import md5,sha selinux_capable = False #userpriv_capable = (os.getuid() == 0) #fakeroot_capable = False @@ -64,30 +65,46 @@ def hexify(str): return r # hexify() -# A function to calculate the md5 sum of a file -def calc_md5(file,verbose=False): - m = md5.new() - f = open(file, 'r') - for line in f.readlines(): - m.update(line) - f.close() - md5sum = hexify(m.digest()) - if verbose: - print "MD5 (%s) = %s" % (file, md5sum) - return md5sum -# calc_md5 - -def calc_sha(file,verbose=False): - m = sha.new() - f = open(file, 'r') - for line in f.readlines(): - m.update(line) - f.close() - shaval = hexify(m.digest()) - if verbose: - print "SHA (%s) = %s" % (file, shaval) - return shaval - +def generate_hash(file,hash_function="crc32",verbose=False): + try: + return hash_map[hash_function][0](file,hash_map[hash_function][1],hash_map[hash_function][2],verbose) + except: + raise CatalystError,"Error generating hash, is appropriate utility installed on your system?" + +def calc_hash(file,cmd,id_string="MD5",verbose=False): + a=os.popen(cmd+" "+file) + mylines=a.readlines() + a.close() + mylines=mylines[0].split() + result=mylines[0] + if verbose: + print id_string+" (%s) = %s" % (file, result) + return result + +def calc_hash2(file,cmd,id_string="MD5",verbose=False): + a=os.popen(cmd+" "+file) + a.readline() + mylines=a.readline().split() + a.close() + result=mylines[0] + if verbose: + print id_string+" (%s) = %s" % (file, result) + return result + +#This has map must be defined after the function calc_hash +#It is possible to call different functions from this but they must be defined before hash_map +hash_map={"md5":[calc_hash,"md5sum","MD5"],\ + "crc32":[calc_hash,"crc32","CRC32"],\ + "sha1":[calc_hash,"sha1sum","SHA1"],\ + "sha224":[calc_hash2,"shash -a SHA224","SHA224"],\ + "sha256":[calc_hash2,"shash -a SHA256","SHA256"],\ + "sha384":[calc_hash2,"shash -a SHA384","SHA384"],\ + "sha512":[calc_hash2,"shash -a SHA512","SHA512"],\ + "ripemd128":[calc_hash2,"shash -a RIPEMD128","RIPEMD128"],\ + "ripemd160":[calc_hash2,"shash -a RIPEMD160","RIPEMD160"],\ + "ripemd256":[calc_hash2,"shash -a RIPEMD256","RIPEMD256"],\ + "ripemd320":[calc_hash2,"shash -a RIPEMD320","RIPEMD320"]} + def read_from_clst(file): line = '' myline = '' @@ -119,8 +136,6 @@ valid_config_file_values.append("CCACHE") valid_config_file_values.append("DISTCC") valid_config_file_values.append("ENVSCRIPT") valid_config_file_values.append("AUTORESUME") -valid_config_file_values.append("SHA") -valid_config_file_values.append("MD5") valid_config_file_values.append("FETCH") valid_config_file_values.append("CLEAR_AUTORESUME") valid_config_file_values.append("options") @@ -129,6 +144,8 @@ valid_config_file_values.append("VERBOSE") valid_config_file_values.append("PURGE") valid_config_file_values.append("SNAPCACHE") valid_config_file_values.append("snapshot_cache") +valid_config_file_values.append("hash_function") +valid_config_file_values.append("digests") valid_config_file_values.append("SEEDCACHE") verbosity=1 diff --git a/modules/generic_stage_target.py b/modules/generic_stage_target.py index 27023462..3cf7fe4f 100644 --- a/modules/generic_stage_target.py +++ b/modules/generic_stage_target.py @@ -1,6 +1,6 @@ # Copyright 1999-2005 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/modules/generic_stage_target.py,v 1.100 2005/12/16 14:53:29 rocket Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/modules/generic_stage_target.py,v 1.101 2005/12/19 20:53:13 rocket Exp $ """ This class does all of the chroot setup, copying of files, etc. It is @@ -149,7 +149,6 @@ class generic_stage_target(generic_target): # this next line checks to make sure that the specified variables exist on disk. #pdb.set_trace() file_locate(self.settings,["source_path","snapshot_path","distdir"],expand=0) - # if we are using portage_confdir, check that as well if self.settings.has_key("portage_confdir"): file_locate(self.settings,["portage_confdir"],expand=0) @@ -334,8 +333,8 @@ class generic_stage_target(generic_target): if os.path.isfile(self.settings["source_path"]): if os.path.exists(self.settings["source_path"]): - self.settings["source_path_md5sum"]=calc_md5(self.settings["source_path"],True) - self.settings["source_path_sha"]=calc_sha(self.settings["source_path"],True) + self.settings["source_path_hash"]=generate_hash(self.settings["source_path"],\ + hash_function=self.settings["hash_function"],verbose=False) if os.path.isdir(self.settings["source_path"]): print "Source path set to "+self.settings["source_path"] print "\tIf this is not desired, remove this directory or turn of seedcache in the options of catalyst.conf" @@ -354,8 +353,8 @@ class generic_stage_target(generic_target): def set_snapshot_path(self): self.settings["snapshot_path"]=normpath(self.settings["storedir"]+"/snapshots/portage-"+self.settings["snapshot"]+".tar.bz2") if os.path.exists(self.settings["snapshot_path"]): - self.settings["snapshot_path_md5sum"]=calc_md5(self.settings["snapshot_path"],True) - self.settings["snapshot_path_sha"]=calc_sha(self.settings["snapshot_path"],True) + self.settings["snapshot_path_hash"]=generate_hash(self.settings["snapshot_path"],\ + hash_function=self.settings["hash_function"],verbose=False) def set_snapcache_path(self): if self.settings.has_key("SNAPCACHE"): @@ -510,7 +509,7 @@ class generic_stage_target(generic_target): def unpack(self): unpack=True - clst_unpack_md5sum=read_from_clst(self.settings["autoresume_path"]+"unpack") + clst_unpack_hash=read_from_clst(self.settings["autoresume_path"]+"unpack") if self.settings.has_key("SEEDCACHE") and os.path.isdir(self.settings["source_path"]): unpack_cmd="rsync -a --delete "+self.settings["source_path"]+" "+self.settings["chroot_path"] @@ -531,7 +530,7 @@ class generic_stage_target(generic_target): invalid_snapshot=False # Autoresume is Valid, Tarball is Valid - elif os.path.isfile(self.settings["source_path"]) and self.settings["source_path_md5sum"] == clst_unpack_md5sum: + elif os.path.isfile(self.settings["source_path"]) and self.settings["source_path_hash"] == clst_unpack_hash: unpack=False invalid_snapshot=True @@ -541,7 +540,7 @@ class generic_stage_target(generic_target): invalid_snapshot=False # Autoresume is InValid, Tarball - elif os.path.isfile(self.settings["source_path"]) and self.settings["source_path_md5sum"] != clst_unpack_md5sum: + elif os.path.isfile(self.settings["source_path"]) and self.settings["source_path_hash"] != clst_unpack_hash: unpack=True invalid_snapshot=True else: @@ -592,9 +591,9 @@ class generic_stage_target(generic_target): print display_msg cmd(unpack_cmd,error_msg,env=self.env) - if self.settings.has_key("source_path_md5sum"): + if self.settings.has_key("source_path_hash"): myf=open(self.settings["autoresume_path"]+"unpack","w") - myf.write(self.settings["source_path_md5sum"]) + myf.write(self.settings["source_path_hash"]) myf.close() else: touch(self.settings["autoresume_path"]+"unpack") @@ -604,10 +603,10 @@ class generic_stage_target(generic_target): def unpack_snapshot(self): unpack=True - snapshot_md5sum=read_from_clst(self.settings["autoresume_path"]+"unpack_portage") + snapshot_hash=read_from_clst(self.settings["autoresume_path"]+"unpack_portage") if self.settings.has_key("SNAPCACHE"): - snapshot_cache_md5sum=read_from_clst(self.settings["snapshot_cache_path"]+"catalyst-md5sum") + snapshot_cache_hash=read_from_clst(self.settings["snapshot_cache_path"]+"catalyst-hash") destdir=self.settings["snapshot_cache_path"] unpack_cmd="tar xjpf "+self.settings["snapshot_path"]+" -C "+destdir unpack_errmsg="Error unpacking snapshot" @@ -615,7 +614,7 @@ class generic_stage_target(generic_target): cleanup_errmsg="Error removing existing snapshot cache directory." self.snapshot_lock_object=self.snapcache_lock - if self.settings["snapshot_path_md5sum"] == snapshot_cache_md5sum: + if self.settings["snapshot_path_hash"] == snapshot_cache_hash: print "Valid snapshot cache, skipping unpack of portage tree ..." unpack=False @@ -629,7 +628,7 @@ class generic_stage_target(generic_target): if self.settings.has_key("AUTORESUME") \ and os.path.exists(self.settings["chroot_path"]+"/usr/portage/") \ and os.path.exists(self.settings["autoresume_path"]+"unpack_portage") \ - and self.settings["snapshot_path_md5sum"] == snapshot_md5sum: + and self.settings["snapshot_path_hash"] == snapshot_hash: print "Valid Resume point detected, skipping unpack of portage tree..." unpack=False @@ -649,14 +648,14 @@ class generic_stage_target(generic_target): cmd(unpack_cmd,unpack_errmsg,env=self.env) if self.settings.has_key("SNAPCACHE"): - myf=open(self.settings["snapshot_cache_path"]+"catalyst-md5sum","w") - myf.write(self.settings["snapshot_path_md5sum"]) + myf=open(self.settings["snapshot_cache_path"]+"catalyst-hash","w") + myf.write(self.settings["snapshot_path_hash"]) myf.close() else: print "Setting snapshot autoresume point" myf=open(self.settings["autoresume_path"]+"unpack_portage","w") - myf.write(self.settings["snapshot_path_md5sum"]) + myf.write(self.settings["snapshot_path_hash"]) myf.close() if self.settings.has_key("SNAPCACHE"): @@ -952,7 +951,7 @@ class generic_stage_target(generic_target): cmd("tar cjf "+self.settings["target_path"]+" -C "+self.settings["stage_path"]+\ " .","Couldn't create stage tarball",env=self.env) - self.gen_digest_file(self.settings["target_path"]+".digests") + self.gen_digest_file(self.settings["target_path"]) touch(self.settings["autoresume_path"]+"capture") @@ -1259,27 +1258,19 @@ class generic_stage_target(generic_target): os.chmod(myemp,mystat[ST_MODE]) def gen_digest_file(self,file): - if os.path.exists(file+".digests"): - os.remove(file+".digests") - if self.settings.has_key("SHA") or self.settings.has_key("MD5"): - if os.path.exists(file): - myf=open(file+".digests","w") - - if self.settings.has_key("MD5"): - if self.settings.has_key("VERBOSE"): - md5=calc_md5(file,True) - else: - md5=calc_md5(file) - myf.write("MD5: "+md5+"\n") - - if self.settings.has_key("SHA"): - if self.settings.has_key("VERBOSE"): - sha=calc_sha(file,True) - else: - sha=calc_sha(file) - myf.write("SHA: "+sha+"\n") - - myf.close() + if os.path.exists(file+".digests"): + os.remove(file+".digests") + if self.settings.has_key("digests"): + if os.path.exists(file): + myf=open(file+".digests","w") + for i in self.settings["digests"].split(): + + if self.settings.has_key("VERBOSE"): + hash=generate_hash(file,hash_function=i,verbose=True) + else: + hash=generate_hash(file,hash_function=i) + myf.write(hash_map[i][2]+": "+hash+"\n") + myf.close() def purge(self): countdown(10,"Purging Caches ...") diff --git a/modules/livecd_stage2_target.py b/modules/livecd_stage2_target.py index 04785c2f..ef21e475 100644 --- a/modules/livecd_stage2_target.py +++ b/modules/livecd_stage2_target.py @@ -1,6 +1,6 @@ # Copyright 1999-2005 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/modules/livecd_stage2_target.py,v 1.57 2005/12/16 14:53:29 rocket Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/modules/livecd_stage2_target.py,v 1.58 2005/12/19 20:53:13 rocket Exp $ """ Builder class for a LiveCD stage2 build. @@ -36,7 +36,7 @@ class livecd_stage2_target(generic_stage_target): def set_source_path(self): self.settings["source_path"]=normpath(self.settings["storedir"]+"/builds/"+self.settings["source_subpath"]+".tar.bz2") if os.path.isfile(self.settings["source_path"]): - self.settings["source_path_md5sum"]=calc_md5(self.settings["source_path"]) + self.settings["source_path_hash"]=generate_hash(self.settings["source_path"]) else: self.settings["source_path"]=normpath(self.settings["storedir"]+"/tmp/"+self.settings["source_subpath"]+"/") if not os.path.exists(self.settings["source_path"]): @@ -76,7 +76,7 @@ class livecd_stage2_target(generic_stage_target): def unpack(self): unpack=True - clst_unpack_md5sum=read_from_clst(self.settings["autoresume_path"]+"unpack") + clst_unpack_hash=read_from_clst(self.settings["autoresume_path"]+"unpack") if os.path.isdir(self.settings["source_path"]): unpack_cmd="rsync -a --delete "+self.settings["source_path"]+" "+self.settings["chroot_path"] @@ -90,8 +90,8 @@ class livecd_stage2_target(generic_stage_target): os.path.exists(self.settings["autoresume_path"]+"unpack"): print "Resume point detected, skipping unpack operation..." unpack=False - elif self.settings.has_key("source_path_md5sum"): - if self.settings["source_path_md5sum"] != clst_unpack_md5sum: + elif self.settings.has_key("source_path_hash"): + if self.settings["source_path_hash"] != clst_unpack_hash: invalid_snapshot=True if unpack: @@ -116,9 +116,9 @@ class livecd_stage2_target(generic_stage_target): print display_msg cmd(unpack_cmd,error_msg,env=self.env) - if self.settings.has_key("source_path_md5sum"): + if self.settings.has_key("source_path_hash"): myf=open(self.settings["autoresume_path"]+"unpack","w") - myf.write(self.settings["source_path_md5sum"]) + myf.write(self.settings["source_path_hash"]) myf.close() else: touch(self.settings["autoresume_path"]+"unpack") diff --git a/modules/stage2_target.py b/modules/stage2_target.py index 4f262818..e6cf5e6d 100644 --- a/modules/stage2_target.py +++ b/modules/stage2_target.py @@ -1,6 +1,6 @@ # Copyright 1999-2005 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -# $Header: /var/cvsroot/gentoo/src/catalyst/modules/stage2_target.py,v 1.10 2005/12/16 14:42:07 rocket Exp $ +# $Header: /var/cvsroot/gentoo/src/catalyst/modules/stage2_target.py,v 1.11 2005/12/19 20:53:13 rocket Exp $ """ Builder class for a stage2 installation tarball build. @@ -31,7 +31,7 @@ class stage2_target(generic_stage_target): self.settings["source_path"]=normpath(self.settings["storedir"]+"/builds/"+self.settings["source_subpath"]+".tar.bz2") if os.path.isfile(self.settings["source_path"]): if os.path.exists(self.settings["source_path"]): - self.settings["source_path_md5sum"]=calc_md5(self.settings["source_path"]) + self.settings["source_path_hash"]=generate_hash(self.settings["source_path"]) def override_chost(self): if os.environ.has_key("CHOST"): self.settings["CHOST"] = os.environ["CHOST"] -- 2.26.2