From e1b167ef9a6c66d068c5dae4ae5b8b7a0cd89add Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Thu, 11 Nov 2010 23:25:04 -0800 Subject: [PATCH] Merge package files in a subprocess. This allows the Scheduler to run in the main thread while files are moved or copied asynchronously. --- pym/portage/dbapi/_MergeProcess.py | 41 ++++++++ pym/portage/dbapi/vartree.py | 147 ++++++++++++++++------------- 2 files changed, 121 insertions(+), 67 deletions(-) create mode 100644 pym/portage/dbapi/_MergeProcess.py diff --git a/pym/portage/dbapi/_MergeProcess.py b/pym/portage/dbapi/_MergeProcess.py new file mode 100644 index 000000000..b5af7142c --- /dev/null +++ b/pym/portage/dbapi/_MergeProcess.py @@ -0,0 +1,41 @@ +# Copyright 2010 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +import portage +from portage import os +from _emerge.SpawnProcess import SpawnProcess + +class MergeProcess(SpawnProcess): + """ + Merge package files in a subprocess, so the Scheduler can run in the + main thread while files are moved or copied asynchronously. + """ + + __slots__ = ('cfgfiledict', 'conf_mem_file', \ + 'destroot', 'dblink', 'srcroot',) + + def _spawn(self, args, fd_pipes=None, **kwargs): + """ + Fork a subprocess, apply local settings, and call + dblink._merge_process(). + """ + + pid = os.fork() + if pid != 0: + portage.process.spawned_pids.append(pid) + return [pid] + + portage.process._setup_pipes(fd_pipes) + + portage.output.havecolor = self.dblink.settings.get('NOCOLOR') \ + not in ('yes', 'true') + + # In this subprocess we want dblink._display_merge() to use + # stdout/stderr directly since they are pipes. This behavior + # is triggered when dblink._scheduler is None. + self.dblink._scheduler = None + + rval = self.dblink._merge_process(self.srcroot, self.destroot, + self.cfgfiledict, self.conf_mem_file) + + os._exit(rval) diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index 4b0b47ba0..5e00f7fd7 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -10,6 +10,7 @@ portage.proxy.lazyimport.lazyimport(globals(), 'portage.checksum:_perform_md5_merge@perform_md5', 'portage.data:portage_gid,portage_uid,secpass', 'portage.dbapi.dep_expand:dep_expand', + 'portage.dbapi._MergeProcess:MergeProcess', 'portage.dep:dep_getkey,isjustname,match_from_list,' + \ 'use_reduce,_slot_re', 'portage.elog:elog_process,_preload_elog_modules', @@ -3183,19 +3184,11 @@ class dblink(object): 'w', encoding=_encodings['repo.content'], errors='backslashreplace' ).write(str(counter)) - # open CONTENTS file (possibly overwriting old one) for recording - outfile = codecs.open(_unicode_encode( - os.path.join(self.dbtmpdir, 'CONTENTS'), - encoding=_encodings['fs'], errors='strict'), - mode='w', encoding=_encodings['repo.content'], - errors='backslashreplace') - self.updateprotect() #if we have a file containing previously-merged config file md5sums, grab it. conf_mem_file = os.path.join(self._eroot, CONFIG_MEMORY_FILE) cfgfiledict = grabdict(conf_mem_file) - cfgfiledict_orig = cfgfiledict.copy() if "NOCONFMEM" in self.settings: cfgfiledict["IGNORE"]=1 else: @@ -3210,61 +3203,18 @@ class dblink(object): cfgfiledict["IGNORE"] = 1 break - # Don't bump mtimes on merge since some application require - # preservation of timestamps. This means that the unmerge phase must - # check to see if file belongs to an installed instance in the same - # slot. - mymtime = None - - # set umask to 0 for merging; back up umask, save old one in prevmask (since this is a global change) - prevmask = os.umask(0) - secondhand = [] - - # we do a first merge; this will recurse through all files in our srcroot but also build up a - # "second hand" of symlinks to merge later - if self.mergeme(srcroot, destroot, outfile, secondhand, "", cfgfiledict, mymtime): - return 1 - - # now, it's time for dealing our second hand; we'll loop until we can't merge anymore. The rest are - # broken symlinks. We'll merge them too. - lastlen = 0 - while len(secondhand) and len(secondhand)!=lastlen: - # clear the thirdhand. Anything from our second hand that - # couldn't get merged will be added to thirdhand. - - thirdhand = [] - if self.mergeme(srcroot, destroot, outfile, thirdhand, - secondhand, cfgfiledict, mymtime): - return 1 - - #swap hands - lastlen = len(secondhand) - - # our thirdhand now becomes our secondhand. It's ok to throw - # away secondhand since thirdhand contains all the stuff that - # couldn't be merged. - secondhand = thirdhand + merge_task = MergeProcess( + background=(self.settings.get('PORTAGE_BACKGROUND') == '1'), + cfgfiledict=cfgfiledict, conf_mem_file=conf_mem_file, dblink=self, + destroot=destroot, + logfile=self.settings.get('PORTAGE_LOG_FILE'), + scheduler=(scheduler or PollScheduler().sched_iface), + srcroot=srcroot) - if len(secondhand): - # force merge of remaining symlinks (broken or circular; oh well) - if self.mergeme(srcroot, destroot, outfile, None, - secondhand, cfgfiledict, mymtime): - return 1 - self._md5_merge_map.clear() - - #restore umask - os.umask(prevmask) - - #if we opened it, close it - outfile.flush() - outfile.close() - - # write out our collection of md5sums - cfgfiledict.pop("IGNORE", None) - if cfgfiledict != cfgfiledict_orig: - ensure_dirs(os.path.dirname(conf_mem_file), - gid=portage_gid, mode=0o2750, mask=0o2) - writedict(cfgfiledict, conf_mem_file) + merge_task.start() + rval = merge_task.wait() + if rval != os.EX_OK: + return rval # These caches are populated during collision-protect and the data # they contain is now invalid. It's very important to invalidate @@ -3449,6 +3399,74 @@ class dblink(object): return backup_p + def _merge_process(self, srcroot, destroot, cfgfiledict, conf_mem_file): + + cfgfiledict_orig = cfgfiledict.copy() + + # open CONTENTS file (possibly overwriting old one) for recording + outfile = codecs.open(_unicode_encode( + os.path.join(self.dbtmpdir, 'CONTENTS'), + encoding=_encodings['fs'], errors='strict'), + mode='w', encoding=_encodings['repo.content'], + errors='backslashreplace') + + # Don't bump mtimes on merge since some application require + # preservation of timestamps. This means that the unmerge phase must + # check to see if file belongs to an installed instance in the same + # slot. + mymtime = None + + # set umask to 0 for merging; back up umask, save old one in prevmask (since this is a global change) + prevmask = os.umask(0) + secondhand = [] + + # we do a first merge; this will recurse through all files in our srcroot but also build up a + # "second hand" of symlinks to merge later + if self.mergeme(srcroot, destroot, outfile, secondhand, "", cfgfiledict, mymtime): + return 1 + + # now, it's time for dealing our second hand; we'll loop until we can't merge anymore. The rest are + # broken symlinks. We'll merge them too. + lastlen = 0 + while len(secondhand) and len(secondhand)!=lastlen: + # clear the thirdhand. Anything from our second hand that + # couldn't get merged will be added to thirdhand. + + thirdhand = [] + if self.mergeme(srcroot, destroot, outfile, thirdhand, + secondhand, cfgfiledict, mymtime): + return 1 + + #swap hands + lastlen = len(secondhand) + + # our thirdhand now becomes our secondhand. It's ok to throw + # away secondhand since thirdhand contains all the stuff that + # couldn't be merged. + secondhand = thirdhand + + if len(secondhand): + # force merge of remaining symlinks (broken or circular; oh well) + if self.mergeme(srcroot, destroot, outfile, None, + secondhand, cfgfiledict, mymtime): + return 1 + + #restore umask + os.umask(prevmask) + + #if we opened it, close it + outfile.flush() + outfile.close() + + # write out our collection of md5sums + if cfgfiledict != cfgfiledict_orig: + cfgfiledict.pop("IGNORE", None) + ensure_dirs(os.path.dirname(conf_mem_file), + gid=portage_gid, mode=0o2750, mask=0o2) + writedict(cfgfiledict, conf_mem_file) + + return os.EX_OK + def mergeme(self, srcroot, destroot, outfile, secondhand, stufftomerge, cfgfiledict, thismtime): """ @@ -3479,7 +3497,6 @@ class dblink(object): showMessage = self._display_merge writemsg = self._display_merge - scheduler = self._scheduler os = _os_merge sep = os.sep @@ -3499,10 +3516,6 @@ class dblink(object): for i, x in enumerate(mergelist): - if scheduler is not None and \ - 0 == i % self._file_merge_yield_interval: - scheduler.scheduleYield() - mysrc = join(srcroot, offset, x) mydest = join(destroot, offset, x) # myrealdest is mydest without the $ROOT prefix (makes a difference if ROOT!="/") -- 2.26.2