3 # Copyright 2003-2010 Gentoo Foundation
4 # Distributed under the terms of the GNU General Public License v2
7 from __future__ import print_function
15 from portage import os
18 import gentoolkit.pprinter as pp
19 from gentoolkit.eclean.exclude import (exclDictMatchCP, exclDictExpand,
20 exclDictExpandPkgname, exclMatchFilename)
21 #from gentoolkit.package import Package
22 from gentoolkit.helpers import walk
25 # Misc. shortcuts to some portage stuff:
26 port_settings = portage.settings
27 pkgdir = port_settings["PKGDIR"]
30 deprecated_message=""""Deprecation Warning: Installed package: %s
31 Is no longer in the tree or an installed overlay"""
32 DEPRECATED = pp.warn(deprecated_message)
37 def dprint(module, message):
38 if module in debug_modules:
43 """Returns DISTDIR if sane, else barfs."""
45 d = portage.settings["DISTDIR"]
46 if not os.path.isdir(d):
47 e = pp.error("%s does not appear to be a directory.\n" % d)
48 e += pp.error("Please set DISTDIR to a sane value.\n")
49 e += pp.error("(Check your /etc/make.conf and environment).")
50 print( e, file=sys.stderr)
54 distdir = get_distdir()
57 class DistfilesSearch(object):
60 @param output: verbose output method or (lambda x: None) to turn off
61 @param vardb: defaults to portage.db[portage.root]["vartree"].dbapi
62 is overridden for testing.
63 @param portdb: defaults to portage.portdb and is overriden for testing.
68 portdb=portage.portdb,
69 vardb=portage.db[portage.root]["vartree"].dbapi,
75 def findDistfiles(self,
78 fetch_restricted=False,
85 """Find all obsolete distfiles.
87 XXX: what about cvs ebuilds?
88 I should install some to see where it goes...
90 @param exclude: an exclusion dict as defined in
91 exclude.parseExcludeFile class.
92 @param destructive: boolean, defaults to False
93 @param fetch_restricted: boolean, defaults to False
94 @param package_names: boolean, defaults to False.
95 @param time_limit: integer time value as returned by parseTime()
96 @param size_limit: integer value of max. file size to keep or 0 to ignore.
97 @param _distdir: path to the distfiles dir being checked, defaults to portage.
98 @param deprecate: bool to control checking the clean dict. files for exclusion
101 @return dict. of package files to clean i.e. {'cat/pkg-ver.tbz2': [filename],}
107 installed_included = False
108 # create a big CPV->SRC_URI dict of packages
109 # whose distfiles should be kept
110 if (not destructive) or fetch_restricted:
111 self.output("...non-destructive type search")
112 # TODO fix fetch_restricted to save the installed packges filenames while processing
113 pkgs, _deprecated = self._non_destructive(destructive, fetch_restricted, exclude=exclude)
114 deprecated.update(_deprecated)
115 installed_included = True
117 self.output("...destructive type search: %d packages already found" %len(pkgs))
118 pkgs, _deprecated = self._destructive(package_names,
119 exclude, pkgs, installed_included)
120 deprecated.update(_deprecated)
121 # gather the files to be cleaned
122 self.output("...checking limits for %d ebuild sources"
124 clean_me = self._check_limits(_distdir,
125 size_limit, time_limit, exclude)
126 # remove any protected files from the list
127 self.output("...removing protected sources from %s candidates to clean"
129 clean_me = self._remove_protected(pkgs, clean_me)
130 if not deprecate and len(exclude) and len(clean_me):
131 self.output("...checking final for exclusion from " +\
132 "%s remaining candidates to clean" %len(clean_me))
133 clean_me, saved = self._check_excludes(exclude, clean_me)
134 return clean_me, saved, deprecated
137 ####################### begin _check_limits code block
139 def _check_limits(self,
146 """Checks files if they exceed size and/or time_limits, etc.
148 checks = [self._isreg_limit_]
150 checks.append(self._size_limit_)
151 self.size_limit = size_limit
153 self.output(" - skipping size limit check")
155 checks.append(self._time_limit_)
156 self.time_limit = time_limit
158 self.output(" - skipping time limit check")
159 if 'filenames' in exclude:
160 checks.append(self._filenames_limit_)
161 self.exclude = exclude
163 self.output(" - skipping exclude filenames check")
164 max_index = len(checks)
165 for file in os.listdir(_distdir):
166 filepath = os.path.join(_distdir, file)
168 file_stat = os.stat(filepath)
174 while _index<max_index and next:
175 next, skip_file = checks[_index](file_stat, file)
179 # this is a candidate for cleaning
180 #print( "Adding file to clean_list:", file)
181 clean_me[file]=[filepath]
184 def _isreg_limit_(self, file_stat, file):
185 """check if file is a regular file."""
186 is_reg_file = stat.S_ISREG(file_stat[stat.ST_MODE])
187 return is_reg_file, not is_reg_file
189 def _size_limit_(self, file_stat, file):
190 """checks if the file size exceeds the size_limit"""
191 if (file_stat[stat.ST_SIZE] >= self.size_limit):
192 #print( "size match ", file, file_stat[stat.ST_SIZE])
196 def _time_limit_(self, file_stat, file):
197 """checks if the file exceeds the time_limit"""
198 if (file_stat[stat.ST_MTIME] >= self.time_limit):
199 #print( "time match ", file, file_stat[stat.ST_MTIME])
203 def _filenames_limit_(self, file_stat, file):
204 """checks if the file matches an exclusion file listing"""
205 # Try to match file name directly
206 if file in self.exclude['filenames']:
208 # See if file matches via regular expression matching
211 for file_entry in self.exclude['filenames']:
212 if self.exclude['filenames'][file_entry].match(file):
219 ####################### end _check_limits code block
221 def _remove_protected(self,
225 """Remove files owned by some protected packages.
227 @returns packages to clean
230 # this regexp extracts files names from SRC_URI. It is not very precise,
231 # but we don't care (may return empty strings, etc.), since it is fast.
232 file_regexp = re.compile(r'([a-zA-Z0-9_,\.\-\+\~]*)[\s\)]')
234 for file in file_regexp.findall(pkgs[cpv]+"\n"):
237 # no need to waste IO time if there is nothing left to clean
238 if not len(clean_me):
242 def _non_destructive(self,
248 """performs the non-destructive checks
250 @param destructive: boolean
251 @param pkgs_: starting dictionary to add to
254 @returns packages and thier SRC_URI's: {cpv: src_uri,}
259 # the following code block was split to optimize for speed
260 # list all CPV from portree (yeah, that takes time...)
261 self.output(" - getting complete ebuild list")
262 cpvs = set(self.portdb.cpv_all())
263 # now add any installed cpv's that are not in the tree or overlays
264 installed_cpvs = self.vardb.cpv_all()
265 cpvs.update(installed_cpvs)
266 if fetch_restricted and destructive:
267 self.output(" - getting source file names " +
268 "for %d installed ebuilds" %len(installed_cpvs))
269 pkgs, _deprecated = self._unrestricted(pkgs, installed_cpvs)
270 deprecated.update(_deprecated)
271 # remove the installed cpvs then check the remaining for fetch restiction
272 cpvs.difference_update(installed_cpvs)
273 self.output(" - getting fetch-restricted source file names " +
274 "for %d remaining ebuilds" %len(cpvs))
275 pkgs, _deprecated = self._fetch_restricted(destructive, pkgs, cpvs)
276 deprecated.update(_deprecated)
278 self.output(" - getting source file names " +
279 "for %d ebuilds" %len(cpvs))
280 pkgs, _deprecated = self._unrestricted(pkgs, cpvs)
281 deprecated.update(_deprecated)
282 return pkgs, deprecated
284 def _fetch_restricted(self, destructive, pkgs_, cpvs):
285 """perform fetch restricted non-destructive source
288 @param destructive: boolean
289 @param pkgs_: starting dictionary to add to
290 @param cpvs: set of (cat/pkg-ver, ...) identifiers
292 @return a new pkg dictionary
298 # get SRC_URI and RESTRICT from aux_get
300 (src_uri,restrict) = \
301 self.portdb.aux_get(cpv,["SRC_URI","RESTRICT"])
302 # keep fetch-restricted check
303 # inside try so it is bypassed on KeyError
304 if 'fetch' in restrict:
307 try: # installed vardb
308 (src_uri,restrict) = \
309 self.vardb.aux_get(cpv,["SRC_URI","RESTRICT"])
310 deprecated[cpv] = src_uri
311 self.output(DEPRECATED %cpv)
312 # keep fetch-restricted check
313 # inside try so it is bypassed on KeyError
314 if 'fetch' in restrict:
317 self.output(" - Key Error looking up: " + cpv)
318 return pkgs, deprecated
320 def _unrestricted(self, pkgs_, cpvs):
321 """Perform unrestricted source filenames lookups
323 @param pkgs_: starting packages dictionary
324 @param cpvs: set of (cat/pkg-ver, ...) identifiers
326 @return a new pkg dictionary
332 # get SRC_URI from aux_get
334 pkgs[cpv] = self.portdb.aux_get(cpv,["SRC_URI"])[0]
336 try: # installed vardb
337 pkgs[cpv] = self.vardb.aux_get(cpv,["SRC_URI"])[0]
338 deprecated[cpv] = pkgs[cpv]
339 self.output(DEPRECATED %cpv)
341 self.output(" - Key Error looking up: " + cpv)
342 return pkgs, deprecated
344 def _destructive(self,
348 installed_included=False
350 """Builds on pkgs according to input options
352 @param package_names: boolean
353 @param exclude: an exclusion dict as defined in
354 exclude.parseExcludeFile class.
355 @param pkgs: starting dictionary to add to
357 @param installed_included: bool. pkgs already
358 has the installed cpv's added.
360 @returns pkgs: {cpv: src_uri,}
365 if not installed_included:
366 if not package_names:
367 # list all installed CPV's from vartree
368 #print( "_destructive: getting vardb.cpv_all")
369 pkgset.update(self.vardb.cpv_all())
370 self.output(" - processing %s installed ebuilds" % len(pkgset))
372 # list all CPV's from portree for CP's in vartree
373 #print( "_destructive: getting vardb.cp_all")
374 cps = self.vardb.cp_all()
375 self.output(" - processing %s installed packages" % len(cps))
377 pkgset.update(self.portdb.cp_list(package))
378 self.output(" - processing excluded")
379 excludes = self._get_excludes(exclude)
380 excludes_length = len(excludes)
381 pkgset.update(excludes)
382 pkgs_done = set(list(pkgs))
383 pkgset.difference_update(pkgs_done)
385 " - (%d of %d total) additional excluded packages to get source filenames for"
386 %(len(pkgset), excludes_length))
387 #self.output(" - processing %d ebuilds for filenames" %len(pkgset))
388 pkgs, _deprecated = self._unrestricted(pkgs, pkgset)
389 deprecated.update(_deprecated)
390 #self.output(" - done...")
391 return pkgs, deprecated
393 def _get_excludes(self, exclude):
394 """Expands the exclude dictionary into a set of
397 @param exclude: dictionary of exclusion categories,
398 packages to exclude from the cleaning
401 @return set of package cpv's
404 for cp in exclDictExpand(exclude):
405 # add packages from the exclude file
406 pkgset.update(self.portdb.cp_list(cp))
409 def _check_excludes(self, exclude, clean_me):
410 """Performs a last minute check on remaining filenames
411 to see if they should be protected. Since if the pkg-version
412 was deprecated it would not have been matched to a
413 source filename and removed.
415 @param exclude: an exclusion dictionary
416 @param clean_me: the list of filenames for cleaning
418 @rtype: dict of packages to clean
421 pn_excludes = exclDictExpandPkgname(exclude)
422 dprint("excludes", "_check_excludes: made it here ;)")
424 return clean_me, saved
425 dprint("excludes", pn_excludes)
426 for key in list(clean_me):
427 if exclMatchFilename(pn_excludes, key):
428 saved[key] = clean_me[key]
430 self.output(" ...Saved excluded package filename: " + key)
431 return clean_me, saved
441 port_dbapi=portage.db[portage.root]["porttree"].dbapi,
442 var_dbapi=portage.db[portage.root]["vartree"].dbapi
444 """Find all obsolete binary packages.
446 XXX: packages are found only by symlinks.
447 Maybe i should also return .tbz2 files from All/ that have
448 no corresponding symlinks.
450 @param options: dict of options determined at runtime
451 @param exclude: an exclusion dict as defined in
452 exclude.parseExcludeFile class.
453 @param destructive: boolean, defaults to False
454 @param time_limit: integer time value as returned by parseTime()
455 @param package_names: boolean, defaults to False.
456 used only if destructive=True
457 @param pkgdir: path to the binary package dir being checked
458 @param port_dbapi: defaults to portage.db[portage.root]["porttree"].dbapi
459 can be overridden for tests.
460 @param var_dbapi: defaults to portage.db[portage.root]["vartree"].dbapi
461 can be overridden for tests.
464 @return clean_me i.e. {'cat/pkg-ver.tbz2': [filepath],}
467 # create a full package dictionary
469 # now do an access test, os.walk does not error for "no read permission"
471 test = os.listdir(pkgdir)
473 except EnvironmentError as er:
474 print( pp.error("Error accessing PKGDIR." ), file=sys.stderr)
475 print( pp.error("(Check your /etc/make.conf and environment)."), file=sys.stderr)
476 print( pp.error("Error: %s" %str(er)), file=sys.stderr)
478 for root, dirs, files in walk(pkgdir):
479 if root[-3:] == 'All':
482 if not file[-5:] == ".tbz2":
483 # ignore non-tbz2 files
485 path = os.path.join(root, file)
486 category = os.path.split(root)[-1]
487 cpv = category+"/"+file[:-5]
489 if time_limit and (st[stat.ST_MTIME] >= time_limit):
490 # time-limit exclusion
492 # dict is cpv->[files] (2 files in general, because of symlink)
493 clean_me[cpv] = [path]
494 #if os.path.islink(path):
495 if stat.S_ISLNK(st[stat.ST_MODE]):
496 clean_me[cpv].append(os.path.realpath(path))
497 # keep only obsolete ones
501 cp_all = dict.fromkeys(dbapi.cp_all())
507 for cpv in list(clean_me):
508 if exclDictMatchCP(exclude,portage.cpv_getkey(cpv)):
509 # exclusion because of the exclude file
512 if dbapi.cpv_exists(cpv):
513 # exclusion because pkg still exists (in porttree or vartree)
516 if portage.cpv_getkey(cpv) in cp_all:
517 # exlusion because of --package-names