Update to genscripts rev 382. This has more fixes for py3k and the modular rewrite...
[gentoolkit.git] / pym / gentoolkit / eclean / search.py
1 #!/usr/bin/python
2
3 # Copyright 2003-2010 Gentoo Foundation
4 # Distributed under the terms of the GNU General Public License v2
5
6
7 from __future__ import print_function
8
9
10 import re
11 import stat
12 import sys
13
14 import portage
15 from portage import os
16
17 import gentoolkit
18 import gentoolkit.pprinter as pp
19 from gentoolkit.eclean.exclude import (exclDictMatchCP, exclDictExpand,
20         exclDictExpandPkgname, exclMatchFilename)
21 #from gentoolkit.package import Package
22 from gentoolkit.helpers import walk
23
24
25 # Misc. shortcuts to some portage stuff:
26 port_settings = portage.settings
27 pkgdir = port_settings["PKGDIR"]
28
29 err = sys.stderr
30 deprecated_message=""""Deprecation Warning: Installed package: %s
31         Is no longer in the tree or an installed overlay"""
32 DEPRECATED = pp.warn(deprecated_message)
33
34 debug_modules = []
35
36
37 def dprint(module, message):
38         if module in debug_modules:
39                 print(message)
40
41
42 def get_distdir():
43         """Returns DISTDIR if sane, else barfs."""
44
45         d = portage.settings["DISTDIR"]
46         if not os.path.isdir(d):
47                 e = pp.error("%s does not appear to be a directory.\n" % d)
48                 e += pp.error("Please set DISTDIR to a sane value.\n")
49                 e += pp.error("(Check your /etc/make.conf and environment).")
50                 print( e, file=sys.stderr)
51                 exit(1)
52         return d
53
54 distdir = get_distdir()
55
56
57 class DistfilesSearch(object):
58         """
59
60                 @param output: verbose output method or (lambda x: None) to turn off
61                 @param vardb: defaults to portage.db[portage.root]["vartree"].dbapi
62                                         is overridden for testing.
63                 @param portdb: defaults to portage.portdb and is overriden for testing.
64 """
65
66         def __init__(self,
67                         output,
68                         portdb=portage.portdb,
69                         vardb=portage.db[portage.root]["vartree"].dbapi,
70                         ):
71                 self.vardb =vardb
72                 self.portdb = portdb
73                 self.output = output
74
75         def findDistfiles(self,
76                         exclude={},
77                         destructive=False,
78                         fetch_restricted=False,
79                         package_names=False,
80                         time_limit=0,
81                         size_limit=0,
82                         _distdir=distdir,
83                         deprecate=False
84                         ):
85                 """Find all obsolete distfiles.
86
87                 XXX: what about cvs ebuilds?
88                 I should install some to see where it goes...
89
90                 @param exclude: an exclusion dict as defined in
91                                 exclude.parseExcludeFile class.
92                 @param destructive: boolean, defaults to False
93                 @param fetch_restricted: boolean, defaults to False
94                 @param package_names: boolean, defaults to False.
95                 @param time_limit: integer time value as returned by parseTime()
96                 @param size_limit: integer value of max. file size to keep or 0 to ignore.
97                 @param _distdir: path to the distfiles dir being checked, defaults to portage.
98                 @param deprecate: bool to control checking the clean dict. files for exclusion
99
100                 @rtype: dict
101                 @return dict. of package files to clean i.e. {'cat/pkg-ver.tbz2': [filename],}
102                 """
103                 clean_me = {}
104                 pkgs = {}
105                 saved = {}
106                 deprecated = {}
107                 installed_included = False
108                 # create a big CPV->SRC_URI dict of packages
109                 # whose distfiles should be kept
110                 if (not destructive) or fetch_restricted:
111                         self.output("...non-destructive type search")
112                         # TODO fix fetch_restricted to save the installed packges filenames while processing
113                         pkgs, _deprecated = self._non_destructive(destructive, fetch_restricted, exclude=exclude)
114                         deprecated.update(_deprecated)
115                         installed_included = True
116                 if destructive:
117                         self.output("...destructive type search: %d packages already found" %len(pkgs))
118                         pkgs, _deprecated = self._destructive(package_names,
119                                         exclude, pkgs, installed_included)
120                         deprecated.update(_deprecated)
121                 # gather the files to be cleaned
122                 self.output("...checking limits for %d ebuild sources"
123                                 %len(pkgs))
124                 clean_me = self._check_limits(_distdir,
125                                 size_limit, time_limit, exclude)
126                 # remove any protected files from the list
127                 self.output("...removing protected sources from %s candidates to clean"
128                                 %len(clean_me))
129                 clean_me = self._remove_protected(pkgs, clean_me)
130                 if not deprecate and len(exclude) and len(clean_me):
131                         self.output("...checking final for exclusion from " +\
132                                 "%s remaining candidates to clean" %len(clean_me))
133                         clean_me, saved = self._check_excludes(exclude, clean_me)
134                 return clean_me, saved, deprecated
135
136
137 ####################### begin _check_limits code block
138
139         def _check_limits(self,
140                         _distdir,
141                         size_limit,
142                         time_limit,
143                         exclude,
144                         clean_me={}
145                         ):
146                 """Checks files if they exceed size and/or time_limits, etc.
147                 """
148                 checks = [self._isreg_limit_]
149                 if size_limit:
150                         checks.append(self._size_limit_)
151                         self.size_limit = size_limit
152                 else:
153                         self.output("   - skipping size limit check")
154                 if time_limit:
155                         checks.append(self._time_limit_)
156                         self.time_limit = time_limit
157                 else:
158                         self.output("   - skipping time limit check")
159                 if 'filenames' in exclude:
160                         checks.append(self._filenames_limit_)
161                         self.exclude = exclude
162                 else:
163                         self.output("   - skipping exclude filenames check")
164                 max_index = len(checks)
165                 for file in os.listdir(_distdir):
166                         filepath = os.path.join(_distdir, file)
167                         try:
168                                 file_stat = os.stat(filepath)
169                         except:
170                                 continue
171                         _index = 0
172                         next = True
173                         skip_file = False
174                         while _index<max_index and next:
175                                 next, skip_file = checks[_index](file_stat, file)
176                                 _index +=1
177                         if skip_file:
178                                 continue
179                         # this is a candidate for cleaning
180                         #print( "Adding file to clean_list:", file)
181                         clean_me[file]=[filepath]
182                 return clean_me
183
184         def _isreg_limit_(self, file_stat, file):
185                 """check if file is a regular file."""
186                 is_reg_file = stat.S_ISREG(file_stat[stat.ST_MODE])
187                 return  is_reg_file, not is_reg_file
188
189         def _size_limit_(self, file_stat, file):
190                 """checks if the file size exceeds the size_limit"""
191                 if (file_stat[stat.ST_SIZE] >= self.size_limit):
192                         #print( "size match ", file, file_stat[stat.ST_SIZE])
193                         return False, True
194                 return True, False
195
196         def _time_limit_(self, file_stat, file):
197                 """checks if the file exceeds the time_limit"""
198                 if (file_stat[stat.ST_MTIME] >= self.time_limit):
199                         #print( "time match ", file, file_stat[stat.ST_MTIME])
200                         return False, True
201                 return True,False
202
203         def _filenames_limit_(self, file_stat, file):
204                 """checks if the file matches an exclusion file listing"""
205                 # Try to match file name directly
206                 if file in self.exclude['filenames']:
207                         return False, True
208                 # See if file matches via regular expression matching
209                 else:
210                         file_match = False
211                         for file_entry in self.exclude['filenames']:
212                                 if self.exclude['filenames'][file_entry].match(file):
213                                         file_match = True
214                                         break
215                 if file_match:
216                         return False, True
217                 return True, False
218
219 ####################### end _check_limits code block
220
221         def _remove_protected(self,
222                         pkgs,
223                         clean_me
224                         ):
225                 """Remove files owned by some protected packages.
226
227                 @returns packages to clean
228                 @rtype: dictionary
229                 """
230                 # this regexp extracts files names from SRC_URI. It is not very precise,
231                 # but we don't care (may return empty strings, etc.), since it is fast.
232                 file_regexp = re.compile(r'([a-zA-Z0-9_,\.\-\+\~]*)[\s\)]')
233                 for cpv in pkgs:
234                         for file in file_regexp.findall(pkgs[cpv]+"\n"):
235                                 if file in clean_me:
236                                         del clean_me[file]
237                         # no need to waste IO time if there is nothing left to clean
238                         if not len(clean_me):
239                                 return clean_me
240                 return clean_me
241
242         def _non_destructive(self,
243                         destructive,
244                         fetch_restricted,
245                         pkgs_ = {},
246                         exclude={}
247                         ):
248                 """performs the non-destructive checks
249
250                 @param destructive: boolean
251                 @param pkgs_: starting dictionary to add to
252                                 defaults to {}.
253
254                 @returns packages and thier SRC_URI's: {cpv: src_uri,}
255                 @rtype: dictionary
256                 """
257                 pkgs = pkgs_.copy()
258                 deprecated = {}
259                 # the following code block was split to optimize for speed
260                 # list all CPV from portree (yeah, that takes time...)
261                 self.output("   - getting complete ebuild list")
262                 cpvs = set(self.portdb.cpv_all())
263                 # now add any installed cpv's that are not in the tree or overlays
264                 installed_cpvs = self.vardb.cpv_all()
265                 cpvs.update(installed_cpvs)
266                 if fetch_restricted and destructive:
267                         self.output("   - getting source file names " +
268                                 "for %d installed ebuilds" %len(installed_cpvs))
269                         pkgs, _deprecated = self._unrestricted(pkgs, installed_cpvs)
270                         deprecated.update(_deprecated)
271                         # remove the installed cpvs then check the remaining for fetch restiction
272                         cpvs.difference_update(installed_cpvs)
273                         self.output("   - getting fetch-restricted source file names " +
274                                 "for %d remaining ebuilds" %len(cpvs))
275                         pkgs, _deprecated = self._fetch_restricted(destructive, pkgs, cpvs)
276                         deprecated.update(_deprecated)
277                 else:
278                         self.output("   - getting source file names " +
279                                 "for %d ebuilds" %len(cpvs))
280                         pkgs, _deprecated = self._unrestricted(pkgs, cpvs)
281                         deprecated.update(_deprecated)
282                 return pkgs, deprecated
283
284         def _fetch_restricted(self, destructive, pkgs_, cpvs):
285                 """perform fetch restricted non-destructive source
286                 filename lookups
287
288                 @param destructive: boolean
289                 @param pkgs_: starting dictionary to add to
290                 @param cpvs: set of (cat/pkg-ver, ...) identifiers
291
292                 @return a new pkg dictionary
293                 @rtype: dictionary
294                 """
295                 pkgs = pkgs_.copy()
296                 deprecated = {}
297                 for cpv in cpvs:
298                         # get SRC_URI and RESTRICT from aux_get
299                         try: # main portdb
300                                 (src_uri,restrict) = \
301                                         self.portdb.aux_get(cpv,["SRC_URI","RESTRICT"])
302                                 # keep fetch-restricted check
303                                 # inside try so it is bypassed on KeyError
304                                 if 'fetch' in restrict:
305                                         pkgs[cpv] = src_uri
306                         except KeyError:
307                                 try: # installed vardb
308                                         (src_uri,restrict) = \
309                                                 self.vardb.aux_get(cpv,["SRC_URI","RESTRICT"])
310                                         deprecated[cpv] = src_uri
311                                         self.output(DEPRECATED %cpv)
312                                         # keep fetch-restricted check
313                                         # inside try so it is bypassed on KeyError
314                                         if 'fetch' in restrict:
315                                                 pkgs[cpv] = src_uri
316                                 except KeyError:
317                                         self.output("   - Key Error looking up: " + cpv)
318                 return pkgs, deprecated
319
320         def _unrestricted(self, pkgs_, cpvs):
321                 """Perform unrestricted source filenames lookups
322
323                 @param pkgs_: starting packages dictionary
324                 @param cpvs: set of (cat/pkg-ver, ...) identifiers
325
326                 @return a new pkg dictionary
327                 @rtype: dictionary
328                 """
329                 pkgs = pkgs_.copy()
330                 deprecated = {}
331                 for cpv in cpvs:
332                         # get SRC_URI from aux_get
333                         try:
334                                 pkgs[cpv] = self.portdb.aux_get(cpv,["SRC_URI"])[0]
335                         except KeyError:
336                                 try: # installed vardb
337                                         pkgs[cpv] = self.vardb.aux_get(cpv,["SRC_URI"])[0]
338                                         deprecated[cpv] = pkgs[cpv]
339                                         self.output(DEPRECATED %cpv)
340                                 except KeyError:
341                                         self.output("   - Key Error looking up: " + cpv)
342                 return pkgs, deprecated
343
344         def _destructive(self,
345                         package_names,
346                         exclude,
347                         pkgs_={},
348                         installed_included=False
349                         ):
350                 """Builds on pkgs according to input options
351
352                 @param package_names: boolean
353                 @param exclude: an exclusion dict as defined in
354                                 exclude.parseExcludeFile class.
355                 @param pkgs: starting dictionary to add to
356                                 defaults to {}.
357                 @param installed_included: bool. pkgs already
358                                 has the installed cpv's added.
359
360                 @returns pkgs: {cpv: src_uri,}
361                 """
362                 pkgs = pkgs_.copy()
363                 deprecated = {}
364                 pkgset = set()
365                 if not installed_included:
366                         if not package_names:
367                                 # list all installed CPV's from vartree
368                                 #print( "_destructive: getting vardb.cpv_all")
369                                 pkgset.update(self.vardb.cpv_all())
370                                 self.output("   - processing %s installed ebuilds" % len(pkgset))
371                         elif package_names:
372                                 # list all CPV's from portree for CP's in vartree
373                                 #print( "_destructive: getting vardb.cp_all")
374                                 cps = self.vardb.cp_all()
375                                 self.output("   - processing %s installed packages" % len(cps))
376                                 for package in cps:
377                                         pkgset.update(self.portdb.cp_list(package))
378                 self.output("   - processing excluded")
379                 excludes = self._get_excludes(exclude)
380                 excludes_length = len(excludes)
381                 pkgset.update(excludes)
382                 pkgs_done = set(list(pkgs))
383                 pkgset.difference_update(pkgs_done)
384                 self.output(
385                         "   - (%d of %d total) additional excluded packages to get source filenames for"
386                         %(len(pkgset), excludes_length))
387                 #self.output("   - processing %d ebuilds for filenames" %len(pkgset))
388                 pkgs, _deprecated = self._unrestricted(pkgs, pkgset)
389                 deprecated.update(_deprecated)
390                 #self.output("   - done...")
391                 return pkgs, deprecated
392
393         def _get_excludes(self, exclude):
394                 """Expands the exclude dictionary into a set of
395                 CPV's
396
397                 @param exclude: dictionary of exclusion categories,
398                         packages to exclude from the cleaning
399
400                 @rtype: set
401                 @return set of package cpv's
402                 """
403                 pkgset = set()
404                 for cp in exclDictExpand(exclude):
405                         # add packages from the exclude file
406                         pkgset.update(self.portdb.cp_list(cp))
407                 return pkgset
408
409         def _check_excludes(self, exclude, clean_me):
410                 """Performs a last minute check on remaining filenames
411                 to see if they should be protected.  Since if the pkg-version
412                 was deprecated it would not have been matched to a
413                 source filename and removed.
414
415                 @param exclude: an exclusion dictionary
416                 @param clean_me: the list of filenames for cleaning
417
418                 @rtype: dict of packages to clean
419                 """
420                 saved = {}
421                 pn_excludes = exclDictExpandPkgname(exclude)
422                 dprint("excludes", "_check_excludes: made it here ;)")
423                 if not pn_excludes:
424                         return clean_me, saved
425                 dprint("excludes", pn_excludes)
426                 for key in list(clean_me):
427                         if exclMatchFilename(pn_excludes, key):
428                                 saved[key] = clean_me[key]
429                                 del clean_me[key]
430                                 self.output("   ...Saved excluded package filename: " + key)
431                 return clean_me, saved
432
433
434 def findPackages(
435                 options,
436                 exclude={},
437                 destructive=False,
438                 time_limit=0,
439                 package_names=False,
440                 pkgdir=None,
441                 port_dbapi=portage.db[portage.root]["porttree"].dbapi,
442                 var_dbapi=portage.db[portage.root]["vartree"].dbapi
443         ):
444         """Find all obsolete binary packages.
445
446         XXX: packages are found only by symlinks.
447         Maybe i should also return .tbz2 files from All/ that have
448         no corresponding symlinks.
449
450         @param options: dict of options determined at runtime
451         @param exclude: an exclusion dict as defined in
452                         exclude.parseExcludeFile class.
453         @param destructive: boolean, defaults to False
454         @param time_limit: integer time value as returned by parseTime()
455         @param package_names: boolean, defaults to False.
456                         used only if destructive=True
457         @param pkgdir: path to the binary package dir being checked
458         @param port_dbapi: defaults to portage.db[portage.root]["porttree"].dbapi
459                                         can be overridden for tests.
460         @param var_dbapi: defaults to portage.db[portage.root]["vartree"].dbapi
461                                         can be overridden for tests.
462
463         @rtype: dict
464         @return clean_me i.e. {'cat/pkg-ver.tbz2': [filepath],}
465         """
466         clean_me = {}
467         # create a full package dictionary
468
469         # now do an access test, os.walk does not error for "no read permission"
470         try:
471                 test = os.listdir(pkgdir)
472                 del test
473         except EnvironmentError as er:
474                 print( pp.error("Error accessing PKGDIR." ), file=sys.stderr)
475                 print( pp.error("(Check your /etc/make.conf and environment)."), file=sys.stderr)
476                 print( pp.error("Error: %s" %str(er)), file=sys.stderr)
477                 exit(1)
478         for root, dirs, files in walk(pkgdir):
479                 if root[-3:] == 'All':
480                         continue
481                 for file in files:
482                         if not file[-5:] == ".tbz2":
483                                 # ignore non-tbz2 files
484                                 continue
485                         path = os.path.join(root, file)
486                         category = os.path.split(root)[-1]
487                         cpv = category+"/"+file[:-5]
488                         st = os.lstat(path)
489                         if time_limit and (st[stat.ST_MTIME] >= time_limit):
490                                 # time-limit exclusion
491                                 continue
492                         # dict is cpv->[files] (2 files in general, because of symlink)
493                         clean_me[cpv] = [path]
494                         #if os.path.islink(path):
495                         if stat.S_ISLNK(st[stat.ST_MODE]):
496                                 clean_me[cpv].append(os.path.realpath(path))
497         # keep only obsolete ones
498         if destructive:
499                 dbapi = var_dbapi
500                 if package_names:
501                         cp_all = dict.fromkeys(dbapi.cp_all())
502                 else:
503                         cp_all = {}
504         else:
505                 dbapi = port_dbapi
506                 cp_all = {}
507         for cpv in list(clean_me):
508                 if exclDictMatchCP(exclude,portage.cpv_getkey(cpv)):
509                         # exclusion because of the exclude file
510                         del clean_me[cpv]
511                         continue
512                 if dbapi.cpv_exists(cpv):
513                         # exclusion because pkg still exists (in porttree or vartree)
514                         del clean_me[cpv]
515                         continue
516                 if portage.cpv_getkey(cpv) in cp_all:
517                         # exlusion because of --package-names
518                         del clean_me[cpv]
519
520         return clean_me