update copyright years
[portage.git] / bin / md5check.py
1 #!/usr/bin/python -O
2 # Copyright 1999-2006 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
4 # $Id: /var/cvsroot/gentoo-src/portage/bin/md5check.py,v 1.4 2004/10/10 10:07:20 carpaski Exp $
5
6 import os,sys,string
7 os.environ["PORTAGE_CALLER"]="mirror"
8 os.environ["FEATURES"]="mirror cvs"
9 sys.path = ["/usr/lib/portage/pym"]+sys.path
10
11 import portage
12 from threading import *
13 from output import red,green,blue,bold
14 from random import shuffle
15 from time import sleep
16
17
18 def cstrip(mystr,mychars):
19         newstr = ""
20         for x in mystr:
21                 if x not in mychars:
22                         newstr += x
23         return newstr
24
25 md5_list = {}
26 bn_list  = []
27 col_list = []
28
29 hugelist = []
30 for mycp in portage.db["/"]["porttree"].dbapi.cp_all():
31         hugelist += portage.db["/"]["porttree"].dbapi.cp_list(mycp)
32 hugelist.sort()
33
34 for mycpv in hugelist:
35         pv = string.split(mycpv, "/")[-1]
36
37         newuri = portage.db["/"]["porttree"].dbapi.aux_get(mycpv,["SRC_URI"])[0]
38         newuri = string.split(newuri)
39
40         digestpath = portage.db["/"]["porttree"].dbapi.findname(mycpv)
41         digestpath = os.path.dirname(digestpath)+"/files/digest-"+pv
42         md5sums    = portage.digestParseFile(digestpath)
43         
44         if md5sums == None:
45                 portage.writemsg("Missing digest: %s\n" % mycpv)
46                 md5sums = {}
47
48         for x in md5sums.keys():
49                 if x[0] == '/':
50                         del md5sums[x]
51
52         #portage.writemsg("\n\ndigestpath: %s\n" % digestpath)
53         #portage.writemsg("md5sums: %s\n" % md5sums)
54         #portage.writemsg("newuri: %s\n" % newuri)
55
56         bn_list = []
57         for x in newuri:
58                 if not x:
59                         continue
60                 if (x in [")","(",":","||"]) or (x[-1] == "?"):
61                         # ignore it. :)
62                         continue
63                 x = cstrip(x,"()|?")
64                 if not x:
65                         continue
66
67                 mybn = os.path.basename(x)
68                 if mybn not in bn_list:
69                         bn_list += [mybn]
70                 else:
71                         continue
72                 
73                 if mybn not in md5sums.keys():
74                         portage_util.writemsg("Missing md5sum: %s in %s\n" % (mybn, mycpv))
75                 else:
76                         if mybn in md5_list.keys():
77                                 if (md5_list[mybn]["MD5"]  != md5sums[mybn]["MD5"]) or \
78                                    (md5_list[mybn]["size"] != md5sums[mybn]["size"]):
79
80                                         # This associates teh md5 with each file. [md5/size]
81                                         md5joins = string.split(md5_list[mybn][2],",")
82                                         md5joins = string.join(md5joins," ["+md5_list[mybn][0]+"/"+md5_list[mybn][1]+"],")
83                                         md5joins += " ["+md5_list[mybn][0]+"/"+md5_list[mybn][1]+"]"
84
85                                         portage.writemsg("Colliding md5: %s of %s [%s/%s] and %s\n" % (mybn,mycpv,md5sums[mybn][0],md5sums[mybn][1],md5joins))
86                                         col_list += [mybn]
87                                 else:
88                                         md5_list[mybn][2] += ","+mycpv
89                         else:
90                                 md5_list[mybn] = md5sums[mybn]+[mycpv]
91                         del md5sums[mybn]
92                 
93         #portage.writemsg(str(bn_list)+"\n")
94         for x in md5sums.keys():
95                 if x not in bn_list:
96                         portage.writemsg("Extra md5sum: %s in %s\n" % (x, mycpv))
97
98
99 print col_list
100 print
101 print str(len(md5_list.keys()))+" unique distfile md5s."
102 print str(len(bn_list))+" unique distfile names."