Update date headers for modified files.
[portage.git] / pym / portage / getbinpkg.py
1 # getbinpkg.py -- Portage binary-package helper functions
2 # Copyright 2003-2011 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
4
5 from portage.output import colorize
6 from portage.cache.mappings import slot_dict_class
7 from portage.localization import _
8 import portage
9 from portage import os
10 from portage import _encodings
11 from portage import _unicode_encode
12
13 import sys
14 import socket
15 import time
16 import tempfile
17 import base64
18
19 _all_errors = [NotImplementedError, ValueError, socket.error]
20
21 try:
22         from html.parser import HTMLParser as html_parser_HTMLParser
23 except ImportError:
24         from HTMLParser import HTMLParser as html_parser_HTMLParser
25
26 try:
27         from urllib.parse import unquote as urllib_parse_unquote
28 except ImportError:
29         from urllib2 import unquote as urllib_parse_unquote
30
31 try:
32         import cPickle as pickle
33 except ImportError:
34         import pickle
35
36 try:
37         import ftplib
38 except ImportError as e:
39         sys.stderr.write(colorize("BAD","!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
40 else:
41         _all_errors.extend(ftplib.all_errors)
42
43 try:
44         try:
45                 from http.client import HTTPConnection as http_client_HTTPConnection
46                 from http.client import BadStatusLine as http_client_BadStatusLine
47                 from http.client import ResponseNotReady as http_client_ResponseNotReady
48                 from http.client import error as http_client_error
49         except ImportError:
50                 from httplib import HTTPConnection as http_client_HTTPConnection
51                 from httplib import BadStatusLine as http_client_BadStatusLine
52                 from httplib import ResponseNotReady as http_client_ResponseNotReady
53                 from httplib import error as http_client_error
54 except ImportError as e:
55         sys.stderr.write(colorize("BAD","!!! CANNOT IMPORT HTTP.CLIENT: ")+str(e)+"\n")
56 else:
57         _all_errors.append(http_client_error)
58
59 _all_errors = tuple(_all_errors)
60
61 if sys.hexversion >= 0x3000000:
62         long = int
63
64 def make_metadata_dict(data):
65         myid,myglob = data
66         
67         mydict = {}
68         for x in portage.xpak.getindex_mem(myid):
69                 mydict[x] = portage.xpak.getitem(data,x)
70
71         return mydict
72
73 class ParseLinks(html_parser_HTMLParser):
74         """Parser class that overrides HTMLParser to grab all anchors from an html
75         page and provide suffix and prefix limitors"""
76         def __init__(self):
77                 self.PL_anchors = []
78                 html_parser_HTMLParser.__init__(self)
79
80         def get_anchors(self):
81                 return self.PL_anchors
82                 
83         def get_anchors_by_prefix(self,prefix):
84                 newlist = []
85                 for x in self.PL_anchors:
86                         if x.startswith(prefix):
87                                 if x not in newlist:
88                                         newlist.append(x[:])
89                 return newlist
90                 
91         def get_anchors_by_suffix(self,suffix):
92                 newlist = []
93                 for x in self.PL_anchors:
94                         if x.endswith(suffix):
95                                 if x not in newlist:
96                                         newlist.append(x[:])
97                 return newlist
98                 
99         def     handle_endtag(self,tag):
100                 pass
101
102         def     handle_starttag(self,tag,attrs):
103                 if tag == "a":
104                         for x in attrs:
105                                 if x[0] == 'href':
106                                         if x[1] not in self.PL_anchors:
107                                                 self.PL_anchors.append(urllib_parse_unquote(x[1]))
108
109
110 def create_conn(baseurl,conn=None):
111         """(baseurl,conn) --- Takes a protocol://site:port/address url, and an
112         optional connection. If connection is already active, it is passed on.
113         baseurl is reduced to address and is returned in tuple (conn,address)"""
114
115         parts = baseurl.split("://",1)
116         if len(parts) != 2:
117                 raise ValueError(_("Provided URI does not "
118                         "contain protocol identifier. '%s'") % baseurl)
119         protocol,url_parts = parts
120         del parts
121
122         url_parts = url_parts.split("/")
123         host = url_parts[0]
124         if len(url_parts) < 2:
125                 address = "/"
126         else:
127                 address = "/"+"/".join(url_parts[1:])
128         del url_parts
129
130         userpass_host = host.split("@",1)
131         if len(userpass_host) == 1:
132                 host = userpass_host[0]
133                 userpass = ["anonymous"]
134         else:
135                 host = userpass_host[1]
136                 userpass = userpass_host[0].split(":")
137         del userpass_host
138
139         if len(userpass) > 2:
140                 raise ValueError(_("Unable to interpret username/password provided."))
141         elif len(userpass) == 2:
142                 username = userpass[0]
143                 password = userpass[1]
144         elif len(userpass) == 1:
145                 username = userpass[0]
146                 password = None
147         del userpass
148
149         http_headers = {}
150         http_params = {}
151         if username and password:
152                 http_headers = {
153                         "Authorization": "Basic %s" %
154                           base64.encodestring("%s:%s" % (username, password)).replace(
155                             "\012",
156                             ""
157                           ),
158                 }
159
160         if not conn:
161                 if protocol == "https":
162                         # Use local import since https typically isn't needed, and
163                         # this way we can usually avoid triggering the global scope
164                         # http.client ImportError handler (like during stage1 -> stage2
165                         # builds where USE=ssl is disabled for python).
166                         try:
167                                 try:
168                                         from http.client import HTTPSConnection as http_client_HTTPSConnection
169                                 except ImportError:
170                                         from httplib import HTTPSConnection as http_client_HTTPSConnection
171                         except ImportError:
172                                 raise NotImplementedError(
173                                         _("python must have ssl enabled for https support"))
174                         conn = http_client_HTTPSConnection(host)
175                 elif protocol == "http":
176                         conn = http_client_HTTPConnection(host)
177                 elif protocol == "ftp":
178                         passive = 1
179                         if(host[-1] == "*"):
180                                 passive = 0
181                                 host = host[:-1]
182                         conn = ftplib.FTP(host)
183                         if password:
184                                 conn.login(username,password)
185                         else:
186                                 sys.stderr.write(colorize("WARN",
187                                         _(" * No password provided for username"))+" '%s'" % \
188                                         (username,) + "\n\n")
189                                 conn.login(username)
190                         conn.set_pasv(passive)
191                         conn.set_debuglevel(0)
192                 elif protocol == "sftp":
193                         try:
194                                 import paramiko
195                         except ImportError:
196                                 raise NotImplementedError(
197                                         _("paramiko must be installed for sftp support"))
198                         t = paramiko.Transport(host)
199                         t.connect(username=username, password=password)
200                         conn = paramiko.SFTPClient.from_transport(t)
201                 else:
202                         raise NotImplementedError(_("%s is not a supported protocol.") % protocol)
203
204         return (conn,protocol,address, http_params, http_headers)
205
206 def make_ftp_request(conn, address, rest=None, dest=None):
207         """(conn,address,rest) --- uses the conn object to request the data
208         from address and issuing a rest if it is passed."""
209         try:
210         
211                 if dest:
212                         fstart_pos = dest.tell()
213         
214                 conn.voidcmd("TYPE I")
215                 fsize = conn.size(address)
216
217                 if (rest != None) and (rest < 0):
218                         rest = fsize+int(rest)
219                 if rest < 0:
220                         rest = 0
221
222                 if rest != None:
223                         mysocket = conn.transfercmd("RETR "+str(address), rest)
224                 else:
225                         mysocket = conn.transfercmd("RETR "+str(address))
226
227                 mydata = ""
228                 while 1:
229                         somedata = mysocket.recv(8192)
230                         if somedata:
231                                 if dest:
232                                         dest.write(somedata)
233                                 else:
234                                         mydata = mydata + somedata
235                         else:
236                                 break
237
238                 if dest:
239                         data_size = fstart_pos - dest.tell()
240                 else:
241                         data_size = len(mydata)
242
243                 mysocket.close()
244                 conn.voidresp()
245                 conn.voidcmd("TYPE A")
246
247                 return mydata,not (fsize==data_size),""
248
249         except ValueError as e:
250                 return None,int(str(e)[:4]),str(e)
251         
252
253 def make_http_request(conn, address, params={}, headers={}, dest=None):
254         """(conn,address,params,headers) --- uses the conn object to request
255         the data from address, performing Location forwarding and using the
256         optional params and headers."""
257
258         rc = 0
259         response = None
260         while (rc == 0) or (rc == 301) or (rc == 302):
261                 try:
262                         if (rc != 0):
263                                 conn,ignore,ignore,ignore,ignore = create_conn(address)
264                         conn.request("GET", address, params, headers)
265                 except SystemExit as e:
266                         raise
267                 except Exception as e:
268                         return None,None,"Server request failed: "+str(e)
269                 response = conn.getresponse()
270                 rc = response.status
271
272                 # 301 means that the page address is wrong.
273                 if ((rc == 301) or (rc == 302)):
274                         ignored_data = response.read()
275                         del ignored_data
276                         for x in str(response.msg).split("\n"):
277                                 parts = x.split(": ",1)
278                                 if parts[0] == "Location":
279                                         if (rc == 301):
280                                                 sys.stderr.write(colorize("BAD",
281                                                         _("Location has moved: ")) + str(parts[1]) + "\n")
282                                         if (rc == 302):
283                                                 sys.stderr.write(colorize("BAD",
284                                                         _("Location has temporarily moved: ")) + \
285                                                         str(parts[1]) + "\n")
286                                         address = parts[1]
287                                         break
288         
289         if (rc != 200) and (rc != 206):
290                 return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
291
292         if dest:
293                 dest.write(response.read())
294                 return "",0,""
295
296         return response.read(),0,""
297
298
299 def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
300         myarray = []
301         
302         if not (prefix and suffix):
303                 match_both = 0
304                 
305         for x in array:
306                 add_p = 0
307                 if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
308                         add_p = 1
309
310                 if match_both:
311                         if prefix and not add_p: # Require both, but don't have first one.
312                                 continue
313                 else:
314                         if add_p:     # Only need one, and we have it.
315                                 myarray.append(x[:])
316                                 continue
317
318                 if not allow_overlap: # Not allow to overlap prefix and suffix
319                         if len(x) >= (len(prefix)+len(suffix)):
320                                 pass
321                         else:
322                                 continue          # Too short to match.
323                 else:
324                         pass                      # Do whatever... We're overlapping.
325                 
326                 if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
327                         myarray.append(x)   # It matches
328                 else:
329                         continue            # Doesn't match.
330
331         return myarray
332                         
333
334
335 def dir_get_list(baseurl,conn=None):
336         """(baseurl[,connection]) -- Takes a base url to connect to and read from.
337         URI should be in the form <proto>://<site>[:port]<path>
338         Connection is used for persistent connection instances."""
339
340         if not conn:
341                 keepconnection = 0
342         else:
343                 keepconnection = 1
344
345         conn,protocol,address,params,headers = create_conn(baseurl, conn)
346
347         listing = None
348         if protocol in ["http","https"]:
349                 if not address.endswith("/"):
350                         # http servers can return a 400 error here
351                         # if the address doesn't end with a slash.
352                         address += "/"
353                 page,rc,msg = make_http_request(conn,address,params,headers)
354                 
355                 if page:
356                         parser = ParseLinks()
357                         parser.feed(page)
358                         del page
359                         listing = parser.get_anchors()
360                 else:
361                         import portage.exception
362                         raise portage.exception.PortageException(
363                                 _("Unable to get listing: %s %s") % (rc,msg))
364         elif protocol in ["ftp"]:
365                 if address[-1] == '/':
366                         olddir = conn.pwd()
367                         conn.cwd(address)
368                         listing = conn.nlst()
369                         conn.cwd(olddir)
370                         del olddir
371                 else:
372                         listing = conn.nlst(address)
373         elif protocol == "sftp":
374                 listing = conn.listdir(address)
375         else:
376                 raise TypeError(_("Unknown protocol. '%s'") % protocol)
377
378         if not keepconnection:
379                 conn.close()
380
381         return listing
382
383 def file_get_metadata(baseurl,conn=None, chunk_size=3000):
384         """(baseurl[,connection]) -- Takes a base url to connect to and read from.
385         URI should be in the form <proto>://<site>[:port]<path>
386         Connection is used for persistent connection instances."""
387
388         if not conn:
389                 keepconnection = 0
390         else:
391                 keepconnection = 1
392
393         conn,protocol,address,params,headers = create_conn(baseurl, conn)
394
395         if protocol in ["http","https"]:
396                 headers["Range"] = "bytes=-"+str(chunk_size)
397                 data,rc,msg = make_http_request(conn, address, params, headers)
398         elif protocol in ["ftp"]:
399                 data,rc,msg = make_ftp_request(conn, address, -chunk_size)
400         elif protocol == "sftp":
401                 f = conn.open(address)
402                 try:
403                         f.seek(-chunk_size, 2)
404                         data = f.read()
405                 finally:
406                         f.close()
407         else:
408                 raise TypeError(_("Unknown protocol. '%s'") % protocol)
409         
410         if data:
411                 xpaksize = portage.xpak.decodeint(data[-8:-4])
412                 if (xpaksize+8) > chunk_size:
413                         myid = file_get_metadata(baseurl, conn, (xpaksize+8))
414                         if not keepconnection:
415                                 conn.close()
416                         return myid
417                 else:
418                         xpak_data = data[len(data)-(xpaksize+8):-8]
419                 del data
420
421                 myid = portage.xpak.xsplit_mem(xpak_data)
422                 if not myid:
423                         myid = None,None
424                 del xpak_data
425         else:
426                 myid = None,None
427
428         if not keepconnection:
429                 conn.close()
430
431         return myid
432
433
434 def file_get(baseurl,dest,conn=None,fcmd=None,filename=None):
435         """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
436         URI should be in the form <proto>://[user[:pass]@]<site>[:port]<path>"""
437
438         if not fcmd:
439                 return file_get_lib(baseurl,dest,conn)
440         if not filename:
441                 filename = os.path.basename(baseurl)
442
443         variables = {
444                 "DISTDIR": dest,
445                 "URI":     baseurl,
446                 "FILE":    filename
447         }
448
449         from portage.util import varexpand
450         from portage.process import spawn
451         myfetch = portage.util.shlex_split(fcmd)
452         myfetch = [varexpand(x, mydict=variables) for x in myfetch]
453         fd_pipes= {
454                 0:sys.stdin.fileno(),
455                 1:sys.stdout.fileno(),
456                 2:sys.stdout.fileno()
457         }
458         retval = spawn(myfetch, env=os.environ.copy(), fd_pipes=fd_pipes)
459         if retval != os.EX_OK:
460                 sys.stderr.write(_("Fetcher exited with a failure condition.\n"))
461                 return 0
462         return 1
463
464 def file_get_lib(baseurl,dest,conn=None):
465         """(baseurl[,connection]) -- Takes a base url to connect to and read from.
466         URI should be in the form <proto>://<site>[:port]<path>
467         Connection is used for persistent connection instances."""
468
469         if not conn:
470                 keepconnection = 0
471         else:
472                 keepconnection = 1
473
474         conn,protocol,address,params,headers = create_conn(baseurl, conn)
475
476         sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
477         if protocol in ["http","https"]:
478                 data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
479         elif protocol in ["ftp"]:
480                 data,rc,msg = make_ftp_request(conn, address, dest=dest)
481         elif protocol == "sftp":
482                 rc = 0
483                 try:
484                         f = conn.open(address)
485                 except SystemExit:
486                         raise
487                 except Exception:
488                         rc = 1
489                 else:
490                         try:
491                                 if dest:
492                                         bufsize = 8192
493                                         while True:
494                                                 data = f.read(bufsize)
495                                                 if not data:
496                                                         break
497                                                 dest.write(data)
498                         finally:
499                                 f.close()
500         else:
501                 raise TypeError(_("Unknown protocol. '%s'") % protocol)
502         
503         if not keepconnection:
504                 conn.close()
505
506         return rc
507
508
509 def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
510         """(baseurl,conn,chunk_size,verbose) -- 
511         """
512         if not conn:
513                 keepconnection = 0
514         else:
515                 keepconnection = 1
516
517         cache_path = "/var/cache/edb"
518         metadatafilename = os.path.join(cache_path, 'remote_metadata.pickle')
519
520         if makepickle is None:
521                 makepickle = "/var/cache/edb/metadata.idx.most_recent"
522
523         try:
524                 conn, protocol, address, params, headers = create_conn(baseurl, conn)
525         except _all_errors as e:
526                 # ftplib.FTP(host) can raise errors like this:
527                 #   socket.error: (111, 'Connection refused')
528                 sys.stderr.write("!!! %s\n" % (e,))
529                 return {}
530
531         out = sys.stdout
532         try:
533                 metadatafile = open(_unicode_encode(metadatafilename,
534                         encoding=_encodings['fs'], errors='strict'), 'rb')
535                 mypickle = pickle.Unpickler(metadatafile)
536                 try:
537                         mypickle.find_global = None
538                 except AttributeError:
539                         # TODO: If py3k, override Unpickler.find_class().
540                         pass
541                 metadata = mypickle.load()
542                 out.write(_("Loaded metadata pickle.\n"))
543                 out.flush()
544                 metadatafile.close()
545         except (IOError, OSError, EOFError, ValueError, pickle.UnpicklingError):
546                 metadata = {}
547         if baseurl not in metadata:
548                 metadata[baseurl]={}
549         if "indexname" not in metadata[baseurl]:
550                 metadata[baseurl]["indexname"]=""
551         if "timestamp" not in metadata[baseurl]:
552                 metadata[baseurl]["timestamp"]=0
553         if "unmodified" not in metadata[baseurl]:
554                 metadata[baseurl]["unmodified"]=0
555         if "data" not in metadata[baseurl]:
556                 metadata[baseurl]["data"]={}
557
558         if not os.access(cache_path, os.W_OK):
559                 sys.stderr.write(_("!!! Unable to write binary metadata to disk!\n"))
560                 sys.stderr.write(_("!!! Permission denied: '%s'\n") % cache_path)
561                 return metadata[baseurl]["data"]
562
563         import portage.exception
564         try:
565                 filelist = dir_get_list(baseurl, conn)
566         except portage.exception.PortageException as e:
567                 sys.stderr.write(_("!!! Error connecting to '%s'.\n") % baseurl)
568                 sys.stderr.write("!!! %s\n" % str(e))
569                 del e
570                 return metadata[baseurl]["data"]
571         tbz2list = match_in_array(filelist, suffix=".tbz2")
572         metalist = match_in_array(filelist, prefix="metadata.idx")
573         del filelist
574         
575         # Determine if our metadata file is current.
576         metalist.sort()
577         metalist.reverse() # makes the order new-to-old.
578         for mfile in metalist:
579                 if usingcache and \
580                    ((metadata[baseurl]["indexname"] != mfile) or \
581                           (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
582                         # Try to download new cache until we succeed on one.
583                         data=""
584                         for trynum in [1,2,3]:
585                                 mytempfile = tempfile.TemporaryFile()
586                                 try:
587                                         file_get(baseurl+"/"+mfile, mytempfile, conn)
588                                         if mytempfile.tell() > len(data):
589                                                 mytempfile.seek(0)
590                                                 data = mytempfile.read()
591                                 except ValueError as e:
592                                         sys.stderr.write("--- "+str(e)+"\n")
593                                         if trynum < 3:
594                                                 sys.stderr.write(_("Retrying...\n"))
595                                         sys.stderr.flush()
596                                         mytempfile.close()
597                                         continue
598                                 if match_in_array([mfile],suffix=".gz"):
599                                         out.write("gzip'd\n")
600                                         out.flush()
601                                         try:
602                                                 import gzip
603                                                 mytempfile.seek(0)
604                                                 gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
605                                                 data = gzindex.read()
606                                         except SystemExit as e:
607                                                 raise
608                                         except Exception as e:
609                                                 mytempfile.close()
610                                                 sys.stderr.write(_("!!! Failed to use gzip: ")+str(e)+"\n")
611                                                 sys.stderr.flush()
612                                         mytempfile.close()
613                                 try:
614                                         metadata[baseurl]["data"] = pickle.loads(data)
615                                         del data
616                                         metadata[baseurl]["indexname"] = mfile
617                                         metadata[baseurl]["timestamp"] = int(time.time())
618                                         metadata[baseurl]["modified"]  = 0 # It's not, right after download.
619                                         out.write(_("Pickle loaded.\n"))
620                                         out.flush()
621                                         break
622                                 except SystemExit as e:
623                                         raise
624                                 except Exception as e:
625                                         sys.stderr.write(_("!!! Failed to read data from index: ")+str(mfile)+"\n")
626                                         sys.stderr.write("!!! "+str(e)+"\n")
627                                         sys.stderr.flush()
628                         try:
629                                 metadatafile = open(_unicode_encode(metadatafilename,
630                                         encoding=_encodings['fs'], errors='strict'), 'wb')
631                                 pickle.dump(metadata, metadatafile, protocol=2)
632                                 metadatafile.close()
633                         except SystemExit as e:
634                                 raise
635                         except Exception as e:
636                                 sys.stderr.write(_("!!! Failed to write binary metadata to disk!\n"))
637                                 sys.stderr.write("!!! "+str(e)+"\n")
638                                 sys.stderr.flush()
639                         break
640         # We may have metadata... now we run through the tbz2 list and check.
641
642         class CacheStats(object):
643                 from time import time
644                 def __init__(self, out):
645                         self.misses = 0
646                         self.hits = 0
647                         self.last_update = 0
648                         self.out = out
649                         self.min_display_latency = 0.2
650                 def update(self):
651                         cur_time = self.time()
652                         if cur_time - self.last_update >= self.min_display_latency:
653                                 self.last_update = cur_time
654                                 self.display()
655                 def display(self):
656                         self.out.write("\r"+colorize("WARN",
657                                 _("cache miss: '")+str(self.misses)+"'") + \
658                                 " --- "+colorize("GOOD", _("cache hit: '")+str(self.hits)+"'"))
659                         self.out.flush()
660
661         cache_stats = CacheStats(out)
662         have_tty = os.environ.get('TERM') != 'dumb' and out.isatty()
663         if have_tty:
664                 cache_stats.display()
665         binpkg_filenames = set()
666         for x in tbz2list:
667                 x = os.path.basename(x)
668                 binpkg_filenames.add(x)
669                 if x not in metadata[baseurl]["data"]:
670                         cache_stats.misses += 1
671                         if have_tty:
672                                 cache_stats.update()
673                         metadata[baseurl]["modified"] = 1
674                         myid = None
675                         for retry in range(3):
676                                 try:
677                                         myid = file_get_metadata(
678                                                 "/".join((baseurl.rstrip("/"), x.lstrip("/"))),
679                                                 conn, chunk_size)
680                                         break
681                                 except http_client_BadStatusLine:
682                                         # Sometimes this error is thrown from conn.getresponse() in
683                                         # make_http_request().  The docstring for this error in
684                                         # httplib.py says "Presumably, the server closed the
685                                         # connection before sending a valid response".
686                                         conn, protocol, address, params, headers = create_conn(
687                                                 baseurl)
688                                 except http_client_ResponseNotReady:
689                                         # With some http servers this error is known to be thrown
690                                         # from conn.getresponse() in make_http_request() when the
691                                         # remote file does not have appropriate read permissions.
692                                         # Maybe it's possible to recover from this exception in
693                                         # cases though, so retry.
694                                         conn, protocol, address, params, headers = create_conn(
695                                                 baseurl)
696
697                         if myid and myid[0]:
698                                 metadata[baseurl]["data"][x] = make_metadata_dict(myid)
699                         elif verbose:
700                                 sys.stderr.write(colorize("BAD",
701                                         _("!!! Failed to retrieve metadata on: "))+str(x)+"\n")
702                                 sys.stderr.flush()
703                 else:
704                         cache_stats.hits += 1
705                         if have_tty:
706                                 cache_stats.update()
707         cache_stats.display()
708         # Cleanse stale cache for files that don't exist on the server anymore.
709         stale_cache = set(metadata[baseurl]["data"]).difference(binpkg_filenames)
710         if stale_cache:
711                 for x in stale_cache:
712                         del metadata[baseurl]["data"][x]
713                 metadata[baseurl]["modified"] = 1
714         del stale_cache
715         del binpkg_filenames
716         out.write("\n")
717         out.flush()
718
719         try:
720                 if "modified" in metadata[baseurl] and metadata[baseurl]["modified"]:
721                         metadata[baseurl]["timestamp"] = int(time.time())
722                         metadatafile = open(_unicode_encode(metadatafilename,
723                                 encoding=_encodings['fs'], errors='strict'), 'wb')
724                         pickle.dump(metadata, metadatafile, protocol=2)
725                         metadatafile.close()
726                 if makepickle:
727                         metadatafile = open(_unicode_encode(makepickle,
728                                 encoding=_encodings['fs'], errors='strict'), 'wb')
729                         pickle.dump(metadata[baseurl]["data"], metadatafile, protocol=2)
730                         metadatafile.close()
731         except SystemExit as e:
732                 raise
733         except Exception as e:
734                 sys.stderr.write(_("!!! Failed to write binary metadata to disk!\n"))
735                 sys.stderr.write("!!! "+str(e)+"\n")
736                 sys.stderr.flush()
737
738         if not keepconnection:
739                 conn.close()
740         
741         return metadata[baseurl]["data"]
742
743 def _cmp_cpv(d1, d2):
744         cpv1 = d1["CPV"]
745         cpv2 = d2["CPV"]
746         if cpv1 > cpv2:
747                 return 1
748         elif cpv1 == cpv2:
749                 return 0
750         else:
751                 return -1
752
753 class PackageIndex(object):
754
755         def __init__(self,
756                 allowed_pkg_keys=None,
757                 default_header_data=None,
758                 default_pkg_data=None,
759                 inherited_keys=None,
760                 translated_keys=None):
761
762                 self._pkg_slot_dict = None
763                 if allowed_pkg_keys is not None:
764                         self._pkg_slot_dict = slot_dict_class(allowed_pkg_keys)
765
766                 self._default_header_data = default_header_data
767                 self._default_pkg_data = default_pkg_data
768                 self._inherited_keys = inherited_keys
769                 self._write_translation_map = {}
770                 self._read_translation_map = {}
771                 if translated_keys:
772                         self._write_translation_map.update(translated_keys)
773                         self._read_translation_map.update(((y, x) for (x, y) in translated_keys))
774                 self.header = {}
775                 if self._default_header_data:
776                         self.header.update(self._default_header_data)
777                 self.packages = []
778                 self.modified = True
779
780         def _readpkgindex(self, pkgfile, pkg_entry=True):
781
782                 allowed_keys = None
783                 if self._pkg_slot_dict is None or not pkg_entry:
784                         d = {}
785                 else:
786                         d = self._pkg_slot_dict()
787                         allowed_keys = d.allowed_keys
788
789                 for line in pkgfile:
790                         line = line.rstrip("\n")
791                         if not line:
792                                 break
793                         line = line.split(":", 1)
794                         if not len(line) == 2:
795                                 continue
796                         k, v = line
797                         if v:
798                                 v = v[1:]
799                         k = self._read_translation_map.get(k, k)
800                         if allowed_keys is not None and \
801                                 k not in allowed_keys:
802                                 continue
803                         d[k] = v
804                 return d
805
806         def _writepkgindex(self, pkgfile, items):
807                 for k, v in items:
808                         pkgfile.write("%s: %s\n" % \
809                                 (self._write_translation_map.get(k, k), v))
810                 pkgfile.write("\n")
811
812         def read(self, pkgfile):
813                 self.readHeader(pkgfile)
814                 self.readBody(pkgfile)
815
816         def readHeader(self, pkgfile):
817                 self.header.update(self._readpkgindex(pkgfile, pkg_entry=False))
818
819         def readBody(self, pkgfile):
820                 while True:
821                         d = self._readpkgindex(pkgfile)
822                         if not d:
823                                 break
824                         mycpv = d.get("CPV")
825                         if not mycpv:
826                                 continue
827                         if self._default_pkg_data:
828                                 for k, v in self._default_pkg_data.items():
829                                         d.setdefault(k, v)
830                         if self._inherited_keys:
831                                 for k in self._inherited_keys:
832                                         v = self.header.get(k)
833                                         if v is not None:
834                                                 d.setdefault(k, v)
835                         self.packages.append(d)
836
837         def write(self, pkgfile):
838                 if self.modified:
839                         self.header["TIMESTAMP"] = str(long(time.time()))
840                         self.header["PACKAGES"] = str(len(self.packages))
841                 keys = list(self.header)
842                 keys.sort()
843                 self._writepkgindex(pkgfile, [(k, self.header[k]) \
844                         for k in keys if self.header[k]])
845                 for metadata in sorted(self.packages,
846                         key=portage.util.cmp_sort_key(_cmp_cpv)):
847                         metadata = metadata.copy()
848                         cpv = metadata["CPV"]
849                         if self._inherited_keys:
850                                 for k in self._inherited_keys:
851                                         v = self.header.get(k)
852                                         if v is not None and v == metadata.get(k):
853                                                 del metadata[k]
854                         if self._default_pkg_data:
855                                 for k, v in self._default_pkg_data.items():
856                                         if metadata.get(k) == v:
857                                                 metadata.pop(k, None)
858                         keys = list(metadata)
859                         keys.sort()
860                         self._writepkgindex(pkgfile,
861                                 [(k, metadata[k]) for k in keys if metadata[k]])