1 # getbinpkg.py -- Portage binary-package helper functions
2 # Copyright 2003-2011 Gentoo Foundation
3 # Distributed under the terms of the GNU General Public License v2
5 from portage.output import colorize
6 from portage.cache.mappings import slot_dict_class
7 from portage.localization import _
10 from portage import _encodings
11 from portage import _unicode_encode
19 _all_errors = [NotImplementedError, ValueError, socket.error]
22 from html.parser import HTMLParser as html_parser_HTMLParser
24 from HTMLParser import HTMLParser as html_parser_HTMLParser
27 from urllib.parse import unquote as urllib_parse_unquote
29 from urllib2 import unquote as urllib_parse_unquote
32 import cPickle as pickle
38 except ImportError as e:
39 sys.stderr.write(colorize("BAD","!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
41 _all_errors.extend(ftplib.all_errors)
45 from http.client import HTTPConnection as http_client_HTTPConnection
46 from http.client import BadStatusLine as http_client_BadStatusLine
47 from http.client import ResponseNotReady as http_client_ResponseNotReady
48 from http.client import error as http_client_error
50 from httplib import HTTPConnection as http_client_HTTPConnection
51 from httplib import BadStatusLine as http_client_BadStatusLine
52 from httplib import ResponseNotReady as http_client_ResponseNotReady
53 from httplib import error as http_client_error
54 except ImportError as e:
55 sys.stderr.write(colorize("BAD","!!! CANNOT IMPORT HTTP.CLIENT: ")+str(e)+"\n")
57 _all_errors.append(http_client_error)
59 _all_errors = tuple(_all_errors)
61 if sys.hexversion >= 0x3000000:
64 def make_metadata_dict(data):
68 for x in portage.xpak.getindex_mem(myid):
69 mydict[x] = portage.xpak.getitem(data,x)
73 class ParseLinks(html_parser_HTMLParser):
74 """Parser class that overrides HTMLParser to grab all anchors from an html
75 page and provide suffix and prefix limitors"""
78 html_parser_HTMLParser.__init__(self)
80 def get_anchors(self):
81 return self.PL_anchors
83 def get_anchors_by_prefix(self,prefix):
85 for x in self.PL_anchors:
86 if x.startswith(prefix):
91 def get_anchors_by_suffix(self,suffix):
93 for x in self.PL_anchors:
94 if x.endswith(suffix):
99 def handle_endtag(self,tag):
102 def handle_starttag(self,tag,attrs):
106 if x[1] not in self.PL_anchors:
107 self.PL_anchors.append(urllib_parse_unquote(x[1]))
110 def create_conn(baseurl,conn=None):
111 """(baseurl,conn) --- Takes a protocol://site:port/address url, and an
112 optional connection. If connection is already active, it is passed on.
113 baseurl is reduced to address and is returned in tuple (conn,address)"""
115 parts = baseurl.split("://",1)
117 raise ValueError(_("Provided URI does not "
118 "contain protocol identifier. '%s'") % baseurl)
119 protocol,url_parts = parts
122 url_parts = url_parts.split("/")
124 if len(url_parts) < 2:
127 address = "/"+"/".join(url_parts[1:])
130 userpass_host = host.split("@",1)
131 if len(userpass_host) == 1:
132 host = userpass_host[0]
133 userpass = ["anonymous"]
135 host = userpass_host[1]
136 userpass = userpass_host[0].split(":")
139 if len(userpass) > 2:
140 raise ValueError(_("Unable to interpret username/password provided."))
141 elif len(userpass) == 2:
142 username = userpass[0]
143 password = userpass[1]
144 elif len(userpass) == 1:
145 username = userpass[0]
151 if username and password:
153 "Authorization": "Basic %s" %
154 base64.encodestring("%s:%s" % (username, password)).replace(
161 if protocol == "https":
162 # Use local import since https typically isn't needed, and
163 # this way we can usually avoid triggering the global scope
164 # http.client ImportError handler (like during stage1 -> stage2
165 # builds where USE=ssl is disabled for python).
168 from http.client import HTTPSConnection as http_client_HTTPSConnection
170 from httplib import HTTPSConnection as http_client_HTTPSConnection
172 raise NotImplementedError(
173 _("python must have ssl enabled for https support"))
174 conn = http_client_HTTPSConnection(host)
175 elif protocol == "http":
176 conn = http_client_HTTPConnection(host)
177 elif protocol == "ftp":
182 conn = ftplib.FTP(host)
184 conn.login(username,password)
186 sys.stderr.write(colorize("WARN",
187 _(" * No password provided for username"))+" '%s'" % \
188 (username,) + "\n\n")
190 conn.set_pasv(passive)
191 conn.set_debuglevel(0)
192 elif protocol == "sftp":
196 raise NotImplementedError(
197 _("paramiko must be installed for sftp support"))
198 t = paramiko.Transport(host)
199 t.connect(username=username, password=password)
200 conn = paramiko.SFTPClient.from_transport(t)
202 raise NotImplementedError(_("%s is not a supported protocol.") % protocol)
204 return (conn,protocol,address, http_params, http_headers)
206 def make_ftp_request(conn, address, rest=None, dest=None):
207 """(conn,address,rest) --- uses the conn object to request the data
208 from address and issuing a rest if it is passed."""
212 fstart_pos = dest.tell()
214 conn.voidcmd("TYPE I")
215 fsize = conn.size(address)
217 if (rest != None) and (rest < 0):
218 rest = fsize+int(rest)
223 mysocket = conn.transfercmd("RETR "+str(address), rest)
225 mysocket = conn.transfercmd("RETR "+str(address))
229 somedata = mysocket.recv(8192)
234 mydata = mydata + somedata
239 data_size = fstart_pos - dest.tell()
241 data_size = len(mydata)
245 conn.voidcmd("TYPE A")
247 return mydata,not (fsize==data_size),""
249 except ValueError as e:
250 return None,int(str(e)[:4]),str(e)
253 def make_http_request(conn, address, params={}, headers={}, dest=None):
254 """(conn,address,params,headers) --- uses the conn object to request
255 the data from address, performing Location forwarding and using the
256 optional params and headers."""
260 while (rc == 0) or (rc == 301) or (rc == 302):
263 conn,ignore,ignore,ignore,ignore = create_conn(address)
264 conn.request("GET", address, params, headers)
265 except SystemExit as e:
267 except Exception as e:
268 return None,None,"Server request failed: "+str(e)
269 response = conn.getresponse()
272 # 301 means that the page address is wrong.
273 if ((rc == 301) or (rc == 302)):
274 ignored_data = response.read()
276 for x in str(response.msg).split("\n"):
277 parts = x.split(": ",1)
278 if parts[0] == "Location":
280 sys.stderr.write(colorize("BAD",
281 _("Location has moved: ")) + str(parts[1]) + "\n")
283 sys.stderr.write(colorize("BAD",
284 _("Location has temporarily moved: ")) + \
285 str(parts[1]) + "\n")
289 if (rc != 200) and (rc != 206):
290 return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
293 dest.write(response.read())
296 return response.read(),0,""
299 def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
302 if not (prefix and suffix):
307 if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
311 if prefix and not add_p: # Require both, but don't have first one.
314 if add_p: # Only need one, and we have it.
318 if not allow_overlap: # Not allow to overlap prefix and suffix
319 if len(x) >= (len(prefix)+len(suffix)):
322 continue # Too short to match.
324 pass # Do whatever... We're overlapping.
326 if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
327 myarray.append(x) # It matches
329 continue # Doesn't match.
335 def dir_get_list(baseurl,conn=None):
336 """(baseurl[,connection]) -- Takes a base url to connect to and read from.
337 URI should be in the form <proto>://<site>[:port]<path>
338 Connection is used for persistent connection instances."""
345 conn,protocol,address,params,headers = create_conn(baseurl, conn)
348 if protocol in ["http","https"]:
349 if not address.endswith("/"):
350 # http servers can return a 400 error here
351 # if the address doesn't end with a slash.
353 page,rc,msg = make_http_request(conn,address,params,headers)
356 parser = ParseLinks()
359 listing = parser.get_anchors()
361 import portage.exception
362 raise portage.exception.PortageException(
363 _("Unable to get listing: %s %s") % (rc,msg))
364 elif protocol in ["ftp"]:
365 if address[-1] == '/':
368 listing = conn.nlst()
372 listing = conn.nlst(address)
373 elif protocol == "sftp":
374 listing = conn.listdir(address)
376 raise TypeError(_("Unknown protocol. '%s'") % protocol)
378 if not keepconnection:
383 def file_get_metadata(baseurl,conn=None, chunk_size=3000):
384 """(baseurl[,connection]) -- Takes a base url to connect to and read from.
385 URI should be in the form <proto>://<site>[:port]<path>
386 Connection is used for persistent connection instances."""
393 conn,protocol,address,params,headers = create_conn(baseurl, conn)
395 if protocol in ["http","https"]:
396 headers["Range"] = "bytes=-"+str(chunk_size)
397 data,rc,msg = make_http_request(conn, address, params, headers)
398 elif protocol in ["ftp"]:
399 data,rc,msg = make_ftp_request(conn, address, -chunk_size)
400 elif protocol == "sftp":
401 f = conn.open(address)
403 f.seek(-chunk_size, 2)
408 raise TypeError(_("Unknown protocol. '%s'") % protocol)
411 xpaksize = portage.xpak.decodeint(data[-8:-4])
412 if (xpaksize+8) > chunk_size:
413 myid = file_get_metadata(baseurl, conn, (xpaksize+8))
414 if not keepconnection:
418 xpak_data = data[len(data)-(xpaksize+8):-8]
421 myid = portage.xpak.xsplit_mem(xpak_data)
428 if not keepconnection:
434 def file_get(baseurl,dest,conn=None,fcmd=None,filename=None):
435 """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
436 URI should be in the form <proto>://[user[:pass]@]<site>[:port]<path>"""
439 return file_get_lib(baseurl,dest,conn)
441 filename = os.path.basename(baseurl)
449 from portage.util import varexpand
450 from portage.process import spawn
451 myfetch = portage.util.shlex_split(fcmd)
452 myfetch = [varexpand(x, mydict=variables) for x in myfetch]
454 0:sys.stdin.fileno(),
455 1:sys.stdout.fileno(),
456 2:sys.stdout.fileno()
458 retval = spawn(myfetch, env=os.environ.copy(), fd_pipes=fd_pipes)
459 if retval != os.EX_OK:
460 sys.stderr.write(_("Fetcher exited with a failure condition.\n"))
464 def file_get_lib(baseurl,dest,conn=None):
465 """(baseurl[,connection]) -- Takes a base url to connect to and read from.
466 URI should be in the form <proto>://<site>[:port]<path>
467 Connection is used for persistent connection instances."""
474 conn,protocol,address,params,headers = create_conn(baseurl, conn)
476 sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
477 if protocol in ["http","https"]:
478 data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
479 elif protocol in ["ftp"]:
480 data,rc,msg = make_ftp_request(conn, address, dest=dest)
481 elif protocol == "sftp":
484 f = conn.open(address)
494 data = f.read(bufsize)
501 raise TypeError(_("Unknown protocol. '%s'") % protocol)
503 if not keepconnection:
509 def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
510 """(baseurl,conn,chunk_size,verbose) --
517 cache_path = "/var/cache/edb"
518 metadatafilename = os.path.join(cache_path, 'remote_metadata.pickle')
520 if makepickle is None:
521 makepickle = "/var/cache/edb/metadata.idx.most_recent"
524 conn, protocol, address, params, headers = create_conn(baseurl, conn)
525 except _all_errors as e:
526 # ftplib.FTP(host) can raise errors like this:
527 # socket.error: (111, 'Connection refused')
528 sys.stderr.write("!!! %s\n" % (e,))
533 metadatafile = open(_unicode_encode(metadatafilename,
534 encoding=_encodings['fs'], errors='strict'), 'rb')
535 mypickle = pickle.Unpickler(metadatafile)
537 mypickle.find_global = None
538 except AttributeError:
539 # TODO: If py3k, override Unpickler.find_class().
541 metadata = mypickle.load()
542 out.write(_("Loaded metadata pickle.\n"))
545 except (IOError, OSError, EOFError, ValueError, pickle.UnpicklingError):
547 if baseurl not in metadata:
549 if "indexname" not in metadata[baseurl]:
550 metadata[baseurl]["indexname"]=""
551 if "timestamp" not in metadata[baseurl]:
552 metadata[baseurl]["timestamp"]=0
553 if "unmodified" not in metadata[baseurl]:
554 metadata[baseurl]["unmodified"]=0
555 if "data" not in metadata[baseurl]:
556 metadata[baseurl]["data"]={}
558 if not os.access(cache_path, os.W_OK):
559 sys.stderr.write(_("!!! Unable to write binary metadata to disk!\n"))
560 sys.stderr.write(_("!!! Permission denied: '%s'\n") % cache_path)
561 return metadata[baseurl]["data"]
563 import portage.exception
565 filelist = dir_get_list(baseurl, conn)
566 except portage.exception.PortageException as e:
567 sys.stderr.write(_("!!! Error connecting to '%s'.\n") % baseurl)
568 sys.stderr.write("!!! %s\n" % str(e))
570 return metadata[baseurl]["data"]
571 tbz2list = match_in_array(filelist, suffix=".tbz2")
572 metalist = match_in_array(filelist, prefix="metadata.idx")
575 # Determine if our metadata file is current.
577 metalist.reverse() # makes the order new-to-old.
578 for mfile in metalist:
580 ((metadata[baseurl]["indexname"] != mfile) or \
581 (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
582 # Try to download new cache until we succeed on one.
584 for trynum in [1,2,3]:
585 mytempfile = tempfile.TemporaryFile()
587 file_get(baseurl+"/"+mfile, mytempfile, conn)
588 if mytempfile.tell() > len(data):
590 data = mytempfile.read()
591 except ValueError as e:
592 sys.stderr.write("--- "+str(e)+"\n")
594 sys.stderr.write(_("Retrying...\n"))
598 if match_in_array([mfile],suffix=".gz"):
599 out.write("gzip'd\n")
604 gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
605 data = gzindex.read()
606 except SystemExit as e:
608 except Exception as e:
610 sys.stderr.write(_("!!! Failed to use gzip: ")+str(e)+"\n")
614 metadata[baseurl]["data"] = pickle.loads(data)
616 metadata[baseurl]["indexname"] = mfile
617 metadata[baseurl]["timestamp"] = int(time.time())
618 metadata[baseurl]["modified"] = 0 # It's not, right after download.
619 out.write(_("Pickle loaded.\n"))
622 except SystemExit as e:
624 except Exception as e:
625 sys.stderr.write(_("!!! Failed to read data from index: ")+str(mfile)+"\n")
626 sys.stderr.write("!!! "+str(e)+"\n")
629 metadatafile = open(_unicode_encode(metadatafilename,
630 encoding=_encodings['fs'], errors='strict'), 'wb')
631 pickle.dump(metadata, metadatafile, protocol=2)
633 except SystemExit as e:
635 except Exception as e:
636 sys.stderr.write(_("!!! Failed to write binary metadata to disk!\n"))
637 sys.stderr.write("!!! "+str(e)+"\n")
640 # We may have metadata... now we run through the tbz2 list and check.
642 class CacheStats(object):
643 from time import time
644 def __init__(self, out):
649 self.min_display_latency = 0.2
651 cur_time = self.time()
652 if cur_time - self.last_update >= self.min_display_latency:
653 self.last_update = cur_time
656 self.out.write("\r"+colorize("WARN",
657 _("cache miss: '")+str(self.misses)+"'") + \
658 " --- "+colorize("GOOD", _("cache hit: '")+str(self.hits)+"'"))
661 cache_stats = CacheStats(out)
662 have_tty = os.environ.get('TERM') != 'dumb' and out.isatty()
664 cache_stats.display()
665 binpkg_filenames = set()
667 x = os.path.basename(x)
668 binpkg_filenames.add(x)
669 if x not in metadata[baseurl]["data"]:
670 cache_stats.misses += 1
673 metadata[baseurl]["modified"] = 1
675 for retry in range(3):
677 myid = file_get_metadata(
678 "/".join((baseurl.rstrip("/"), x.lstrip("/"))),
681 except http_client_BadStatusLine:
682 # Sometimes this error is thrown from conn.getresponse() in
683 # make_http_request(). The docstring for this error in
684 # httplib.py says "Presumably, the server closed the
685 # connection before sending a valid response".
686 conn, protocol, address, params, headers = create_conn(
688 except http_client_ResponseNotReady:
689 # With some http servers this error is known to be thrown
690 # from conn.getresponse() in make_http_request() when the
691 # remote file does not have appropriate read permissions.
692 # Maybe it's possible to recover from this exception in
693 # cases though, so retry.
694 conn, protocol, address, params, headers = create_conn(
698 metadata[baseurl]["data"][x] = make_metadata_dict(myid)
700 sys.stderr.write(colorize("BAD",
701 _("!!! Failed to retrieve metadata on: "))+str(x)+"\n")
704 cache_stats.hits += 1
707 cache_stats.display()
708 # Cleanse stale cache for files that don't exist on the server anymore.
709 stale_cache = set(metadata[baseurl]["data"]).difference(binpkg_filenames)
711 for x in stale_cache:
712 del metadata[baseurl]["data"][x]
713 metadata[baseurl]["modified"] = 1
720 if "modified" in metadata[baseurl] and metadata[baseurl]["modified"]:
721 metadata[baseurl]["timestamp"] = int(time.time())
722 metadatafile = open(_unicode_encode(metadatafilename,
723 encoding=_encodings['fs'], errors='strict'), 'wb')
724 pickle.dump(metadata, metadatafile, protocol=2)
727 metadatafile = open(_unicode_encode(makepickle,
728 encoding=_encodings['fs'], errors='strict'), 'wb')
729 pickle.dump(metadata[baseurl]["data"], metadatafile, protocol=2)
731 except SystemExit as e:
733 except Exception as e:
734 sys.stderr.write(_("!!! Failed to write binary metadata to disk!\n"))
735 sys.stderr.write("!!! "+str(e)+"\n")
738 if not keepconnection:
741 return metadata[baseurl]["data"]
743 def _cmp_cpv(d1, d2):
753 class PackageIndex(object):
756 allowed_pkg_keys=None,
757 default_header_data=None,
758 default_pkg_data=None,
760 translated_keys=None):
762 self._pkg_slot_dict = None
763 if allowed_pkg_keys is not None:
764 self._pkg_slot_dict = slot_dict_class(allowed_pkg_keys)
766 self._default_header_data = default_header_data
767 self._default_pkg_data = default_pkg_data
768 self._inherited_keys = inherited_keys
769 self._write_translation_map = {}
770 self._read_translation_map = {}
772 self._write_translation_map.update(translated_keys)
773 self._read_translation_map.update(((y, x) for (x, y) in translated_keys))
775 if self._default_header_data:
776 self.header.update(self._default_header_data)
780 def _readpkgindex(self, pkgfile, pkg_entry=True):
783 if self._pkg_slot_dict is None or not pkg_entry:
786 d = self._pkg_slot_dict()
787 allowed_keys = d.allowed_keys
790 line = line.rstrip("\n")
793 line = line.split(":", 1)
794 if not len(line) == 2:
799 k = self._read_translation_map.get(k, k)
800 if allowed_keys is not None and \
801 k not in allowed_keys:
806 def _writepkgindex(self, pkgfile, items):
808 pkgfile.write("%s: %s\n" % \
809 (self._write_translation_map.get(k, k), v))
812 def read(self, pkgfile):
813 self.readHeader(pkgfile)
814 self.readBody(pkgfile)
816 def readHeader(self, pkgfile):
817 self.header.update(self._readpkgindex(pkgfile, pkg_entry=False))
819 def readBody(self, pkgfile):
821 d = self._readpkgindex(pkgfile)
827 if self._default_pkg_data:
828 for k, v in self._default_pkg_data.items():
830 if self._inherited_keys:
831 for k in self._inherited_keys:
832 v = self.header.get(k)
835 self.packages.append(d)
837 def write(self, pkgfile):
839 self.header["TIMESTAMP"] = str(long(time.time()))
840 self.header["PACKAGES"] = str(len(self.packages))
841 keys = list(self.header)
843 self._writepkgindex(pkgfile, [(k, self.header[k]) \
844 for k in keys if self.header[k]])
845 for metadata in sorted(self.packages,
846 key=portage.util.cmp_sort_key(_cmp_cpv)):
847 metadata = metadata.copy()
848 cpv = metadata["CPV"]
849 if self._inherited_keys:
850 for k in self._inherited_keys:
851 v = self.header.get(k)
852 if v is not None and v == metadata.get(k):
854 if self._default_pkg_data:
855 for k, v in self._default_pkg_data.items():
856 if metadata.get(k) == v:
857 metadata.pop(k, None)
858 keys = list(metadata)
860 self._writepkgindex(pkgfile,
861 [(k, metadata[k]) for k in keys if metadata[k]])