From 7c6598c12e2c34794b17be093ee0a80bceb3fce0 Mon Sep 17 00:00:00 2001 From: Mark Florisson Date: Thu, 25 Nov 2010 21:54:36 +0100 Subject: [PATCH] Merged with libpython.py from Python3.2 + prevented continuation with negative pointer values gdb was giving to avoid segfaulting the inferior when using 'cy exec' --- Cython/Debugger/Cygdb.py | 4 +- Cython/Debugger/libcython.py | 6 +- Cython/Debugger/libpython.py | 274 +++++++++++++++++++++++++++++++---- 3 files changed, 251 insertions(+), 33 deletions(-) diff --git a/Cython/Debugger/Cygdb.py b/Cython/Debugger/Cygdb.py index 6288fe64..a5683a30 100644 --- a/Cython/Debugger/Cygdb.py +++ b/Cython/Debugger/Cygdb.py @@ -42,7 +42,9 @@ def make_command_file(path_to_debug_info, prefix_code='', no_import=False): f.write('python from Cython.Debugger import libcython\n') if no_import: - f.write("file %s\n" % sys.executable) + # don't do this, this overrides file command in .gdbinit + # f.write("file %s\n" % sys.executable) + pass else: path = os.path.join(path_to_debug_info, "cython_debug", "interpreter") interpreter = open(path).read() diff --git a/Cython/Debugger/libcython.py b/Cython/Debugger/libcython.py index fe5f463a..d5e17c93 100644 --- a/Cython/Debugger/libcython.py +++ b/Cython/Debugger/libcython.py @@ -1152,9 +1152,9 @@ class CyExec(CythonCommand, libpython.PyExec): pystringp = executor.alloc_pystring(name) code = ''' - PyDict_SetItem( - (PyObject *) %d, - (PyObject *) %d, + (PyObject *) PyDict_SetItem( + (PyObject *) %d, + (PyObject *) %d, (PyObject *) %s) ''' % (local_dict_pointer, pystringp, cyvar.cname) diff --git a/Cython/Debugger/libpython.py b/Cython/Debugger/libpython.py index 5e5fb9a4..653ee219 100644 --- a/Cython/Debugger/libpython.py +++ b/Cython/Debugger/libpython.py @@ -48,6 +48,7 @@ from __future__ import with_statement import os import re import sys +import locale import atexit import warnings import tempfile @@ -70,6 +71,7 @@ Py_TPFLAGS_LONG_SUBCLASS = (1L << 24) Py_TPFLAGS_LIST_SUBCLASS = (1L << 25) Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26) Py_TPFLAGS_STRING_SUBCLASS = (1L << 27) +Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27) Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28) Py_TPFLAGS_DICT_SUBCLASS = (1L << 29) Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30) @@ -78,6 +80,10 @@ Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31) MAX_OUTPUT_LEN=1024 +hexdigits = "0123456789abcdef" + +ENCODING = locale.getpreferredencoding() + class NullPyObjectPtr(RuntimeError): pass @@ -94,6 +100,30 @@ def safe_range(val): # threshold in case the data was corrupted return xrange(safety_limit(val)) +def write_unicode(file, text): + # Write a byte or unicode string to file. Unicode strings are encoded to + # ENCODING encoding with 'backslashreplace' error handler to avoid + # UnicodeEncodeError. + if isinstance(text, unicode): + text = text.encode(ENCODING, 'backslashreplace') + file.write(text) + +def os_fsencode(filename): + if not isinstance(filename, unicode): + return filename + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + # mbcs doesn't support surrogateescape + return filename.encode(encoding) + encoded = [] + for char in filename: + # surrogateescape error handler + if 0xDC80 <= ord(char) <= 0xDCFF: + byte = chr(ord(char) - 0xDC00) + else: + byte = char.encode(encoding) + encoded.append(byte) + return ''.join(encoded) class StringTruncated(RuntimeError): pass @@ -160,16 +190,9 @@ class PyObjectPtr(object): return pyo_ptr.dereference()[name] if name == 'ob_size': - try: - # Python 2: - return self._gdbval.dereference()[name] - except RuntimeError: - # Python 3: - try: - return self._gdbval.dereference()['ob_base'][name] - except RuntimeError: - return 0 - + pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) + return pyo_ptr.dereference()[name] + # General case: look it up inside the object: return self._gdbval.dereference()[name] @@ -370,6 +393,8 @@ class PyObjectPtr(object): def as_address(self): return long(self._gdbval) +class PyVarObjectPtr(PyObjectPtr): + _typename = 'PyVarObject' class ProxyAlreadyVisited(object): ''' @@ -785,6 +810,22 @@ class PyLongObjectPtr(PyObjectPtr): result = -result return result + def write_repr(self, out, visited): + # Write this out as a Python 3 int literal, i.e. without the "L" suffix + proxy = self.proxyval(visited) + out.write("%s" % proxy) + + +class PyBoolObjectPtr(PyLongObjectPtr): + """ + Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two + instances (Py_True/Py_False) within the process being debugged. + """ + def proxyval(self, visited): + if PyLongObjectPtr.proxyval(self, visited): + return True + else: + return False class PyNoneStructPtr(PyObjectPtr): """ @@ -898,7 +939,8 @@ class PyFrameObjectPtr(PyObjectPtr): newline character''' if self.is_optimized_out(): return '(frame information optimized out)' - with open(self.filename(), 'r') as f: + filename = self.filename() + with open(os_fsencode(filename), 'r') as f: all_lines = f.readlines() # Convert from 1-based current_line_num to 0-based list offset: return all_lines[self.current_line_num()-1] @@ -909,9 +951,9 @@ class PyFrameObjectPtr(PyObjectPtr): return out.write('Frame 0x%x, for file %s, line %i, in %s (' % (self.as_address(), - self.co_filename, + self.co_filename.proxyval(visited), self.current_line_num(), - self.co_name)) + self.co_name.proxyval(visited))) first = True for pyop_name, pyop_value in self.iter_locals(): if not first: @@ -948,7 +990,8 @@ class PySetObjectPtr(PyObjectPtr): return set(members) def write_repr(self, out, visited): - out.write(self.safe_tp_name()) + # Emulate Python 3's set_repr + tp_name = self.safe_tp_name() # Guard against infinite loops: if self.as_address() in visited: @@ -956,7 +999,18 @@ class PySetObjectPtr(PyObjectPtr): return visited.add(self.as_address()) - out.write('([') + # Python 3's set_repr special-cases the empty set: + if not self.field('used'): + out.write(tp_name) + out.write('()') + return + + # Python 3 uses {} for set literals: + if tp_name != 'set': + out.write(tp_name) + out.write('(') + + out.write('{') first = True table = self.field('table') for i in safe_range(self.field('mask')+1): @@ -970,23 +1024,55 @@ class PySetObjectPtr(PyObjectPtr): out.write(', ') first = False pyop_key.write_repr(out, visited) - out.write('])') + out.write('}') + if tp_name != 'set': + out.write(')') -class PyStringObjectPtr(PyObjectPtr): - _typename = 'PyStringObject' + +class PyBytesObjectPtr(PyObjectPtr): + _typename = 'PyBytesObject' def __str__(self): field_ob_size = self.field('ob_size') field_ob_sval = self.field('ob_sval') - char_ptr = field_ob_sval.address.cast( - gdb.lookup_type('char').pointer()) - + char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr) return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) def proxyval(self, visited): return str(self) + def write_repr(self, out, visited): + # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix + + # Get a PyStringObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr + # to Python 2 code: + quote = "'" + if "'" in proxy and not '"' in proxy: + quote = '"' + out.write('b') + out.write(quote) + for byte in proxy: + if byte == quote or byte == '\\': + out.write('\\') + out.write(byte) + elif byte == '\t': + out.write('\\t') + elif byte == '\n': + out.write('\\n') + elif byte == '\r': + out.write('\\r') + elif byte < ' ' or ord(byte) >= 0x7f: + out.write('\\x') + out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) + out.write(hexdigits[ord(byte) & 0xf]) + else: + out.write(byte) + out.write(quote) + class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' @@ -1027,6 +1113,13 @@ class PyTypeObjectPtr(PyObjectPtr): _typename = 'PyTypeObject' +def _unichr_is_printable(char): + # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py + if char == u" ": + return True + import unicodedata + return unicodedata.category(char) not in ("C", "Z") + if sys.maxunicode >= 0x10000: _unichr = unichr else: @@ -1085,6 +1178,116 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) return result + def write_repr(self, out, visited): + # Write this out as a Python 3 str literal, i.e. without a "u" prefix + + # Get a PyUnicodeObject* within the Python 2 gdb process: + proxy = self.proxyval(visited) + + # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr + # to Python 2: + if "'" in proxy and '"' not in proxy: + quote = '"' + else: + quote = "'" + out.write(quote) + + i = 0 + while i < len(proxy): + ch = proxy[i] + i += 1 + + # Escape quotes and backslashes + if ch == quote or ch == '\\': + out.write('\\') + out.write(ch) + + # Map special whitespace to '\t', \n', '\r' + elif ch == '\t': + out.write('\\t') + elif ch == '\n': + out.write('\\n') + elif ch == '\r': + out.write('\\r') + + # Map non-printable US ASCII to '\xhh' */ + elif ch < ' ' or ch == 0x7F: + out.write('\\x') + out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) + out.write(hexdigits[ord(ch) & 0x000F]) + + # Copy ASCII characters as-is + elif ord(ch) < 0x7F: + out.write(ch) + + # Non-ASCII characters + else: + ucs = ch + ch2 = None + if sys.maxunicode < 0x10000: + # If sizeof(Py_UNICODE) is 2 here (in gdb), join + # surrogate pairs before calling _unichr_is_printable. + if (i < len(proxy) + and 0xD800 <= ord(ch) < 0xDC00 \ + and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): + ch2 = proxy[i] + ucs = ch + ch2 + i += 1 + + # Unfortuately, Python 2's unicode type doesn't seem + # to expose the "isprintable" method + printable = _unichr_is_printable(ucs) + if printable: + try: + ucs.encode(ENCODING) + except UnicodeEncodeError: + printable = False + + # Map Unicode whitespace and control characters + # (categories Z* and C* except ASCII space) + if not printable: + if ch2 is not None: + # Match Python 3's representation of non-printable + # wide characters. + code = (ord(ch) & 0x03FF) << 10 + code |= ord(ch2) & 0x03FF + code += 0x00010000 + else: + code = ord(ucs) + + # Map 8-bit characters to '\\xhh' + if code <= 0xff: + out.write('\\x') + out.write(hexdigits[(code >> 4) & 0x000F]) + out.write(hexdigits[code & 0x000F]) + # Map 21-bit characters to '\U00xxxxxx' + elif code >= 0x10000: + out.write('\\U') + out.write(hexdigits[(code >> 28) & 0x0000000F]) + out.write(hexdigits[(code >> 24) & 0x0000000F]) + out.write(hexdigits[(code >> 20) & 0x0000000F]) + out.write(hexdigits[(code >> 16) & 0x0000000F]) + out.write(hexdigits[(code >> 12) & 0x0000000F]) + out.write(hexdigits[(code >> 8) & 0x0000000F]) + out.write(hexdigits[(code >> 4) & 0x0000000F]) + out.write(hexdigits[code & 0x0000000F]) + # Map 16-bit characters to '\uxxxx' + else: + out.write('\\u') + out.write(hexdigits[(code >> 12) & 0x000F]) + out.write(hexdigits[(code >> 8) & 0x000F]) + out.write(hexdigits[(code >> 4) & 0x000F]) + out.write(hexdigits[code & 0x000F]) + else: + # Copy characters as-is + out.write(ch) + if ch2 is not None: + out.write(ch2) + + out.write(quote) + + + def int_from_int(gdbval): return int(str(gdbval)) @@ -1254,7 +1457,8 @@ class Frame(object): if self.is_evalframeex(): pyop = self.get_pyop() if pyop: - sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN))) + line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) + write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) sys.stdout.write(pyop.current_line()) else: sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) @@ -1315,7 +1519,7 @@ class PyList(gdb.Command): if start<1: start = 1 - with open(filename, 'r') as f: + with open(os_fsencode(filename), 'r') as f: all_lines = f.readlines() # start and end are 1-based, all_lines is 0-based; # so [start-1:end] as a python slice gives us [start, end] as a @@ -1487,8 +1691,7 @@ class PyNameEquals(gdb.Function): pyframe = frame.get_pyop() if pyframe is None: return None - - return str(getattr(pyframe, attr)) + val = str(getattr(pyframe, attr)); print val, val.proxyval(set()); return val return None @@ -1922,7 +2125,7 @@ Py_single_input = 256 Py_file_input = 257 Py_eval_input = 258 -def pointervalue(gdbval): +def _pointervalue(gdbval): """ Return the value of the pionter as a Python int. @@ -1936,11 +2139,23 @@ def pointervalue(gdbval): # still convert the pointer to an int return long(gdbval) +def pointervalue(gdbval): + pointer = _pointervalue(gdbval) + try: + if pointer < 0: + raise gdb.GdbError("Negative pointer value, presumably a bug " + "in gdb, aborting.") + except RuntimeError: + # work around yet another bug in gdb where you get random behaviour + # and tracebacks + pass + + return pointer class PythonCodeExecutor(object): def malloc(self, size): - chunk = (gdb.parse_and_eval("(void *) malloc(%d)" % size)) + chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size)) pointer = pointervalue(chunk) if pointer == 0: @@ -1958,8 +2173,9 @@ class PythonCodeExecutor(object): stringp = self.alloc_string(string) try: result = gdb.parse_and_eval( - 'PyString_FromStringAndSize((char *) %d, (size_t) %d)' % - (stringp, len(string))) + '(PyObject *) PyString_FromStringAndSize(' + '(char *) %d,' + '(size_t) %d)' % (stringp, len(string))) finally: self.free(stringp) -- 2.26.2