import os
import re
import sys
+import locale
import atexit
import warnings
import tempfile
Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Py_TPFLAGS_STRING_SUBCLASS = (1L << 27)
+Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
MAX_OUTPUT_LEN=1024
+hexdigits = "0123456789abcdef"
+
+ENCODING = locale.getpreferredencoding()
+
class NullPyObjectPtr(RuntimeError):
pass
# threshold in case the data was corrupted
return xrange(safety_limit(val))
+def write_unicode(file, text):
+ # Write a byte or unicode string to file. Unicode strings are encoded to
+ # ENCODING encoding with 'backslashreplace' error handler to avoid
+ # UnicodeEncodeError.
+ if isinstance(text, unicode):
+ text = text.encode(ENCODING, 'backslashreplace')
+ file.write(text)
+
+def os_fsencode(filename):
+ if not isinstance(filename, unicode):
+ return filename
+ encoding = sys.getfilesystemencoding()
+ if encoding == 'mbcs':
+ # mbcs doesn't support surrogateescape
+ return filename.encode(encoding)
+ encoded = []
+ for char in filename:
+ # surrogateescape error handler
+ if 0xDC80 <= ord(char) <= 0xDCFF:
+ byte = chr(ord(char) - 0xDC00)
+ else:
+ byte = char.encode(encoding)
+ encoded.append(byte)
+ return ''.join(encoded)
class StringTruncated(RuntimeError):
pass
return pyo_ptr.dereference()[name]
if name == 'ob_size':
- try:
- # Python 2:
- return self._gdbval.dereference()[name]
- except RuntimeError:
- # Python 3:
- try:
- return self._gdbval.dereference()['ob_base'][name]
- except RuntimeError:
- return 0
-
+ pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+ return pyo_ptr.dereference()[name]
+
# General case: look it up inside the object:
return self._gdbval.dereference()[name]
def as_address(self):
return long(self._gdbval)
+class PyVarObjectPtr(PyObjectPtr):
+ _typename = 'PyVarObject'
class ProxyAlreadyVisited(object):
'''
result = -result
return result
+ def write_repr(self, out, visited):
+ # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+ proxy = self.proxyval(visited)
+ out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+ """
+ Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+ <bool> instances (Py_True/Py_False) within the process being debugged.
+ """
+ def proxyval(self, visited):
+ if PyLongObjectPtr.proxyval(self, visited):
+ return True
+ else:
+ return False
class PyNoneStructPtr(PyObjectPtr):
"""
newline character'''
if self.is_optimized_out():
return '(frame information optimized out)'
- with open(self.filename(), 'r') as f:
+ filename = self.filename()
+ with open(os_fsencode(filename), 'r') as f:
all_lines = f.readlines()
# Convert from 1-based current_line_num to 0-based list offset:
return all_lines[self.current_line_num()-1]
return
out.write('Frame 0x%x, for file %s, line %i, in %s ('
% (self.as_address(),
- self.co_filename,
+ self.co_filename.proxyval(visited),
self.current_line_num(),
- self.co_name))
+ self.co_name.proxyval(visited)))
first = True
for pyop_name, pyop_value in self.iter_locals():
if not first:
return set(members)
def write_repr(self, out, visited):
- out.write(self.safe_tp_name())
+ # Emulate Python 3's set_repr
+ tp_name = self.safe_tp_name()
# Guard against infinite loops:
if self.as_address() in visited:
return
visited.add(self.as_address())
- out.write('([')
+ # Python 3's set_repr special-cases the empty set:
+ if not self.field('used'):
+ out.write(tp_name)
+ out.write('()')
+ return
+
+ # Python 3 uses {} for set literals:
+ if tp_name != 'set':
+ out.write(tp_name)
+ out.write('(')
+
+ out.write('{')
first = True
table = self.field('table')
for i in safe_range(self.field('mask')+1):
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
- out.write('])')
+ out.write('}')
+ if tp_name != 'set':
+ out.write(')')
-class PyStringObjectPtr(PyObjectPtr):
- _typename = 'PyStringObject'
+
+class PyBytesObjectPtr(PyObjectPtr):
+ _typename = 'PyBytesObject'
def __str__(self):
field_ob_size = self.field('ob_size')
field_ob_sval = self.field('ob_sval')
- char_ptr = field_ob_sval.address.cast(
- gdb.lookup_type('char').pointer())
-
+ char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
def proxyval(self, visited):
return str(self)
+ def write_repr(self, out, visited):
+ # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+ # Get a PyStringObject* within the Python 2 gdb process:
+ proxy = self.proxyval(visited)
+
+ # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+ # to Python 2 code:
+ quote = "'"
+ if "'" in proxy and not '"' in proxy:
+ quote = '"'
+ out.write('b')
+ out.write(quote)
+ for byte in proxy:
+ if byte == quote or byte == '\\':
+ out.write('\\')
+ out.write(byte)
+ elif byte == '\t':
+ out.write('\\t')
+ elif byte == '\n':
+ out.write('\\n')
+ elif byte == '\r':
+ out.write('\\r')
+ elif byte < ' ' or ord(byte) >= 0x7f:
+ out.write('\\x')
+ out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+ out.write(hexdigits[ord(byte) & 0xf])
+ else:
+ out.write(byte)
+ out.write(quote)
+
class PyTupleObjectPtr(PyObjectPtr):
_typename = 'PyTupleObject'
_typename = 'PyTypeObject'
+def _unichr_is_printable(char):
+ # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+ if char == u" ":
+ return True
+ import unicodedata
+ return unicodedata.category(char) not in ("C", "Z")
+
if sys.maxunicode >= 0x10000:
_unichr = unichr
else:
result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
+ def write_repr(self, out, visited):
+ # Write this out as a Python 3 str literal, i.e. without a "u" prefix
+
+ # Get a PyUnicodeObject* within the Python 2 gdb process:
+ proxy = self.proxyval(visited)
+
+ # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+ # to Python 2:
+ if "'" in proxy and '"' not in proxy:
+ quote = '"'
+ else:
+ quote = "'"
+ out.write(quote)
+
+ i = 0
+ while i < len(proxy):
+ ch = proxy[i]
+ i += 1
+
+ # Escape quotes and backslashes
+ if ch == quote or ch == '\\':
+ out.write('\\')
+ out.write(ch)
+
+ # Map special whitespace to '\t', \n', '\r'
+ elif ch == '\t':
+ out.write('\\t')
+ elif ch == '\n':
+ out.write('\\n')
+ elif ch == '\r':
+ out.write('\\r')
+
+ # Map non-printable US ASCII to '\xhh' */
+ elif ch < ' ' or ch == 0x7F:
+ out.write('\\x')
+ out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+ out.write(hexdigits[ord(ch) & 0x000F])
+
+ # Copy ASCII characters as-is
+ elif ord(ch) < 0x7F:
+ out.write(ch)
+
+ # Non-ASCII characters
+ else:
+ ucs = ch
+ ch2 = None
+ if sys.maxunicode < 0x10000:
+ # If sizeof(Py_UNICODE) is 2 here (in gdb), join
+ # surrogate pairs before calling _unichr_is_printable.
+ if (i < len(proxy)
+ and 0xD800 <= ord(ch) < 0xDC00 \
+ and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+ ch2 = proxy[i]
+ ucs = ch + ch2
+ i += 1
+
+ # Unfortuately, Python 2's unicode type doesn't seem
+ # to expose the "isprintable" method
+ printable = _unichr_is_printable(ucs)
+ if printable:
+ try:
+ ucs.encode(ENCODING)
+ except UnicodeEncodeError:
+ printable = False
+
+ # Map Unicode whitespace and control characters
+ # (categories Z* and C* except ASCII space)
+ if not printable:
+ if ch2 is not None:
+ # Match Python 3's representation of non-printable
+ # wide characters.
+ code = (ord(ch) & 0x03FF) << 10
+ code |= ord(ch2) & 0x03FF
+ code += 0x00010000
+ else:
+ code = ord(ucs)
+
+ # Map 8-bit characters to '\\xhh'
+ if code <= 0xff:
+ out.write('\\x')
+ out.write(hexdigits[(code >> 4) & 0x000F])
+ out.write(hexdigits[code & 0x000F])
+ # Map 21-bit characters to '\U00xxxxxx'
+ elif code >= 0x10000:
+ out.write('\\U')
+ out.write(hexdigits[(code >> 28) & 0x0000000F])
+ out.write(hexdigits[(code >> 24) & 0x0000000F])
+ out.write(hexdigits[(code >> 20) & 0x0000000F])
+ out.write(hexdigits[(code >> 16) & 0x0000000F])
+ out.write(hexdigits[(code >> 12) & 0x0000000F])
+ out.write(hexdigits[(code >> 8) & 0x0000000F])
+ out.write(hexdigits[(code >> 4) & 0x0000000F])
+ out.write(hexdigits[code & 0x0000000F])
+ # Map 16-bit characters to '\uxxxx'
+ else:
+ out.write('\\u')
+ out.write(hexdigits[(code >> 12) & 0x000F])
+ out.write(hexdigits[(code >> 8) & 0x000F])
+ out.write(hexdigits[(code >> 4) & 0x000F])
+ out.write(hexdigits[code & 0x000F])
+ else:
+ # Copy characters as-is
+ out.write(ch)
+ if ch2 is not None:
+ out.write(ch2)
+
+ out.write(quote)
+
+
+
def int_from_int(gdbval):
return int(str(gdbval))
if self.is_evalframeex():
pyop = self.get_pyop()
if pyop:
- sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
+ line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+ write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
sys.stdout.write(pyop.current_line())
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
if start<1:
start = 1
- with open(filename, 'r') as f:
+ with open(os_fsencode(filename), 'r') as f:
all_lines = f.readlines()
# start and end are 1-based, all_lines is 0-based;
# so [start-1:end] as a python slice gives us [start, end] as a
pyframe = frame.get_pyop()
if pyframe is None:
return None
-
- return str(getattr(pyframe, attr))
+ val = str(getattr(pyframe, attr)); print val, val.proxyval(set()); return val
return None
Py_file_input = 257
Py_eval_input = 258
-def pointervalue(gdbval):
+def _pointervalue(gdbval):
"""
Return the value of the pionter as a Python int.
# still convert the pointer to an int
return long(gdbval)
+def pointervalue(gdbval):
+ pointer = _pointervalue(gdbval)
+ try:
+ if pointer < 0:
+ raise gdb.GdbError("Negative pointer value, presumably a bug "
+ "in gdb, aborting.")
+ except RuntimeError:
+ # work around yet another bug in gdb where you get random behaviour
+ # and tracebacks
+ pass
+
+ return pointer
class PythonCodeExecutor(object):
def malloc(self, size):
- chunk = (gdb.parse_and_eval("(void *) malloc(%d)" % size))
+ chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
pointer = pointervalue(chunk)
if pointer == 0:
stringp = self.alloc_string(string)
try:
result = gdb.parse_and_eval(
- 'PyString_FromStringAndSize((char *) %d, (size_t) %d)' %
- (stringp, len(string)))
+ '(PyObject *) PyString_FromStringAndSize('
+ '(char *) %d,'
+ '(size_t) %d)' % (stringp, len(string)))
finally:
self.free(stringp)