Merged with libpython.py from Python3.2 + prevented continuation with negative pointe...

author Mark Florisson <markflorisson88@gmail.com>

Thu, 25 Nov 2010 20:54:36 +0000 (21:54 +0100)

committer Mark Florisson <markflorisson88@gmail.com>

Thu, 25 Nov 2010 20:54:36 +0000 (21:54 +0100)
author Mark Florisson <markflorisson88@gmail.com>
Thu, 25 Nov 2010 20:54:36 +0000 (21:54 +0100)
committer Mark Florisson <markflorisson88@gmail.com>
Thu, 25 Nov 2010 20:54:36 +0000 (21:54 +0100)
diff --git a/Cython/Debugger/Cygdb.py b/Cython/Debugger/Cygdb.py

index 6288fe64190be6b742ffe3ae13c1cfa68d2222bf..a5683a3068dc7aa7d0a33c6f145389661263ea2d 100644 (file)
--- a/Cython/Debugger/Cygdb.py
+++ b/Cython/Debugger/Cygdb.py
@@ -42,7 +42,9 @@ def make_command_file(path_to_debug_info, prefix_code='', no_import=False):
      f.write('python from Cython.Debugger import libcython\n')
      
      if no_import:
-        f.write("file %s\n" % sys.executable)
+        # don't do this, this overrides file command in .gdbinit
+        # f.write("file %s\n" % sys.executable)
+        pass
      else:
          path = os.path.join(path_to_debug_info, "cython_debug", "interpreter")
          interpreter = open(path).read()
diff --git a/Cython/Debugger/libcython.py b/Cython/Debugger/libcython.py

index fe5f463a9376db9fe87c2691b9a4ad1ae52fa77a..d5e17c93b43b4224a8a89781d2721bf1d1fe094d 100644 (file)
--- a/Cython/Debugger/libcython.py
+++ b/Cython/Debugger/libcython.py
@@ -1152,9 +1152,9 @@ class CyExec(CythonCommand, libpython.PyExec):
                  
                  pystringp = executor.alloc_pystring(name)
                  code = '''
-                    PyDict_SetItem(
-                        (PyObject *) %d, 
-                        (PyObject *) %d, 
+                    (PyObject *) PyDict_SetItem(
+                        (PyObject *) %d,
+                        (PyObject *) %d,
                          (PyObject *) %s)
                  ''' % (local_dict_pointer, pystringp, cyvar.cname)
  
diff --git a/Cython/Debugger/libpython.py b/Cython/Debugger/libpython.py

index 5e5fb9a4091b0a058159ba4b2c59021db7e762b6..653ee21993c9bdfdac40c3be2caaf4b79314ea0b 100644 (file)
--- a/Cython/Debugger/libpython.py
+++ b/Cython/Debugger/libpython.py
@@ -48,6 +48,7 @@ from __future__ import with_statement
  import os
  import re
  import sys
+import locale
  import atexit
  import warnings
  import tempfile
@@ -70,6 +71,7 @@ Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
  Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
  Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
  Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
+Py_TPFLAGS_BYTES_SUBCLASS    = (1L << 27)
  Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
  Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
  Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
@@ -78,6 +80,10 @@ Py_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)
  
  MAX_OUTPUT_LEN=1024
  
+hexdigits = "0123456789abcdef"
+
+ENCODING = locale.getpreferredencoding()
+
  class NullPyObjectPtr(RuntimeError):
      pass
  
@@ -94,6 +100,30 @@ def safe_range(val):
      # threshold in case the data was corrupted
      return xrange(safety_limit(val))
  
+def write_unicode(file, text):
+    # Write a byte or unicode string to file. Unicode strings are encoded to
+    # ENCODING encoding with 'backslashreplace' error handler to avoid
+    # UnicodeEncodeError.
+    if isinstance(text, unicode):
+        text = text.encode(ENCODING, 'backslashreplace')
+    file.write(text)
+
+def os_fsencode(filename):
+    if not isinstance(filename, unicode):
+        return filename
+    encoding = sys.getfilesystemencoding()
+    if encoding == 'mbcs':
+        # mbcs doesn't support surrogateescape
+        return filename.encode(encoding)
+    encoded = []
+    for char in filename:
+        # surrogateescape error handler
+        if 0xDC80 <= ord(char) <= 0xDCFF:
+            byte = chr(ord(char) - 0xDC00)
+        else:
+            byte = char.encode(encoding)
+        encoded.append(byte)
+    return ''.join(encoded)
  
  class StringTruncated(RuntimeError):
      pass
@@ -160,16 +190,9 @@ class PyObjectPtr(object):
              return pyo_ptr.dereference()[name]
  
          if name == 'ob_size':
-            try:
-            # Python 2:
-                return self._gdbval.dereference()[name]
-            except RuntimeError:
-                # Python 3:
-                try:
-                    return self._gdbval.dereference()['ob_base'][name]
-                except RuntimeError:
-                    return 0
-                
+            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
+
          # General case: look it up inside the object:
          return self._gdbval.dereference()[name]
  
@@ -370,6 +393,8 @@ class PyObjectPtr(object):
      def as_address(self):
          return long(self._gdbval)
  
+class PyVarObjectPtr(PyObjectPtr):
+    _typename = 'PyVarObject'
  
  class ProxyAlreadyVisited(object):
      '''
@@ -785,6 +810,22 @@ class PyLongObjectPtr(PyObjectPtr):
              result = -result
          return result
  
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
+        proxy = self.proxyval(visited)
+        out.write("%s" % proxy)
+
+
+class PyBoolObjectPtr(PyLongObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+    <bool> instances (Py_True/Py_False) within the process being debugged.
+    """
+    def proxyval(self, visited):
+        if PyLongObjectPtr.proxyval(self, visited):
+            return True
+        else:
+            return False
  
  class PyNoneStructPtr(PyObjectPtr):
      """
@@ -898,7 +939,8 @@ class PyFrameObjectPtr(PyObjectPtr):
          newline character'''
          if self.is_optimized_out():
              return '(frame information optimized out)'
-        with open(self.filename(), 'r') as f:
+        filename = self.filename()
+        with open(os_fsencode(filename), 'r') as f:
              all_lines = f.readlines()
              # Convert from 1-based current_line_num to 0-based list offset:
              return all_lines[self.current_line_num()-1]
@@ -909,9 +951,9 @@ class PyFrameObjectPtr(PyObjectPtr):
              return
          out.write('Frame 0x%x, for file %s, line %i, in %s ('
                    % (self.as_address(),
-                     self.co_filename,
+                     self.co_filename.proxyval(visited),
                       self.current_line_num(),
-                     self.co_name))
+                     self.co_name.proxyval(visited)))
          first = True
          for pyop_name, pyop_value in self.iter_locals():
              if not first:
@@ -948,7 +990,8 @@ class PySetObjectPtr(PyObjectPtr):
              return set(members)
  
      def write_repr(self, out, visited):
-        out.write(self.safe_tp_name())
+        # Emulate Python 3's set_repr
+        tp_name = self.safe_tp_name()
  
          # Guard against infinite loops:
          if self.as_address() in visited:
@@ -956,7 +999,18 @@ class PySetObjectPtr(PyObjectPtr):
              return
          visited.add(self.as_address())
  
-        out.write('([')
+        # Python 3's set_repr special-cases the empty set:
+        if not self.field('used'):
+            out.write(tp_name)
+            out.write('()')
+            return
+
+        # Python 3 uses {} for set literals:
+        if tp_name != 'set':
+            out.write(tp_name)
+            out.write('(')
+
+        out.write('{')
          first = True
          table = self.field('table')
          for i in safe_range(self.field('mask')+1):
@@ -970,23 +1024,55 @@ class PySetObjectPtr(PyObjectPtr):
                          out.write(', ')
                      first = False
                      pyop_key.write_repr(out, visited)
-        out.write('])')
+        out.write('}')
  
+        if tp_name != 'set':
+            out.write(')')
  
-class PyStringObjectPtr(PyObjectPtr):
-    _typename = 'PyStringObject'
+
+class PyBytesObjectPtr(PyObjectPtr):
+    _typename = 'PyBytesObject'
  
      def __str__(self):
          field_ob_size = self.field('ob_size')
          field_ob_sval = self.field('ob_sval')
-        char_ptr = field_ob_sval.address.cast(
-            gdb.lookup_type('char').pointer())
-        
+        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
          return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
  
      def proxyval(self, visited):
          return str(self)
  
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
+
+        # Get a PyStringObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
+        # to Python 2 code:
+        quote = "'"
+        if "'" in proxy and not '"' in proxy:
+            quote = '"'
+        out.write('b')
+        out.write(quote)
+        for byte in proxy:
+            if byte == quote or byte == '\\':
+                out.write('\\')
+                out.write(byte)
+            elif byte == '\t':
+                out.write('\\t')
+            elif byte == '\n':
+                out.write('\\n')
+            elif byte == '\r':
+                out.write('\\r')
+            elif byte < ' ' or ord(byte) >= 0x7f:
+                out.write('\\x')
+                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
+                out.write(hexdigits[ord(byte) & 0xf])
+            else:
+                out.write(byte)
+        out.write(quote)
+
  class PyTupleObjectPtr(PyObjectPtr):
      _typename = 'PyTupleObject'
  
@@ -1027,6 +1113,13 @@ class PyTypeObjectPtr(PyObjectPtr):
      _typename = 'PyTypeObject'
  
  
+def _unichr_is_printable(char):
+    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
+    if char == u" ":
+        return True
+    import unicodedata
+    return unicodedata.category(char) not in ("C", "Z")
+
  if sys.maxunicode >= 0x10000:
      _unichr = unichr
  else:
@@ -1085,6 +1178,116 @@ class PyUnicodeObjectPtr(PyObjectPtr):
          result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
          return result
  
+    def write_repr(self, out, visited):
+        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
+
+        # Get a PyUnicodeObject* within the Python 2 gdb process:
+        proxy = self.proxyval(visited)
+
+        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
+        # to Python 2:
+        if "'" in proxy and '"' not in proxy:
+            quote = '"'
+        else:
+            quote = "'"
+        out.write(quote)
+
+        i = 0
+        while i < len(proxy):
+            ch = proxy[i]
+            i += 1
+
+            # Escape quotes and backslashes
+            if ch == quote or ch == '\\':
+                out.write('\\')
+                out.write(ch)
+
+            #  Map special whitespace to '\t', \n', '\r'
+            elif ch == '\t':
+                out.write('\\t')
+            elif ch == '\n':
+                out.write('\\n')
+            elif ch == '\r':
+                out.write('\\r')
+
+            # Map non-printable US ASCII to '\xhh' */
+            elif ch < ' ' or ch == 0x7F:
+                out.write('\\x')
+                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
+                out.write(hexdigits[ord(ch) & 0x000F])
+
+            # Copy ASCII characters as-is
+            elif ord(ch) < 0x7F:
+                out.write(ch)
+
+            # Non-ASCII characters
+            else:
+                ucs = ch
+                ch2 = None
+                if sys.maxunicode < 0x10000:
+                    # If sizeof(Py_UNICODE) is 2 here (in gdb), join
+                    # surrogate pairs before calling _unichr_is_printable.
+                    if (i < len(proxy)
+                    and 0xD800 <= ord(ch) < 0xDC00 \
+                    and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+                        ch2 = proxy[i]
+                        ucs = ch + ch2
+                        i += 1
+
+                # Unfortuately, Python 2's unicode type doesn't seem
+                # to expose the "isprintable" method
+                printable = _unichr_is_printable(ucs)
+                if printable:
+                    try:
+                        ucs.encode(ENCODING)
+                    except UnicodeEncodeError:
+                        printable = False
+
+                # Map Unicode whitespace and control characters
+                # (categories Z* and C* except ASCII space)
+                if not printable:
+                    if ch2 is not None:
+                        # Match Python 3's representation of non-printable
+                        # wide characters.
+                        code = (ord(ch) & 0x03FF) << 10
+                        code |= ord(ch2) & 0x03FF
+                        code += 0x00010000
+                    else:
+                        code = ord(ucs)
+
+                    # Map 8-bit characters to '\\xhh'
+                    if code <= 0xff:
+                        out.write('\\x')
+                        out.write(hexdigits[(code >> 4) & 0x000F])
+                        out.write(hexdigits[code & 0x000F])
+                    # Map 21-bit characters to '\U00xxxxxx'
+                    elif code >= 0x10000:
+                        out.write('\\U')
+                        out.write(hexdigits[(code >> 28) & 0x0000000F])
+                        out.write(hexdigits[(code >> 24) & 0x0000000F])
+                        out.write(hexdigits[(code >> 20) & 0x0000000F])
+                        out.write(hexdigits[(code >> 16) & 0x0000000F])
+                        out.write(hexdigits[(code >> 12) & 0x0000000F])
+                        out.write(hexdigits[(code >> 8) & 0x0000000F])
+                        out.write(hexdigits[(code >> 4) & 0x0000000F])
+                        out.write(hexdigits[code & 0x0000000F])
+                    # Map 16-bit characters to '\uxxxx'
+                    else:
+                        out.write('\\u')
+                        out.write(hexdigits[(code >> 12) & 0x000F])
+                        out.write(hexdigits[(code >> 8) & 0x000F])
+                        out.write(hexdigits[(code >> 4) & 0x000F])
+                        out.write(hexdigits[code & 0x000F])
+                else:
+                    # Copy characters as-is
+                    out.write(ch)
+                    if ch2 is not None:
+                        out.write(ch2)
+
+        out.write(quote)
+
+
+
  
  def int_from_int(gdbval):
      return int(str(gdbval))
@@ -1254,7 +1457,8 @@ class Frame(object):
          if self.is_evalframeex():
              pyop = self.get_pyop()
              if pyop:
-                sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
+                line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+                write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
                  sys.stdout.write(pyop.current_line())
              else:
                  sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
@@ -1315,7 +1519,7 @@ class PyList(gdb.Command):
          if start<1:
              start = 1
  
-        with open(filename, 'r') as f:
+        with open(os_fsencode(filename), 'r') as f:
              all_lines = f.readlines()
              # start and end are 1-based, all_lines is 0-based;
              # so [start-1:end] as a python slice gives us [start, end] as a
@@ -1487,8 +1691,7 @@ class PyNameEquals(gdb.Function):
              pyframe = frame.get_pyop()
              if pyframe is None:
                  return None
-            
-            return str(getattr(pyframe, attr))
+            val = str(getattr(pyframe, attr)); print val, val.proxyval(set()); return val
          
          return None
      
@@ -1922,7 +2125,7 @@ Py_single_input = 256
  Py_file_input = 257
  Py_eval_input = 258
  
-def pointervalue(gdbval):
+def _pointervalue(gdbval):
      """
      Return the value of the pionter as a Python int. 
      
@@ -1936,11 +2139,23 @@ def pointervalue(gdbval):
          # still convert the pointer to an int
          return long(gdbval)
  
+def pointervalue(gdbval):
+    pointer = _pointervalue(gdbval)
+    try:
+        if pointer < 0:
+            raise gdb.GdbError("Negative pointer value, presumably a bug "
+                               "in gdb, aborting.")
+    except RuntimeError:
+        # work around yet another bug in gdb where you get random behaviour
+        # and tracebacks
+        pass
+        
+    return pointer
  
  class PythonCodeExecutor(object):
          
      def malloc(self, size):
-        chunk = (gdb.parse_and_eval("(void *) malloc(%d)" % size))
+        chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
          
          pointer = pointervalue(chunk)
          if pointer == 0:
@@ -1958,8 +2173,9 @@ class PythonCodeExecutor(object):
          stringp = self.alloc_string(string)
          try:
              result = gdb.parse_and_eval(
-                'PyString_FromStringAndSize((char *) %d, (size_t) %d)' % 
-                                                 (stringp, len(string)))
+                '(PyObject *) PyString_FromStringAndSize('
+                    '(char *) %d,'
+                    '(size_t) %d)' % (stringp, len(string)))
          finally:
              self.free(stringp)
author	Mark Florisson <markflorisson88@gmail.com>
	Thu, 25 Nov 2010 20:54:36 +0000 (21:54 +0100)
committer	Mark Florisson <markflorisson88@gmail.com>
	Thu, 25 Nov 2010 20:54:36 +0000 (21:54 +0100)
Cython/Debugger/Cygdb.py		patch \| blob \| history
Cython/Debugger/libcython.py		patch \| blob \| history
Cython/Debugger/libpython.py		patch \| blob \| history