Preliminary debug support for Cython
authorMark Florisson <markflorisson88@gmail.com>
Sat, 18 Sep 2010 22:44:06 +0000 (00:44 +0200)
committerMark Florisson <markflorisson88@gmail.com>
Sat, 18 Sep 2010 22:44:06 +0000 (00:44 +0200)
added the --pyrex-debug flag to Cython's build_ext
added the pyrex_debug boolean to Cython's Cython.Distutils.extension.Extension
    (for per-module debugging information)
debug output is written to the cython_debug directory
bin/cygdb is included (start this from the build directory)
    working commands: cy import, cy locals, cy break
when debugging is active, export all functions as extern

Cython/Compiler/CmdLine.py
Cython/Compiler/Errors.py
Cython/Compiler/Main.py
Cython/Compiler/ParseTreeTransforms.py
Cython/Debugger/__init__.py [new file with mode: 0644]
Cython/Debugger/libcython.py [new file with mode: 0644]
Cython/Debugger/libpython.py [new file with mode: 0644]
Cython/Distutils/build_ext.py
Cython/Distutils/extension.py
bin/cygdb [new file with mode: 0755]

index 320690b0f2c00dc3932e63175fd53ce7ba3fd55a..bc0925d26faa02a40432cbff63f1aad0926f0804 100644 (file)
@@ -113,6 +113,8 @@ def parse_command_line(args):
                 Options.convert_range = True
             elif option == "--line-directives":
                 options.emit_linenums = True
+            elif option == "--debug":
+                options.debug = True
             elif option == '-2':
                 options.language_level = 2
             elif option == '-3':
index a492e5bb2cf61c24553273063e60c51998c9bf84..87132f2c9f8f46509871229efef4ec8d1c550009 100644 (file)
@@ -91,6 +91,10 @@ class CompilerCrash(CompileError):
             message += u'%s: %s' % (cause.__class__.__name__, cause)
         CompileError.__init__(self, pos, message)
 
+class NoElementTreeInstalledException(PyrexError):
+    """raised when the user enabled options.debug but no ElementTree 
+    implementation was found
+    """
 
 listing_file = None
 num_errors = 0
index 700f43f20c7eec58ff833626f8d0725a02875531..761f60fefc9dfee06f9642737bf29a8e1f758d7f 100644 (file)
@@ -13,7 +13,9 @@ except NameError:
     # Python 2.3
     from sets import Set as set
 
+import itertools
 from time import time
+
 import Code
 import Errors
 import Parsing
@@ -176,13 +178,19 @@ class Context(object):
             from Cython.TestUtils import TreeAssertVisitor
             test_support.append(TreeAssertVisitor())
 
-        return ([
-                create_parse(self),
-            ] + self.create_pipeline(pxd=False, py=py) + test_support + [
-                inject_pxd_code,
-                abort_on_errors,
-                generate_pyx_code,
-            ])
+        if options.debug:
+            import ParseTreeTransforms
+            debug_transform = [ParseTreeTransforms.DebuggerTransform(self)]
+        else:
+            debug_transform = []
+            
+        return list(itertools.chain(
+            [create_parse(self)],
+            self.create_pipeline(pxd=False, py=py),
+            test_support,
+            [inject_pxd_code, abort_on_errors],
+            debug_transform,
+            [generate_pyx_code]))
 
     def create_pxd_pipeline(self, scope, module_name):
         def parse_pxd(source_desc):
@@ -798,4 +806,5 @@ default_options = dict(
     evaluate_tree_assertions = False,
     emit_linenums = False,
     language_level = 2,
+    debug = False,
 )
index fccba1775450fb8fe4d4fa5a9a5b70ebc95f5b40..0279112c48fa600086c64a3b359b4a549554613e 100644 (file)
@@ -7,11 +7,39 @@ from Cython.Compiler.UtilNodes import *
 from Cython.Compiler.TreeFragment import TreeFragment, TemplateTransform
 from Cython.Compiler.StringEncoding import EncodedString
 from Cython.Compiler.Errors import error, CompileError
+from Cython.Compiler import Errors
+
 try:
     set
 except NameError:
     from sets import Set as set
+
 import copy
+import os
+import errno
+
+try:
+  from lxml import etree
+  have_lxml = True
+except ImportError:
+    have_lxml = False
+    try:
+        # Python 2.5
+        from xml.etree import cElementTree as etree
+    except ImportError:
+        try:
+            # Python 2.5
+            from xml.etree import ElementTree as etree
+        except ImportError:
+            try:
+                # normal cElementTree install
+                import cElementTree as etree
+            except ImportError:
+                try:
+                    # normal ElementTree install
+                    import elementtree.ElementTree as etree
+                except ImportError:
+                    etree = None
 
 
 class NameNodeCollector(TreeVisitor):
@@ -1431,3 +1459,122 @@ class TransformBuiltinMethods(EnvTransform):
         
         self.visitchildren(node)
         return node
+
+
+def _create_xmlnode(tb, name, attrs=None):
+    "create a xml node with name name and attrs attrs given TreeBuilder tb"
+    tb.start(name, attrs or {})
+    tb.end(name)
+
+
+class DebuggerTransform(CythonTransform):
+    """
+    Class to output debugging information for cygdb
+    
+    It writes debug information to cython_debug/cython_debug_info_<modulename>
+    in the build directory. Also sets all functions' visibility to extern to 
+    enable debugging
+    """
+    
+    def __init__(self, context):
+        super(DebuggerTransform, self).__init__(context)
+        if etree is None:
+            raise Errors.NoElementTreeInstalledException()
+        else:
+            self.tb = etree.TreeBuilder()
+        self.visited = set()
+        
+    def visit_ModuleNode(self, node):
+        self.module_name = node.full_module_name
+        attrs = dict(
+            module_name=self.module_name,
+            filename=node.pos[0].filename)
+        
+        self.tb.start('Module', attrs)
+        
+        # serialize functions
+        self.tb.start('Functions', {})
+        self.visitchildren(node)
+        self.tb.end('Functions')
+        
+        # 2.3 compatibility. Serialize global variables
+        self.tb.start('Globals', {})
+        entries = {}
+        for k, v in node.scope.entries.iteritems():
+            if (v.qualified_name not in self.visited and 
+                not v.name.startswith('__pyx_')):
+                # if v.qualified_name == 'testcython.G': import pdb; pdb.set_trace()
+                entries[k]= v
+        
+        self.serialize_local_variables(entries)
+        self.tb.end('Globals')
+        self.tb.end('Module')
+        return node
+    
+    def visit_FuncDefNode(self, node):
+        self.visited.add(node.local_scope.qualified_name)
+        node.entry.visibility = 'extern'
+        if node.py_func is None:
+            pf_cname = ''
+        else:
+            pf_cname = node.py_func.entry.func_cname
+            
+        attrs = dict(
+            name=node.entry.name,
+            cname=node.entry.func_cname,
+            pf_cname=pf_cname,
+            qualified_name=node.local_scope.qualified_name,
+            lineno=str(node.pos[1]))
+        
+        self.tb.start('Function', attrs=attrs)
+        
+        self.tb.start('Locals', {})
+        self.serialize_local_variables(node.local_scope.entries)
+        self.tb.end('Locals')
+        self.tb.start('Arguments', {})
+        for arg in node.local_scope.arg_entries:
+            _create_xmlnode(self.tb, arg.name)
+        self.tb.end('Arguments')
+        self.tb.end('Function')
+        return node
+    
+    def serialize_local_variables(self, entries):
+        for entry in entries.values():
+            if entry.type.is_pyobject:
+                vartype = 'PyObject'
+            else:
+                vartype = 'CObject'
+            
+            cname = entry.cname
+            if entry.type.is_extension_type:
+                cname = entry.type.typeptr_cname
+
+            attrs = dict(
+                name=entry.name,
+                cname=cname,
+                qualified_name=entry.qualified_name,
+                type=vartype)
+                
+            _create_xmlnode(self.tb, 'LocalVar', attrs)
+    
+    def __call__(self, root):
+        self.tb.start('cython_debug', attrs=dict(version='1.0'))
+        super(DebuggerTransform, self).__call__(root)
+        self.tb.end('cython_debug')
+        xml_root_element = self.tb.close()
+
+        try:
+            os.mkdir('cython_debug')
+        except OSError, e:
+            if e.errno != errno.EEXIST:
+                raise
+
+        et = etree.ElementTree(xml_root_element)
+        kw = {}
+        if have_lxml:
+            kw['pretty_print'] = True
+        et.write("cython_debug/cython_debug_info_" + self.module_name, 
+                 encoding="UTF-8", 
+                 **kw)
+            
+        return root
\ No newline at end of file
diff --git a/Cython/Debugger/__init__.py b/Cython/Debugger/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/Cython/Debugger/libcython.py b/Cython/Debugger/libcython.py
new file mode 100644 (file)
index 0000000..7462230
--- /dev/null
@@ -0,0 +1,354 @@
+"""
+GDB extension that adds Cython support.
+"""
+
+import sys
+import traceback
+import functools
+import itertools
+import collections
+
+import gdb
+
+try:
+  from lxml import etree
+  have_lxml = True
+except ImportError:
+    have_lxml = False
+    try:
+        # Python 2.5
+        from xml.etree import cElementTree as etree
+    except ImportError:
+        try:
+            # Python 2.5
+            from xml.etree import ElementTree as etree
+        except ImportError:
+            try:
+                # normal cElementTree install
+                import cElementTree as etree
+            except ImportError:
+                # normal ElementTree install
+                import elementtree.ElementTree as etree
+
+if hasattr(gdb, 'string_to_argv'):
+    from gdb import string_to_argv
+else:
+    from shlex import split as string_to_argv
+
+from Cython.Debugger import libpython
+
+
+# Cython module namespace
+cython_namespace = {}
+
+# C or Python type
+CObject = object()
+PythonObject = object()
+
+# maps (unique) qualified function names (e.g. 
+# cythonmodule.ClassName.method_name) to the CythonFunction object
+functions_by_qualified_name = {}
+
+# unique cnames of Cython functions
+functions_by_cname = {}
+
+# map function names like method_name to a list of all such CythonFunction
+# objects
+functions_by_name = collections.defaultdict(list)
+
+_filesystemencoding = sys.getfilesystemencoding() or 'UTF-8'
+
+def dont_suppress_errors(function):
+    @functools.wraps(function)
+    def wrapper(*args, **kwargs):
+        try:
+            return function(*args, **kwargs)
+        except Exception:
+            traceback.print_exc()
+            raise
+    
+    return wrapper
+
+class CythonModule(object):
+    def __init__(self, module_name, filename):
+        self.name = module_name
+        self.filename = filename
+        self.functions = {}
+        self.globals = {}
+
+class CythonVariable(object):
+    def __init__(self, name, cname, qualified_name, type):
+        self.name = name
+        self.cname = cname
+        self.qualified_name = qualified_name
+        self.type = type
+
+class CythonFunction(CythonVariable):
+    def __init__(self, 
+                 module, 
+                 name, 
+                 cname, 
+                 pf_cname,
+                 qualified_name, 
+                 lineno, 
+                 type=CObject):
+        super(CythonFunction, self).__init__(name, cname, qualified_name, type)
+        self.module = module
+        self.pf_cname = pf_cname
+        self.lineno = lineno
+        self.locals = {}
+        self.arguments = []
+
+
+class CythonCommand(gdb.Command):
+    """
+    Invoke a Cython command. Available commands are:
+        
+        cy import
+        cy break
+        cy condition
+        cy step
+        cy enable
+        cy disable
+        cy print
+        cy list
+        cy locals
+        cy globals
+        cy tb
+        cy cname
+    """
+
+CythonCommand('cy', gdb.COMMAND_NONE, gdb.COMPLETE_COMMAND, prefix=True)
+
+
+class CyImport(gdb.Command):
+    """
+    Import debug information outputted by the Cython compiler
+    Example: cy import FILE...
+    """
+
+    def invoke(self, args, from_tty):
+        args = args.encode(_filesystemencoding)
+        for arg in string_to_argv(args):
+            try:
+                f = open(arg)
+            except OSError, e:
+                print('Unable to open file %r: %s' % (args, e.args[1]))
+                return
+            
+            t = etree.parse(f)
+            
+            for module in t.getroot():
+                cython_module = CythonModule(**module.attrib)
+                cython_namespace[cython_module.name] = cython_module
+                
+                for variable in module.find('Globals'):
+                    d = variable.attrib
+                    cython_module.globals[d['name']] = CythonVariable(**d)
+                
+                for function in module.find('Functions'):
+                    cython_function = CythonFunction(module=cython_module, 
+                                                     **function.attrib)
+                    cython_module.functions[cython_function.name] = \
+                        cython_function
+                    
+                    # update the global function mappings
+                    functions_by_name[cython_function.name].append(
+                        cython_function)
+                    functions_by_qualified_name[
+                        cython_function.qualified_name] = cython_function
+                    functions_by_cname[cython_function.cname] = cython_function
+                    
+                    for local in function.find('Locals'):
+                        d = local.attrib
+                        cython_function.locals[d['name']] = CythonVariable(**d)
+                    
+                    cython_function.arguments.extend(
+                        funcarg.tag for funcarg in function.find('Arguments'))
+        
+CyImport('cy import', gdb.COMMAND_STATUS, gdb.COMPLETE_FILENAME)
+
+
+class CyBreak(gdb.Command):
+    """
+    Set a breakpoint for Cython code using Cython qualified name notation, e.g.:
+        
+        cy-break cython_modulename.ClassName.method_name...
+    
+    or normal notation:
+        
+        cy-break function_or_method_name...
+    """
+    
+    def invoke(self, function_names, from_tty):
+        for funcname in string_to_argv(function_names.encode('UTF-8')):
+            func = functions_by_qualified_name.get(funcname)
+            break_funcs = [func]
+            
+            if not func:
+                funcs = functions_by_name.get(funcname)
+                if not funcs:
+                    gdb.execute('break ' + funcname)
+                    return
+                    
+                if len(funcs) > 1:
+                    # multiple functions, let the user pick one
+                    print 'There are multiple such functions:'
+                    for idx, func in enumerate(funcs):
+                        print '%3d) %s' % (idx, func.qualified_name)
+                    
+                    while True:
+                        try:
+                            result = raw_input(
+                                "Select a function, press 'a' for all "
+                                "functions or press 'q' or '^D' to quit: ")
+                        except EOFError:
+                            return
+                        else:
+                            if result.lower() == 'q':
+                                return
+                            elif result.lower() == 'a':
+                                break_funcs = funcs
+                                break
+                            elif (result.isdigit() and 
+                                0 <= int(result) < len(funcs)):
+                                break_funcs = [funcs[int(result)]]
+                                break
+                            else:
+                                print 'Not understood...'
+                else:
+                    break_funcs = [funcs[0]]
+            
+            for func in break_funcs:
+                gdb.execute('break %s' % func.cname)
+                if func.pf_cname:
+                    gdb.execute('break %s' % func.pf_cname)
+    
+    @dont_suppress_errors
+    def complete(self, text, word):
+        names = itertools.chain(functions_by_qualified_name, functions_by_name)
+        lastword = text.strip().split()[-1]
+        if '.' in lastword:
+            compl = [n for n in functions_by_qualified_name 
+                           if n.startswith(lastword)]
+        else:
+            seen = set(text[:-len(word)].split())
+            return [n for n in names if n.startswith(word) and n not in seen]
+        
+        if len(lastword) > len(word):
+            # readline sees something (e.g. a '.') as a word boundary, so don't
+            # "recomplete" this prefix
+            strip_prefix_length = len(lastword) - len(word)
+            compl = [n[strip_prefix_length:] for n in compl]
+            
+        return compl
+
+CyBreak('cy break', gdb.COMMAND_BREAKPOINTS)
+
+# This needs GDB 7.2 or the Archer branch
+# class CompleteUnqualifiedFunctionNames(gdb.Parameter):
+    # """
+    # Indicates whether 'cy break' should complete unqualified function or 
+    # method names. e.g. whether only 'modulename.functioname' should be
+    # completed, or also just 'functionname'
+    # """
+# 
+# cy_complete_unqualified = CompleteUnqualifiedFunctionNames(
+    # 'cy_complete_unqualified', 
+    # gdb.COMMAND_BREAKPOINTS, 
+    # gdb.PARAM_BOOLEAN)
+
+
+class NoCythonFunctionNameInFrameError(Exception):
+    """
+    raised when the name of the C function could not be determined 
+    in the current C stack frame
+    """
+
+class CyPrint(gdb.Command):
+    """
+    Print a Cython variable using 'cy-print x' or 'cy-print module.function.x'
+    """
+    def _get_current_cython_function(self):
+        func_name = gdb.selected_frame().name()
+        if func_name is None:
+            raise NoCythonFunctionNameInFrameError()
+        
+        return functions_by_cname.get(func_name)
+    
+    def _get_locals_globals(self):
+        try:
+            cython_function = self._get_current_cython_function()
+        except NoCythonFunctionNameInFrameError:
+            return (None, None)
+        else:
+            if cython_function is None:
+                return (None, None)
+                
+            return cython_function.locals, cython_function.module.globals
+    
+    def invoke(self, name, from_tty):
+        try:
+            cython_function = self._get_current_cython_function()
+        except NoCythonFunctionNameInFrameError:
+            print 'Unable to determine the name of the function in the current frame.'
+        except RuntimeError, e:
+            print e.args[0]
+        else:
+            # a cython_function of None means we don't know about such a Cython
+            # function and we fall back to GDB's print
+            cname = name
+            if cython_function is not None:
+                if name in cython_function.locals:
+                    cname = cython_function.locals[name].cname
+                elif name in cython_function.module.globals:
+                    cname = cython_function.module.globals[name].cname
+            
+            gdb.execute('print ' + cname)
+    
+    def complete(self):
+        locals_, globals_ = self._get_locals_globals()
+        if locals_ is None:
+            return []
+        return list(itertools.chain(locals_, globals_))
+    
+CyPrint('cy print', gdb.COMMAND_DATA)
+
+class CyLocals(CyPrint):
+    def ns(self):
+        locals_, _ = self._get_locals_globals()
+        return locals_
+        
+    def invoke(self, name, from_tty):
+        try:
+            ns = self.ns()
+        except RuntimeError, e:
+            print e.args[0]
+            return
+        
+        if ns is None:
+            print ('Information of Cython locals could not be obtained. '
+                   'Is this an actual Cython function and did you '
+                   "'cy import' the debug information?")
+        
+        for var in ns.itervalues():
+            val = gdb.parse_and_eval(var.cname)
+            if var.type == PythonObject:
+                result = libpython.PyObjectPtr.from_pyobject_ptr(val)
+            else:
+                result = CObject
+                
+            print '%s = %s' % (var.name, result)
+
+class CyGlobals(CyLocals):
+    def ns(self):
+        _, globals_ = self._get_locals_globals()
+        return globals_
+    
+    def invoke(self, name, from_tty):
+        m = gdb.parse_and_eval('PyModule_GetDict(__pyx_m)')
+        m = m.cast(gdb.lookup_type('PyModuleObject').pointer())
+        print PyObjectPtrPrinter(libpython.PyObjectPtr.from_pyobject_ptr(m['md_dict'])).to_string()
+
+CyLocals('cy locals', gdb.COMMAND_STACK, gdb.COMPLETE_NONE)
+CyGlobals('cy globals', gdb.COMMAND_STACK, gdb.COMPLETE_NONE)
diff --git a/Cython/Debugger/libpython.py b/Cython/Debugger/libpython.py
new file mode 100644 (file)
index 0000000..06d26ed
--- /dev/null
@@ -0,0 +1,1434 @@
+#!/usr/bin/python
+'''
+From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
+to be extended with Python code e.g. for library-specific data visualizations,
+such as for the C++ STL types.  Documentation on this API can be seen at:
+http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
+
+
+This python module deals with the case when the process being debugged (the
+"inferior process" in gdb parlance) is itself python, or more specifically,
+linked against libpython.  In this situation, almost every item of data is a
+(PyObject*), and having the debugger merely print their addresses is not very
+enlightening.
+
+This module embeds knowledge about the implementation details of libpython so
+that we can emit useful visualizations e.g. a string, a list, a dict, a frame
+giving file/line information and the state of local variables
+
+In particular, given a gdb.Value corresponding to a PyObject* in the inferior
+process, we can generate a "proxy value" within the gdb process.  For example,
+given a PyObject* in the inferior process that is in fact a PyListObject*
+holding three PyObject* that turn out to be PyStringObject* instances, we can
+generate a proxy value within the gdb process that is a list of strings:
+  ["foo", "bar", "baz"]
+
+Doing so can be expensive for complicated graphs of objects, and could take
+some time, so we also have a "write_repr" method that writes a representation
+of the data to a file-like object.  This allows us to stop the traversal by
+having the file-like object raise an exception if it gets too much data.
+
+With both "proxyval" and "write_repr" we keep track of the set of all addresses
+visited so far in the traversal, to avoid infinite recursion due to cycles in
+the graph of object references.
+
+We try to defer gdb.lookup_type() invocations for python types until as late as
+possible: for a dynamically linked python binary, when the process starts in
+the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
+the type names are known to the debugger
+
+The module also extends gdb with some python-specific commands.
+'''
+from __future__ import with_statement
+import gdb
+
+# Look up the gdb.Type for some standard types:
+_type_char_ptr = gdb.lookup_type('char').pointer() # char*
+_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
+_type_void_ptr = gdb.lookup_type('void').pointer() # void*
+
+SIZEOF_VOID_P = _type_void_ptr.sizeof
+
+
+Py_TPFLAGS_HEAPTYPE = (1L << 9)
+
+Py_TPFLAGS_INT_SUBCLASS      = (1L << 23)
+Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
+Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
+Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
+Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
+Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
+Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
+Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
+Py_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)
+
+
+MAX_OUTPUT_LEN=1024
+
+class NullPyObjectPtr(RuntimeError):
+    pass
+
+
+def safety_limit(val):
+    # Given a integer value from the process being debugged, limit it to some
+    # safety threshold so that arbitrary breakage within said process doesn't
+    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
+    return min(val, 1000)
+
+
+def safe_range(val):
+    # As per range, but don't trust the value too much: cap it to a safety
+    # threshold in case the data was corrupted
+    return xrange(safety_limit(val))
+
+
+class StringTruncated(RuntimeError):
+    pass
+
+class TruncatedStringIO(object):
+    '''Similar to cStringIO, but can truncate the output by raising a
+    StringTruncated exception'''
+    def __init__(self, maxlen=None):
+        self._val = ''
+        self.maxlen = maxlen
+
+    def write(self, data):
+        if self.maxlen:
+            if len(data) + len(self._val) > self.maxlen:
+                # Truncation:
+                self._val += data[0:self.maxlen - len(self._val)]
+                raise StringTruncated()
+
+        self._val += data
+
+    def getvalue(self):
+        return self._val
+
+class PyObjectPtr(object):
+    """
+    Class wrapping a gdb.Value that's a either a (PyObject*) within the
+    inferior process, or some subclass pointer e.g. (PyStringObject*)
+
+    There will be a subclass for every refined PyObject type that we care
+    about.
+
+    Note that at every stage the underlying pointer could be NULL, point
+    to corrupt data, etc; this is the debugger, after all.
+    """
+    _typename = 'PyObject'
+
+    def __init__(self, gdbval, cast_to=None):
+        if cast_to:
+            self._gdbval = gdbval.cast(cast_to)
+        else:
+            self._gdbval = gdbval
+
+    def field(self, name):
+        '''
+        Get the gdb.Value for the given field within the PyObject, coping with
+        some python 2 versus python 3 differences.
+
+        Various libpython types are defined using the "PyObject_HEAD" and
+        "PyObject_VAR_HEAD" macros.
+
+        In Python 2, this these are defined so that "ob_type" and (for a var
+        object) "ob_size" are fields of the type in question.
+
+        In Python 3, this is defined as an embedded PyVarObject type thus:
+           PyVarObject ob_base;
+        so that the "ob_size" field is located insize the "ob_base" field, and
+        the "ob_type" is most easily accessed by casting back to a (PyObject*).
+        '''
+        if self.is_null():
+            raise NullPyObjectPtr(self)
+
+        if name == 'ob_type':
+            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
+            return pyo_ptr.dereference()[name]
+
+        if name == 'ob_size':
+            try:
+            # Python 2:
+                return self._gdbval.dereference()[name]
+            except RuntimeError:
+                # Python 3:
+                return self._gdbval.dereference()['ob_base'][name]
+
+        # General case: look it up inside the object:
+        return self._gdbval.dereference()[name]
+
+    def pyop_field(self, name):
+        '''
+        Get a PyObjectPtr for the given PyObject* field within this PyObject,
+        coping with some python 2 versus python 3 differences.
+        '''
+        return PyObjectPtr.from_pyobject_ptr(self.field(name))
+
+    def write_field_repr(self, name, out, visited):
+        '''
+        Extract the PyObject* field named "name", and write its representation
+        to file-like object "out"
+        '''
+        field_obj = self.pyop_field(name)
+        field_obj.write_repr(out, visited)
+
+    def get_truncated_repr(self, maxlen):
+        '''
+        Get a repr-like string for the data, but truncate it at "maxlen" bytes
+        (ending the object graph traversal as soon as you do)
+        '''
+        out = TruncatedStringIO(maxlen)
+        try:
+            self.write_repr(out, set())
+        except StringTruncated:
+            # Truncation occurred:
+            return out.getvalue() + '...(truncated)'
+
+        # No truncation occurred:
+        return out.getvalue()
+
+    def type(self):
+        return PyTypeObjectPtr(self.field('ob_type'))
+
+    def is_null(self):
+        return 0 == long(self._gdbval)
+
+    def is_optimized_out(self):
+        '''
+        Is the value of the underlying PyObject* visible to the debugger?
+
+        This can vary with the precise version of the compiler used to build
+        Python, and the precise version of gdb.
+
+        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
+        PyEval_EvalFrameEx's "f"
+        '''
+        return self._gdbval.is_optimized_out
+
+    def safe_tp_name(self):
+        try:
+            return self.type().field('tp_name').string()
+        except NullPyObjectPtr:
+            # NULL tp_name?
+            return 'unknown'
+        except RuntimeError:
+            # Can't even read the object at all?
+            return 'unknown'
+
+    def proxyval(self, visited):
+        '''
+        Scrape a value from the inferior process, and try to represent it
+        within the gdb process, whilst (hopefully) avoiding crashes when
+        the remote data is corrupt.
+
+        Derived classes will override this.
+
+        For example, a PyIntObject* with ob_ival 42 in the inferior process
+        should result in an int(42) in this process.
+
+        visited: a set of all gdb.Value pyobject pointers already visited
+        whilst generating this value (to guard against infinite recursion when
+        visiting object graphs with loops).  Analogous to Py_ReprEnter and
+        Py_ReprLeave
+        '''
+
+        class FakeRepr(object):
+            """
+            Class representing a non-descript PyObject* value in the inferior
+            process for when we don't have a custom scraper, intended to have
+            a sane repr().
+            """
+
+            def __init__(self, tp_name, address):
+                self.tp_name = tp_name
+                self.address = address
+
+            def __repr__(self):
+                # For the NULL pointer, we have no way of knowing a type, so
+                # special-case it as per
+                # http://bugs.python.org/issue8032#msg100882
+                if self.address == 0:
+                    return '0x0'
+                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
+
+        return FakeRepr(self.safe_tp_name(),
+                        long(self._gdbval))
+
+    def write_repr(self, out, visited):
+        '''
+        Write a string representation of the value scraped from the inferior
+        process to "out", a file-like object.
+        '''
+        # Default implementation: generate a proxy value and write its repr
+        # However, this could involve a lot of work for complicated objects,
+        # so for derived classes we specialize this
+        return out.write(repr(self.proxyval(visited)))
+
+    @classmethod
+    def subclass_from_type(cls, t):
+        '''
+        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
+        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
+        to use
+
+        Ideally, we would look up the symbols for the global types, but that
+        isn't working yet:
+          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
+          Traceback (most recent call last):
+            File "<string>", line 1, in <module>
+          NotImplementedError: Symbol type not yet supported in Python scripts.
+          Error while executing Python code.
+
+        For now, we use tp_flags, after doing some string comparisons on the
+        tp_name for some special-cases that don't seem to be visible through
+        flags
+        '''
+        try:
+            tp_name = t.field('tp_name').string()
+            tp_flags = int(t.field('tp_flags'))
+        except RuntimeError:
+            # Handle any kind of error e.g. NULL ptrs by simply using the base
+            # class
+            return cls
+
+        #print 'tp_flags = 0x%08x' % tp_flags
+        #print 'tp_name = %r' % tp_name
+
+        name_map = {'bool': PyBoolObjectPtr,
+                    'classobj': PyClassObjectPtr,
+                    'instance': PyInstanceObjectPtr,
+                    'NoneType': PyNoneStructPtr,
+                    'frame': PyFrameObjectPtr,
+                    'set' : PySetObjectPtr,
+                    'frozenset' : PySetObjectPtr,
+                    'builtin_function_or_method' : PyCFunctionObjectPtr,
+                    }
+        if tp_name in name_map:
+            return name_map[tp_name]
+
+        if tp_flags & Py_TPFLAGS_HEAPTYPE:
+            return HeapTypeObjectPtr
+
+        if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
+            return PyIntObjectPtr
+        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
+            return PyLongObjectPtr
+        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
+            return PyListObjectPtr
+        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
+            return PyTupleObjectPtr
+        if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
+            return PyStringObjectPtr
+        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
+            return PyUnicodeObjectPtr
+        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
+            return PyDictObjectPtr
+        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
+            return PyBaseExceptionObjectPtr
+        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
+        #    return PyTypeObjectPtr
+
+        # Use the base class:
+        return cls
+
+    @classmethod
+    def from_pyobject_ptr(cls, gdbval):
+        '''
+        Try to locate the appropriate derived class dynamically, and cast
+        the pointer accordingly.
+        '''
+        try:
+            p = PyObjectPtr(gdbval)
+            cls = cls.subclass_from_type(p.type())
+            return cls(gdbval, cast_to=cls.get_gdb_type())
+        except RuntimeError:
+            # Handle any kind of error e.g. NULL ptrs by simply using the base
+            # class
+            pass
+        return cls(gdbval)
+
+    @classmethod
+    def get_gdb_type(cls):
+        return gdb.lookup_type(cls._typename).pointer()
+
+    def as_address(self):
+        return long(self._gdbval)
+
+
+class ProxyAlreadyVisited(object):
+    '''
+    Placeholder proxy to use when protecting against infinite recursion due to
+    loops in the object graph.
+
+    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
+    '''
+    def __init__(self, rep):
+        self._rep = rep
+
+    def __repr__(self):
+        return self._rep
+
+
+def _write_instance_repr(out, visited, name, pyop_attrdict, address):
+    '''Shared code for use by old-style and new-style classes:
+    write a representation to file-like object "out"'''
+    out.write('<')
+    out.write(name)
+
+    # Write dictionary of instance attributes:
+    if isinstance(pyop_attrdict, PyDictObjectPtr):
+        out.write('(')
+        first = True
+        for pyop_arg, pyop_val in pyop_attrdict.iteritems():
+            if not first:
+                out.write(', ')
+            first = False
+            out.write(pyop_arg.proxyval(visited))
+            out.write('=')
+            pyop_val.write_repr(out, visited)
+        out.write(')')
+    out.write(' at remote 0x%x>' % address)
+
+
+class InstanceProxy(object):
+
+    def __init__(self, cl_name, attrdict, address):
+        self.cl_name = cl_name
+        self.attrdict = attrdict
+        self.address = address
+
+    def __repr__(self):
+        if isinstance(self.attrdict, dict):
+            kwargs = ', '.join(["%s=%r" % (arg, val)
+                                for arg, val in self.attrdict.iteritems()])
+            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
+                                                kwargs, self.address)
+        else:
+            return '<%s at remote 0x%x>' % (self.cl_name,
+                                            self.address)
+
+def _PyObject_VAR_SIZE(typeobj, nitems):
+    return ( ( typeobj.field('tp_basicsize') +
+               nitems * typeobj.field('tp_itemsize') +
+               (SIZEOF_VOID_P - 1)
+             ) & ~(SIZEOF_VOID_P - 1)
+           ).cast(gdb.lookup_type('size_t'))
+
+class HeapTypeObjectPtr(PyObjectPtr):
+    _typename = 'PyObject'
+
+    def get_attr_dict(self):
+        '''
+        Get the PyDictObject ptr representing the attribute dictionary
+        (or None if there's a problem)
+        '''
+        try:
+            typeobj = self.type()
+            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
+            if dictoffset != 0:
+                if dictoffset < 0:
+                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
+                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
+                    if tsize < 0:
+                        tsize = -tsize
+                    size = _PyObject_VAR_SIZE(typeobj, tsize)
+                    dictoffset += size
+                    assert dictoffset > 0
+                    assert dictoffset % SIZEOF_VOID_P == 0
+
+                dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
+                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
+                dictptr = dictptr.cast(PyObjectPtrPtr)
+                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
+        except RuntimeError:
+            # Corrupt data somewhere; fail safe
+            pass
+
+        # Not found, or some kind of error:
+        return None
+
+    def proxyval(self, visited):
+        '''
+        Support for new-style classes.
+
+        Currently we just locate the dictionary using a transliteration to
+        python of _PyObject_GetDictPtr, ignoring descriptors
+        '''
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('<...>')
+        visited.add(self.as_address())
+
+        pyop_attr_dict = self.get_attr_dict()
+        if pyop_attr_dict:
+            attr_dict = pyop_attr_dict.proxyval(visited)
+        else:
+            attr_dict = {}
+        tp_name = self.safe_tp_name()
+
+        # New-style class:
+        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('<...>')
+            return
+        visited.add(self.as_address())
+
+        pyop_attrdict = self.get_attr_dict()
+        _write_instance_repr(out, visited,
+                             self.safe_tp_name(), pyop_attrdict, self.as_address())
+
+class ProxyException(Exception):
+    def __init__(self, tp_name, args):
+        self.tp_name = tp_name
+        self.args = args
+
+    def __repr__(self):
+        return '%s%r' % (self.tp_name, self.args)
+
+class PyBaseExceptionObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
+    within the process being debugged.
+    """
+    _typename = 'PyBaseExceptionObject'
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('(...)')
+        visited.add(self.as_address())
+        arg_proxy = self.pyop_field('args').proxyval(visited)
+        return ProxyException(self.safe_tp_name(),
+                              arg_proxy)
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('(...)')
+            return
+        visited.add(self.as_address())
+
+        out.write(self.safe_tp_name())
+        self.write_field_repr('args', out, visited)
+
+class PyBoolObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
+    <bool> instances (Py_True/Py_False) within the process being debugged.
+    """
+    _typename = 'PyBoolObject'
+
+    def proxyval(self, visited):
+        if int_from_int(self.field('ob_ival')):
+            return True
+        else:
+            return False
+
+
+class PyClassObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
+    instance within the process being debugged.
+    """
+    _typename = 'PyClassObject'
+
+
+class BuiltInFunctionProxy(object):
+    def __init__(self, ml_name):
+        self.ml_name = ml_name
+
+    def __repr__(self):
+        return "<built-in function %s>" % self.ml_name
+
+class BuiltInMethodProxy(object):
+    def __init__(self, ml_name, pyop_m_self):
+        self.ml_name = ml_name
+        self.pyop_m_self = pyop_m_self
+
+    def __repr__(self):
+        return ('<built-in method %s of %s object at remote 0x%x>'
+                % (self.ml_name,
+                   self.pyop_m_self.safe_tp_name(),
+                   self.pyop_m_self.as_address())
+                )
+
+class PyCFunctionObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyCFunctionObject*
+    (see Include/methodobject.h and Objects/methodobject.c)
+    """
+    _typename = 'PyCFunctionObject'
+
+    def proxyval(self, visited):
+        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
+        ml_name = m_ml['ml_name'].string()
+
+        pyop_m_self = self.pyop_field('m_self')
+        if pyop_m_self.is_null():
+            return BuiltInFunctionProxy(ml_name)
+        else:
+            return BuiltInMethodProxy(ml_name, pyop_m_self)
+
+
+class PyCodeObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
+    within the process being debugged.
+    """
+    _typename = 'PyCodeObject'
+
+    def addr2line(self, addrq):
+        '''
+        Get the line number for a given bytecode offset
+
+        Analogous to PyCode_Addr2Line; translated from pseudocode in
+        Objects/lnotab_notes.txt
+        '''
+        co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
+
+        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
+        # not 0, as lnotab_notes.txt has it:
+        lineno = int_from_int(self.field('co_firstlineno'))
+
+        addr = 0
+        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
+            addr += ord(addr_incr)
+            if addr > addrq:
+                return lineno
+            lineno += ord(line_incr)
+        return lineno
+
+
+class PyDictObjectPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
+    within the process being debugged.
+    """
+    _typename = 'PyDictObject'
+
+    def iteritems(self):
+        '''
+        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
+        analagous to dict.iteritems()
+        '''
+        for i in safe_range(self.field('ma_mask') + 1):
+            ep = self.field('ma_table') + i
+            pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
+            if not pyop_value.is_null():
+                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
+                yield (pyop_key, pyop_value)
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('{...}')
+        visited.add(self.as_address())
+
+        result = {}
+        for pyop_key, pyop_value in self.iteritems():
+            proxy_key = pyop_key.proxyval(visited)
+            proxy_value = pyop_value.proxyval(visited)
+            result[proxy_key] = proxy_value
+        return result
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('{...}')
+            return
+        visited.add(self.as_address())
+
+        out.write('{')
+        first = True
+        for pyop_key, pyop_value in self.iteritems():
+            if not first:
+                out.write(', ')
+            first = False
+            pyop_key.write_repr(out, visited)
+            out.write(': ')
+            pyop_value.write_repr(out, visited)
+        out.write('}')
+
+class PyInstanceObjectPtr(PyObjectPtr):
+    _typename = 'PyInstanceObject'
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('<...>')
+        visited.add(self.as_address())
+
+        # Get name of class:
+        in_class = self.pyop_field('in_class')
+        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
+
+        # Get dictionary of instance attributes:
+        in_dict = self.pyop_field('in_dict').proxyval(visited)
+
+        # Old-style class:
+        return InstanceProxy(cl_name, in_dict, long(self._gdbval))
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('<...>')
+            return
+        visited.add(self.as_address())
+
+        # Old-style class:
+
+        # Get name of class:
+        in_class = self.pyop_field('in_class')
+        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
+
+        # Get dictionary of instance attributes:
+        pyop_in_dict = self.pyop_field('in_dict')
+
+        _write_instance_repr(out, visited,
+                             cl_name, pyop_in_dict, self.as_address())
+
+class PyIntObjectPtr(PyObjectPtr):
+    _typename = 'PyIntObject'
+
+    def proxyval(self, visited):
+        result = int_from_int(self.field('ob_ival'))
+        return result
+
+class PyListObjectPtr(PyObjectPtr):
+    _typename = 'PyListObject'
+
+    def __getitem__(self, i):
+        # Get the gdb.Value for the (PyObject*) with the given index:
+        field_ob_item = self.field('ob_item')
+        return field_ob_item[i]
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('[...]')
+        visited.add(self.as_address())
+
+        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
+                  for i in safe_range(int_from_int(self.field('ob_size')))]
+        return result
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('[...]')
+            return
+        visited.add(self.as_address())
+
+        out.write('[')
+        for i in safe_range(int_from_int(self.field('ob_size'))):
+            if i > 0:
+                out.write(', ')
+            element = PyObjectPtr.from_pyobject_ptr(self[i])
+            element.write_repr(out, visited)
+        out.write(']')
+
+class PyLongObjectPtr(PyObjectPtr):
+    _typename = 'PyLongObject'
+
+    def proxyval(self, visited):
+        '''
+        Python's Include/longobjrep.h has this declaration:
+           struct _longobject {
+               PyObject_VAR_HEAD
+               digit ob_digit[1];
+           };
+
+        with this description:
+            The absolute value of a number is equal to
+                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
+            Negative numbers are represented with ob_size < 0;
+            zero is represented by ob_size == 0.
+
+        where SHIFT can be either:
+            #define PyLong_SHIFT        30
+            #define PyLong_SHIFT        15
+        '''
+        ob_size = long(self.field('ob_size'))
+        if ob_size == 0:
+            return 0L
+
+        ob_digit = self.field('ob_digit')
+
+        if gdb.lookup_type('digit').sizeof == 2:
+            SHIFT = 15L
+        else:
+            SHIFT = 30L
+
+        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
+                  for i in safe_range(abs(ob_size))]
+        result = sum(digits)
+        if ob_size < 0:
+            result = -result
+        return result
+
+
+class PyNoneStructPtr(PyObjectPtr):
+    """
+    Class wrapping a gdb.Value that's a PyObject* pointing to the
+    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
+    """
+    _typename = 'PyObject'
+
+    def proxyval(self, visited):
+        return None
+
+
+class PyFrameObjectPtr(PyObjectPtr):
+    _typename = 'PyFrameObject'
+
+    def __init__(self, gdbval, cast_to):
+        PyObjectPtr.__init__(self, gdbval, cast_to)
+
+        if not self.is_optimized_out():
+            self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
+            self.co_name = self.co.pyop_field('co_name')
+            self.co_filename = self.co.pyop_field('co_filename')
+
+            self.f_lineno = int_from_int(self.field('f_lineno'))
+            self.f_lasti = int_from_int(self.field('f_lasti'))
+            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
+            self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
+
+    def iter_locals(self):
+        '''
+        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
+        the local variables of this frame
+        '''
+        if self.is_optimized_out():
+            return
+
+        f_localsplus = self.field('f_localsplus')
+        for i in safe_range(self.co_nlocals):
+            pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
+            if not pyop_value.is_null():
+                pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
+                yield (pyop_name, pyop_value)
+
+    def iter_globals(self):
+        '''
+        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
+        the global variables of this frame
+        '''
+        if self.is_optimized_out():
+            return
+
+        pyop_globals = self.pyop_field('f_globals')
+        return pyop_globals.iteritems()
+
+    def iter_builtins(self):
+        '''
+        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
+        the builtin variables
+        '''
+        if self.is_optimized_out():
+            return
+
+        pyop_builtins = self.pyop_field('f_builtins')
+        return pyop_builtins.iteritems()
+
+    def get_var_by_name(self, name):
+        '''
+        Look for the named local variable, returning a (PyObjectPtr, scope) pair
+        where scope is a string 'local', 'global', 'builtin'
+
+        If not found, return (None, None)
+        '''
+        for pyop_name, pyop_value in self.iter_locals():
+            if name == pyop_name.proxyval(set()):
+                return pyop_value, 'local'
+        for pyop_name, pyop_value in self.iter_globals():
+            if name == pyop_name.proxyval(set()):
+                return pyop_value, 'global'
+        for pyop_name, pyop_value in self.iter_builtins():
+            if name == pyop_name.proxyval(set()):
+                return pyop_value, 'builtin'
+        return None, None
+
+    def filename(self):
+        '''Get the path of the current Python source file, as a string'''
+        if self.is_optimized_out():
+            return '(frame information optimized out)'
+        return self.co_filename.proxyval(set())
+
+    def current_line_num(self):
+        '''Get current line number as an integer (1-based)
+
+        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
+
+        See Objects/lnotab_notes.txt
+        '''
+        if self.is_optimized_out():
+            return None
+        f_trace = self.field('f_trace')
+        if long(f_trace) != 0:
+            # we have a non-NULL f_trace:
+            return self.f_lineno
+        else:
+            #try:
+            return self.co.addr2line(self.f_lasti)
+            #except ValueError:
+            #    return self.f_lineno
+
+    def current_line(self):
+        '''Get the text of the current source line as a string, with a trailing
+        newline character'''
+        if self.is_optimized_out():
+            return '(frame information optimized out)'
+        with open(self.filename(), 'r') as f:
+            all_lines = f.readlines()
+            # Convert from 1-based current_line_num to 0-based list offset:
+            return all_lines[self.current_line_num()-1]
+
+    def write_repr(self, out, visited):
+        if self.is_optimized_out():
+            out.write('(frame information optimized out)')
+            return
+        out.write('Frame 0x%x, for file %s, line %i, in %s ('
+                  % (self.as_address(),
+                     self.co_filename,
+                     self.current_line_num(),
+                     self.co_name))
+        first = True
+        for pyop_name, pyop_value in self.iter_locals():
+            if not first:
+                out.write(', ')
+            first = False
+
+            out.write(pyop_name.proxyval(visited))
+            out.write('=')
+            pyop_value.write_repr(out, visited)
+
+        out.write(')')
+
+class PySetObjectPtr(PyObjectPtr):
+    _typename = 'PySetObject'
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
+        visited.add(self.as_address())
+
+        members = []
+        table = self.field('table')
+        for i in safe_range(self.field('mask')+1):
+            setentry = table[i]
+            key = setentry['key']
+            if key != 0:
+                key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
+                if key_proxy != '<dummy key>':
+                    members.append(key_proxy)
+        if self.safe_tp_name() == 'frozenset':
+            return frozenset(members)
+        else:
+            return set(members)
+
+    def write_repr(self, out, visited):
+        out.write(self.safe_tp_name())
+
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('(...)')
+            return
+        visited.add(self.as_address())
+
+        out.write('([')
+        first = True
+        table = self.field('table')
+        for i in safe_range(self.field('mask')+1):
+            setentry = table[i]
+            key = setentry['key']
+            if key != 0:
+                pyop_key = PyObjectPtr.from_pyobject_ptr(key)
+                key_proxy = pyop_key.proxyval(visited) # FIXME!
+                if key_proxy != '<dummy key>':
+                    if not first:
+                        out.write(', ')
+                    first = False
+                    pyop_key.write_repr(out, visited)
+        out.write('])')
+
+
+class PyStringObjectPtr(PyObjectPtr):
+    _typename = 'PyStringObject'
+
+    def __str__(self):
+        field_ob_size = self.field('ob_size')
+        field_ob_sval = self.field('ob_sval')
+        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
+        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
+
+    def proxyval(self, visited):
+        return str(self)
+
+class PyTupleObjectPtr(PyObjectPtr):
+    _typename = 'PyTupleObject'
+
+    def __getitem__(self, i):
+        # Get the gdb.Value for the (PyObject*) with the given index:
+        field_ob_item = self.field('ob_item')
+        return field_ob_item[i]
+
+    def proxyval(self, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            return ProxyAlreadyVisited('(...)')
+        visited.add(self.as_address())
+
+        result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
+                        for i in safe_range(int_from_int(self.field('ob_size')))])
+        return result
+
+    def write_repr(self, out, visited):
+        # Guard against infinite loops:
+        if self.as_address() in visited:
+            out.write('(...)')
+            return
+        visited.add(self.as_address())
+
+        out.write('(')
+        for i in safe_range(int_from_int(self.field('ob_size'))):
+            if i > 0:
+                out.write(', ')
+            element = PyObjectPtr.from_pyobject_ptr(self[i])
+            element.write_repr(out, visited)
+        if self.field('ob_size') == 1:
+            out.write(',)')
+        else:
+            out.write(')')
+
+class PyTypeObjectPtr(PyObjectPtr):
+    _typename = 'PyTypeObject'
+
+
+class PyUnicodeObjectPtr(PyObjectPtr):
+    _typename = 'PyUnicodeObject'
+
+    def char_width(self):
+        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
+        return _type_Py_UNICODE.sizeof
+
+    def proxyval(self, visited):
+        # From unicodeobject.h:
+        #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
+        #     Py_UNICODE *str;    /* Raw Unicode buffer */
+        field_length = long(self.field('length'))
+        field_str = self.field('str')
+
+        # Gather a list of ints from the Py_UNICODE array; these are either
+        # UCS-2 or UCS-4 code points:
+        Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+
+        # Convert the int code points to unicode characters, and generate a
+        # local unicode instance:
+        result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
+        return result
+
+    def write_repr(self, out, visited):
+        proxy = self.proxyval(visited)
+        if self.char_width() == 2:
+            # sizeof(Py_UNICODE)==2: join surrogates
+            proxy2 = []
+            i = 0
+            while i < len(proxy):
+                ch = proxy[i]
+                i += 1
+                if (i < len(proxy)
+                and 0xD800 <= ord(ch) < 0xDC00 \
+                and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+                    # Get code point from surrogate pair
+                    ch2 = proxy[i]
+                    code = (ord(ch) & 0x03FF) << 10
+                    code |= ord(ch2) & 0x03FF
+                    code += 0x00010000
+                    i += 1
+                    proxy2.append(unichr(code))
+                else:
+                    proxy2.append(ch)
+            proxy = u''.join(proxy2)
+        out.write(repr(proxy))
+
+
+def int_from_int(gdbval):
+    return int(str(gdbval))
+
+
+def stringify(val):
+    # TODO: repr() puts everything on one line; pformat can be nicer, but
+    # can lead to v.long results; this function isolates the choice
+    if True:
+        return repr(val)
+    else:
+        from pprint import pformat
+        return pformat(val)
+
+
+class PyObjectPtrPrinter:
+    "Prints a (PyObject*)"
+
+    def __init__ (self, gdbval):
+        self.gdbval = gdbval
+
+    def to_string (self):
+        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
+        if True:
+            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
+        else:
+            # Generate full proxy value then stringify it.
+            # Doing so could be expensive
+            proxyval = pyop.proxyval(set())
+            return stringify(proxyval)
+
+def pretty_printer_lookup(gdbval):
+    type = gdbval.type.unqualified()
+    if type.code == gdb.TYPE_CODE_PTR:
+        type = type.target().unqualified()
+        t = str(type)
+        if t in ("PyObject", "PyFrameObject"):
+            return PyObjectPtrPrinter(gdbval)
+
+"""
+During development, I've been manually invoking the code in this way:
+(gdb) python
+
+import sys
+sys.path.append('/home/david/coding/python-gdb')
+import libpython
+end
+
+then reloading it after each edit like this:
+(gdb) python reload(libpython)
+
+The following code should ensure that the prettyprinter is registered
+if the code is autoloaded by gdb when visiting libpython.so, provided
+that this python file is installed to the same path as the library (or its
+.debug file) plus a "-gdb.py" suffix, e.g:
+  /usr/lib/libpython2.6.so.1.0-gdb.py
+  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
+"""
+def register (obj):
+    if obj == None:
+        obj = gdb
+
+    # Wire up the pretty-printer
+    obj.pretty_printers.append(pretty_printer_lookup)
+
+register (gdb.current_objfile ())
+
+
+
+# Unfortunately, the exact API exposed by the gdb module varies somewhat
+# from build to build
+# See http://bugs.python.org/issue8279?#msg102276
+
+class Frame(object):
+    '''
+    Wrapper for gdb.Frame, adding various methods
+    '''
+    def __init__(self, gdbframe):
+        self._gdbframe = gdbframe
+
+    def older(self):
+        older = self._gdbframe.older()
+        if older:
+            return Frame(older)
+        else:
+            return None
+
+    def newer(self):
+        newer = self._gdbframe.newer()
+        if newer:
+            return Frame(newer)
+        else:
+            return None
+
+    def select(self):
+        '''If supported, select this frame and return True; return False if unsupported
+
+        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
+        onwards, but absent on Ubuntu buildbot'''
+        if not hasattr(self._gdbframe, 'select'):
+            print ('Unable to select frame: '
+                   'this build of gdb does not expose a gdb.Frame.select method')
+            return False
+        self._gdbframe.select()
+        return True
+
+    def get_index(self):
+        '''Calculate index of frame, starting at 0 for the newest frame within
+        this thread'''
+        index = 0
+        # Go down until you reach the newest frame:
+        iter_frame = self
+        while iter_frame.newer():
+            index += 1
+            iter_frame = iter_frame.newer()
+        return index
+
+    def is_evalframeex(self):
+        '''Is this a PyEval_EvalFrameEx frame?'''
+        if self._gdbframe.name() == 'PyEval_EvalFrameEx':
+            '''
+            I believe we also need to filter on the inline
+            struct frame_id.inline_depth, only regarding frames with
+            an inline depth of 0 as actually being this function
+
+            So we reject those with type gdb.INLINE_FRAME
+            '''
+            if self._gdbframe.type() == gdb.NORMAL_FRAME:
+                # We have a PyEval_EvalFrameEx frame:
+                return True
+
+        return False
+
+    def get_pyop(self):
+        try:
+            f = self._gdbframe.read_var('f')
+            return PyFrameObjectPtr.from_pyobject_ptr(f)
+        except ValueError:
+            return None
+
+    @classmethod
+    def get_selected_frame(cls):
+        _gdbframe = gdb.selected_frame()
+        if _gdbframe:
+            return Frame(_gdbframe)
+        return None
+
+    @classmethod
+    def get_selected_python_frame(cls):
+        '''Try to obtain the Frame for the python code in the selected frame,
+        or None'''
+        frame = cls.get_selected_frame()
+
+        while frame:
+            if frame.is_evalframeex():
+                return frame
+            frame = frame.older()
+
+        # Not found:
+        return None
+
+    def print_summary(self):
+        if self.is_evalframeex():
+            pyop = self.get_pyop()
+            if pyop:
+                sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
+                sys.stdout.write(pyop.current_line())
+            else:
+                sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
+        else:
+            sys.stdout.write('#%i\n' % self.get_index())
+
+class PyList(gdb.Command):
+    '''List the current Python source code, if any
+
+    Use
+       py-list START
+    to list at a different line number within the python source.
+
+    Use
+       py-list START, END
+    to list a specific range of lines within the python source.
+    '''
+
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-list",
+                              gdb.COMMAND_FILES,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        import re
+
+        start = None
+        end = None
+
+        m = re.match(r'\s*(\d+)\s*', args)
+        if m:
+            start = int(m.group(0))
+            end = start + 10
+
+        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
+        if m:
+            start, end = map(int, m.groups())
+
+        frame = Frame.get_selected_python_frame()
+        if not frame:
+            print 'Unable to locate python frame'
+            return
+
+        pyop = frame.get_pyop()
+        if not pyop:
+            print 'Unable to read information on python frame'
+            return
+
+        filename = pyop.filename()
+        lineno = pyop.current_line_num()
+
+        if start is None:
+            start = lineno - 5
+            end = lineno + 5
+
+        if start<1:
+            start = 1
+
+        with open(filename, 'r') as f:
+            all_lines = f.readlines()
+            # start and end are 1-based, all_lines is 0-based;
+            # so [start-1:end] as a python slice gives us [start, end] as a
+            # closed interval
+            for i, line in enumerate(all_lines[start-1:end]):
+                linestr = str(i+start)
+                # Highlight current line:
+                if i + start == lineno:
+                    linestr = '>' + linestr
+                sys.stdout.write('%4s    %s' % (linestr, line))
+
+
+# ...and register the command:
+PyList()
+
+def move_in_stack(move_up):
+    '''Move up or down the stack (for the py-up/py-down command)'''
+    frame = Frame.get_selected_python_frame()
+    while frame:
+        if move_up:
+            iter_frame = frame.older()
+        else:
+            iter_frame = frame.newer()
+
+        if not iter_frame:
+            break
+
+        if iter_frame.is_evalframeex():
+            # Result:
+            if iter_frame.select():
+                iter_frame.print_summary()
+            return
+
+        frame = iter_frame
+
+    if move_up:
+        print 'Unable to find an older python frame'
+    else:
+        print 'Unable to find a newer python frame'
+
+class PyUp(gdb.Command):
+    'Select and print the python stack frame that called this one (if any)'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-up",
+                              gdb.COMMAND_STACK,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        move_in_stack(move_up=True)
+
+class PyDown(gdb.Command):
+    'Select and print the python stack frame called by this one (if any)'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-down",
+                              gdb.COMMAND_STACK,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        move_in_stack(move_up=False)
+
+# Not all builds of gdb have gdb.Frame.select
+if hasattr(gdb.Frame, 'select'):
+    PyUp()
+    PyDown()
+
+class PyBacktrace(gdb.Command):
+    'Display the current python frame and all the frames within its call stack (if any)'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-bt",
+                              gdb.COMMAND_STACK,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        frame = Frame.get_selected_python_frame()
+        while frame:
+            if frame.is_evalframeex():
+                frame.print_summary()
+            frame = frame.older()
+
+PyBacktrace()
+
+class PyPrint(gdb.Command):
+    'Look up the given python variable name, and print it'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-print",
+                              gdb.COMMAND_DATA,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        name = str(args)
+
+        frame = Frame.get_selected_python_frame()
+        if not frame:
+            print 'Unable to locate python frame'
+            return
+
+        pyop_frame = frame.get_pyop()
+        if not pyop_frame:
+            print 'Unable to read information on python frame'
+            return
+
+        pyop_var, scope = pyop_frame.get_var_by_name(name)
+
+        if pyop_var:
+            print ('%s %r = %s'
+                   % (scope,
+                      name,
+                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
+        else:
+            print '%r not found' % name
+
+PyPrint()
+
+class PyLocals(gdb.Command):
+    'Look up the given python variable name, and print it'
+    def __init__(self):
+        gdb.Command.__init__ (self,
+                              "py-locals",
+                              gdb.COMMAND_DATA,
+                              gdb.COMPLETE_NONE)
+
+
+    def invoke(self, args, from_tty):
+        name = str(args)
+
+        frame = Frame.get_selected_python_frame()
+        if not frame:
+            print 'Unable to locate python frame'
+            return
+
+        pyop_frame = frame.get_pyop()
+        if not pyop_frame:
+            print 'Unable to read information on python frame'
+            return
+
+        for pyop_name, pyop_value in pyop_frame.iter_locals():
+            print ('%s = %s'
+                   % (pyop_name.proxyval(set()),
+                      pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
+
+PyLocals()
\ No newline at end of file
index 397f0972f081bcfdbc8025376fdd9db04f14830e..98416ff6194d5de3503b01672ebf44b941f05e5b 100644 (file)
@@ -46,10 +46,13 @@ class build_ext(_build_ext.build_ext):
             "generate .pxi file for public declarations"),
         ('pyrex-directives=', None,
             "compiler directive overrides"),
+        ('pyrex-debug', None,
+         "generate debug information for cygdb"),
         ])
 
     boolean_options.extend([
-        'pyrex-cplus', 'pyrex-create-listing', 'pyrex-line-directives', 'pyrex-c-in-temp'
+        'pyrex-cplus', 'pyrex-create-listing', 'pyrex-line-directives', 
+        'pyrex-c-in-temp', 'pyrex-debug',
     ])
 
     def initialize_options(self):
@@ -61,6 +64,7 @@ class build_ext(_build_ext.build_ext):
         self.pyrex_directives = None
         self.pyrex_c_in_temp = 0
         self.pyrex_gen_pxi = 0
+        self.pyrex_debug = False
 
     def finalize_options (self):
         _build_ext.build_ext.finalize_options(self)
@@ -127,7 +131,7 @@ class build_ext(_build_ext.build_ext):
         cplus = self.pyrex_cplus or getattr(extension, 'pyrex_cplus', 0) or \
                 (extension.language and extension.language.lower() == 'c++')
         pyrex_gen_pxi = self.pyrex_gen_pxi or getattr(extension, 'pyrex_gen_pxi', 0)
-
+        pyrex_debug = self.pyrex_debug or getattr(extension, 'pyrex_debug', False)
         # Set up the include_path for the Cython compiler:
         #    1.    Start with the command line option.
         #    2.    Add in any (unique) paths from the extension
@@ -207,7 +211,8 @@ class build_ext(_build_ext.build_ext):
                     output_file = target,
                     cplus = cplus,
                     emit_linenums = line_directives,
-                    generate_pxi = pyrex_gen_pxi)
+                    generate_pxi = pyrex_gen_pxi,
+                    debug = pyrex_debug)
                 result = cython_compile(source, options=options,
                                         full_module_name=module_name)
             else:
index 23b54698af636ac5848a8fdc5da0cfa92749a745..fca822ccc988ded227d642089ccd991ed12806ba 100644 (file)
@@ -31,6 +31,8 @@ class Extension(_Extension.Extension):
         put generated C files in temp directory.
     pyrex_gen_pxi : boolean
         generate .pxi file for public declarations
+    pyrex_debug : boolean
+        generate Cython debug information for this extension for cygdb
     """
 
     # When adding arguments to this constructor, be sure to update
@@ -56,6 +58,7 @@ class Extension(_Extension.Extension):
             pyrex_cplus = 0,
             pyrex_c_in_temp = 0,
             pyrex_gen_pxi = 0,
+            pyrex_debug = False,
             **kw):
 
         _Extension.Extension.__init__(self, name, sources,
@@ -81,6 +84,7 @@ class Extension(_Extension.Extension):
         self.pyrex_cplus = pyrex_cplus
         self.pyrex_c_in_temp = pyrex_c_in_temp
         self.pyrex_gen_pxi = pyrex_gen_pxi
+        self.pyrex_debug = pyrex_debug
 
 # class Extension
 
diff --git a/bin/cygdb b/bin/cygdb
new file mode 100755 (executable)
index 0000000..227d554
--- /dev/null
+++ b/bin/cygdb
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+"""
+The Cython debugger
+
+The current directory should contain a directory named 'cython_debug', or a
+path to the cython project directory should be given (the parent directory of
+cython_debug).
+"""
+
+import os
+import sys
+import glob
+import tempfile
+import subprocess
+
+def main(import_libpython=False, path_to_debug_info=os.curdir):
+    """
+    Start the Cython debugger. This tells gdb to import the Cython and Python
+    extensions (libpython.py and libcython.py) and it enables gdb's pending 
+    breakpoints
+    """
+    debug_files = glob.glob(
+        os.path.join(os.curdir, 'cython_debug/cython_debug_info_*'))
+
+    if not debug_files:
+        sys.exit('No debug files were found in the current directory. '
+                 'Aborting.')
+        
+    fd, tempfilename = tempfile.mkstemp()
+    f = os.fdopen(fd, 'w')
+    f.write('set breakpoint pending on\n')
+    f.write('python from Cython.Debugger import libcython\n')
+    if import_libpython:
+        f.write('python import libpython')
+    else:
+        f.write('python from Cython.Debugger import libpython\n')
+    f.write('\n'.join('cy import %s\n' % fn for fn in debug_files))
+    f.close()
+    
+    p = subprocess.Popen(['gdb', '-command', tempfilename])
+    while True:
+        try:
+            p.wait()
+        except KeyboardInterrupt:
+            pass
+        else:
+            break
+    os.remove(tempfilename)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file