More buffer typestring; fixes #285

author Dag Sverre Seljebotn <dagss@student.matnat.uio.no>

Wed, 6 May 2009 17:20:00 +0000 (19:20 +0200)

committer Dag Sverre Seljebotn <dagss@student.matnat.uio.no>

Wed, 6 May 2009 17:20:00 +0000 (19:20 +0200)
author Dag Sverre Seljebotn <dagss@student.matnat.uio.no>
Wed, 6 May 2009 17:20:00 +0000 (19:20 +0200)
committer Dag Sverre Seljebotn <dagss@student.matnat.uio.no>
Wed, 6 May 2009 17:20:00 +0000 (19:20 +0200)
diff --git a/Cython/Compiler/Buffer.py b/Cython/Compiler/Buffer.py

index f9cfb06fca3c5be8a7fc5d0beb47c87561635a8b..7724fadef01101d4ddff098cb366f8e3d8f84833 100644 (file)
--- a/Cython/Compiler/Buffer.py
+++ b/Cython/Compiler/Buffer.py
@@ -1002,6 +1002,14 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
    return 0;    
  }
  
+static int __Pyx_BufFmt_FirstPack(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->enc_type != 0 || ctx->packmode != '@') {
+    PyErr_SetString(PyExc_ValueError, "Buffer packing mode currently only allowed at beginning of format string (this is a defect)");
+    return -1;
+  }
+  return 0;
+}
+
  static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
    int got_Z = 0;
    while (1) {
@@ -1027,6 +1035,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
            PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
            return NULL;
          }
+        if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
          ctx->packmode = '=';
          ++ts;
          break;
@@ -1036,13 +1045,15 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha
            PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
            return NULL;
          }
+        if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
          ctx->packmode = '=';
          ++ts;
          break;
        case '=':
        case '@':
        case '^':
-      ctx->packmode = *ts++;
+        if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL;
+        ctx->packmode = *ts++;
          break;
        case 'T': /* substruct */
          {
diff --git a/Cython/Includes/numpy.pxd b/Cython/Includes/numpy.pxd

index d5b122748c333bf0bcd9f4f0ae26a3bddfe57d05..ea9921cc2827e4e6f75f07955ad81c5503cb3c0c 100644 (file)
--- a/Cython/Includes/numpy.pxd
+++ b/Cython/Includes/numpy.pxd
@@ -16,7 +16,7 @@ cimport stdlib
  
  cdef extern from "Python.h":
      ctypedef int Py_intptr_t
-    
+
  cdef extern from "numpy/arrayobject.h":
      ctypedef Py_intptr_t npy_intp
          
@@ -68,6 +68,9 @@ cdef extern from "numpy/arrayobject.h":
              # In particular strided access is always provided regardless
              # of flags
              cdef int copy_shape, i, ndim
+            cdef int endian_detector = 1
+            cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+            
              ndim = PyArray_NDIM(self)
              
              if sizeof(npy_intp) != sizeof(Py_ssize_t):
@@ -105,7 +108,6 @@ cdef extern from "numpy/arrayobject.h":
              cdef dtype descr = self.descr
              cdef list stack
              cdef int offset
-            cdef char byteorder = 0
  
              cdef bint hasfields = PyDataType_HASFIELDS(descr)
  
@@ -118,6 +120,9 @@ cdef extern from "numpy/arrayobject.h":
  
              if not hasfields:
                  t = descr.type_num
+                if ((descr.byteorder == '>' and little_endian) or
+                    (descr.byteorder == '<' and not little_endian)):
+                    raise ValueError("Non-native byte order not supported")
                  if   t == NPY_BYTE:        f = "b"
                  elif t == NPY_UBYTE:       f = "B"
                  elif t == NPY_SHORT:       f = "h"
@@ -141,10 +146,11 @@ cdef extern from "numpy/arrayobject.h":
                  return
              else:
                  info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+                info.format[0] = '^' # Native data types, manual alignment
                  offset = 0
-                f = _util_dtypestring(descr, info.format,
+                f = _util_dtypestring(descr, info.format + 1,
                                        info.format + _buffer_format_string_len,
-                                      &offset, &byteorder)
+                                      &offset)
                  f[0] = 0 # Terminate format string
  
          def __releasebuffer__(ndarray self, Py_buffer* info):
@@ -257,39 +263,45 @@ ctypedef npy_cdouble     cdouble_t
  ctypedef npy_clongdouble clongdouble_t
  
  
-cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset, char* byteorder) except NULL:
+cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
      # Recursive utility function used in __getbuffer__ to get format
      # string. The new location in the format string is returned.
  
      cdef dtype child
      cdef int delta_offset
      cdef tuple i
-    cdef char new_byteorder
+    cdef int endian_detector = 1
+    cdef bint little_endian = ((<char*>&endian_detector)[0] == 0)
+    
      for i in descr.fields.itervalues():
          child = i[0]
          new_offset = i[1]
  
-        if (end - f) - (new_offset - offset[0]) < 15: # this should leave room for "T{" and "}" as well
+        if (end - f) - (new_offset - offset[0]) < 15:
              raise RuntimeError("Format string allocated too short, see comment in numpy.pxd")
  
-#        new_byteorder = child.byteorder
-#        if new_byteorder == '|': new_byteorder = '='
-#        if byteorder[0] != new_byteorder:
-#            f[0] = new_byteorder
-#            f += 1
-#            byteorder[0] = new_byteorder
-
+        if ((child.byteorder == '>' and little_endian) or
+            (child.byteorder == '<' and not little_endian)):
+            raise ValueError("Non-native byte order not supported")
+            # One could encode it in the format string and have Cython
+            # complain instead, BUT: < and > in format strings also imply
+            # standardized sizes for datatypes, and we rely on native in
+            # order to avoid reencoding data types based on their size.
+            #
+            # A proper PEP 3118 exporter for other clients than Cython
+            # must deal properly with this!
+        
          # Output padding bytes
-#        while offset[0] < new_offset:
-#            f[0] = 120 # "x"; pad byte
-#            f += 1
-#            offset[0] += 1
+        while offset[0] < new_offset:
+            f[0] = 120 # "x"; pad byte
+            f += 1
+            offset[0] += 1
  
          offset[0] += child.itemsize
              
          if not PyDataType_HASFIELDS(child):
              t = child.type_num
-            if end - f < 15: # this should leave room for "T{" and "}" as well
+            if end - f < 5:
                  raise RuntimeError("Format string allocated too short.")
  
              # Until ticket #99 is fixed, use integers to avoid warnings
@@ -314,11 +326,8 @@ cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset
                  raise ValueError("unknown dtype code in numpy.pxd (%d)" % t)
              f += 1
          else:
-            f[0] = 84 #"T"
-            f[1] = 123 #"{"
-            f += 2
-            f = _util_dtypestring(child, f, end, offset, byteorder)
-            f[0] = 125 #"}"
-            f += 1
+            # Cython ignores struct boundary information ("T{...}"),
+            # so don't output it
+            f = _util_dtypestring(child, f, end, offset)
      return f
                  
diff --git a/tests/run/buffmt.pyx b/tests/run/buffmt.pyx

index 245e41f94596748371601d056cc580e320b77741..4afb34e1bb30a46d2c748f036fc30e376d6ac6d6 100644 (file)
--- a/tests/run/buffmt.pyx
+++ b/tests/run/buffmt.pyx
@@ -26,10 +26,10 @@ cdef struct align_of_int_helper:
      int i
  double_align = sizeof(align_of_double_helper) - sizeof(double)
  int_align = sizeof(align_of_int_helper) - sizeof(int)
-if double_align != 8:
-    raise RuntimeError("Alignment of double is %d on this system, please report to cython-dev for a testcase fix" % double_align)
-if int_align != 4:
-    raise RuntimeError("Alignment of int is %d on this system, please report to cython-dev for a testcase fix" % int_align)
+if double_align != 8 or sizeof(double) != 8:
+    raise RuntimeError("Alignment or size of double is %d on this system, please report to cython-dev for a testcase fix" % double_align)
+if int_align != 4 or sizeof(int) != 4:
+    raise RuntimeError("Alignment or size of int is %d on this system, please report to cython-dev for a testcase fix" % int_align)
  
   
  cdef class MockBuffer:
@@ -152,7 +152,8 @@ def char3int(fmt):
      >>> char3int("c1i1i1i")    
      >>> char3int("c3i")
      >>> char3int("ci2i")
-    >>> char3int("c@i@2i")
+
+    #TODO > char3int("c@i@2i")
  
      Extra pad bytes (assuming int size is 4 or more)
      >>> char3int("cxiii")
@@ -161,11 +162,12 @@ def char3int(fmt):
  
      Standard alignment (assming int size is 4)
      >>> char3int("=c3xiii")
-    >>> char3int("=cxxx@iii")
      >>> char3int("=ciii")
      Traceback (most recent call last):
          ...
      ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected
+
+    #TODO char3int("=cxxx@iii")
      
      Error:
      >>> char3int("cii")
@@ -222,7 +224,6 @@ def complex_test(fmt):
  def alignment_string(fmt, exc=None):
      """
      >>> alignment_string("@i")
-    >>> alignment_string("@i@@")
      >>> alignment_string("%si" % current_endian)
      >>> alignment_string("%si" % other_endian, "X-endian buffer not supported on X-endian compiler")
      >>> alignment_string("=i")
@@ -268,6 +269,39 @@ def mixed_complex_struct():
      """
      cdef object[MixedComplex] buf = MockBuffer("Zd", sizeof(MixedComplex))
  
- 
+
+cdef packed struct PackedSubStruct:
+    char x
+    int y
+
+cdef packed struct PackedStruct:
+    char a
+    int b
+    PackedSubStruct sub
+    
+
+@testcase
+def packed_struct(fmt):
+    """
+    Assuming int is four bytes:
+    
+    >>> packed_struct("^cici")
+    >>> packed_struct("=cibi")
+
+    >>> packed_struct("^c@i^ci")
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer packing mode currently only allowed at beginning of format string (this is a defect)
+    
+    However aligned access won't work:
+    >>> packed_struct("@cici")
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected
+
+    """
+    cdef object[PackedStruct] buf = MockBuffer(fmt, sizeof(PackedStruct))
+
  # TODO: empty struct
  # TODO: Incomplete structs
+# TODO: mixed structs
diff --git a/tests/run/numpy_test.pyx b/tests/run/numpy_test.pyx

index 7220d6909a5157e58806f225716ca5bd31b63777..17a1f28c70352b04fd237b970c7d39e70b9f3b24 100644 (file)
--- a/tests/run/numpy_test.pyx
+++ b/tests/run/numpy_test.pyx
@@ -2,6 +2,17 @@
  
  cimport numpy as np
  
+def little_endian():
+    cdef int endian_detector = 1
+    return (<char*>&endian_detector)[0] != 0
+
+if little_endian():
+    my_endian = '<'
+    other_endian = '>'
+else:
+    my_endian = '>'
+    other_endian = '<'
+
  try:
      import numpy as np
      __doc__ = u"""
@@ -130,23 +141,49 @@ try:
      >>> test_dtype(np.int32, inc1_int32_t)
      >>> test_dtype(np.float64, inc1_float64_t)
  
+    Endian tests:
+    >>> test_dtype('%si' % my_endian, inc1_int)
+    >>> test_dtype('%si' % other_endian, inc1_int)
+    Traceback (most recent call last):
+       ...
+    ValueError: Non-native byte order not supported
+    
+
+
      >>> test_recordarray()
      
-    >>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
+    >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
              ('a', np.dtype('i,i')),\
              ('b', np.dtype('i,i'))\
-        ])))
+        ]))))
      array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))], 
-          dtype=[('a', [('f0', '<i4'), ('f1', '<i4')]), ('b', [('f0', '<i4'), ('f1', '<i4')])])
+          dtype=[('a', [('f0', '!i4'), ('f1', '!i4')]), ('b', [('f0', '!i4'), ('f1', '!i4')])])
  
-    >>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
+    >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\
              ('a', np.dtype('i,f')),\
              ('b', np.dtype('i,i'))\
-        ])))
+        ]))))
      Traceback (most recent call last):
          ...
      ValueError: Buffer dtype mismatch, expected 'int' but got 'float' in 'DoubleInt.y'
  
+    >>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=False))))
+    array([(22, 23)], 
+          dtype=[('f0', '|i1'), ('f1', '!i4')])
+    >>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=True))))
+    array([(22, 23)], 
+          dtype=[('f0', '|i1'), ('', '|V3'), ('f1', '!i4')])
+
+    >>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=True))))
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected
+
+    >>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=False))))
+    Traceback (most recent call last):
+        ...
+    ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected
+
  
      >>> test_good_cast()
      True
@@ -300,7 +337,7 @@ def test_nested_dtypes(obj):
      arr[1].b.x = arr[0].a.y + 1
      arr[1].b.y = 4
      arr[2] = arr[1]
-    return arr
+    return repr(arr).replace('<', '!').replace('>', '!')
  
  def test_bad_nested_dtypes():
      cdef object[BadNestedStruct] arr
@@ -314,3 +351,21 @@ def test_good_cast():
  def test_bad_cast():
      # This should raise an exception
      cdef np.ndarray[long, cast=True] arr = np.array([1], dtype=b'b')
+
+cdef packed struct PackedStruct:
+    char a
+    int b
+
+cdef struct UnpackedStruct:
+    char a
+    int b
+
+def test_packed_align(np.ndarray[PackedStruct] arr):
+    arr[0].a = 22
+    arr[0].b = 23
+    return repr(arr).replace('<', '!').replace('>', '!')
+
+def test_unpacked_align(np.ndarray[UnpackedStruct] arr):
+    arr[0].a = 22
+    arr[0].b = 23    
+    return repr(arr).replace('<', '!').replace('>', '!')
author	Dag Sverre Seljebotn <dagss@student.matnat.uio.no>
	Wed, 6 May 2009 17:20:00 +0000 (19:20 +0200)
committer	Dag Sverre Seljebotn <dagss@student.matnat.uio.no>
	Wed, 6 May 2009 17:20:00 +0000 (19:20 +0200)
Cython/Compiler/Buffer.py		patch \| blob \| history
Cython/Includes/numpy.pxd		patch \| blob \| history
tests/run/buffmt.pyx		patch \| blob \| history
tests/run/numpy_test.pyx		patch \| blob \| history