From 15d052fce274246fb68fda3fd29ff3fddeddeacc Mon Sep 17 00:00:00 2001 From: Dag Sverre Seljebotn Date: Wed, 6 May 2009 19:20:00 +0200 Subject: [PATCH] More buffer typestring; fixes #285 --- Cython/Compiler/Buffer.py | 13 +++++++- Cython/Includes/numpy.pxd | 59 +++++++++++++++++++--------------- tests/run/buffmt.pyx | 50 ++++++++++++++++++++++++----- tests/run/numpy_test.pyx | 67 +++++++++++++++++++++++++++++++++++---- 4 files changed, 149 insertions(+), 40 deletions(-) diff --git a/Cython/Compiler/Buffer.py b/Cython/Compiler/Buffer.py index f9cfb06f..7724fade 100644 --- a/Cython/Compiler/Buffer.py +++ b/Cython/Compiler/Buffer.py @@ -1002,6 +1002,14 @@ static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { return 0; } +static int __Pyx_BufFmt_FirstPack(__Pyx_BufFmt_Context* ctx) { + if (ctx->enc_type != 0 || ctx->packmode != '@') { + PyErr_SetString(PyExc_ValueError, "Buffer packing mode currently only allowed at beginning of format string (this is a defect)"); + return -1; + } + return 0; +} + static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { int got_Z = 0; while (1) { @@ -1027,6 +1035,7 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); return NULL; } + if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL; ctx->packmode = '='; ++ts; break; @@ -1036,13 +1045,15 @@ static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const cha PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); return NULL; } + if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL; ctx->packmode = '='; ++ts; break; case '=': case '@': case '^': - ctx->packmode = *ts++; + if (__Pyx_BufFmt_FirstPack(ctx) == -1) return NULL; + ctx->packmode = *ts++; break; case 'T': /* substruct */ { diff --git a/Cython/Includes/numpy.pxd b/Cython/Includes/numpy.pxd index d5b12274..ea9921cc 100644 --- a/Cython/Includes/numpy.pxd +++ b/Cython/Includes/numpy.pxd @@ -16,7 +16,7 @@ cimport stdlib cdef extern from "Python.h": ctypedef int Py_intptr_t - + cdef extern from "numpy/arrayobject.h": ctypedef Py_intptr_t npy_intp @@ -68,6 +68,9 @@ cdef extern from "numpy/arrayobject.h": # In particular strided access is always provided regardless # of flags cdef int copy_shape, i, ndim + cdef int endian_detector = 1 + cdef bint little_endian = ((&endian_detector)[0] != 0) + ndim = PyArray_NDIM(self) if sizeof(npy_intp) != sizeof(Py_ssize_t): @@ -105,7 +108,6 @@ cdef extern from "numpy/arrayobject.h": cdef dtype descr = self.descr cdef list stack cdef int offset - cdef char byteorder = 0 cdef bint hasfields = PyDataType_HASFIELDS(descr) @@ -118,6 +120,9 @@ cdef extern from "numpy/arrayobject.h": if not hasfields: t = descr.type_num + if ((descr.byteorder == '>' and little_endian) or + (descr.byteorder == '<' and not little_endian)): + raise ValueError("Non-native byte order not supported") if t == NPY_BYTE: f = "b" elif t == NPY_UBYTE: f = "B" elif t == NPY_SHORT: f = "h" @@ -141,10 +146,11 @@ cdef extern from "numpy/arrayobject.h": return else: info.format = stdlib.malloc(_buffer_format_string_len) + info.format[0] = '^' # Native data types, manual alignment offset = 0 - f = _util_dtypestring(descr, info.format, + f = _util_dtypestring(descr, info.format + 1, info.format + _buffer_format_string_len, - &offset, &byteorder) + &offset) f[0] = 0 # Terminate format string def __releasebuffer__(ndarray self, Py_buffer* info): @@ -257,39 +263,45 @@ ctypedef npy_cdouble cdouble_t ctypedef npy_clongdouble clongdouble_t -cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset, char* byteorder) except NULL: +cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # Recursive utility function used in __getbuffer__ to get format # string. The new location in the format string is returned. cdef dtype child cdef int delta_offset cdef tuple i - cdef char new_byteorder + cdef int endian_detector = 1 + cdef bint little_endian = ((&endian_detector)[0] == 0) + for i in descr.fields.itervalues(): child = i[0] new_offset = i[1] - if (end - f) - (new_offset - offset[0]) < 15: # this should leave room for "T{" and "}" as well + if (end - f) - (new_offset - offset[0]) < 15: raise RuntimeError("Format string allocated too short, see comment in numpy.pxd") -# new_byteorder = child.byteorder -# if new_byteorder == '|': new_byteorder = '=' -# if byteorder[0] != new_byteorder: -# f[0] = new_byteorder -# f += 1 -# byteorder[0] = new_byteorder - + if ((child.byteorder == '>' and little_endian) or + (child.byteorder == '<' and not little_endian)): + raise ValueError("Non-native byte order not supported") + # One could encode it in the format string and have Cython + # complain instead, BUT: < and > in format strings also imply + # standardized sizes for datatypes, and we rely on native in + # order to avoid reencoding data types based on their size. + # + # A proper PEP 3118 exporter for other clients than Cython + # must deal properly with this! + # Output padding bytes -# while offset[0] < new_offset: -# f[0] = 120 # "x"; pad byte -# f += 1 -# offset[0] += 1 + while offset[0] < new_offset: + f[0] = 120 # "x"; pad byte + f += 1 + offset[0] += 1 offset[0] += child.itemsize if not PyDataType_HASFIELDS(child): t = child.type_num - if end - f < 15: # this should leave room for "T{" and "}" as well + if end - f < 5: raise RuntimeError("Format string allocated too short.") # Until ticket #99 is fixed, use integers to avoid warnings @@ -314,11 +326,8 @@ cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset raise ValueError("unknown dtype code in numpy.pxd (%d)" % t) f += 1 else: - f[0] = 84 #"T" - f[1] = 123 #"{" - f += 2 - f = _util_dtypestring(child, f, end, offset, byteorder) - f[0] = 125 #"}" - f += 1 + # Cython ignores struct boundary information ("T{...}"), + # so don't output it + f = _util_dtypestring(child, f, end, offset) return f diff --git a/tests/run/buffmt.pyx b/tests/run/buffmt.pyx index 245e41f9..4afb34e1 100644 --- a/tests/run/buffmt.pyx +++ b/tests/run/buffmt.pyx @@ -26,10 +26,10 @@ cdef struct align_of_int_helper: int i double_align = sizeof(align_of_double_helper) - sizeof(double) int_align = sizeof(align_of_int_helper) - sizeof(int) -if double_align != 8: - raise RuntimeError("Alignment of double is %d on this system, please report to cython-dev for a testcase fix" % double_align) -if int_align != 4: - raise RuntimeError("Alignment of int is %d on this system, please report to cython-dev for a testcase fix" % int_align) +if double_align != 8 or sizeof(double) != 8: + raise RuntimeError("Alignment or size of double is %d on this system, please report to cython-dev for a testcase fix" % double_align) +if int_align != 4 or sizeof(int) != 4: + raise RuntimeError("Alignment or size of int is %d on this system, please report to cython-dev for a testcase fix" % int_align) cdef class MockBuffer: @@ -152,7 +152,8 @@ def char3int(fmt): >>> char3int("c1i1i1i") >>> char3int("c3i") >>> char3int("ci2i") - >>> char3int("c@i@2i") + + #TODO > char3int("c@i@2i") Extra pad bytes (assuming int size is 4 or more) >>> char3int("cxiii") @@ -161,11 +162,12 @@ def char3int(fmt): Standard alignment (assming int size is 4) >>> char3int("=c3xiii") - >>> char3int("=cxxx@iii") >>> char3int("=ciii") Traceback (most recent call last): ... ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected + + #TODO char3int("=cxxx@iii") Error: >>> char3int("cii") @@ -222,7 +224,6 @@ def complex_test(fmt): def alignment_string(fmt, exc=None): """ >>> alignment_string("@i") - >>> alignment_string("@i@@") >>> alignment_string("%si" % current_endian) >>> alignment_string("%si" % other_endian, "X-endian buffer not supported on X-endian compiler") >>> alignment_string("=i") @@ -268,6 +269,39 @@ def mixed_complex_struct(): """ cdef object[MixedComplex] buf = MockBuffer("Zd", sizeof(MixedComplex)) - + +cdef packed struct PackedSubStruct: + char x + int y + +cdef packed struct PackedStruct: + char a + int b + PackedSubStruct sub + + +@testcase +def packed_struct(fmt): + """ + Assuming int is four bytes: + + >>> packed_struct("^cici") + >>> packed_struct("=cibi") + + >>> packed_struct("^c@i^ci") + Traceback (most recent call last): + ... + ValueError: Buffer packing mode currently only allowed at beginning of format string (this is a defect) + + However aligned access won't work: + >>> packed_struct("@cici") + Traceback (most recent call last): + ... + ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected + + """ + cdef object[PackedStruct] buf = MockBuffer(fmt, sizeof(PackedStruct)) + # TODO: empty struct # TODO: Incomplete structs +# TODO: mixed structs diff --git a/tests/run/numpy_test.pyx b/tests/run/numpy_test.pyx index 7220d690..17a1f28c 100644 --- a/tests/run/numpy_test.pyx +++ b/tests/run/numpy_test.pyx @@ -2,6 +2,17 @@ cimport numpy as np +def little_endian(): + cdef int endian_detector = 1 + return (&endian_detector)[0] != 0 + +if little_endian(): + my_endian = '<' + other_endian = '>' +else: + my_endian = '>' + other_endian = '<' + try: import numpy as np __doc__ = u""" @@ -130,23 +141,49 @@ try: >>> test_dtype(np.int32, inc1_int32_t) >>> test_dtype(np.float64, inc1_float64_t) + Endian tests: + >>> test_dtype('%si' % my_endian, inc1_int) + >>> test_dtype('%si' % other_endian, inc1_int) + Traceback (most recent call last): + ... + ValueError: Non-native byte order not supported + + + >>> test_recordarray() - >>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\ + >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\ ('a', np.dtype('i,i')),\ ('b', np.dtype('i,i'))\ - ]))) + ])))) array([((0, 0), (0, 0)), ((1, 2), (1, 4)), ((1, 2), (1, 4))], - dtype=[('a', [('f0', '>> test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\ + >>> print(test_nested_dtypes(np.zeros((3,), dtype=np.dtype([\ ('a', np.dtype('i,f')),\ ('b', np.dtype('i,i'))\ - ]))) + ])))) Traceback (most recent call last): ... ValueError: Buffer dtype mismatch, expected 'int' but got 'float' in 'DoubleInt.y' + >>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=False)))) + array([(22, 23)], + dtype=[('f0', '|i1'), ('f1', '!i4')]) + >>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=True)))) + array([(22, 23)], + dtype=[('f0', '|i1'), ('', '|V3'), ('f1', '!i4')]) + + >>> print(test_packed_align(np.zeros((1,), dtype=np.dtype('b,i', align=True)))) + Traceback (most recent call last): + ... + ValueError: Buffer dtype mismatch; next field is at offset 4 but 1 expected + + >>> print(test_unpacked_align(np.zeros((1,), dtype=np.dtype('b,i', align=False)))) + Traceback (most recent call last): + ... + ValueError: Buffer dtype mismatch; next field is at offset 1 but 4 expected + >>> test_good_cast() True @@ -300,7 +337,7 @@ def test_nested_dtypes(obj): arr[1].b.x = arr[0].a.y + 1 arr[1].b.y = 4 arr[2] = arr[1] - return arr + return repr(arr).replace('<', '!').replace('>', '!') def test_bad_nested_dtypes(): cdef object[BadNestedStruct] arr @@ -314,3 +351,21 @@ def test_good_cast(): def test_bad_cast(): # This should raise an exception cdef np.ndarray[long, cast=True] arr = np.array([1], dtype=b'b') + +cdef packed struct PackedStruct: + char a + int b + +cdef struct UnpackedStruct: + char a + int b + +def test_packed_align(np.ndarray[PackedStruct] arr): + arr[0].a = 22 + arr[0].b = 23 + return repr(arr).replace('<', '!').replace('>', '!') + +def test_unpacked_align(np.ndarray[UnpackedStruct] arr): + arr[0].a = 22 + arr[0].b = 23 + return repr(arr).replace('<', '!').replace('>', '!') -- 2.26.2