From e26883a6117bfad282adf22027b6a604cef79fef Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Sat, 24 Oct 2009 15:37:28 -0400 Subject: [PATCH] freeze: fix for compiling with Python3 To get the output of cython_freeze to compile with both Python 2 and Python 3, I copied the contents of Python-3.1.1/Modules/python.c, with some slight modifications. The main issue is that Python 3 uses wchar_t, while Python 2 uses char. It also appears that the Py_FlushLine/PyErr_Clear is not needed in Python 3. Related to #434: "main() function generated by --embed doesn't compile in Py3" --- Demos/freeze/Makefile | 6 +- bin/cython_freeze | 145 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 5 deletions(-) diff --git a/Demos/freeze/Makefile b/Demos/freeze/Makefile index de593dac..f8027c3b 100644 --- a/Demos/freeze/Makefile +++ b/Demos/freeze/Makefile @@ -4,9 +4,9 @@ CYTHON_FREEZE = ../../bin/cython_freeze PYTHON = python RST2HTML = rst2html -PY_LDFLAGS = $(shell $(PYTHON) -c 'from distutils.sysconfig import get_config_var as g; print " ".join([g("LINKFORSHARED"), "-L"+g("LIBPL")])') -PY_CPPFLAGS = $(shell $(PYTHON) -c 'from distutils.sysconfig import *; print "-I"+get_python_inc()') -PY_LDLIBS = $(shell $(PYTHON) -c 'from distutils.sysconfig import get_config_var as g; print " ".join(["-lpython"+g("VERSION"), g("SYSLIBS"), g("LIBS"), g("LOCALMODLIBS")])') +PY_LDFLAGS = $(shell $(PYTHON) -c 'from distutils.sysconfig import get_config_var as g; import sys; sys.stdout.write(" ".join([g("LINKFORSHARED"), "-L"+g("LIBPL")]) + "\n")') +PY_CPPFLAGS = $(shell $(PYTHON) -c 'from distutils.sysconfig import *; import sys; sys.stdout.write("-I"+get_python_inc() + "\n")') +PY_LDLIBS = $(shell $(PYTHON) -c 'from distutils.sysconfig import get_config_var as g; import sys; sys.stdout.write(" ".join(["-lpython"+g("VERSION"), g("SYSLIBS"), g("LIBS"), g("LOCALMODLIBS")]) + "\n")') CFLAGS = -fPIC -fno-strict-aliasing -g -O2 -Wall -Wextra CPPFLAGS = $(PY_CPPFLAGS) diff --git a/bin/cython_freeze b/bin/cython_freeze index 631c3924..0123b0ba 100755 --- a/bin/cython_freeze +++ b/bin/cython_freeze @@ -38,6 +38,7 @@ modules = [format_modname(x) for x in args] print """\ #include +#include #include #include @@ -69,10 +70,12 @@ if not options.pymain: print "\nextern int __pyx_module_is_main_%s;" % modules[0] print """ -#if PY_MAJOR_VERSION < 3 || (!defined(WIN32) && !defined(MS_WINDOWS)) +#if PY_MAJOR_VERSION < 3 int main(int argc, char** argv) { -#else +#elif defined(WIN32) || defined(MS_WINDOWS) int wmain(int argc, wchar_t **argv) { +#else +static int python_main(int argc, wchar_t **argv) { #endif """, if not options.pymain: @@ -112,11 +115,149 @@ else: if (!m) { r = 1; PyErr_Print(); /* This exits with the right code if SystemExit. */ +#if PY_MAJOR_VERSION < 3 if (Py_FlushLine()) PyErr_Clear(); +#endif } Py_XDECREF(m); Py_Finalize(); return r; } """ % {'main' : modules[0]}, + +print r""" +#if PY_MAJOR_VERSION >= 3 && !defined(WIN32) && !defined(MS_WINDOWS) +static wchar_t* +char2wchar(char* arg) +{ + wchar_t *res; +#ifdef HAVE_BROKEN_MBSTOWCS + /* Some platforms have a broken implementation of + * mbstowcs which does not count the characters that + * would result from conversion. Use an upper bound. + */ + size_t argsize = strlen(arg); +#else + size_t argsize = mbstowcs(NULL, arg, 0); +#endif + size_t count; + unsigned char *in; + wchar_t *out; +#ifdef HAVE_MBRTOWC + mbstate_t mbs; +#endif + if (argsize != (size_t)-1) { + res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t)); + if (!res) + goto oom; + count = mbstowcs(res, arg, argsize+1); + if (count != (size_t)-1) { + wchar_t *tmp; + /* Only use the result if it contains no + surrogate characters. */ + for (tmp = res; *tmp != 0 && + (*tmp < 0xd800 || *tmp > 0xdfff); tmp++) + ; + if (*tmp == 0) + return res; + } + PyMem_Free(res); + } + /* Conversion failed. Fall back to escaping with surrogateescape. */ +#ifdef HAVE_MBRTOWC + /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ + + /* Overallocate; as multi-byte characters are in the argument, the + actual output could use less memory. */ + argsize = strlen(arg) + 1; + res = PyMem_Malloc(argsize*sizeof(wchar_t)); + if (!res) goto oom; + in = (unsigned char*)arg; + out = res; + memset(&mbs, 0, sizeof mbs); + while (argsize) { + size_t converted = mbrtowc(out, (char*)in, argsize, &mbs); + if (converted == 0) + /* Reached end of string; null char stored. */ + break; + if (converted == (size_t)-2) { + /* Incomplete character. This should never happen, + since we provide everything that we have - + unless there is a bug in the C library, or I + misunderstood how mbrtowc works. */ + fprintf(stderr, "unexpected mbrtowc result -2\n"); + return NULL; + } + if (converted == (size_t)-1) { + /* Conversion error. Escape as UTF-8b, and start over + in the initial shift state. */ + *out++ = 0xdc00 + *in++; + argsize--; + memset(&mbs, 0, sizeof mbs); + continue; + } + if (*out >= 0xd800 && *out <= 0xdfff) { + /* Surrogate character. Escape the original + byte sequence with surrogateescape. */ + argsize -= converted; + while (converted--) + *out++ = 0xdc00 + *in++; + continue; + } + /* successfully converted some bytes */ + in += converted; + argsize -= converted; + out++; + } +#else + /* Cannot use C locale for escaping; manually escape as if charset + is ASCII (i.e. escape all bytes > 128. This will still roundtrip + correctly in the locale's charset, which must be an ASCII superset. */ + res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t)); + if (!res) goto oom; + in = (unsigned char*)arg; + out = res; + while(*in) + if(*in < 128) + *out++ = *in++; + else + *out++ = 0xdc00 + *in++; + *out = 0; +#endif + return res; +oom: + fprintf(stderr, "out of memory\n"); + return NULL; +} + +int +main(int argc, char **argv) +{ + wchar_t **argv_copy = (wchar_t **)PyMem_Malloc(sizeof(wchar_t*)*argc); + /* We need a second copies, as Python might modify the first one. */ + wchar_t **argv_copy2 = (wchar_t **)PyMem_Malloc(sizeof(wchar_t*)*argc); + int i, res; + char *oldloc; + if (!argv_copy || !argv_copy2) { + fprintf(stderr, "out of memory\n"); + return 1; + } + oldloc = strdup(setlocale(LC_ALL, NULL)); + setlocale(LC_ALL, ""); + for (i = 0; i < argc; i++) { + argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]); + if (!argv_copy[i]) + return 1; + } + setlocale(LC_ALL, oldloc); + free(oldloc); + res = python_main(argc, argv_copy); + for (i = 0; i < argc; i++) { + PyMem_Free(argv_copy2[i]); + } + PyMem_Free(argv_copy); + PyMem_Free(argv_copy2); + return res; +} +#endif""" -- 2.26.2