From: Mickaël Guérin Date: Mon, 5 May 2008 15:08:51 +0000 (+0200) Subject: using a table for _speedups escape X-Git-Tag: 2.0rc1~95 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c0d40d345b146596ed1215b8f52d607f395ac539;p=jinja2.git using a table for _speedups escape --HG-- branch : trunk --- diff --git a/jinja2/_speedups.c b/jinja2/_speedups.c index 5a7e9aa..ca3497e 100644 --- a/jinja2/_speedups.c +++ b/jinja2/_speedups.c @@ -13,19 +13,30 @@ #include - -static const char *samp = "&", *slt = "<", *sgt = ">", *sqt = """; -static Py_UNICODE *amp, *lt, *gt, *qt; static PyObject* markup; +#define ESCAPED_CHARS_TABLE_SIZE 63 + +static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; +static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; static int init_constants(void) { - amp = ((PyUnicodeObject*)PyUnicode_DecodeASCII(samp, 5, NULL))->str; - lt = ((PyUnicodeObject*)PyUnicode_DecodeASCII(slt, 4, NULL))->str; - gt = ((PyUnicodeObject*)PyUnicode_DecodeASCII(sgt, 4, NULL))->str; - qt = ((PyUnicodeObject*)PyUnicode_DecodeASCII(sqt, 6, NULL))->str; + memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len)); + /* memset(escaped_chars_repl, 0, sizeof (escaped_chars_repl)); */ + + escaped_chars_delta_len['"'] = 5; + escaped_chars_repl['"'] = ((PyUnicodeObject*)PyUnicode_DecodeASCII(""", 6, NULL))->str; + + escaped_chars_delta_len['&'] = 3; + escaped_chars_repl['&'] = ((PyUnicodeObject*)PyUnicode_DecodeASCII("&", 5, NULL))->str; + + escaped_chars_delta_len['<'] = 3; + escaped_chars_repl['<'] = ((PyUnicodeObject*)PyUnicode_DecodeASCII("<", 4, NULL))->str; + + escaped_chars_delta_len['>'] = 3; + escaped_chars_repl['>'] = ((PyUnicodeObject*)PyUnicode_DecodeASCII(">", 4, NULL))->str; PyObject *module = PyImport_ImportModule("jinja2.utils"); if (!module) @@ -40,29 +51,20 @@ static PyObject* escape_unicode(PyUnicodeObject *in) { PyUnicodeObject *out; + Py_UNICODE *inp = in->str; + const Py_UNICODE *inp_end = in->str + in->length; + Py_UNICODE *next_escp; Py_UNICODE *outp; + Py_ssize_t delta=0, erepl=0, delta_len=0; /* First we need to figure out how long the escaped string will be */ - int len = 0, erepl = 0, repl = 0; - Py_UNICODE *inp = in->str; - while (*(inp) || in->length > inp - in->str) - switch (*inp++) { - case '&': - len += 5; - ++erepl; - break; - case '"': - len += 6; + while (*(inp) || inp < inp_end) { + if (*inp < ESCAPED_CHARS_TABLE_SIZE && escaped_chars_delta_len[*inp]) { + delta += escaped_chars_delta_len[*inp]; ++erepl; - break; - case '<': - case '>': - len += 4; - ++erepl; - break; - default: - ++len; } + ++inp; + } /* Do we need to escape anything at all? */ if (!erepl) { @@ -70,44 +72,37 @@ escape_unicode(PyUnicodeObject *in) return (PyObject*)in; } - out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, len); + out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, in->length + delta); if (!out) return NULL; outp = out->str; inp = in->str; - while (*(inp) || in->length > inp - in->str) { - /* copy rest of string if we have replaced everything */ - if (repl == erepl) { - Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str)); - break; + while (erepl-- > 0) { + /* look for the next sustitution */ + next_escp = inp; + while (next_escp < inp_end) { + if (*next_escp < ESCAPED_CHARS_TABLE_SIZE && (delta_len = escaped_chars_delta_len[*next_escp])) { + ++delta_len; + break; + } + ++next_escp; } - /* regular replacements */ - switch (*inp) { - case '&': - Py_UNICODE_COPY(outp, amp, 5); - outp += 5; - ++repl; - break; - case '"': - Py_UNICODE_COPY(outp, qt, 6); - outp += 6; - ++repl; - break; - case '<': - Py_UNICODE_COPY(outp, lt, 4); - outp += 4; - ++repl; - break; - case '>': - Py_UNICODE_COPY(outp, gt, 4); - outp += 4; - ++repl; - break; - default: - *outp++ = *inp; - }; - ++inp; + + if (next_escp > inp) { + /* copy unescaped chars between inp and next_escp */ + Py_UNICODE_COPY(outp, inp, next_escp-inp); + outp += next_escp-inp; + } + + /* escape 'next_escp' */ + Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len); + outp += delta_len; + + inp = next_escp + 1; + } + if (inp < inp_end) { + Py_UNICODE_COPY(outp, inp, in->length - (inp - in->str)); } return (PyObject*)out;