swig/swig-3.0.12-Add-missing-checks-for-failures-in-calls-to-PyUnicod.patch

347 lines
12 KiB
Diff

From b0e29fbdf31bb94b11cb8a7cc830b4a76467afa3 Mon Sep 17 00:00:00 2001
From: William S Fulton <wsf@fultondesigns.co.uk>
Date: Mon, 4 Dec 2017 18:41:55 +0000
Subject: [PATCH] Add missing checks for failures in calls to
PyUnicode_AsUTF8String.
Previously a seg fault could occur when passing invalid UTF8 strings (low
surrogates), eg passing u"\udcff" to the C layer (Python 3).
---
CHANGES.current | 8 ++++++-
Doc/Manual/Python.html | 22 ++++++++++++++++---
Doc/Manual/Varargs.html | 5 ++++-
Examples/python/multimap/example.i | 12 +++++++++-
.../python/unicode_strings_runme.py | 10 +++++++++
Examples/test-suite/python_varargs_typemap.i | 5 ++++-
Examples/test-suite/unicode_strings.i | 2 ++
Lib/python/pyerrors.swg | 11 ++++++----
Lib/python/pyhead.swg | 16 ++++++++------
Lib/python/pyinit.swg | 4 ++--
Lib/python/pyrun.swg | 10 ++++++---
Lib/python/pystrings.swg | 12 ++++++++--
12 files changed, 92 insertions(+), 25 deletions(-)
#diff --git a/CHANGES.current b/CHANGES.current
#index 5cab80172..06b958f18 100644
#--- a/CHANGES.current
#+++ b/CHANGES.current
#@@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
#
# Version 4.0.0 (in progress)
# ===========================
#+
#+2017-12-04: wsfulton
#+ [Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a
#+ seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
#+ u"\udcff" to the C layer (Python 3).
#+
# 2017-11-24: joequant
#- Fix github #1124 and return R_NilValue for null pointers
#+ Fix #1124 and return R_NilValue for null pointers
#
# 2017-11-29: wsfulton
# [Java] director exception handling improvements.
#diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html
#index 0c0023dea..27ce084bd 100644
#--- a/Doc/Manual/Python.html
#+++ b/Doc/Manual/Python.html
#@@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
# <div class="code"><pre>
# %module example
#
#-%include &lt;std_string.i&gt;
#-
# %inline %{
#
#-const char* non_utf8_c_str(void) {
#+const char * non_utf8_c_str(void) {
# return "h\xe9llo w\xc3\xb6rld";
# }
#
#+void instring(const char *s) {
#+ ...
#+}
#+
# %}
# </pre></div>
#
#@@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
# <a href="https://www.python.org/dev/peps/pep-0383/">PEP 383</a>.
# </p>
#
#+<p>
#+When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
#+For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
#+will result in a TypeError because the attempted conversion fails:
#+</p>
#+
#+<div class="targetlang"><pre>
#+&gt;&gt;&gt; example.instring('h\xe9llo')
#+&gt;&gt;&gt; example.instring('h\udce9llo')
#+Traceback (most recent call last):
#+ File "&lt;stdin&gt;", line 1, in &lt;module&gt;
#+TypeError: in method 'instring', argument 1 of type 'char const *'
#+</pre></div>
#+
# <p>
# In some cases, users may wish to instead handle all byte strings as bytes
# objects in Python 3. This can be accomplished by adding
#diff --git a/Doc/Manual/Varargs.html b/Doc/Manual/Varargs.html
#index eba816382..014a38cae 100644
#--- a/Doc/Manual/Varargs.html
#+++ b/Doc/Manual/Varargs.html
#@@ -529,8 +529,11 @@ like this:
# SWIG_fail;
# }
# pystr = PyUnicode_AsUTF8String(pyobj);
#+ if (!pystr) {
#+ SWIG_fail;
#+ }
# str = strdup(PyBytes_AsString(pystr));
#- Py_XDECREF(pystr);
#+ Py_DECREF(pystr);
# %#else
# if (!PyString_Check(pyobj)) {
# PyErr_SetString(PyExc_ValueError, "Expected a string");
diff --git a/Examples/python/multimap/example.i b/Examples/python/multimap/example.i
index 66c0f74c6..3ff5d52c0 100644
--- a/Examples/python/multimap/example.i
+++ b/Examples/python/multimap/example.i
@@ -39,7 +39,11 @@ extern int gcd(int x, int y);
%#if PY_VERSION_HEX >= 0x03000000
{
PyObject *utf8str = PyUnicode_AsUTF8String(s);
- const char *cstr = PyBytes_AsString(utf8str);
+ const char *cstr;
+ if (!utf8str) {
+ SWIG_fail;
+ }
+ cstr = PyBytes_AsString(utf8str);
$2[i] = strdup(cstr);
Py_DECREF(utf8str);
}
@@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
SWIG_fail;
}
utf8str = PyUnicode_AsUTF8String($input);
+ if (!utf8str) {
+ SWIG_fail;
+ }
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
$2 = (int)len;
@@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
char *cstr;
Py_ssize_t len;
PyObject *utf8str = PyUnicode_AsUTF8String($input);
+ if (!utf8str) {
+ SWIG_fail;
+ }
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
$2 = (int)len;
diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py
index fa9c51437..39e93b0fc 100644
--- a/Examples/test-suite/python/unicode_strings_runme.py
+++ b/Examples/test-suite/python/unicode_strings_runme.py
@@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
check(unicode_strings.charstring(unicode("hello4")), "hello4")
unicode_strings.charstring(u"hell\xb05")
unicode_strings.charstring(u"hell\u00f66")
+
+low_surrogate_string = u"\udcff"
+try:
+ unicode_strings.instring(low_surrogate_string)
+ # Will succeed with Python 2
+except TypeError, e:
+ # Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
+ # The real error is actually:
+ # UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
+ pass
diff --git a/Examples/test-suite/python_varargs_typemap.i b/Examples/test-suite/python_varargs_typemap.i
index f05fb98eb..d809bf1fa 100644
--- a/Examples/test-suite/python_varargs_typemap.i
+++ b/Examples/test-suite/python_varargs_typemap.i
@@ -23,8 +23,11 @@
SWIG_fail;
}
pystr = PyUnicode_AsUTF8String(pyobj);
+ if (!pystr) {
+ SWIG_fail;
+ }
str = strdup(PyBytes_AsString(pystr));
- Py_XDECREF(pystr);
+ Py_DECREF(pystr);
%#else
if (!PyString_Check(pyobj)) {
PyErr_SetString(PyExc_ValueError, "Expected a string");
diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i
index 9be3748e6..e7266266e 100644
--- a/Examples/test-suite/unicode_strings.i
+++ b/Examples/test-suite/unicode_strings.i
@@ -20,4 +20,6 @@ char *charstring(char *s) {
return s;
}
+void instring(const char *s) {
+}
%}
diff --git a/Lib/python/pyerrors.swg b/Lib/python/pyerrors.swg
index fe7313554..463afae15 100644
--- a/Lib/python/pyerrors.swg
+++ b/Lib/python/pyerrors.swg
@@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
PyObject *value = 0;
PyObject *traceback = 0;
- if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
+ if (PyErr_Occurred())
+ PyErr_Fetch(&type, &value, &traceback);
if (value) {
- char *tmp;
PyObject *old_str = PyObject_Str(value);
+ const char *tmp = SWIG_Python_str_AsChar(old_str);
PyErr_Clear();
Py_XINCREF(type);
-
- PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
+ if (tmp)
+ PyErr_Format(type, "%s %s", tmp, mesg);
+ else
+ PyErr_Format(type, "%s", mesg);
SWIG_Python_str_DelForPy3(tmp);
Py_DECREF(old_str);
Py_DECREF(value);
diff --git a/Lib/python/pyhead.swg b/Lib/python/pyhead.swg
index 55eb95a6d..2fa8b5b4c 100644
--- a/Lib/python/pyhead.swg
+++ b/Lib/python/pyhead.swg
@@ -38,14 +38,16 @@ SWIGINTERN char*
SWIG_Python_str_AsChar(PyObject *str)
{
#if PY_VERSION_HEX >= 0x03000000
- char *cstr;
- char *newstr;
- Py_ssize_t len;
+ char *newstr = 0;
str = PyUnicode_AsUTF8String(str);
- PyBytes_AsStringAndSize(str, &cstr, &len);
- newstr = (char *) malloc(len+1);
- memcpy(newstr, cstr, len+1);
- Py_XDECREF(str);
+ if (str) {
+ char *cstr;
+ Py_ssize_t len;
+ PyBytes_AsStringAndSize(str, &cstr, &len);
+ newstr = (char *) malloc(len+1);
+ memcpy(newstr, cstr, len+1);
+ Py_XDECREF(str);
+ }
return newstr;
#else
return PyString_AsString(str);
diff --git a/Lib/python/pyinit.swg b/Lib/python/pyinit.swg
index fe45ac941..826f8411b 100644
--- a/Lib/python/pyinit.swg
+++ b/Lib/python/pyinit.swg
@@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
SWIGINTERN int
swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
- char *tmp;
PyObject *str = swig_varlink_str(v);
+ const char *tmp = SWIG_Python_str_AsChar(str);
fprintf(fp,"Swig global variables ");
- fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
+ fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
SWIG_Python_str_DelForPy3(tmp);
Py_DECREF(str);
return 0;
diff --git a/Lib/python/pyrun.swg b/Lib/python/pyrun.swg
index efc476613..430d3af18 100644
--- a/Lib/python/pyrun.swg
+++ b/Lib/python/pyrun.swg
@@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
PyObject *traceback = 0;
PyErr_Fetch(&type, &value, &traceback);
if (value) {
- char *tmp;
PyObject *old_str = PyObject_Str(value);
+ const char *tmp = SWIG_Python_str_AsChar(old_str);
+ if (!tmp)
+ tmp = "Invalid error message";
Py_XINCREF(type);
PyErr_Clear();
if (infront) {
- PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
+ PyErr_Format(type, "%s %s", mesg, tmp);
} else {
- PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
+ PyErr_Format(type, "%s %s", tmp, mesg);
}
SWIG_Python_str_DelForPy3(tmp);
Py_DECREF(old_str);
@@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
Py_INCREF(name);
} else {
encoded_name = PyUnicode_AsUTF8String(name);
+ if (!encoded_name)
+ return -1;
}
PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
Py_DECREF(encoded_name);
diff --git a/Lib/python/pystrings.swg b/Lib/python/pystrings.swg
index fd37855eb..301e0f3e1 100644
--- a/Lib/python/pystrings.swg
+++ b/Lib/python/pystrings.swg
@@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
%#endif
{
char *cstr; Py_ssize_t len;
+ int ret = SWIG_OK;
%#if PY_VERSION_HEX>=0x03000000
%#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
if (!alloc && cptr) {
@@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
return SWIG_RuntimeError;
}
obj = PyUnicode_AsUTF8String(obj);
- if(alloc) *alloc = SWIG_NEWOBJ;
+ if (!obj)
+ return SWIG_TypeError;
+ if (alloc)
+ *alloc = SWIG_NEWOBJ;
%#endif
PyBytes_AsStringAndSize(obj, &cstr, &len);
%#else
@@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
%#endif
%#else
*cptr = SWIG_Python_str_AsChar(obj);
+ if (!*cptr)
+ ret = SWIG_TypeError;
%#endif
}
}
@@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
%#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
Py_XDECREF(obj);
%#endif
- return SWIG_OK;
+ return ret;
} else {
%#if defined(SWIG_PYTHON_2_UNICODE)
%#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
@@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
return SWIG_RuntimeError;
}
obj = PyUnicode_AsUTF8String(obj);
+ if (!obj)
+ return SWIG_TypeError;
if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
if (cptr) {
if (alloc) *alloc = SWIG_NEWOBJ;
--
2.21.1