347 lines
12 KiB
Diff
347 lines
12 KiB
Diff
From b0e29fbdf31bb94b11cb8a7cc830b4a76467afa3 Mon Sep 17 00:00:00 2001
|
|
From: William S Fulton <wsf@fultondesigns.co.uk>
|
|
Date: Mon, 4 Dec 2017 18:41:55 +0000
|
|
Subject: [PATCH] Add missing checks for failures in calls to
|
|
PyUnicode_AsUTF8String.
|
|
|
|
Previously a seg fault could occur when passing invalid UTF8 strings (low
|
|
surrogates), eg passing u"\udcff" to the C layer (Python 3).
|
|
---
|
|
CHANGES.current | 8 ++++++-
|
|
Doc/Manual/Python.html | 22 ++++++++++++++++---
|
|
Doc/Manual/Varargs.html | 5 ++++-
|
|
Examples/python/multimap/example.i | 12 +++++++++-
|
|
.../python/unicode_strings_runme.py | 10 +++++++++
|
|
Examples/test-suite/python_varargs_typemap.i | 5 ++++-
|
|
Examples/test-suite/unicode_strings.i | 2 ++
|
|
Lib/python/pyerrors.swg | 11 ++++++----
|
|
Lib/python/pyhead.swg | 16 ++++++++------
|
|
Lib/python/pyinit.swg | 4 ++--
|
|
Lib/python/pyrun.swg | 10 ++++++---
|
|
Lib/python/pystrings.swg | 12 ++++++++--
|
|
12 files changed, 92 insertions(+), 25 deletions(-)
|
|
|
|
#diff --git a/CHANGES.current b/CHANGES.current
|
|
#index 5cab80172..06b958f18 100644
|
|
#--- a/CHANGES.current
|
|
#+++ b/CHANGES.current
|
|
#@@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
|
|
#
|
|
# Version 4.0.0 (in progress)
|
|
# ===========================
|
|
#+
|
|
#+2017-12-04: wsfulton
|
|
#+ [Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a
|
|
#+ seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
|
|
#+ u"\udcff" to the C layer (Python 3).
|
|
#+
|
|
# 2017-11-24: joequant
|
|
#- Fix github #1124 and return R_NilValue for null pointers
|
|
#+ Fix #1124 and return R_NilValue for null pointers
|
|
#
|
|
# 2017-11-29: wsfulton
|
|
# [Java] director exception handling improvements.
|
|
#diff --git a/Doc/Manual/Python.html b/Doc/Manual/Python.html
|
|
#index 0c0023dea..27ce084bd 100644
|
|
#--- a/Doc/Manual/Python.html
|
|
#+++ b/Doc/Manual/Python.html
|
|
#@@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
|
|
# <div class="code"><pre>
|
|
# %module example
|
|
#
|
|
#-%include <std_string.i>
|
|
#-
|
|
# %inline %{
|
|
#
|
|
#-const char* non_utf8_c_str(void) {
|
|
#+const char * non_utf8_c_str(void) {
|
|
# return "h\xe9llo w\xc3\xb6rld";
|
|
# }
|
|
#
|
|
#+void instring(const char *s) {
|
|
#+ ...
|
|
#+}
|
|
#+
|
|
# %}
|
|
# </pre></div>
|
|
#
|
|
#@@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
|
|
# <a href="https://www.python.org/dev/peps/pep-0383/">PEP 383</a>.
|
|
# </p>
|
|
#
|
|
#+<p>
|
|
#+When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
|
|
#+For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
|
|
#+will result in a TypeError because the attempted conversion fails:
|
|
#+</p>
|
|
#+
|
|
#+<div class="targetlang"><pre>
|
|
#+>>> example.instring('h\xe9llo')
|
|
#+>>> example.instring('h\udce9llo')
|
|
#+Traceback (most recent call last):
|
|
#+ File "<stdin>", line 1, in <module>
|
|
#+TypeError: in method 'instring', argument 1 of type 'char const *'
|
|
#+</pre></div>
|
|
#+
|
|
# <p>
|
|
# In some cases, users may wish to instead handle all byte strings as bytes
|
|
# objects in Python 3. This can be accomplished by adding
|
|
#diff --git a/Doc/Manual/Varargs.html b/Doc/Manual/Varargs.html
|
|
#index eba816382..014a38cae 100644
|
|
#--- a/Doc/Manual/Varargs.html
|
|
#+++ b/Doc/Manual/Varargs.html
|
|
#@@ -529,8 +529,11 @@ like this:
|
|
# SWIG_fail;
|
|
# }
|
|
# pystr = PyUnicode_AsUTF8String(pyobj);
|
|
#+ if (!pystr) {
|
|
#+ SWIG_fail;
|
|
#+ }
|
|
# str = strdup(PyBytes_AsString(pystr));
|
|
#- Py_XDECREF(pystr);
|
|
#+ Py_DECREF(pystr);
|
|
# %#else
|
|
# if (!PyString_Check(pyobj)) {
|
|
# PyErr_SetString(PyExc_ValueError, "Expected a string");
|
|
diff --git a/Examples/python/multimap/example.i b/Examples/python/multimap/example.i
|
|
index 66c0f74c6..3ff5d52c0 100644
|
|
--- a/Examples/python/multimap/example.i
|
|
+++ b/Examples/python/multimap/example.i
|
|
@@ -39,7 +39,11 @@ extern int gcd(int x, int y);
|
|
%#if PY_VERSION_HEX >= 0x03000000
|
|
{
|
|
PyObject *utf8str = PyUnicode_AsUTF8String(s);
|
|
- const char *cstr = PyBytes_AsString(utf8str);
|
|
+ const char *cstr;
|
|
+ if (!utf8str) {
|
|
+ SWIG_fail;
|
|
+ }
|
|
+ cstr = PyBytes_AsString(utf8str);
|
|
$2[i] = strdup(cstr);
|
|
Py_DECREF(utf8str);
|
|
}
|
|
@@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
|
|
SWIG_fail;
|
|
}
|
|
utf8str = PyUnicode_AsUTF8String($input);
|
|
+ if (!utf8str) {
|
|
+ SWIG_fail;
|
|
+ }
|
|
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
|
|
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
|
|
$2 = (int)len;
|
|
@@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
|
|
char *cstr;
|
|
Py_ssize_t len;
|
|
PyObject *utf8str = PyUnicode_AsUTF8String($input);
|
|
+ if (!utf8str) {
|
|
+ SWIG_fail;
|
|
+ }
|
|
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
|
|
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
|
|
$2 = (int)len;
|
|
diff --git a/Examples/test-suite/python/unicode_strings_runme.py b/Examples/test-suite/python/unicode_strings_runme.py
|
|
index fa9c51437..39e93b0fc 100644
|
|
--- a/Examples/test-suite/python/unicode_strings_runme.py
|
|
+++ b/Examples/test-suite/python/unicode_strings_runme.py
|
|
@@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
|
|
check(unicode_strings.charstring(unicode("hello4")), "hello4")
|
|
unicode_strings.charstring(u"hell\xb05")
|
|
unicode_strings.charstring(u"hell\u00f66")
|
|
+
|
|
+low_surrogate_string = u"\udcff"
|
|
+try:
|
|
+ unicode_strings.instring(low_surrogate_string)
|
|
+ # Will succeed with Python 2
|
|
+except TypeError, e:
|
|
+ # Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
|
|
+ # The real error is actually:
|
|
+ # UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
|
|
+ pass
|
|
diff --git a/Examples/test-suite/python_varargs_typemap.i b/Examples/test-suite/python_varargs_typemap.i
|
|
index f05fb98eb..d809bf1fa 100644
|
|
--- a/Examples/test-suite/python_varargs_typemap.i
|
|
+++ b/Examples/test-suite/python_varargs_typemap.i
|
|
@@ -23,8 +23,11 @@
|
|
SWIG_fail;
|
|
}
|
|
pystr = PyUnicode_AsUTF8String(pyobj);
|
|
+ if (!pystr) {
|
|
+ SWIG_fail;
|
|
+ }
|
|
str = strdup(PyBytes_AsString(pystr));
|
|
- Py_XDECREF(pystr);
|
|
+ Py_DECREF(pystr);
|
|
%#else
|
|
if (!PyString_Check(pyobj)) {
|
|
PyErr_SetString(PyExc_ValueError, "Expected a string");
|
|
diff --git a/Examples/test-suite/unicode_strings.i b/Examples/test-suite/unicode_strings.i
|
|
index 9be3748e6..e7266266e 100644
|
|
--- a/Examples/test-suite/unicode_strings.i
|
|
+++ b/Examples/test-suite/unicode_strings.i
|
|
@@ -20,4 +20,6 @@ char *charstring(char *s) {
|
|
return s;
|
|
}
|
|
|
|
+void instring(const char *s) {
|
|
+}
|
|
%}
|
|
diff --git a/Lib/python/pyerrors.swg b/Lib/python/pyerrors.swg
|
|
index fe7313554..463afae15 100644
|
|
--- a/Lib/python/pyerrors.swg
|
|
+++ b/Lib/python/pyerrors.swg
|
|
@@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
|
|
PyObject *value = 0;
|
|
PyObject *traceback = 0;
|
|
|
|
- if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
|
|
+ if (PyErr_Occurred())
|
|
+ PyErr_Fetch(&type, &value, &traceback);
|
|
if (value) {
|
|
- char *tmp;
|
|
PyObject *old_str = PyObject_Str(value);
|
|
+ const char *tmp = SWIG_Python_str_AsChar(old_str);
|
|
PyErr_Clear();
|
|
Py_XINCREF(type);
|
|
-
|
|
- PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
|
|
+ if (tmp)
|
|
+ PyErr_Format(type, "%s %s", tmp, mesg);
|
|
+ else
|
|
+ PyErr_Format(type, "%s", mesg);
|
|
SWIG_Python_str_DelForPy3(tmp);
|
|
Py_DECREF(old_str);
|
|
Py_DECREF(value);
|
|
diff --git a/Lib/python/pyhead.swg b/Lib/python/pyhead.swg
|
|
index 55eb95a6d..2fa8b5b4c 100644
|
|
--- a/Lib/python/pyhead.swg
|
|
+++ b/Lib/python/pyhead.swg
|
|
@@ -38,14 +38,16 @@ SWIGINTERN char*
|
|
SWIG_Python_str_AsChar(PyObject *str)
|
|
{
|
|
#if PY_VERSION_HEX >= 0x03000000
|
|
- char *cstr;
|
|
- char *newstr;
|
|
- Py_ssize_t len;
|
|
+ char *newstr = 0;
|
|
str = PyUnicode_AsUTF8String(str);
|
|
- PyBytes_AsStringAndSize(str, &cstr, &len);
|
|
- newstr = (char *) malloc(len+1);
|
|
- memcpy(newstr, cstr, len+1);
|
|
- Py_XDECREF(str);
|
|
+ if (str) {
|
|
+ char *cstr;
|
|
+ Py_ssize_t len;
|
|
+ PyBytes_AsStringAndSize(str, &cstr, &len);
|
|
+ newstr = (char *) malloc(len+1);
|
|
+ memcpy(newstr, cstr, len+1);
|
|
+ Py_XDECREF(str);
|
|
+ }
|
|
return newstr;
|
|
#else
|
|
return PyString_AsString(str);
|
|
diff --git a/Lib/python/pyinit.swg b/Lib/python/pyinit.swg
|
|
index fe45ac941..826f8411b 100644
|
|
--- a/Lib/python/pyinit.swg
|
|
+++ b/Lib/python/pyinit.swg
|
|
@@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
|
|
|
|
SWIGINTERN int
|
|
swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
|
|
- char *tmp;
|
|
PyObject *str = swig_varlink_str(v);
|
|
+ const char *tmp = SWIG_Python_str_AsChar(str);
|
|
fprintf(fp,"Swig global variables ");
|
|
- fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
|
|
+ fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
|
|
SWIG_Python_str_DelForPy3(tmp);
|
|
Py_DECREF(str);
|
|
return 0;
|
|
diff --git a/Lib/python/pyrun.swg b/Lib/python/pyrun.swg
|
|
index efc476613..430d3af18 100644
|
|
--- a/Lib/python/pyrun.swg
|
|
+++ b/Lib/python/pyrun.swg
|
|
@@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
|
|
PyObject *traceback = 0;
|
|
PyErr_Fetch(&type, &value, &traceback);
|
|
if (value) {
|
|
- char *tmp;
|
|
PyObject *old_str = PyObject_Str(value);
|
|
+ const char *tmp = SWIG_Python_str_AsChar(old_str);
|
|
+ if (!tmp)
|
|
+ tmp = "Invalid error message";
|
|
Py_XINCREF(type);
|
|
PyErr_Clear();
|
|
if (infront) {
|
|
- PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
|
|
+ PyErr_Format(type, "%s %s", mesg, tmp);
|
|
} else {
|
|
- PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
|
|
+ PyErr_Format(type, "%s %s", tmp, mesg);
|
|
}
|
|
SWIG_Python_str_DelForPy3(tmp);
|
|
Py_DECREF(old_str);
|
|
@@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
|
|
Py_INCREF(name);
|
|
} else {
|
|
encoded_name = PyUnicode_AsUTF8String(name);
|
|
+ if (!encoded_name)
|
|
+ return -1;
|
|
}
|
|
PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
|
|
Py_DECREF(encoded_name);
|
|
diff --git a/Lib/python/pystrings.swg b/Lib/python/pystrings.swg
|
|
index fd37855eb..301e0f3e1 100644
|
|
--- a/Lib/python/pystrings.swg
|
|
+++ b/Lib/python/pystrings.swg
|
|
@@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|
%#endif
|
|
{
|
|
char *cstr; Py_ssize_t len;
|
|
+ int ret = SWIG_OK;
|
|
%#if PY_VERSION_HEX>=0x03000000
|
|
%#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
|
|
if (!alloc && cptr) {
|
|
@@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|
return SWIG_RuntimeError;
|
|
}
|
|
obj = PyUnicode_AsUTF8String(obj);
|
|
- if(alloc) *alloc = SWIG_NEWOBJ;
|
|
+ if (!obj)
|
|
+ return SWIG_TypeError;
|
|
+ if (alloc)
|
|
+ *alloc = SWIG_NEWOBJ;
|
|
%#endif
|
|
PyBytes_AsStringAndSize(obj, &cstr, &len);
|
|
%#else
|
|
@@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|
%#endif
|
|
%#else
|
|
*cptr = SWIG_Python_str_AsChar(obj);
|
|
+ if (!*cptr)
|
|
+ ret = SWIG_TypeError;
|
|
%#endif
|
|
}
|
|
}
|
|
@@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|
%#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
|
|
Py_XDECREF(obj);
|
|
%#endif
|
|
- return SWIG_OK;
|
|
+ return ret;
|
|
} else {
|
|
%#if defined(SWIG_PYTHON_2_UNICODE)
|
|
%#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
|
|
@@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|
return SWIG_RuntimeError;
|
|
}
|
|
obj = PyUnicode_AsUTF8String(obj);
|
|
+ if (!obj)
|
|
+ return SWIG_TypeError;
|
|
if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
|
|
if (cptr) {
|
|
if (alloc) *alloc = SWIG_NEWOBJ;
|
|
--
|
|
2.21.1
|
|
|