From 31fe33b583978fe0075269527bf0eba08d233db9 Mon Sep 17 00:00:00 2001 From: Charalampos Stratakis Date: Sat, 6 May 2017 23:27:11 +0200 Subject: [PATCH] Update PEP 538 to the latest upstream implementation --- 00262-pep538_coerce_legacy_c_locale.patch | 328 ++++++++++++++-------- python3.spec | 5 +- 2 files changed, 222 insertions(+), 111 deletions(-) diff --git a/00262-pep538_coerce_legacy_c_locale.patch b/00262-pep538_coerce_legacy_c_locale.patch index a13b644..462f2aa 100644 --- a/00262-pep538_coerce_legacy_c_locale.patch +++ b/00262-pep538_coerce_legacy_c_locale.patch @@ -1,34 +1,39 @@ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst -index c0e64d6..0bb28da 100644 +index 195f63f..0d0a127 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst -@@ -711,6 +711,35 @@ conflict. +@@ -713,6 +713,40 @@ conflict. .. versionadded:: 3.6 + +.. envvar:: PYTHONCOERCECLOCALE + -+ If set to a non-empty string, causes the main Python command line application ++ If set to the value ``0``, causes the main Python command line application + to skip coercing the legacy ASCII-based C locale to a more capable UTF-8 + based alternative. Note that this setting is checked even when the + :option:`-E` or :option:`-I` options are used, as it is handled prior to + the processing of command line options. + -+ If this variable is *not* set, and the current locale reported for the -+ ``LC_CTYPE`` category is the default ``C`` locale, then the Python CLI will -+ attempt to configure one of the following locales for the given locale -+ categories before loading the interpreter runtime: ++ If this variable is *not* set, or is set to a value other than ``0``, and ++ the current locale reported for the ``LC_CTYPE`` category is the default ++ ``C`` locale, then the Python CLI will attempt to configure one of the ++ following locales for the given locale categories before loading the ++ interpreter runtime: + -+ * ``C.UTF-8` (``LC_ALL``) -+ * ``C.utf8` (``LC_ALL``) -+ * ``UTF-8` (``LC_CTYPE``) ++ * ``C.UTF-8`` (``LC_ALL``) ++ * ``C.utf8`` (``LC_ALL``) ++ * ``UTF-8`` (``LC_CTYPE``) + + If setting one of these locale categories succeeds, then the matching -+ environment variables will be set (both ``LC_ALL` and ``LANG`` for the -+ ``LC_ALL`` category, and ``LC_CTYPE`` for the ``LC_CTYPE`` category), -+ and (if not already set to a non-empty string) :envvar:`PYTHONIOENCODING` -+ will be set to ``utf-8:surrogateescape``. ++ environment variables will be set (both ``LC_ALL`` and ``LANG`` for the ++ ``LC_ALL`` category, and ``LC_CTYPE`` for the ``LC_CTYPE`` category) in ++ the current process environment before the Python runtime is initialized. ++ ++ Configuring one of these locales (either explicitly or via the above ++ implicit locale coercion) will automatically set the error handler for ++ :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This ++ behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual. + + Availability: \*nix + @@ -39,7 +44,7 @@ index c0e64d6..0bb28da 100644 ~~~~~~~~~~~~~~~~~~~~ diff --git a/Lib/test/support/script_helper.py b/Lib/test/support/script_helper.py -index 80889b1..1a1a862 100644 +index ca5f9c2..7aa460b 100644 --- a/Lib/test/support/script_helper.py +++ b/Lib/test/support/script_helper.py @@ -51,8 +51,35 @@ def interpreter_requires_environment(): @@ -80,7 +85,7 @@ index 80889b1..1a1a862 100644 # Executing the interpreter in a subprocess -@@ -99,30 +126,7 @@ def run_python_until_end(*args, **env_vars): +@@ -110,30 +137,7 @@ def run_python_until_end(*args, **env_vars): def _assert_python(expected_success, *args, **env_vars): res, cmd_line = run_python_until_end(*args, **env_vars) if (res.rc and expected_success) or (not res.rc and not expected_success): @@ -113,10 +118,28 @@ index 80889b1..1a1a862 100644 def assert_python_ok(*args, **env_vars): diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py -index 2a53f3d..ece84af 100644 +index 2a53f3d..391ca15 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py -@@ -386,7 +386,7 @@ class EmbeddingTests(unittest.TestCase): +@@ -369,14 +369,15 @@ class EmbeddingTests(unittest.TestCase): + def tearDown(self): + os.chdir(self.oldcwd) + +- def run_embedded_interpreter(self, *args): ++ def run_embedded_interpreter(self, *args, env=None): + """Runs a test in the embedded interpreter""" + cmd = [self.test_exe] + cmd.extend(args) + p = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, +- universal_newlines=True) ++ universal_newlines=True, ++ env=env) + (out, err) = p.communicate() + self.assertEqual(p.returncode, 0, + "bad returncode %d, stderr is %r" % +@@ -386,7 +387,7 @@ class EmbeddingTests(unittest.TestCase): def test_subinterps(self): # This is just a "don't crash" test out, err = self.run_embedded_interpreter("repeated_init_and_subinterpreters") @@ -125,11 +148,14 @@ index 2a53f3d..ece84af 100644 print() print(out) print(err) -@@ -404,14 +404,15 @@ class EmbeddingTests(unittest.TestCase): +@@ -403,13 +404,14 @@ class EmbeddingTests(unittest.TestCase): + def test_forced_io_encoding(self): # Checks forced configuration of embedded interpreter IO streams - out, err = self.run_embedded_interpreter("forced_io_encoding") +- out, err = self.run_embedded_interpreter("forced_io_encoding") - if support.verbose: ++ env = {"PYTHONIOENCODING": "UTF-8:surrogateescape"} ++ out, err = self.run_embedded_interpreter("forced_io_encoding", env=env) + if support.verbose > 1: print() print(out) @@ -140,12 +166,9 @@ index 2a53f3d..ece84af 100644 + expected_stdin_encoding = "UTF-8" expected_pipe_encoding = self._get_default_pipe_encoding() expected_output = '\n'.join([ -+ "Setting PYTHONIOENCODING=UTF-8:surrogateescape", "--- Use defaults ---", - "Expected encoding: default", - "Expected errors: default", diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py -index b71bb9f..56867fc 100644 +index ae2bcd4..0a302ff 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -9,8 +9,9 @@ import sys @@ -181,7 +204,7 @@ index df9ebd4..63145e4 100644 'import sys', 'def dump(name):', diff --git a/Programs/_testembed.c b/Programs/_testembed.c -index a68d4fa..1494452 100644 +index a68d4fa..e28de1c 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1,4 +1,5 @@ @@ -191,17 +214,7 @@ index a68d4fa..1494452 100644 #include /********************************************************* -@@ -106,6 +107,9 @@ static void check_stdio_details(const char *encoding, const char * errors) - - static int test_forced_io_encoding(void) - { -+ /* Ensure consistent "defaults" */ -+ printf("Setting PYTHONIOENCODING=UTF-8:surrogateescape\n"); -+ setenv("PYTHONIOENCODING", "UTF-8:surrogateescape", 1); - /* Check various combinations */ - printf("--- Use defaults ---\n"); - check_stdio_details(NULL, NULL); -@@ -126,6 +130,20 @@ static int test_forced_io_encoding(void) +@@ -126,6 +127,20 @@ static int test_forced_io_encoding(void) return 0; } @@ -222,7 +235,7 @@ index a68d4fa..1494452 100644 /* ********************************************************* * List of test cases and the function that implements it. * -@@ -147,6 +165,7 @@ struct TestCase +@@ -147,6 +162,7 @@ struct TestCase static struct TestCase TestCases[] = { { "forced_io_encoding", test_forced_io_encoding }, { "repeated_init_and_subinterpreters", test_repeated_init_and_subinterpreters }, @@ -231,14 +244,14 @@ index a68d4fa..1494452 100644 }; diff --git a/Programs/python.c b/Programs/python.c -index a7afbc7..b5edebb 100644 +index a7afbc7..03f8295 100644 --- a/Programs/python.c +++ b/Programs/python.c -@@ -15,6 +15,110 @@ wmain(int argc, wchar_t **argv) +@@ -15,6 +15,21 @@ wmain(int argc, wchar_t **argv) } #else -+/* Helpers to better handle the legacy C locale ++/* Access private pylifecycle helper API to better handle the legacy C locale + * + * The legacy C locale assumes ASCII as the default text encoding, which + * causes problems not only for the CPython runtime, but also other @@ -250,11 +263,100 @@ index a7afbc7..b5edebb 100644 + * See the documentation of the PYTHONCOERCECLOCALE setting for more details. + * + */ ++extern int _Py_LegacyLocaleDetected(void); ++extern void _Py_CoerceLegacyLocale(void); + -+#ifdef PY_COERCE_C_LOCALE -+static const char *_C_LOCALE_COERCION_WARNING = -+ "Python detected LC_CTYPE=C: %.20s coerced to %.20s (set another locale " -+ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behaviour).\n"; + int + main(int argc, char **argv) + { +@@ -25,7 +40,11 @@ main(int argc, char **argv) + char *oldloc; + + /* Force malloc() allocator to bootstrap Python */ ++#ifdef Py_DEBUG ++ (void)_PyMem_SetupAllocators("malloc_debug"); ++# else + (void)_PyMem_SetupAllocators("malloc"); ++# endif + + argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1)); + argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1)); +@@ -49,7 +68,21 @@ main(int argc, char **argv) + return 1; + } + ++#ifdef __ANDROID__ ++ /* Passing "" to setlocale() on Android requests the C locale rather ++ * than checking environment variables, so request C.UTF-8 explicitly ++ */ ++ setlocale(LC_ALL, "C.UTF-8"); ++#else ++ /* Reconfigure the locale to the default for this process */ + setlocale(LC_ALL, ""); ++#endif ++ ++ if (_Py_LegacyLocaleDetected()) { ++ _Py_CoerceLegacyLocale(); ++ } ++ ++ /* Convert from char to wchar_t based on the locale settings */ + for (i = 0; i < argc; i++) { + argv_copy[i] = Py_DecodeLocale(argv[i], NULL); + if (!argv_copy[i]) { +@@ -70,7 +103,11 @@ main(int argc, char **argv) + + /* Force again malloc() allocator to release memory blocks allocated + before Py_Main() */ ++#ifdef Py_DEBUG ++ (void)_PyMem_SetupAllocators("malloc_debug"); ++# else + (void)_PyMem_SetupAllocators("malloc"); ++# endif + + for (i = 0; i < argc; i++) { + PyMem_RawFree(argv_copy2[i]); +diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c +index a4f7f82..261ed34 100644 +--- a/Python/pylifecycle.c ++++ b/Python/pylifecycle.c +@@ -167,6 +167,7 @@ Py_SetStandardStreamEncoding(const char *encoding, const char *errors) + return 0; + } + ++ + /* Global initializations. Can be undone by Py_FinalizeEx(). Don't + call this twice without an intervening Py_FinalizeEx() call. When + initializations fail, a fatal error is issued and the function does +@@ -301,6 +302,173 @@ import_init(PyInterpreterState *interp, PyObject *sysmod) + } + + ++/* Helper functions to better handle the legacy C locale ++ * ++ * The legacy C locale assumes ASCII as the default text encoding, which ++ * causes problems not only for the CPython runtime, but also other ++ * components like GNU readline. ++ * ++ * Accordingly, when the CLI detects it, it attempts to coerce it to a ++ * more capable UTF-8 based alternative as follows: ++ * ++ * if (_Py_LegacyLocaleDetected()) { ++ * _Py_CoerceLegacyLocale(); ++ * } ++ * ++ * See the documentation of the PYTHONCOERCECLOCALE setting for more details. ++ * ++ * Locale coercion also impacts the default error handler for the standard ++ * streams: while the usual default is "strict", the default for the legacy ++ * C locale and for any of the coercion target locales is "surrogateescape". ++ */ ++ ++int ++_Py_LegacyLocaleDetected(void) ++{ ++ const char *ctype_loc = setlocale(LC_CTYPE, NULL); ++ return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; ++} + +typedef struct _CandidateLocale { + const char *locale_name; @@ -268,7 +370,35 @@ index a7afbc7..b5edebb 100644 + { NULL, 0 } +}; + -+void ++static char * ++get_default_standard_stream_error_handler(void) ++{ ++ const char *ctype_loc = setlocale(LC_CTYPE, NULL); ++ if (ctype_loc != NULL) { ++ /* "surrogateescape" is the default in the legacy C locale */ ++ if (strcmp(ctype_loc, "C") == 0) { ++ return "surrogateescape"; ++ } ++ ++ /* "surrogateescape" is the default in locale coercion target locales */ ++ const _LocaleCoercionTarget *target = NULL; ++ for (target = _TARGET_LOCALES; target->locale_name; target++) { ++ if (strcmp(ctype_loc, target->locale_name) == 0) { ++ return "surrogateescape"; ++ } ++ } ++ } ++ ++ /* Otherwise return NULL to request the typical default error handler */ ++ return NULL; ++} ++ ++#ifdef PY_COERCE_C_LOCALE ++static const char *_C_LOCALE_COERCION_WARNING = ++ "Python detected LC_CTYPE=C: %.20s coerced to %.20s (set another locale " ++ "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n"; ++ ++static void +_coerce_default_locale_settings(const _LocaleCoercionTarget *target) +{ + const char *newloc = target->locale_name; @@ -303,28 +433,36 @@ index a7afbc7..b5edebb 100644 + return; + } + -+ /* Set PYTHONIOENCODING if not already set */ -+ if (setenv("PYTHONIOENCODING", "utf-8:surrogateescape", 0)) { -+ fprintf(stderr, -+ "Error setting PYTHONIOENCODING during C locale coercion\n"); -+ } -+ + /* Reconfigure with the overridden environment variables */ + setlocale(LC_ALL, ""); +} + -+void -+_handle_legacy_c_locale(void) ++static int ++c_locale_coercion_is_expected(void) +{ ++ /* This may be called prior to Py_Initialize, so we don't call any other ++ * Python APIs, and we ignore the -E and -I flags ++ */ + const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); -+ /* We ignore the Python -E and -I flags here, as we need to sort out ++ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { ++ return 1; ++ } ++ return 0; ++} ++#endif ++ ++void ++_Py_CoerceLegacyLocale(void) ++{ ++#ifdef PY_COERCE_C_LOCALE ++ /* We ignore the Python -E and -I flags here, as the CLI needs to sort out + * the locale settings *before* we try to do anything with the command + * line arguments. For cross-platform debugging purposes, we also need + * to give end users a way to force even scripts that are otherwise + * isolated from their environment to use the legacy ASCII-centric C + * locale. + */ -+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { ++ if (c_locale_coercion_is_expected()) { + /* PYTHONCOERCECLOCALE is not set, or is not set to exactly "0" */ + const _LocaleCoercionTarget *target = NULL; + for (target = _TARGET_LOCALES; target->locale_name; target++) { @@ -336,50 +474,12 @@ index a7afbc7..b5edebb 100644 + return; + } + } -+ + } + /* No C locale warning here, as Py_Initialize will emit one later */ ++#endif +} -+#endif + - int - main(int argc, char **argv) - { -@@ -49,7 +153,26 @@ main(int argc, char **argv) - return 1; - } - -+#ifdef __ANDROID__ -+ /* Passing "" to setlocale() on Android requests the C locale rather -+ * than checking environment variables, so request C.UTF-8 explicitly -+ */ -+ setlocale(LC_ALL, "C.UTF-8"); -+#else -+ /* Reconfigure the locale to the default for this process */ - setlocale(LC_ALL, ""); -+#endif + -+#ifdef PY_COERCE_C_LOCALE -+ /* When the LC_CTYPE category still claims to be using the C locale, -+ assume configuration error and try for a UTF-8 based locale instead */ -+ const char *ctype_loc = setlocale(LC_CTYPE, NULL); -+ if (ctype_loc != NULL && strcmp(ctype_loc, "C") == 0) { -+ _handle_legacy_c_locale(); -+ } -+#endif -+ -+ /* Convert from char to wchar_t based on the locale settings */ - for (i = 0; i < argc; i++) { - argv_copy[i] = Py_DecodeLocale(argv[i], NULL); - if (!argv_copy[i]) { -diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c -index a4f7f82..dd58dc9 100644 ---- a/Python/pylifecycle.c -+++ b/Python/pylifecycle.c -@@ -301,6 +301,31 @@ import_init(PyInterpreterState *interp, PyObject *sysmod) - } - - +#ifdef PY_WARN_ON_C_LOCALE +static const char *_C_LOCALE_WARNING = + "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " @@ -390,15 +490,8 @@ index a4f7f82..dd58dc9 100644 +static void +_emit_stderr_warning_for_c_locale(void) +{ -+ const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); -+ /* We don't emit a warning if locale coercion has been explicitly disabled. -+ * -+ * For consistency with the corresponding check in Programs/python.c -+ * we ignore the Python -E and -I flags here. -+ */ -+ if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { -+ const char *ctype_loc = setlocale(LC_CTYPE, NULL); -+ if (ctype_loc != NULL && strcmp(ctype_loc, "C") == 0) { ++ if (c_locale_coercion_is_expected()) { ++ if (_Py_LegacyLocaleDetected()) { + fprintf(stderr, "%s", _C_LOCALE_WARNING); + } + } @@ -408,7 +501,7 @@ index a4f7f82..dd58dc9 100644 void _Py_InitializeEx_Private(int install_sigs, int install_importlib) { -@@ -315,11 +340,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) +@@ -315,11 +483,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) initialized = 1; _Py_Finalizing = NULL; @@ -429,8 +522,23 @@ index a4f7f82..dd58dc9 100644 #endif if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0') +@@ -1242,12 +1418,8 @@ initstdio(void) + } + } + if (!errors && !(pythonioencoding && *pythonioencoding)) { +- /* When the LC_CTYPE locale is the POSIX locale ("C locale"), +- stdin and stdout use the surrogateescape error handler by +- default, instead of the strict error handler. */ +- char *loc = setlocale(LC_CTYPE, NULL); +- if (loc != NULL && strcmp(loc, "C") == 0) +- errors = "surrogateescape"; ++ /* Choose the default error handler based on the current locale */ ++ errors = get_default_standard_stream_error_handler(); + } + } + diff --git a/configure b/configure -index 6bcddb7..13052d6 100755 +index 2915246..39e5a27 100755 --- a/configure +++ b/configure @@ -834,6 +834,8 @@ with_thread @@ -455,7 +563,7 @@ index 6bcddb7..13052d6 100755 --with-valgrind Enable Valgrind support --with(out)-dtrace disable/enable DTrace support --with-fpectl enable SIGFPE catching -@@ -11016,6 +11024,52 @@ fi +@@ -11010,6 +11018,52 @@ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_pymalloc" >&5 $as_echo "$with_pymalloc" >&6; } @@ -509,10 +617,10 @@ index 6bcddb7..13052d6 100755 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-valgrind" >&5 $as_echo_n "checking for --with-valgrind... " >&6; } diff --git a/configure.ac b/configure.ac -index e222c21..a1653e7 100644 +index 67dfba3..b9c9f04 100644 --- a/configure.ac +++ b/configure.ac -@@ -3287,6 +3287,40 @@ then +@@ -3279,6 +3279,40 @@ then fi AC_MSG_RESULT($with_pymalloc) @@ -554,10 +662,10 @@ index e222c21..a1653e7 100644 AC_MSG_CHECKING([for --with-valgrind]) AC_ARG_WITH([valgrind], diff --git a/pyconfig.h.in b/pyconfig.h.in -index e7a836c..11e0798 100644 +index b10c57f..0a6f3e2 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in -@@ -1241,9 +1241,15 @@ +@@ -1244,9 +1244,15 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT diff --git a/python3.spec b/python3.spec index 996c52e..aec30b2 100644 --- a/python3.spec +++ b/python3.spec @@ -123,7 +123,7 @@ Summary: Version 3 of the Python programming language aka Python 3000 Name: python3 Version: %{pybasever}.1 -Release: 4%{?dist} +Release: 5%{?dist} License: Python Group: Development/Languages @@ -1659,6 +1659,9 @@ fi # ====================================================== %changelog +* Fri May 05 2017 Charalampos Stratakis - 3.6.1-5 +- Update PEP 538 to the latest upstream implementation + * Tue Apr 18 2017 Charalampos Stratakis - 3.6.1-4 - Enable link time optimizations - Move windows executables to the devel subpackage (rhbz#1426257)