Import glibc snapshot from rawhide (glibc-2.33.9000-52.fc35)

Related: #1983628
2021-07-26 09:33:11 +02:00 · 2021-07-26 09:33:11 +02:00 · 0bc856b55b
commit 0bc856b55b
parent 39f6d21b86
4 changed files with 1183 additions and 144 deletions
--- a/glibc-c-utf8-locale.patch
+++ b/glibc-c-utf8-locale.patch
@ -1,48 +1,370 @@
-Short description: Add C.UTF-8 support.
-Author(s): Fedora glibc team <glibc@lists.fedoraproject.org>
-Origin: PATCH
-Upstream status: not-submitted
+Submission: https://sourceware.org/pipermail/libc-alpha/2021-July/129456.html

-This patch needs to upstream as part of Carlos O'Donell
-<carlos@redhat.com>'s work on enabling upstream C.UTF-8 support. This
-work is currently blocked on cleaning up the test results to prove that
-full code-point sorting is working as intended.
+Author: Carlos O'Donell <carlos@redhat.com>
+Date:   Sun Jul 25 22:14:08 2021 -0400

-Note that this patch does not provide full code-point sorting as
-expected.
-
-This patch needs to upstream as soon as possible since it would be nice
-to have this in F29 and fixed.
-
-From 2eda7b462b415105f5a05c1323372d4e39d46439 Mon Sep 17 00:00:00 2001
-From: Mike FABIAN <mfabian@redhat.com>
-Date: Mon, 10 Aug 2015 15:58:12 +0200
-Subject: [PATCH] Add a C.UTF-8 locale
-
---
- localedata/SUPPORTED |   1 +
- localedata/locales/C | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 239 insertions(+)
- create mode 100644 localedata/locales/C
+    Add generic C.UTF-8 locale (Bug 17318)
+    
+    We add a new C.UTF-8 locale.  This locale is not builtin to glibc, but
+    is provided as a distinct locale.  The locale provides full support
+    for UTF-8 and this includes full code point sorting via strcmp-based
+    collation.
+    
+    The collation uses a new keyword 'strcmp_collation' which drops all
+    collation rules and generates an empty zero rules collation to enable
+    strcmp usage in collation. This ensures that we get full code point
+    sorting for C.UTF-8 with a minimal 92 bytes of overhead (LC_COLLATE
+    structure information).
+    
+    The new locale is added to SUPPORTED.  Minimal test data for specific
+    code points (minus those not supported by collate-test) is provided
+    in C.UTF-8.in, and this verifies code point sorting is working
+    reasonably across the range.  The locale was tested manually with the
+    full set of code points without failure.
+    
+    The locale is harmonized with locales already shipping in Gentoo,
+    Debian, Ubuntu, Fedora, CentOS Stream, and RHEL. A new tst-iconv9 test
+    is added which verifies the C.UTF-8 locale is generally usable.
+    
+    Tested on x86_64 or i686 without regression.

+diff --git a/iconv/Makefile b/iconv/Makefile
+index 07d77c9ecaafba1f..9993f2d3f3cd7498 100644
+--- a/iconv/Makefile
+++ b/iconv/Makefile
+@@ -43,8 +43,19 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
+ CFLAGS-linereader.c += -DNO_TRANSLITERATION
+ CFLAGS-simple-hash.c += -I../locale
+ 
+-tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
+-	  tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt
+tests = \
+	tst-iconv1 \
+	tst-iconv2 \
+	tst-iconv3 \
+	tst-iconv4 \
+	tst-iconv5 \
+	tst-iconv6 \
+	tst-iconv7 \
+	tst-iconv8 \
+	tst-iconv9 \
+	tst-iconv-mt \
+	tst-iconv-opt \
+	# tests
+ 
+ others		= iconv_prog iconvconfig
+ install-others-programs	= $(inst_bindir)/iconv
+@@ -83,10 +94,15 @@ endif
+ include ../Rules
+ 
+ ifeq ($(run-built-tests),yes)
+-LOCALES := en_US.UTF-8
+# We have to generate locales (list sorted alphabetically)
+LOCALES := \
+	C.UTF-8 \
+	en_US.UTF-8 \
+	# LOCALES
+ include ../gen-locales.mk
+ 
+ $(objpfx)tst-iconv-opt.out: $(gen-locales)
+$(objpfx)tst-iconv9.out: $(gen-locales)
+ endif
+ 
+ $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
+diff --git a/iconv/tst-iconv9.c b/iconv/tst-iconv9.c
+new file mode 100644
+index 0000000000000000..78a532427993d1c1
+--- /dev/null
+++ b/iconv/tst-iconv9.c
+@@ -0,0 +1,87 @@
+/* Verify that using C.UTF-8 works.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/support.h>
+#include <support/check.h>
+
+/* This test does two things:
+   (1) Verify that we have likely included translit_combining in C.UTF-8.
+   (2) Verify default_missing is '?' as expected.  */
+
+/* ISO-8859-1 encoding of "für".  */
+char iso88591_in[] = { 0x66, 0xfc, 0x72, 0x0 };
+/* ASCII transliteration is "fur" with C.UTF-8 translit_combining.  */
+char ascii_exp[] = { 0x66, 0x75, 0x72, 0x0 };
+
+/* First 3-byte UTF-8 code point.  */
+char utf8_in[] = { 0xe0, 0xa0, 0x80, 0x0 };
+/* There is no ASCII transliteration for SAMARITAN LETTER ALAF
+   so we get default_missing used which is '?'.  */
+char default_missing_exp[] = { 0x3f, 0x0 };
+
+static int
+do_test (void)
+{
+  char ascii_out[5];
+  iconv_t cd;
+  char *inbuf;
+  char *outbuf;
+  size_t inbytes;
+  size_t outbytes;
+  size_t n;
+
+  /* The C.UTF-8 locale should include translit_combining, which provides
+     the transliteration for "LATIN SMALL LETTER U WITH DIAERESIS" which
+     is not provided by locale/C-translit.h.in.  */
+  xsetlocale (LC_ALL, "C.UTF-8");
+
+  /* From ISO-8859-1 to ASCII. */
+  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "ISO-8859-1");
+  TEST_VERIFY (cd != (iconv_t) -1);
+  inbuf = iso88591_in;
+  inbytes = 3;
+  outbuf = ascii_out;
+  outbytes = 3;
+  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
+  TEST_VERIFY (n != -1);
+  *outbuf = '\0';
+  TEST_COMPARE_BLOB (ascii_out, 3, ascii_exp, 3);
+  TEST_VERIFY (iconv_close (cd) == 0);
+
+  /* From UTF-8 to ASCII. */
+  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "UTF-8");
+  TEST_VERIFY (cd != (iconv_t) -1);
+  inbuf = utf8_in;
+  inbytes = 3;
+  outbuf = ascii_out;
+  outbytes = 3;
+  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
+  TEST_VERIFY (n != -1);
+  *outbuf = '\0';
+  TEST_COMPARE_BLOB (ascii_out, 1, default_missing_exp, 1);
+  TEST_VERIFY (iconv_close (cd) == 0);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
+diff --git a/localedata/C.UTF-8.in b/localedata/C.UTF-8.in
+new file mode 100644
+index 0000000000000000..c31dcc2aa045ee61
+--- /dev/null
+++ b/localedata/C.UTF-8.in
+@@ -0,0 +1,157 @@
+ ; <U1>
+ ; <U2>
+ ; <U3>
+ ; <U4>
+ ; <U5>
+ ; <U6>
+ ; <U7>
+ ; <U8>
+ ; <UE>
+ ; <UF>
+ ; <U10>
+ ; <U11>
+ ; <U12>
+ ; <U13>
+ ; <U14>
+ ; <U15>
+ ; <U16>
+ ; <U17>
+ ; <U18>
+ ; <U19>
+ ; <U1A>
+ ; <U1B>
+ ; <U1C>
+ ; <U1D>
+ ; <U1E>
+ ; <U1F>
+! ; <U21>
+" ; <U22>
+# ; <U23>
+$ ; <U24>
+% ; <U25>
+& ; <U26>
+' ; <U27>
+) ; <U29>
+* ; <U2A>
++ ; <U2B>
+, ; <U2C>
+- ; <U2D>
+. ; <U2E>
+/ ; <U2F>
+0 ; <U30>
+1 ; <U31>
+2 ; <U32>
+3 ; <U33>
+4 ; <U34>
+5 ; <U35>
+6 ; <U36>
+7 ; <U37>
+8 ; <U38>
+9 ; <U39>
+< ; <U3C>
+= ; <U3D>
+> ; <U3E>
+? ; <U3F>
+@ ; <U40>
+A ; <U41>
+B ; <U42>
+C ; <U43>
+D ; <U44>
+E ; <U45>
+F ; <U46>
+G ; <U47>
+H ; <U48>
+I ; <U49>
+J ; <U4A>
+K ; <U4B>
+L ; <U4C>
+M ; <U4D>
+N ; <U4E>
+O ; <U4F>
+P ; <U50>
+Q ; <U51>
+R ; <U52>
+S ; <U53>
+T ; <U54>
+U ; <U55>
+V ; <U56>
+W ; <U57>
+X ; <U58>
+Y ; <U59>
+Z ; <U5A>
+[ ; <U5B>
+\ ; <U5C>
+] ; <U5D>
+^ ; <U5E>
+_ ; <U5F>
+` ; <U60>
+a ; <U61>
+b ; <U62>
+c ; <U63>
+d ; <U64>
+e ; <U65>
+f ; <U66>
+g ; <U67>
+h ; <U68>
+i ; <U69>
+j ; <U6A>
+k ; <U6B>
+l ; <U6C>
+m ; <U6D>
+n ; <U6E>
+o ; <U6F>
+p ; <U70>
+q ; <U71>
+r ; <U72>
+s ; <U73>
+t ; <U74>
+u ; <U75>
+v ; <U76>
+w ; <U77>
+x ; <U78>
+y ; <U79>
+z ; <U7A>
+{ ; <U7B>
+| ; <U7C>
+} ; <U7D>
+~ ; <U7E>
+ ; <U7F>
+ ; <U80>
+ÿ ; <UFF>
+Ā ; <U100>
+࿿ ; <UFFF>
+က ; <U1000>
+<2B> ; <UFFFD>
+ ; <UFFFF>
+𐀀 ; <U10000>
+🿿 ; <U1FFFF>
+𠀀 ; <U20000>
+𯿿 ; <U2FFFF>
+𰀀 ; <U30000>
+𿿾 ; <U3FFFE>
+񀀀 ; <U40000>
+񏿿 ; <U4FFFF>
+񐀀 ; <U50000>
+񟿿 ; <U5FFFF>
+񠀀 ; <U60000>
+񯿿 ; <U6FFFF>
+񰀀 ; <U70000>
+񿿿 ; <U7FFFF>
+򀀀 ; <U80000>
+򏿿 ; <U8FFFF>
+򐀀 ; <U90000>
+򟿿 ; <U9FFFF>
+򠀀 ; <UA0000>
+򯿿 ; <UAFFFF>
+򰀀 ; <UB0000>
+򿿿 ; <UBFFFF>
+󀀁 ; <UC0001>
+󏿌 ; <UCFFCC>
+󐀎 ; <UD000E>
+󟿿 ; <UDFFFF>
+󠀁 ; <UE0001>
+󯿿 ; <UEFFFF>
+󰀁 ; <UF0001>
+󿿿 ; <UFFFFF>
+􀀁 ; <U100001>
+􏿿 ; <U10FFFF>
+diff --git a/localedata/Makefile b/localedata/Makefile
+index 0341528b0407ae3b..c9dd5a954e8194cc 100644
+--- a/localedata/Makefile
+++ b/localedata/Makefile
+@@ -47,6 +47,7 @@ test-input := \
+ 	bg_BG.UTF-8 \
+ 	br_FR.UTF-8 \
+ 	bs_BA.UTF-8 \
+	C.UTF-8 \
+ 	ckb_IQ.UTF-8 \
+ 	cmn_TW.UTF-8 \
+ 	crh_UA.UTF-8 \
+@@ -206,6 +207,7 @@ LOCALES := \
+ 	bg_BG.UTF-8 \
+ 	br_FR.UTF-8 \
+ 	bs_BA.UTF-8 \
+	C.UTF-8 \
+ 	ckb_IQ.UTF-8 \
+ 	cmn_TW.UTF-8 \
+ 	crh_UA.UTF-8 \
 diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
-index 8ca023e..2a78391 100644
+index 34f7a7c3fe2b6526..546ce6cea16a8fdb 100644
 --- a/localedata/SUPPORTED
 +++ b/localedata/SUPPORTED
-@@ -1,6 +1,7 @@
- # This file names the currently supported and somewhat tested locales.
- # If you have any additions please file a glibc bug report.
- SUPPORTED-LOCALES=\
+@@ -79,6 +79,7 @@ brx_IN/UTF-8 \
+ bs_BA.UTF-8/UTF-8 \
+ bs_BA/ISO-8859-2 \
+ byn_ER/UTF-8 \
 +C.UTF-8/UTF-8 \
- aa_DJ.UTF-8/UTF-8 \
- aa_DJ/ISO-8859-1 \
- aa_ER/UTF-8 \
+ ca_AD.UTF-8/UTF-8 \
+ ca_AD/ISO-8859-15 \
+ ca_ES.UTF-8/UTF-8 \
 diff --git a/localedata/locales/C b/localedata/locales/C
 new file mode 100644
-index 0000000..fdf460e
+index 0000000000000000..651691c72424cf38
 --- /dev/null
 +++ b/localedata/locales/C
-@@ -0,0 +1,238 @@
+@@ -0,0 +1,194 @@
 +escape_char /
 +comment_char %
 +% Locale for C locale in UTF-8
@ -52,14 +374,13 @@ index 0000000..fdf460e
 +source     ""
 +address    ""
 +contact    ""
-+email      "mfabian@redhat.com"
+email      "bug-glibc-locales@gnu.org"
 +tel        ""
 +fax        ""
-+language   "C"
+language   ""
 +territory  ""
-+revision   "1.0"
-+date       "2015-08-10"
-+%
+revision   "2.0"
+date       "2020-06-28"
 +category  "i18n:2012";LC_IDENTIFICATION
 +category  "i18n:2012";LC_CTYPE
 +category  "i18n:2012";LC_COLLATE
@ -75,8 +396,23 @@ index 0000000..fdf460e
 +END LC_IDENTIFICATION
 +
 +LC_CTYPE
-+copy "i18n"
+% Include only the i18n character type classes without any of the
+% transliteration that i18n uses by default.
+copy "i18n_ctype"
 +
+% Include the neutral transliterations.  The builtin C and
+% POSIX locales have +1600 transliterations that are built into
+% the locales, and these are a superset of those.
+translit_start
+include "translit_neutral";""
+% We must use '?' for default_missing because the transliteration
+% framework includes it directly into the output and so it must
+% be compatible with ASCII if that is the target character set.
+default_missing <U003F>
+translit_end
+
+% Include the transliterations that can convert combined cahracters.
+% These are generally expected by users.
 +translit_start
 +include "translit_combining";""
 +translit_end
@ -84,48 +420,26 @@ index 0000000..fdf460e
 +END LC_CTYPE
 +
 +LC_COLLATE
-+order_start forward
-+<U0000>
-+..
-+<UFFFF>
-+<U00010000>
-+..
-+<U0001FFFF>
-+<U00020000>
-+..
-+<U0002FFFF>
-+<U000E0000>
-+..
-+<U000EFFFF>
-+<U000F0000>
-+..
-+<U000FFFFF>
-+<U00100000>
-+..
-+<U0010FFFF>
-+UNDEFINED
-+order_end
+% The keyword 'strcmp_collation' in any part of any LC_COLLATE
+% immediately discards all collation information and causes the
+% locale to use strcmp for collation comparison.  This is exactly
+% what is needed for C (ASCII) or C.UTF-8.
+strcmp_collation
 +END LC_COLLATE
 +
 +LC_MONETARY
-+% This is the 14652 i18n fdcc-set definition for
-+% the LC_MONETARY category
-+% (except for the int_curr_symbol and currency_symbol, they are empty in
-+% the 14652 i18n fdcc-set definition and also empty in
-+% glibc/locale/C-monetary.c. But localedef complains in that case).
-+%
-+% Using "USD" for int_curr_symbol. But maybe "XXX" would be better?
-+% XXX is "No currency" (https://en.wikipedia.org/wiki/ISO_4217)
-+int_curr_symbol     "<U0055><U0053><U0044><U0020>"
-+% Using "$" for currency_symbol. But maybe <U00A4> would be better?
-+% U+00A4 is the "generic currency symbol"
-+% (https://en.wikipedia.org/wiki/Currency_sign_%28typography%29)
-+currency_symbol     "<U0024>"
-+mon_decimal_point   "<U002E>"
+
+% This is the 14652 i18n fdcc-set definition for the LC_MONETARY
+% category (except for the int_curr_symbol and currency_symbol, they are
+% empty in the 14652 i18n fdcc-set definition and also empty in
+% glibc/locale/C-monetary.c.).
+int_curr_symbol     ""
+currency_symbol     ""
+mon_decimal_point   "."
 +mon_thousands_sep   ""
 +mon_grouping        -1
 +positive_sign       ""
-+negative_sign       "<U002D>"
+negative_sign       "-"
 +int_frac_digits     -1
 +frac_digits         -1
 +p_cs_precedes       -1
@ -143,51 +457,29 @@ index 0000000..fdf460e
 +% This is the POSIX Locale definition for
 +% the LC_NUMERIC category.
 +%
-+decimal_point   "<U002E>"
+decimal_point   "."
 +thousands_sep   ""
 +grouping        -1
 +END LC_NUMERIC
 +
 +LC_TIME
-+% This is the POSIX Locale definition for
-+% the LC_TIME category.
+% This is the POSIX Locale definition for the LC_TIME category with the
+% exception that time is per ISO 8601 and 24-hour.
 +%
 +% Abbreviated weekday names (%a)
-+abday       "<U0053><U0075><U006E>";"<U004D><U006F><U006E>";/
-+            "<U0054><U0075><U0065>";"<U0057><U0065><U0064>";/
-+            "<U0054><U0068><U0075>";"<U0046><U0072><U0069>";/
-+            "<U0053><U0061><U0074>"
+abday       "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat"
 +
 +% Full weekday names (%A)
-+day         "<U0053><U0075><U006E><U0064><U0061><U0079>";/
-+            "<U004D><U006F><U006E><U0064><U0061><U0079>";/
-+            "<U0054><U0075><U0065><U0073><U0064><U0061><U0079>";/
-+            "<U0057><U0065><U0064><U006E><U0065><U0073><U0064><U0061><U0079>";/
-+            "<U0054><U0068><U0075><U0072><U0073><U0064><U0061><U0079>";/
-+            "<U0046><U0072><U0069><U0064><U0061><U0079>";/
-+            "<U0053><U0061><U0074><U0075><U0072><U0064><U0061><U0079>"
+day         "Sunday";"Monday";"Tuesday";"Wednesday";"Thursday";/
+            "Friday";"Saturday"
 +
 +% Abbreviated month names (%b)
-+abmon       "<U004A><U0061><U006E>";"<U0046><U0065><U0062>";/
-+            "<U004D><U0061><U0072>";"<U0041><U0070><U0072>";/
-+            "<U004D><U0061><U0079>";"<U004A><U0075><U006E>";/
-+            "<U004A><U0075><U006C>";"<U0041><U0075><U0067>";/
-+            "<U0053><U0065><U0070>";"<U004F><U0063><U0074>";/
-+            "<U004E><U006F><U0076>";"<U0044><U0065><U0063>"
+abmon       "Jan";"Feb";"Mar";"Apr";"May";"Jun";"Jul";"Aug";"Sep";/
+            "Oct";"Nov";"Dec"
 +
 +% Full month names (%B)
-+mon         "<U004A><U0061><U006E><U0075><U0061><U0072><U0079>";/
-+            "<U0046><U0065><U0062><U0072><U0075><U0061><U0072><U0079>";/
-+            "<U004D><U0061><U0072><U0063><U0068>";/
-+            "<U0041><U0070><U0072><U0069><U006C>";/
-+            "<U004D><U0061><U0079>";/
-+            "<U004A><U0075><U006E><U0065>";/
-+            "<U004A><U0075><U006C><U0079>";/
-+            "<U0041><U0075><U0067><U0075><U0073><U0074>";/
-+            "<U0053><U0065><U0070><U0074><U0065><U006D><U0062><U0065><U0072>";/
-+            "<U004F><U0063><U0074><U006F><U0062><U0065><U0072>";/
-+            "<U004E><U006F><U0076><U0065><U006D><U0062><U0065><U0072>";/
-+            "<U0044><U0065><U0063><U0065><U006D><U0062><U0065><U0072>"
+mon         "January";"February";"March";"April";"May";"June";"July";/
+            "August";"September";"October";"November";"December"
 +
 +% Week description, consists of three fields:
 +% 1. Number of days in a week.
@ -198,40 +490,35 @@ index 0000000..fdf460e
 +% Monday), and 4 (Thursday), respectively.
 +week    7;19971201;4
 +first_weekday	1
-+first_workday	1
+first_workday	2
 +
 +% Appropriate date and time representation (%c)
-+%	"%a %b %e %H:%M:%S %Y"
-+d_t_fmt "<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065><U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U0059>"
+d_t_fmt "%a %b %e %H:%M:%S %Y"
 +
 +% Appropriate date representation (%x)
-+%	"%m/%d/%y"
-+d_fmt   "<U0025><U006D><U002F><U0025><U0064><U002F><U0025><U0079>"
+d_fmt   "%m/%d/%y"
 +
 +% Appropriate time representation (%X)
-+%	"%H:%M:%S"
-+t_fmt   "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
+t_fmt   "%H:%M:%S"
 +
 +% Appropriate AM/PM time representation (%r)
-+%	"%I:%M:%S %p"
-+t_fmt_ampm "<U0025><U0049><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U0070>"
+t_fmt_ampm "%I:%M:%S %p"
 +
-+% Equivalent of AM/PM (%p)      "AM"/"PM"
-+%
-+am_pm	"<U0041><U004D>";"<U0050><U004D>"
+% Equivalent of AM/PM (%p)
+am_pm	"AM";"PM"
 +
 +% Appropriate date representation (date(1))   "%a %b %e %H:%M:%S %Z %Y"
-+date_fmt	"<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065><U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U005A><U0020><U0025><U0059>"
+date_fmt	"%a %b %e %H:%M:%S %Z %Y"
 +END LC_TIME
 +
 +LC_MESSAGES
 +% This is the POSIX Locale definition for
 +% the LC_NUMERIC category.
 +%
-+yesexpr "<U005E><U005B><U0079><U0059><U005D>"
-+noexpr  "<U005E><U005B><U006E><U004E><U005D>"
-+yesstr  "<U0059><U0065><U0073>"
-+nostr   "<U004E><U006F>"
+yesexpr "^[yY]"
+noexpr  "^[nN]"
+yesstr  "Yes"
+nostr   "No"
 +END LC_MESSAGES
 +
 +LC_PAPER
@ -246,30 +533,22 @@ index 0000000..fdf460e
 +LC_NAME
 +% This is the ISO/IEC 14652 "i18n" definition for
 +% the LC_NAME category.
-+% "%p%t%g%t%m%t%f"
 +% (also used in the built in C/POSIX locale in glibc/locale/C-name.c)
-+name_fmt    "<U0025><U0070><U0025><U0074><U0025><U0067><U0025><U0074>/
-+<U0025><U006D><U0025><U0074><U0025><U0066>"
+name_fmt    "%p%t%g%t%m%t%f"
 +END LC_NAME
 +
 +LC_ADDRESS
 +% This is the ISO/IEC 14652 "i18n" definition for
 +% the LC_ADDRESS category.
-+% "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
 +% (also used in the built in C/POSIX locale in glibc/locale/C-address.c)
-+postal_fmt    "<U0025><U0061><U0025><U004E><U0025><U0066><U0025><U004E>/
-+<U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025><U0073>/
-+<U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072><U0025>/
-+<U004E><U0025><U0043><U002D><U0025><U007A><U0020><U0025><U0054><U0025>/
-+<U004E><U0025><U0063><U0025><U004E>"
+postal_fmt    "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
 +END LC_ADDRESS
 +
 +LC_TELEPHONE
 +% This is the ISO/IEC 14652 "i18n" definition for
 +% the LC_TELEPHONE category.
 +% "+%c %a %l"
-+tel_int_fmt    "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025>/
-+<U006C>"
+tel_int_fmt    "+%c %a %l"
 +% (also used in the built in C/POSIX locale in glibc/locale/C-telephone.c)
 +END LC_TELEPHONE
 +
@ -280,7 +559,3 @@ index 0000000..fdf460e
 +%metric
 +measurement    1
 +END LC_MEASUREMENT
-+
-- 
-2.4.3
-
--- a/glibc-c-utf8-strcmp_collation.patch
+++ b/glibc-c-utf8-strcmp_collation.patch
@ -0,0 +1,754 @@
+Submission: https://sourceware.org/pipermail/libc-alpha/2021-July/129455.html
+
+Author: Carlos O'Donell <carlos@redhat.com>
+Date:   Sun Jul 25 22:10:17 2021 -0400
+
+    Add 'strcmp_collation' support for LC_COLLATE.
+    
+    Support a new directive 'strcmp_collation' in the LC_COLLATE
+    section of a locale source file. This new directive causes all
+    collation rules to be dropped and instead 'strcmp' is used for
+    collation of the input character set. This is required to allow
+    for a C.UTF-8 that contains zero collation rules (minimal size)
+    and sorts using code point sorting.
+    
+    Tested on x86_64 and i686 without regression.
+
+diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
+index b6406b775d3a81ad..ec778e23d3c4beb7 100644
+--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
+@@ -195,6 +195,9 @@ struct name_list
+ /* The real definition of the struct for the LC_COLLATE locale.  */
+ struct locale_collate_t
+ {
+  /* Does the locale use strcmp to compare the encoding?  */
+  bool strcmp_collation;
+
+   int col_weight_max;
+   int cur_weight_max;
+ 
+@@ -1510,6 +1513,7 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale,
+ 	  obstack_init (&collate->mempool);
+ 
+ 	  collate->col_weight_max = -1;
+	  collate->strcmp_collation = false;
+ 	}
+       else
+ 	/* Reuse the copy_locale's data structures.  */
+@@ -1568,6 +1572,10 @@ collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
+       return;
+     }
+ 
+  /* No data required.  */
+  if (collate->strcmp_collation)
+    return;
+
+   /* If this assertion is hit change the type in `element_t'.  */
+   assert (nrules <= sizeof (runp->used_in_level) * 8);
+ 
+@@ -2115,7 +2123,7 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
+   add_locale_uint32 (&file, nrules);
+ 
+   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
+-  if (collate == NULL)
+  if (collate == NULL || collate->strcmp_collation)
+     {
+       size_t idx;
+       for (idx = 1; idx < nelems; idx++)
+@@ -2123,6 +2131,10 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
+ 	  /* The words have to be handled specially.  */
+ 	  if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
+ 	    add_locale_uint32 (&file, 0);
+	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
+		   && collate != NULL)
+	    /* A valid LC_COLLATE must have a code set name.  */
+	    add_locale_string (&file, charmap->code_set_name);
+ 	  else
+ 	    add_locale_empty (&file);
+ 	}
+@@ -2672,6 +2684,10 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
+ 
+       switch (nowtok)
+ 	{
+	case tok_strcmp_collation:
+	  collate->strcmp_collation = true;
+	  break;
+
+ 	case tok_copy:
+ 	  /* Allow copying other locales.  */
+ 	  now = lr_token (ldfile, charmap, result, NULL, verbose);
+@@ -3742,9 +3758,11 @@ error while adding equivalent collating symbol"));
+ 	  /* Next we assume `LC_COLLATE'.  */
+ 	  if (!ignore_content)
+ 	    {
+-	      if (state == 0 && copy_locale == NULL)
+	      if (state == 0
+		  && copy_locale == NULL
+		  && !collate->strcmp_collation)
+ 		/* We must either see a copy statement or have
+-		   ordering values.  */
+		   ordering values, or strcmp_collation.  */
+ 		lr_error (ldfile,
+ 			  _("%s: empty category description not allowed"),
+ 			  "LC_COLLATE");
+diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
+index bcded15ddb4c44bb..0ae72673409b3545 100644
+--- a/locale/programs/locfile-kw.gperf
+++ b/locale/programs/locfile-kw.gperf
+@@ -54,6 +54,7 @@ translit_end,           tok_translit_end,           0
+ translit_ignore,        tok_translit_ignore,        0
+ default_missing,        tok_default_missing,        0
+ LC_COLLATE,             tok_lc_collate,             0
+strcmp_collation,       tok_strcmp_collation,       0
+ coll_weight_max,        tok_coll_weight_max,        0
+ section-symbol,         tok_section_symbol,         0
+ collating-element,      tok_collating_element,      0
+diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h
+index bc1cb8f0845852ad..f7af5c8a0a4dbeeb 100644
+--- a/locale/programs/locfile-kw.h
+++ b/locale/programs/locfile-kw.h
+@@ -54,7 +54,7 @@
+ #line 24 "locfile-kw.gperf"
+ struct keyword_t ;
+ 
+-#define TOTAL_KEYWORDS 178
+#define TOTAL_KEYWORDS 179
+ #define MIN_WORD_LENGTH 3
+ #define MAX_WORD_LENGTH 22
+ #define MIN_HASH_VALUE 3
+@@ -78,7 +78,7 @@ hash (register const char *str, register size_t len)
+       631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+       631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+       631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
+-        5,   0, 631, 631, 631, 631, 631, 631, 631, 631,
+       10,   5, 631, 631, 631, 631, 631, 631, 631, 631,
+       631, 631, 631, 631, 631,   5, 631,   0,   0,   0,
+         0,   0,  10,   0, 631, 631,   0, 631,   0,   5,
+       631, 631,   0,   0,   0,  10, 631, 631, 631,   0,
+@@ -134,92 +134,92 @@ locfile_hash (register const char *str, register size_t len)
+ #line 31 "locfile-kw.gperf"
+       {"END",                    tok_end,                    0},
+       {""}, {""},
+-#line 70 "locfile-kw.gperf"
+#line 71 "locfile-kw.gperf"
+       {"IGNORE",                 tok_ignore,                 0},
+-#line 129 "locfile-kw.gperf"
+#line 130 "locfile-kw.gperf"
+       {"LC_TIME",                tok_lc_time,                0},
+ #line 30 "locfile-kw.gperf"
+       {"LC_CTYPE",               tok_lc_ctype,               0},
+       {""},
+-#line 168 "locfile-kw.gperf"
+#line 169 "locfile-kw.gperf"
+       {"LC_ADDRESS",             tok_lc_address,             0},
+-#line 153 "locfile-kw.gperf"
+#line 154 "locfile-kw.gperf"
+       {"LC_MESSAGES",            tok_lc_messages,            0},
+-#line 161 "locfile-kw.gperf"
+#line 162 "locfile-kw.gperf"
+       {"LC_NAME",                tok_lc_name,                0},
+-#line 158 "locfile-kw.gperf"
+#line 159 "locfile-kw.gperf"
+       {"LC_PAPER",               tok_lc_paper,               0},
+-#line 186 "locfile-kw.gperf"
+#line 187 "locfile-kw.gperf"
+       {"LC_MEASUREMENT",         tok_lc_measurement,         0},
+ #line 56 "locfile-kw.gperf"
+       {"LC_COLLATE",             tok_lc_collate,             0},
+       {""},
+-#line 188 "locfile-kw.gperf"
+#line 189 "locfile-kw.gperf"
+       {"LC_IDENTIFICATION",      tok_lc_identification,      0},
+-#line 201 "locfile-kw.gperf"
+#line 202 "locfile-kw.gperf"
+       {"revision",               tok_revision,               0},
+-#line 69 "locfile-kw.gperf"
+#line 70 "locfile-kw.gperf"
+       {"UNDEFINED",              tok_undefined,              0},
+-#line 125 "locfile-kw.gperf"
+#line 126 "locfile-kw.gperf"
+       {"LC_NUMERIC",             tok_lc_numeric,             0},
+-#line 82 "locfile-kw.gperf"
+#line 83 "locfile-kw.gperf"
+       {"LC_MONETARY",            tok_lc_monetary,            0},
+-#line 181 "locfile-kw.gperf"
+#line 182 "locfile-kw.gperf"
+       {"LC_TELEPHONE",           tok_lc_telephone,           0},
+       {""}, {""}, {""},
+-#line 75 "locfile-kw.gperf"
+#line 76 "locfile-kw.gperf"
+       {"define",                 tok_define,                 0},
+-#line 154 "locfile-kw.gperf"
+#line 155 "locfile-kw.gperf"
+       {"yesexpr",                tok_yesexpr,                0},
+-#line 141 "locfile-kw.gperf"
+#line 142 "locfile-kw.gperf"
+       {"era_year",               tok_era_year,               0},
+       {""},
+ #line 54 "locfile-kw.gperf"
+       {"translit_ignore",        tok_translit_ignore,        0},
+-#line 156 "locfile-kw.gperf"
+#line 157 "locfile-kw.gperf"
+       {"yesstr",                 tok_yesstr,                 0},
+       {""},
+-#line 89 "locfile-kw.gperf"
+#line 90 "locfile-kw.gperf"
+       {"negative_sign",          tok_negative_sign,          0},
+       {""},
+-#line 137 "locfile-kw.gperf"
+#line 138 "locfile-kw.gperf"
+       {"t_fmt",                  tok_t_fmt,                  0},
+-#line 159 "locfile-kw.gperf"
+#line 160 "locfile-kw.gperf"
+       {"height",                 tok_height,                 0},
+       {""}, {""},
+ #line 52 "locfile-kw.gperf"
+       {"translit_start",         tok_translit_start,         0},
+-#line 136 "locfile-kw.gperf"
+#line 137 "locfile-kw.gperf"
+       {"d_fmt",                  tok_d_fmt,                  0},
+       {""},
+ #line 53 "locfile-kw.gperf"
+       {"translit_end",           tok_translit_end,           0},
+-#line 94 "locfile-kw.gperf"
+#line 95 "locfile-kw.gperf"
+       {"n_cs_precedes",          tok_n_cs_precedes,          0},
+-#line 144 "locfile-kw.gperf"
+#line 145 "locfile-kw.gperf"
+       {"era_t_fmt",              tok_era_t_fmt,              0},
+ #line 39 "locfile-kw.gperf"
+       {"space",                  tok_space,                  0},
+-#line 72 "locfile-kw.gperf"
+-      {"reorder-end",            tok_reorder_end,            0},
+ #line 73 "locfile-kw.gperf"
+      {"reorder-end",            tok_reorder_end,            0},
+#line 74 "locfile-kw.gperf"
+       {"reorder-sections-after", tok_reorder_sections_after, 0},
+       {""},
+-#line 142 "locfile-kw.gperf"
+#line 143 "locfile-kw.gperf"
+       {"era_d_fmt",              tok_era_d_fmt,              0},
+-#line 189 "locfile-kw.gperf"
+#line 190 "locfile-kw.gperf"
+       {"title",                  tok_title,                  0},
+       {""}, {""},
+-#line 149 "locfile-kw.gperf"
+#line 150 "locfile-kw.gperf"
+       {"timezone",               tok_timezone,               0},
+       {""},
+-#line 74 "locfile-kw.gperf"
+#line 75 "locfile-kw.gperf"
+       {"reorder-sections-end",   tok_reorder_sections_end,   0},
+       {""}, {""}, {""},
+-#line 95 "locfile-kw.gperf"
+#line 96 "locfile-kw.gperf"
+       {"n_sep_by_space",         tok_n_sep_by_space,         0},
+       {""}, {""},
+-#line 100 "locfile-kw.gperf"
+#line 101 "locfile-kw.gperf"
+       {"int_n_cs_precedes",      tok_int_n_cs_precedes,      0},
+       {""}, {""}, {""},
+ #line 26 "locfile-kw.gperf"
+@@ -233,147 +233,147 @@ locfile_hash (register const char *str, register size_t len)
+       {"print",                  tok_print,                  0},
+ #line 44 "locfile-kw.gperf"
+       {"xdigit",                 tok_xdigit,                 0},
+-#line 110 "locfile-kw.gperf"
+#line 111 "locfile-kw.gperf"
+       {"duo_n_cs_precedes",      tok_duo_n_cs_precedes,      0},
+-#line 127 "locfile-kw.gperf"
+#line 128 "locfile-kw.gperf"
+       {"thousands_sep",          tok_thousands_sep,          0},
+-#line 197 "locfile-kw.gperf"
+#line 198 "locfile-kw.gperf"
+       {"territory",              tok_territory,              0},
+ #line 36 "locfile-kw.gperf"
+       {"digit",                  tok_digit,                  0},
+       {""}, {""},
+-#line 92 "locfile-kw.gperf"
+#line 93 "locfile-kw.gperf"
+       {"p_cs_precedes",          tok_p_cs_precedes,          0},
+       {""}, {""},
+-#line 62 "locfile-kw.gperf"
+#line 63 "locfile-kw.gperf"
+       {"script",                 tok_script,                 0},
+ #line 29 "locfile-kw.gperf"
+       {"include",                tok_include,                0},
+       {""},
+-#line 78 "locfile-kw.gperf"
+#line 79 "locfile-kw.gperf"
+       {"else",                   tok_else,                   0},
+-#line 184 "locfile-kw.gperf"
+#line 185 "locfile-kw.gperf"
+       {"int_select",             tok_int_select,             0},
+       {""}, {""}, {""},
+-#line 132 "locfile-kw.gperf"
+#line 133 "locfile-kw.gperf"
+       {"week",                   tok_week,                   0},
+ #line 33 "locfile-kw.gperf"
+       {"upper",                  tok_upper,                  0},
+       {""}, {""},
+-#line 194 "locfile-kw.gperf"
+#line 195 "locfile-kw.gperf"
+       {"tel",                    tok_tel,                    0},
+-#line 93 "locfile-kw.gperf"
+#line 94 "locfile-kw.gperf"
+       {"p_sep_by_space",         tok_p_sep_by_space,         0},
+-#line 160 "locfile-kw.gperf"
+#line 161 "locfile-kw.gperf"
+       {"width",                  tok_width,                  0},
+       {""},
+-#line 98 "locfile-kw.gperf"
+#line 99 "locfile-kw.gperf"
+       {"int_p_cs_precedes",      tok_int_p_cs_precedes,      0},
+       {""}, {""},
+ #line 41 "locfile-kw.gperf"
+       {"punct",                  tok_punct,                  0},
+       {""}, {""},
+-#line 101 "locfile-kw.gperf"
+#line 102 "locfile-kw.gperf"
+       {"int_n_sep_by_space",     tok_int_n_sep_by_space,     0},
+       {""}, {""}, {""},
+-#line 108 "locfile-kw.gperf"
+#line 109 "locfile-kw.gperf"
+       {"duo_p_cs_precedes",      tok_duo_p_cs_precedes,      0},
+ #line 48 "locfile-kw.gperf"
+       {"charconv",               tok_charconv,               0},
+       {""},
+ #line 47 "locfile-kw.gperf"
+       {"class",                  tok_class,                  0},
+-#line 114 "locfile-kw.gperf"
+-      {"duo_int_n_cs_precedes",  tok_duo_int_n_cs_precedes,  0},
+ #line 115 "locfile-kw.gperf"
+      {"duo_int_n_cs_precedes",  tok_duo_int_n_cs_precedes,  0},
+#line 116 "locfile-kw.gperf"
+       {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0},
+-#line 111 "locfile-kw.gperf"
+#line 112 "locfile-kw.gperf"
+       {"duo_n_sep_by_space",     tok_duo_n_sep_by_space,     0},
+-#line 119 "locfile-kw.gperf"
+#line 120 "locfile-kw.gperf"
+       {"duo_int_n_sign_posn",    tok_duo_int_n_sign_posn,    0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""},
+-#line 58 "locfile-kw.gperf"
+#line 59 "locfile-kw.gperf"
+       {"section-symbol",         tok_section_symbol,         0},
+-#line 185 "locfile-kw.gperf"
+#line 186 "locfile-kw.gperf"
+       {"int_prefix",             tok_int_prefix,             0},
+       {""}, {""}, {""}, {""},
+ #line 42 "locfile-kw.gperf"
+       {"graph",                  tok_graph,                  0},
+       {""}, {""},
+-#line 99 "locfile-kw.gperf"
+#line 100 "locfile-kw.gperf"
+       {"int_p_sep_by_space",     tok_int_p_sep_by_space,     0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 112 "locfile-kw.gperf"
+-      {"duo_int_p_cs_precedes",  tok_duo_int_p_cs_precedes,  0},
+ #line 113 "locfile-kw.gperf"
+      {"duo_int_p_cs_precedes",  tok_duo_int_p_cs_precedes,  0},
+#line 114 "locfile-kw.gperf"
+       {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0},
+-#line 109 "locfile-kw.gperf"
+#line 110 "locfile-kw.gperf"
+       {"duo_p_sep_by_space",     tok_duo_p_sep_by_space,     0},
+-#line 118 "locfile-kw.gperf"
+#line 119 "locfile-kw.gperf"
+       {"duo_int_p_sign_posn",    tok_duo_int_p_sign_posn,    0},
+-#line 157 "locfile-kw.gperf"
+#line 158 "locfile-kw.gperf"
+       {"nostr",                  tok_nostr,                  0},
+       {""}, {""},
+-#line 140 "locfile-kw.gperf"
+#line 141 "locfile-kw.gperf"
+       {"era",                    tok_era,                    0},
+       {""},
+-#line 84 "locfile-kw.gperf"
+#line 85 "locfile-kw.gperf"
+       {"currency_symbol",        tok_currency_symbol,        0},
+       {""},
+-#line 167 "locfile-kw.gperf"
+#line 168 "locfile-kw.gperf"
+       {"name_ms",                tok_name_ms,                0},
+-#line 165 "locfile-kw.gperf"
+-      {"name_mrs",               tok_name_mrs,               0},
+ #line 166 "locfile-kw.gperf"
+      {"name_mrs",               tok_name_mrs,               0},
+#line 167 "locfile-kw.gperf"
+       {"name_miss",              tok_name_miss,              0},
+-#line 83 "locfile-kw.gperf"
+#line 84 "locfile-kw.gperf"
+       {"int_curr_symbol",        tok_int_curr_symbol,        0},
+-#line 190 "locfile-kw.gperf"
+#line 191 "locfile-kw.gperf"
+       {"source",                 tok_source,                 0},
+-#line 164 "locfile-kw.gperf"
+#line 165 "locfile-kw.gperf"
+       {"name_mr",                tok_name_mr,                0},
+-#line 163 "locfile-kw.gperf"
+#line 164 "locfile-kw.gperf"
+       {"name_gen",               tok_name_gen,               0},
+-#line 202 "locfile-kw.gperf"
+#line 203 "locfile-kw.gperf"
+       {"date",                   tok_date,                   0},
+       {""}, {""},
+-#line 191 "locfile-kw.gperf"
+#line 192 "locfile-kw.gperf"
+       {"address",                tok_address,                0},
+-#line 162 "locfile-kw.gperf"
+#line 163 "locfile-kw.gperf"
+       {"name_fmt",               tok_name_fmt,               0},
+ #line 32 "locfile-kw.gperf"
+       {"copy",                   tok_copy,                   0},
+-#line 103 "locfile-kw.gperf"
+#line 104 "locfile-kw.gperf"
+       {"int_n_sign_posn",        tok_int_n_sign_posn,        0},
+       {""}, {""},
+-#line 131 "locfile-kw.gperf"
+#line 132 "locfile-kw.gperf"
+       {"day",                    tok_day,                    0},
+-#line 105 "locfile-kw.gperf"
+#line 106 "locfile-kw.gperf"
+       {"duo_currency_symbol",    tok_duo_currency_symbol,    0},
+       {""}, {""}, {""},
+-#line 150 "locfile-kw.gperf"
+#line 151 "locfile-kw.gperf"
+       {"date_fmt",               tok_date_fmt,               0},
+-#line 64 "locfile-kw.gperf"
+#line 65 "locfile-kw.gperf"
+       {"order_end",              tok_order_end,              0},
+-#line 117 "locfile-kw.gperf"
+#line 118 "locfile-kw.gperf"
+       {"duo_n_sign_posn",        tok_duo_n_sign_posn,        0},
+       {""},
+-#line 170 "locfile-kw.gperf"
+#line 171 "locfile-kw.gperf"
+       {"country_name",           tok_country_name,           0},
+-#line 71 "locfile-kw.gperf"
+#line 72 "locfile-kw.gperf"
+       {"reorder-after",          tok_reorder_after,          0},
+       {""}, {""},
+-#line 155 "locfile-kw.gperf"
+#line 156 "locfile-kw.gperf"
+       {"noexpr",                 tok_noexpr,                 0},
+ #line 50 "locfile-kw.gperf"
+       {"tolower",                tok_tolower,                0},
+-#line 198 "locfile-kw.gperf"
+#line 199 "locfile-kw.gperf"
+       {"audience",               tok_audience,               0},
+       {""}, {""}, {""},
+ #line 49 "locfile-kw.gperf"
+       {"toupper",                tok_toupper,                0},
+-#line 68 "locfile-kw.gperf"
+#line 69 "locfile-kw.gperf"
+       {"position",               tok_position,               0},
+       {""},
+ #line 40 "locfile-kw.gperf"
+@@ -381,196 +381,198 @@ locfile_hash (register const char *str, register size_t len)
+       {""},
+ #line 27 "locfile-kw.gperf"
+       {"comment_char",           tok_comment_char,           0},
+-#line 88 "locfile-kw.gperf"
+#line 89 "locfile-kw.gperf"
+       {"positive_sign",          tok_positive_sign,          0},
+       {""}, {""}, {""}, {""},
+-#line 61 "locfile-kw.gperf"
+#line 62 "locfile-kw.gperf"
+       {"symbol-equivalence",     tok_symbol_equivalence,     0},
+       {""},
+-#line 102 "locfile-kw.gperf"
+#line 103 "locfile-kw.gperf"
+       {"int_p_sign_posn",        tok_int_p_sign_posn,        0},
+-#line 175 "locfile-kw.gperf"
+#line 176 "locfile-kw.gperf"
+       {"country_car",            tok_country_car,            0},
+       {""}, {""},
+-#line 104 "locfile-kw.gperf"
+#line 105 "locfile-kw.gperf"
+       {"duo_int_curr_symbol",    tok_duo_int_curr_symbol,    0},
+       {""}, {""},
+-#line 135 "locfile-kw.gperf"
+#line 136 "locfile-kw.gperf"
+       {"d_t_fmt",                tok_d_t_fmt,                0},
+       {""}, {""},
+-#line 116 "locfile-kw.gperf"
+#line 117 "locfile-kw.gperf"
+       {"duo_p_sign_posn",        tok_duo_p_sign_posn,        0},
+-#line 187 "locfile-kw.gperf"
+#line 188 "locfile-kw.gperf"
+       {"measurement",            tok_measurement,            0},
+-#line 176 "locfile-kw.gperf"
+#line 177 "locfile-kw.gperf"
+       {"country_isbn",           tok_country_isbn,           0},
+ #line 37 "locfile-kw.gperf"
+       {"outdigit",               tok_outdigit,               0},
+       {""}, {""},
+-#line 143 "locfile-kw.gperf"
+#line 144 "locfile-kw.gperf"
+       {"era_d_t_fmt",            tok_era_d_t_fmt,            0},
+       {""}, {""}, {""},
+ #line 34 "locfile-kw.gperf"
+       {"lower",                  tok_lower,                  0},
+-#line 183 "locfile-kw.gperf"
+#line 184 "locfile-kw.gperf"
+       {"tel_dom_fmt",            tok_tel_dom_fmt,            0},
+-#line 171 "locfile-kw.gperf"
+#line 172 "locfile-kw.gperf"
+       {"country_post",           tok_country_post,           0},
+-#line 148 "locfile-kw.gperf"
+#line 149 "locfile-kw.gperf"
+       {"cal_direction",          tok_cal_direction,          0},
+       {""},
+-#line 139 "locfile-kw.gperf"
+#line 140 "locfile-kw.gperf"
+       {"t_fmt_ampm",             tok_t_fmt_ampm,             0},
+-#line 91 "locfile-kw.gperf"
+#line 92 "locfile-kw.gperf"
+       {"frac_digits",            tok_frac_digits,            0},
+       {""}, {""},
+-#line 177 "locfile-kw.gperf"
+#line 178 "locfile-kw.gperf"
+       {"lang_name",              tok_lang_name,              0},
+-#line 90 "locfile-kw.gperf"
+#line 91 "locfile-kw.gperf"
+       {"int_frac_digits",        tok_int_frac_digits,        0},
+       {""},
+-#line 121 "locfile-kw.gperf"
+#line 122 "locfile-kw.gperf"
+       {"uno_valid_to",           tok_uno_valid_to,           0},
+-#line 126 "locfile-kw.gperf"
+#line 127 "locfile-kw.gperf"
+       {"decimal_point",          tok_decimal_point,          0},
+       {""},
+-#line 133 "locfile-kw.gperf"
+#line 134 "locfile-kw.gperf"
+       {"abmon",                  tok_abmon,                  0},
+       {""}, {""}, {""}, {""},
+-#line 107 "locfile-kw.gperf"
+#line 108 "locfile-kw.gperf"
+       {"duo_frac_digits",        tok_duo_frac_digits,        0},
+-#line 182 "locfile-kw.gperf"
+#line 183 "locfile-kw.gperf"
+       {"tel_int_fmt",            tok_tel_int_fmt,            0},
+-#line 123 "locfile-kw.gperf"
+#line 124 "locfile-kw.gperf"
+       {"duo_valid_to",           tok_duo_valid_to,           0},
+-#line 146 "locfile-kw.gperf"
+#line 147 "locfile-kw.gperf"
+       {"first_weekday",          tok_first_weekday,          0},
+       {""},
+-#line 130 "locfile-kw.gperf"
+#line 131 "locfile-kw.gperf"
+       {"abday",                  tok_abday,                  0},
+       {""},
+-#line 200 "locfile-kw.gperf"
+#line 201 "locfile-kw.gperf"
+       {"abbreviation",           tok_abbreviation,           0},
+-#line 147 "locfile-kw.gperf"
+#line 148 "locfile-kw.gperf"
+       {"first_workday",          tok_first_workday,          0},
+       {""}, {""},
+-#line 97 "locfile-kw.gperf"
+#line 98 "locfile-kw.gperf"
+       {"n_sign_posn",            tok_n_sign_posn,            0},
+       {""}, {""}, {""},
+-#line 145 "locfile-kw.gperf"
+#line 146 "locfile-kw.gperf"
+       {"alt_digits",             tok_alt_digits,             0},
+       {""}, {""},
+-#line 128 "locfile-kw.gperf"
+#line 129 "locfile-kw.gperf"
+       {"grouping",               tok_grouping,               0},
+       {""},
+ #line 45 "locfile-kw.gperf"
+       {"blank",                  tok_blank,                  0},
+       {""}, {""},
+-#line 196 "locfile-kw.gperf"
+#line 197 "locfile-kw.gperf"
+       {"language",               tok_language,               0},
+-#line 120 "locfile-kw.gperf"
+#line 121 "locfile-kw.gperf"
+       {"uno_valid_from",         tok_uno_valid_from,         0},
+       {""},
+-#line 199 "locfile-kw.gperf"
+#line 200 "locfile-kw.gperf"
+       {"application",            tok_application,            0},
+       {""},
+-#line 80 "locfile-kw.gperf"
+#line 81 "locfile-kw.gperf"
+       {"elifndef",               tok_elifndef,               0},
+       {""}, {""}, {""}, {""}, {""},
+-#line 122 "locfile-kw.gperf"
+#line 123 "locfile-kw.gperf"
+       {"duo_valid_from",         tok_duo_valid_from,         0},
+-#line 57 "locfile-kw.gperf"
+#line 58 "locfile-kw.gperf"
+       {"coll_weight_max",        tok_coll_weight_max,        0},
+       {""},
+-#line 79 "locfile-kw.gperf"
+#line 80 "locfile-kw.gperf"
+       {"elifdef",                tok_elifdef,                0},
+-#line 67 "locfile-kw.gperf"
+#line 68 "locfile-kw.gperf"
+       {"backward",               tok_backward,               0},
+-#line 106 "locfile-kw.gperf"
+#line 107 "locfile-kw.gperf"
+       {"duo_int_frac_digits",    tok_duo_int_frac_digits,    0},
+       {""}, {""}, {""}, {""}, {""}, {""},
+-#line 96 "locfile-kw.gperf"
+#line 97 "locfile-kw.gperf"
+       {"p_sign_posn",            tok_p_sign_posn,            0},
+       {""},
+-#line 203 "locfile-kw.gperf"
+#line 204 "locfile-kw.gperf"
+       {"category",               tok_category,               0},
+       {""}, {""}, {""}, {""},
+-#line 134 "locfile-kw.gperf"
+#line 135 "locfile-kw.gperf"
+       {"mon",                    tok_mon,                    0},
+       {""},
+-#line 124 "locfile-kw.gperf"
+#line 125 "locfile-kw.gperf"
+       {"conversion_rate",        tok_conversion_rate,        0},
+       {""}, {""}, {""}, {""}, {""},
+-#line 63 "locfile-kw.gperf"
+#line 64 "locfile-kw.gperf"
+       {"order_start",            tok_order_start,            0},
+       {""}, {""}, {""}, {""}, {""},
+-#line 178 "locfile-kw.gperf"
+#line 179 "locfile-kw.gperf"
+       {"lang_ab",                tok_lang_ab,                0},
+-#line 180 "locfile-kw.gperf"
+#line 181 "locfile-kw.gperf"
+       {"lang_lib",               tok_lang_lib,               0},
+       {""}, {""}, {""},
+-#line 192 "locfile-kw.gperf"
+#line 193 "locfile-kw.gperf"
+       {"contact",                tok_contact,                0},
+       {""}, {""}, {""},
+-#line 173 "locfile-kw.gperf"
+-      {"country_ab3",            tok_country_ab3,            0},
+#line 57 "locfile-kw.gperf"
+      {"strcmp_collation",       tok_strcmp_collation,       0},
+       {""}, {""}, {""},
+-#line 193 "locfile-kw.gperf"
+#line 194 "locfile-kw.gperf"
+       {"email",                  tok_email,                  0},
+-#line 172 "locfile-kw.gperf"
+-      {"country_ab2",            tok_country_ab2,            0},
+#line 174 "locfile-kw.gperf"
+      {"country_ab3",            tok_country_ab3,            0},
+       {""}, {""}, {""},
+ #line 55 "locfile-kw.gperf"
+       {"default_missing",        tok_default_missing,        0},
+-      {""}, {""},
+-#line 195 "locfile-kw.gperf"
+#line 173 "locfile-kw.gperf"
+      {"country_ab2",            tok_country_ab2,            0},
+      {""},
+#line 196 "locfile-kw.gperf"
+       {"fax",                    tok_fax,                    0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 174 "locfile-kw.gperf"
+#line 175 "locfile-kw.gperf"
+       {"country_num",            tok_country_num,            0},
+       {""}, {""}, {""}, {""}, {""}, {""},
+ #line 51 "locfile-kw.gperf"
+       {"map",                    tok_map,                    0},
+-#line 65 "locfile-kw.gperf"
+#line 66 "locfile-kw.gperf"
+       {"from",                   tok_from,                   0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 86 "locfile-kw.gperf"
+#line 87 "locfile-kw.gperf"
+       {"mon_thousands_sep",      tok_mon_thousands_sep,      0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""},
+-#line 81 "locfile-kw.gperf"
+#line 82 "locfile-kw.gperf"
+       {"endif",                  tok_endif,                  0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 151 "locfile-kw.gperf"
+#line 152 "locfile-kw.gperf"
+       {"alt_mon",                tok_alt_mon,                0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 76 "locfile-kw.gperf"
+#line 77 "locfile-kw.gperf"
+       {"undef",                  tok_undef,                  0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 59 "locfile-kw.gperf"
+#line 60 "locfile-kw.gperf"
+       {"collating-element",      tok_collating_element,      0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 152 "locfile-kw.gperf"
+#line 153 "locfile-kw.gperf"
+       {"ab_alt_mon",             tok_ab_alt_mon,             0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 66 "locfile-kw.gperf"
+#line 67 "locfile-kw.gperf"
+       {"forward",                tok_forward,                0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""},
+-#line 85 "locfile-kw.gperf"
+#line 86 "locfile-kw.gperf"
+       {"mon_decimal_point",      tok_mon_decimal_point,      0},
+       {""}, {""},
+-#line 169 "locfile-kw.gperf"
+#line 170 "locfile-kw.gperf"
+       {"postal_fmt",             tok_postal_fmt,             0},
+       {""}, {""}, {""}, {""}, {""},
+-#line 60 "locfile-kw.gperf"
+#line 61 "locfile-kw.gperf"
+       {"collating-symbol",       tok_collating_symbol,       0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+@@ -583,15 +585,15 @@ locfile_hash (register const char *str, register size_t len)
+ #line 38 "locfile-kw.gperf"
+       {"alnum",                  tok_alnum,                  0},
+       {""},
+-#line 87 "locfile-kw.gperf"
+#line 88 "locfile-kw.gperf"
+       {"mon_grouping",           tok_mon_grouping,           0},
+       {""},
+-#line 179 "locfile-kw.gperf"
+#line 180 "locfile-kw.gperf"
+       {"lang_term",              tok_lang_term,              0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""},
+-#line 77 "locfile-kw.gperf"
+#line 78 "locfile-kw.gperf"
+       {"ifdef",                  tok_ifdef,                  0},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+@@ -599,7 +601,7 @@ locfile_hash (register const char *str, register size_t len)
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+       {""}, {""}, {""}, {""},
+-#line 138 "locfile-kw.gperf"
+#line 139 "locfile-kw.gperf"
+       {"am_pm",                  tok_am_pm,                  0}
+     };
+ 
+diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
+index 414ad3076223e971..0ea73c51f14e839d 100644
+--- a/locale/programs/locfile-token.h
+++ b/locale/programs/locfile-token.h
+@@ -91,6 +91,7 @@ enum token_t
+   tok_translit_ignore,
+   tok_default_missing,
+   tok_lc_collate,
+  tok_strcmp_collation,
+   tok_coll_weight_max,
+   tok_section_symbol,
+   tok_collating_element,
--- a/glibc.spec
+++ b/glibc.spec
@ -1,4 +1,4 @@
-%define glibcsrcdir glibc-2.33.9000-982-gd34ed66f96
+%define glibcsrcdir glibc-2.33.9000-984-gddcc612ce9
 %define glibcversion 2.33.9000
 # Pre-release tarballs are pulled in from git using a command that is
 # effectively:
@ -111,7 +111,7 @@
 Summary: The GNU libc libraries
 Name: glibc
 Version: %{glibcversion}
-Release: 50%{?dist}
+Release: 52%{?dist}

 # In general, GPLv2+ is used by programs, LGPLv2+ is used for
 # libraries.
@ -185,7 +185,8 @@ Patch13: glibc-fedora-localedata-rh61908.patch
 Patch15: glibc-rh1070416.patch
 Patch16: glibc-nscd-sysconfig.patch
 Patch17: glibc-cs-path.patch
-Patch18: glibc-c-utf8-locale.patch
+Patch18: glibc-c-utf8-strcmp_collation.patch
+Patch19: glibc-c-utf8-locale.patch
 Patch23: glibc-python3.patch
 Patch29: glibc-fedora-nsswitch.patch
 Patch30: glibc-deprecated-selinux-makedb.patch
@ -2242,6 +2243,15 @@ fi
 %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared

 %changelog
+* Mon Jul 26 2021 Florian Weimer <fweimer@redhat.com> - 2.33.9000-52
+- Switch to new version of C.UTF-8 locale
+
+* Mon Jul 26 2021 Florian Weimer <fweimer@redhat.com> - 2.33.9000-51
+- Auto-sync with upstream branch master,
+  commit ddcc612ce923038b867083a0c55d6e034951155a:
+- Exclude static tests for mcheck and malloc-check
+- i386: Regenerate ulps
+
 * Sat Jul 24 2021 Florian Weimer <fweimer@redhat.com> - 2.33.9000-50
 - Auto-sync with upstream branch master,
  commit d34ed66f96fa9316654d7adb2afcce4be1d1c4f5:
--- a/2
+++ b/2
@ -1 +1 @@
-SHA512 (glibc-2.33.9000-982-gd34ed66f96.tar.xz) = 9c6b0e4ffc7d389f8df9a18c8f27c622fa9f55ccc7b09598ea809bbb95653f22f9c32435f5e912f77bb6173c45a5bd401197e3524b78ca247715f73b40a463d0
+SHA512 (glibc-2.33.9000-984-gddcc612ce9.tar.xz) = e6bfc0bb082e4e2d80ed900317eddb0a2ff778e9ee7817095a9bde7715e8858208fcb69a75f097dc96eeeddc7ae0b7a7b2c4de23e9a76a9068b3825328def259