Switch to new version of C.UTF-8 locale

2021-07-26 09:26:07 +02:00 · 2021-07-26 09:26:07 +02:00 · 8f6143c826
commit 8f6143c826
parent c4a47573b4
3 changed files with 1175 additions and 142 deletions
--- a/glibc-c-utf8-locale.patch
+++ b/glibc-c-utf8-locale.patch
@ -1,48 +1,370 @@
-Short description: Add C.UTF-8 support.
+Submission: https://sourceware.org/pipermail/libc-alpha/2021-July/129456.html
 Author(s): Fedora glibc team <glibc@lists.fedoraproject.org>
 Origin: PATCH
 Upstream status: not-submitted
-This patch needs to upstream as part of Carlos O'Donell
+Author: Carlos O'Donell <carlos@redhat.com>
-<carlos@redhat.com>'s work on enabling upstream C.UTF-8 support. This
+Date:   Sun Jul 25 22:14:08 2021 -0400
 work is currently blocked on cleaning up the test results to prove that
 full code-point sorting is working as intended.
-Note that this patch does not provide full code-point sorting as
+    Add generic C.UTF-8 locale (Bug 17318)
 expected.
-This patch needs to upstream as soon as possible since it would be nice
+    We add a new C.UTF-8 locale.  This locale is not builtin to glibc, but
-to have this in F29 and fixed.
+    is provided as a distinct locale.  The locale provides full support
    for UTF-8 and this includes full code point sorting via strcmp-based
    collation.
-From 2eda7b462b415105f5a05c1323372d4e39d46439 Mon Sep 17 00:00:00 2001
+    The collation uses a new keyword 'strcmp_collation' which drops all
-From: Mike FABIAN <mfabian@redhat.com>
+    collation rules and generates an empty zero rules collation to enable
-Date: Mon, 10 Aug 2015 15:58:12 +0200
+    strcmp usage in collation. This ensures that we get full code point
-Subject: [PATCH] Add a C.UTF-8 locale
+    sorting for C.UTF-8 with a minimal 92 bytes of overhead (LC_COLLATE
    structure information).
---
+    The new locale is added to SUPPORTED.  Minimal test data for specific
- localedata/SUPPORTED |   1 +
+    code points (minus those not supported by collate-test) is provided
- localedata/locales/C | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++
+    in C.UTF-8.in, and this verifies code point sorting is working
- 2 files changed, 239 insertions(+)
+    reasonably across the range.  The locale was tested manually with the
- create mode 100644 localedata/locales/C
+    full set of code points without failure.
    The locale is harmonized with locales already shipping in Gentoo,
    Debian, Ubuntu, Fedora, CentOS Stream, and RHEL. A new tst-iconv9 test
    is added which verifies the C.UTF-8 locale is generally usable.
    Tested on x86_64 or i686 without regression.
 diff --git a/iconv/Makefile b/iconv/Makefile
 index 07d77c9ecaafba1f..9993f2d3f3cd7498 100644
 --- a/iconv/Makefile
 +++ b/iconv/Makefile
@@ -43,8 +43,19 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
 CFLAGS-linereader.c += -DNO_TRANSLITERATION
 CFLAGS-simple-hash.c += -I../locale
 -tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
 -	  tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt
 +tests = \
 +	tst-iconv1 \
 +	tst-iconv2 \
 +	tst-iconv3 \
 +	tst-iconv4 \
 +	tst-iconv5 \
 +	tst-iconv6 \
 +	tst-iconv7 \
 +	tst-iconv8 \
 +	tst-iconv9 \
 +	tst-iconv-mt \
 +	tst-iconv-opt \
 +	# tests
 others		= iconv_prog iconvconfig
 install-others-programs	= $(inst_bindir)/iconv
@@ -83,10 +94,15 @@ endif
 include ../Rules
 ifeq ($(run-built-tests),yes)
 -LOCALES := en_US.UTF-8
 +# We have to generate locales (list sorted alphabetically)
 +LOCALES := \
 +	C.UTF-8 \
 +	en_US.UTF-8 \
 +	# LOCALES
 include ../gen-locales.mk
 $(objpfx)tst-iconv-opt.out: $(gen-locales)
 +$(objpfx)tst-iconv9.out: $(gen-locales)
 endif
 $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
 diff --git a/iconv/tst-iconv9.c b/iconv/tst-iconv9.c
 new file mode 100644
 index 0000000000000000..78a532427993d1c1
 --- /dev/null
 +++ b/iconv/tst-iconv9.c
@@ -0,0 +1,87 @@
 +/* Verify that using C.UTF-8 works.
 +
 +   Copyright (C) 2021 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <https://www.gnu.org/licenses/>.  */
 +
 +#include <iconv.h>
 +#include <stddef.h>
 +#include <stdio.h>
 +#include <string.h>
 +#include <support/support.h>
 +#include <support/check.h>
 +
 +/* This test does two things:
 +   (1) Verify that we have likely included translit_combining in C.UTF-8.
 +   (2) Verify default_missing is '?' as expected.  */
 +
 +/* ISO-8859-1 encoding of "für".  */
 +char iso88591_in[] = { 0x66, 0xfc, 0x72, 0x0 };
 +/* ASCII transliteration is "fur" with C.UTF-8 translit_combining.  */
 +char ascii_exp[] = { 0x66, 0x75, 0x72, 0x0 };
 +
 +/* First 3-byte UTF-8 code point.  */
 +char utf8_in[] = { 0xe0, 0xa0, 0x80, 0x0 };
 +/* There is no ASCII transliteration for SAMARITAN LETTER ALAF
 +   so we get default_missing used which is '?'.  */
 +char default_missing_exp[] = { 0x3f, 0x0 };
 +
 +static int
 +do_test (void)
 +{
 +  char ascii_out[5];
 +  iconv_t cd;
 +  char *inbuf;
 +  char *outbuf;
 +  size_t inbytes;
 +  size_t outbytes;
 +  size_t n;
 +
 +  /* The C.UTF-8 locale should include translit_combining, which provides
 +     the transliteration for "LATIN SMALL LETTER U WITH DIAERESIS" which
 +     is not provided by locale/C-translit.h.in.  */
 +  xsetlocale (LC_ALL, "C.UTF-8");
 +
 +  /* From ISO-8859-1 to ASCII. */
 +  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "ISO-8859-1");
 +  TEST_VERIFY (cd != (iconv_t) -1);
 +  inbuf = iso88591_in;
 +  inbytes = 3;
 +  outbuf = ascii_out;
 +  outbytes = 3;
 +  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
 +  TEST_VERIFY (n != -1);
 +  *outbuf = '\0';
 +  TEST_COMPARE_BLOB (ascii_out, 3, ascii_exp, 3);
 +  TEST_VERIFY (iconv_close (cd) == 0);
 +
 +  /* From UTF-8 to ASCII. */
 +  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "UTF-8");
 +  TEST_VERIFY (cd != (iconv_t) -1);
 +  inbuf = utf8_in;
 +  inbytes = 3;
 +  outbuf = ascii_out;
 +  outbytes = 3;
 +  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
 +  TEST_VERIFY (n != -1);
 +  *outbuf = '\0';
 +  TEST_COMPARE_BLOB (ascii_out, 1, default_missing_exp, 1);
 +  TEST_VERIFY (iconv_close (cd) == 0);
 +
 +  return 0;
 +}
 +
 +#include <support/test-driver.c>
 diff --git a/localedata/C.UTF-8.in b/localedata/C.UTF-8.in
 new file mode 100644
 index 0000000000000000..c31dcc2aa045ee61
 --- /dev/null
 +++ b/localedata/C.UTF-8.in
@@ -0,0 +1,157 @@
 + ; <U1>
 + ; <U2>
 + ; <U3>
 + ; <U4>
 + ; <U5>
 + ; <U6>
 + ; <U7>
 + ; <U8>
 + ; <UE>
 + ; <UF>
 + ; <U10>
 + ; <U11>
 + ; <U12>
 + ; <U13>
 + ; <U14>
 + ; <U15>
 + ; <U16>
 + ; <U17>
 + ; <U18>
 + ; <U19>
 + ; <U1A>
 + ; <U1B>
 + ; <U1C>
 + ; <U1D>
 + ; <U1E>
 + ; <U1F>
 +! ; <U21>
 +" ; <U22>
 +# ; <U23>
 +$ ; <U24>
 +% ; <U25>
 +& ; <U26>
 +' ; <U27>
 +) ; <U29>
 +* ; <U2A>
 ++ ; <U2B>
 +, ; <U2C>
 +- ; <U2D>
 +. ; <U2E>
 +/ ; <U2F>
 +0 ; <U30>
 +1 ; <U31>
 +2 ; <U32>
 +3 ; <U33>
 +4 ; <U34>
 +5 ; <U35>
 +6 ; <U36>
 +7 ; <U37>
 +8 ; <U38>
 +9 ; <U39>
 +< ; <U3C>
 += ; <U3D>
 +> ; <U3E>
 +? ; <U3F>
 +@ ; <U40>
 +A ; <U41>
 +B ; <U42>
 +C ; <U43>
 +D ; <U44>
 +E ; <U45>
 +F ; <U46>
 +G ; <U47>
 +H ; <U48>
 +I ; <U49>
 +J ; <U4A>
 +K ; <U4B>
 +L ; <U4C>
 +M ; <U4D>
 +N ; <U4E>
 +O ; <U4F>
 +P ; <U50>
 +Q ; <U51>
 +R ; <U52>
 +S ; <U53>
 +T ; <U54>
 +U ; <U55>
 +V ; <U56>
 +W ; <U57>
 +X ; <U58>
 +Y ; <U59>
 +Z ; <U5A>
 +[ ; <U5B>
 +\ ; <U5C>
 +] ; <U5D>
 +^ ; <U5E>
 +_ ; <U5F>
 +` ; <U60>
 +a ; <U61>
 +b ; <U62>
 +c ; <U63>
 +d ; <U64>
 +e ; <U65>
 +f ; <U66>
 +g ; <U67>
 +h ; <U68>
 +i ; <U69>
 +j ; <U6A>
 +k ; <U6B>
 +l ; <U6C>
 +m ; <U6D>
 +n ; <U6E>
 +o ; <U6F>
 +p ; <U70>
 +q ; <U71>
 +r ; <U72>
 +s ; <U73>
 +t ; <U74>
 +u ; <U75>
 +v ; <U76>
 +w ; <U77>
 +x ; <U78>
 +y ; <U79>
 +z ; <U7A>
 +{ ; <U7B>
 +| ; <U7C>
 +} ; <U7D>
 +~ ; <U7E>
 + ; <U7F>
 + ; <U80>
 +ÿ ; <UFF>
 +Ā ; <U100>
 +࿿ ; <UFFF>
 +က ; <U1000>
 +<2B> ; <UFFFD>
 + ; <UFFFF>
 +𐀀 ; <U10000>
 +🿿 ; <U1FFFF>
 +𠀀 ; <U20000>
 +𯿿 ; <U2FFFF>
 +𰀀 ; <U30000>
 +𿿾 ; <U3FFFE>
 +񀀀 ; <U40000>
 +񏿿 ; <U4FFFF>
 +񐀀 ; <U50000>
 +񟿿 ; <U5FFFF>
 +񠀀 ; <U60000>
 +񯿿 ; <U6FFFF>
 +񰀀 ; <U70000>
 +񿿿 ; <U7FFFF>
 +򀀀 ; <U80000>
 +򏿿 ; <U8FFFF>
 +򐀀 ; <U90000>
 +򟿿 ; <U9FFFF>
 +򠀀 ; <UA0000>
 +򯿿 ; <UAFFFF>
 +򰀀 ; <UB0000>
 +򿿿 ; <UBFFFF>
 +󀀁 ; <UC0001>
 +󏿌 ; <UCFFCC>
 +󐀎 ; <UD000E>
 +󟿿 ; <UDFFFF>
 +󠀁 ; <UE0001>
 +󯿿 ; <UEFFFF>
 +󰀁 ; <UF0001>
 +󿿿 ; <UFFFFF>
 +􀀁 ; <U100001>
 +􏿿 ; <U10FFFF>
 diff --git a/localedata/Makefile b/localedata/Makefile
 index 0341528b0407ae3b..c9dd5a954e8194cc 100644
 --- a/localedata/Makefile
 +++ b/localedata/Makefile
@@ -47,6 +47,7 @@ test-input := \
 	bg_BG.UTF-8 \
 	br_FR.UTF-8 \
 	bs_BA.UTF-8 \
 +	C.UTF-8 \
 	ckb_IQ.UTF-8 \
 	cmn_TW.UTF-8 \
 	crh_UA.UTF-8 \
@@ -206,6 +207,7 @@ LOCALES := \
 	bg_BG.UTF-8 \
 	br_FR.UTF-8 \
 	bs_BA.UTF-8 \
 +	C.UTF-8 \
 	ckb_IQ.UTF-8 \
 	cmn_TW.UTF-8 \
 	crh_UA.UTF-8 \
 diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
-index 8ca023e..2a78391 100644
+index 34f7a7c3fe2b6526..546ce6cea16a8fdb 100644
 --- a/localedata/SUPPORTED
 +++ b/localedata/SUPPORTED
-@@ -1,6 +1,7 @@
+@@ -79,6 +79,7 @@ brx_IN/UTF-8 \
- # This file names the currently supported and somewhat tested locales.
+ bs_BA.UTF-8/UTF-8 \
- # If you have any additions please file a glibc bug report.
+ bs_BA/ISO-8859-2 \
- SUPPORTED-LOCALES=\
+ byn_ER/UTF-8 \
 +C.UTF-8/UTF-8 \
- aa_DJ.UTF-8/UTF-8 \
+ ca_AD.UTF-8/UTF-8 \
- aa_DJ/ISO-8859-1 \
+ ca_AD/ISO-8859-15 \
- aa_ER/UTF-8 \
+ ca_ES.UTF-8/UTF-8 \
 diff --git a/localedata/locales/C b/localedata/locales/C
 new file mode 100644
-index 0000000..fdf460e
+index 0000000000000000..651691c72424cf38
 --- /dev/null
 +++ b/localedata/locales/C
-@@ -0,0 +1,238 @@
+@@ -0,0 +1,194 @@
 +escape_char /
 +comment_char %
 +% Locale for C locale in UTF-8
@ -52,14 +374,13 @@ index 0000000..fdf460e
 +source     ""
 +address    ""
 +contact    ""
-+email      "mfabian@redhat.com"
+email      "bug-glibc-locales@gnu.org"
 +tel        ""
 +fax        ""
-+language   "C"
+language   ""
 +territory  ""
-+revision   "1.0"
+revision   "2.0"
-+date       "2015-08-10"
+date       "2020-06-28"
 +%
 +category  "i18n:2012";LC_IDENTIFICATION
 +category  "i18n:2012";LC_CTYPE
 +category  "i18n:2012";LC_COLLATE
@ -75,8 +396,23 @@ index 0000000..fdf460e
 +END LC_IDENTIFICATION
 +
 +LC_CTYPE
-+copy "i18n"
+% Include only the i18n character type classes without any of the
 +% transliteration that i18n uses by default.
 +copy "i18n_ctype"
 +
 +% Include the neutral transliterations.  The builtin C and
 +% POSIX locales have +1600 transliterations that are built into
 +% the locales, and these are a superset of those.
 +translit_start
 +include "translit_neutral";""
 +% We must use '?' for default_missing because the transliteration
 +% framework includes it directly into the output and so it must
 +% be compatible with ASCII if that is the target character set.
 +default_missing <U003F>
 +translit_end
 +
 +% Include the transliterations that can convert combined cahracters.
 +% These are generally expected by users.
 +translit_start
 +include "translit_combining";""
 +translit_end
@ -84,48 +420,26 @@ index 0000000..fdf460e
 +END LC_CTYPE
 +
 +LC_COLLATE
-+order_start forward
+% The keyword 'strcmp_collation' in any part of any LC_COLLATE
-+<U0000>
+% immediately discards all collation information and causes the
-+..
+% locale to use strcmp for collation comparison.  This is exactly
-+<UFFFF>
+% what is needed for C (ASCII) or C.UTF-8.
-+<U00010000>
+strcmp_collation
 +..
 +<U0001FFFF>
 +<U00020000>
 +..
 +<U0002FFFF>
 +<U000E0000>
 +..
 +<U000EFFFF>
 +<U000F0000>
 +..
 +<U000FFFFF>
 +<U00100000>
 +..
 +<U0010FFFF>
 +UNDEFINED
 +order_end
 +END LC_COLLATE
 +
 +LC_MONETARY
-+% This is the 14652 i18n fdcc-set definition for
+
-+% the LC_MONETARY category
+% This is the 14652 i18n fdcc-set definition for the LC_MONETARY
-+% (except for the int_curr_symbol and currency_symbol, they are empty in
+% category (except for the int_curr_symbol and currency_symbol, they are
-+% the 14652 i18n fdcc-set definition and also empty in
+% empty in the 14652 i18n fdcc-set definition and also empty in
-+% glibc/locale/C-monetary.c. But localedef complains in that case).
+% glibc/locale/C-monetary.c.).
-+%
+int_curr_symbol     ""
-+% Using "USD" for int_curr_symbol. But maybe "XXX" would be better?
+currency_symbol     ""
-+% XXX is "No currency" (https://en.wikipedia.org/wiki/ISO_4217)
+mon_decimal_point   "."
 +int_curr_symbol     "<U0055><U0053><U0044><U0020>"
 +% Using "$" for currency_symbol. But maybe <U00A4> would be better?
 +% U+00A4 is the "generic currency symbol"
 +% (https://en.wikipedia.org/wiki/Currency_sign_%28typography%29)
 +currency_symbol     "<U0024>"
 +mon_decimal_point   "<U002E>"
 +mon_thousands_sep   ""
 +mon_grouping        -1
 +positive_sign       ""
-+negative_sign       "<U002D>"
+negative_sign       "-"
 +int_frac_digits     -1
 +frac_digits         -1
 +p_cs_precedes       -1
@ -143,51 +457,29 @@ index 0000000..fdf460e
 +% This is the POSIX Locale definition for
 +% the LC_NUMERIC category.
 +%
-+decimal_point   "<U002E>"
+decimal_point   "."
 +thousands_sep   ""
 +grouping        -1
 +END LC_NUMERIC
 +
 +LC_TIME
-+% This is the POSIX Locale definition for
+% This is the POSIX Locale definition for the LC_TIME category with the
-+% the LC_TIME category.
+% exception that time is per ISO 8601 and 24-hour.
 +%
 +% Abbreviated weekday names (%a)
-+abday       "<U0053><U0075><U006E>";"<U004D><U006F><U006E>";/
+abday       "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat"
 +            "<U0054><U0075><U0065>";"<U0057><U0065><U0064>";/
 +            "<U0054><U0068><U0075>";"<U0046><U0072><U0069>";/
 +            "<U0053><U0061><U0074>"
 +
 +% Full weekday names (%A)
-+day         "<U0053><U0075><U006E><U0064><U0061><U0079>";/
+day         "Sunday";"Monday";"Tuesday";"Wednesday";"Thursday";/
-+            "<U004D><U006F><U006E><U0064><U0061><U0079>";/
+            "Friday";"Saturday"
 +            "<U0054><U0075><U0065><U0073><U0064><U0061><U0079>";/
 +            "<U0057><U0065><U0064><U006E><U0065><U0073><U0064><U0061><U0079>";/
 +            "<U0054><U0068><U0075><U0072><U0073><U0064><U0061><U0079>";/
 +            "<U0046><U0072><U0069><U0064><U0061><U0079>";/
 +            "<U0053><U0061><U0074><U0075><U0072><U0064><U0061><U0079>"
 +
 +% Abbreviated month names (%b)
-+abmon       "<U004A><U0061><U006E>";"<U0046><U0065><U0062>";/
+abmon       "Jan";"Feb";"Mar";"Apr";"May";"Jun";"Jul";"Aug";"Sep";/
-+            "<U004D><U0061><U0072>";"<U0041><U0070><U0072>";/
+            "Oct";"Nov";"Dec"
 +            "<U004D><U0061><U0079>";"<U004A><U0075><U006E>";/
 +            "<U004A><U0075><U006C>";"<U0041><U0075><U0067>";/
 +            "<U0053><U0065><U0070>";"<U004F><U0063><U0074>";/
 +            "<U004E><U006F><U0076>";"<U0044><U0065><U0063>"
 +
 +% Full month names (%B)
-+mon         "<U004A><U0061><U006E><U0075><U0061><U0072><U0079>";/
+mon         "January";"February";"March";"April";"May";"June";"July";/
-+            "<U0046><U0065><U0062><U0072><U0075><U0061><U0072><U0079>";/
+            "August";"September";"October";"November";"December"
 +            "<U004D><U0061><U0072><U0063><U0068>";/
 +            "<U0041><U0070><U0072><U0069><U006C>";/
 +            "<U004D><U0061><U0079>";/
 +            "<U004A><U0075><U006E><U0065>";/
 +            "<U004A><U0075><U006C><U0079>";/
 +            "<U0041><U0075><U0067><U0075><U0073><U0074>";/
 +            "<U0053><U0065><U0070><U0074><U0065><U006D><U0062><U0065><U0072>";/
 +            "<U004F><U0063><U0074><U006F><U0062><U0065><U0072>";/
 +            "<U004E><U006F><U0076><U0065><U006D><U0062><U0065><U0072>";/
 +            "<U0044><U0065><U0063><U0065><U006D><U0062><U0065><U0072>"
 +
 +% Week description, consists of three fields:
 +% 1. Number of days in a week.
@ -198,40 +490,35 @@ index 0000000..fdf460e
 +% Monday), and 4 (Thursday), respectively.
 +week    7;19971201;4
 +first_weekday	1
-+first_workday	1
+first_workday	2
 +
 +% Appropriate date and time representation (%c)
-+%	"%a %b %e %H:%M:%S %Y"
+d_t_fmt "%a %b %e %H:%M:%S %Y"
 +d_t_fmt "<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065><U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U0059>"
 +
 +% Appropriate date representation (%x)
-+%	"%m/%d/%y"
+d_fmt   "%m/%d/%y"
 +d_fmt   "<U0025><U006D><U002F><U0025><U0064><U002F><U0025><U0079>"
 +
 +% Appropriate time representation (%X)
-+%	"%H:%M:%S"
+t_fmt   "%H:%M:%S"
 +t_fmt   "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
 +
 +% Appropriate AM/PM time representation (%r)
-+%	"%I:%M:%S %p"
+t_fmt_ampm "%I:%M:%S %p"
 +t_fmt_ampm "<U0025><U0049><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U0070>"
 +
-+% Equivalent of AM/PM (%p)      "AM"/"PM"
+% Equivalent of AM/PM (%p)
-+%
+am_pm	"AM";"PM"
 +am_pm	"<U0041><U004D>";"<U0050><U004D>"
 +
 +% Appropriate date representation (date(1))   "%a %b %e %H:%M:%S %Z %Y"
-+date_fmt	"<U0025><U0061><U0020><U0025><U0062><U0020><U0025><U0065><U0020><U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053><U0020><U0025><U005A><U0020><U0025><U0059>"
+date_fmt	"%a %b %e %H:%M:%S %Z %Y"
 +END LC_TIME
 +
 +LC_MESSAGES
 +% This is the POSIX Locale definition for
 +% the LC_NUMERIC category.
 +%
-+yesexpr "<U005E><U005B><U0079><U0059><U005D>"
+yesexpr "^[yY]"
-+noexpr  "<U005E><U005B><U006E><U004E><U005D>"
+noexpr  "^[nN]"
-+yesstr  "<U0059><U0065><U0073>"
+yesstr  "Yes"
-+nostr   "<U004E><U006F>"
+nostr   "No"
 +END LC_MESSAGES
 +
 +LC_PAPER
@ -246,30 +533,22 @@ index 0000000..fdf460e
 +LC_NAME
 +% This is the ISO/IEC 14652 "i18n" definition for
 +% the LC_NAME category.
 +% "%p%t%g%t%m%t%f"
 +% (also used in the built in C/POSIX locale in glibc/locale/C-name.c)
-+name_fmt    "<U0025><U0070><U0025><U0074><U0025><U0067><U0025><U0074>/
+name_fmt    "%p%t%g%t%m%t%f"
 +<U0025><U006D><U0025><U0074><U0025><U0066>"
 +END LC_NAME
 +
 +LC_ADDRESS
 +% This is the ISO/IEC 14652 "i18n" definition for
 +% the LC_ADDRESS category.
 +% "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
 +% (also used in the built in C/POSIX locale in glibc/locale/C-address.c)
-+postal_fmt    "<U0025><U0061><U0025><U004E><U0025><U0066><U0025><U004E>/
+postal_fmt    "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
 +<U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025><U0073>/
 +<U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072><U0025>/
 +<U004E><U0025><U0043><U002D><U0025><U007A><U0020><U0025><U0054><U0025>/
 +<U004E><U0025><U0063><U0025><U004E>"
 +END LC_ADDRESS
 +
 +LC_TELEPHONE
 +% This is the ISO/IEC 14652 "i18n" definition for
 +% the LC_TELEPHONE category.
 +% "+%c %a %l"
-+tel_int_fmt    "<U002B><U0025><U0063><U0020><U0025><U0061><U0020><U0025>/
+tel_int_fmt    "+%c %a %l"
 +<U006C>"
 +% (also used in the built in C/POSIX locale in glibc/locale/C-telephone.c)
 +END LC_TELEPHONE
 +
@ -280,7 +559,3 @@ index 0000000..fdf460e
 +%metric
 +measurement    1
 +END LC_MEASUREMENT
 +
 -- 
 2.4.3
--- a/glibc-c-utf8-strcmp_collation.patch
+++ b/glibc-c-utf8-strcmp_collation.patch
@ -0,0 +1,754 @@
 Submission: https://sourceware.org/pipermail/libc-alpha/2021-July/129455.html
 Author: Carlos O'Donell <carlos@redhat.com>
 Date:   Sun Jul 25 22:10:17 2021 -0400
    Add 'strcmp_collation' support for LC_COLLATE.
    Support a new directive 'strcmp_collation' in the LC_COLLATE
    section of a locale source file. This new directive causes all
    collation rules to be dropped and instead 'strcmp' is used for
    collation of the input character set. This is required to allow
    for a C.UTF-8 that contains zero collation rules (minimal size)
    and sorts using code point sorting.
    Tested on x86_64 and i686 without regression.
 diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
 index b6406b775d3a81ad..ec778e23d3c4beb7 100644
 --- a/locale/programs/ld-collate.c
 +++ b/locale/programs/ld-collate.c
@@ -195,6 +195,9 @@ struct name_list
 /* The real definition of the struct for the LC_COLLATE locale.  */
 struct locale_collate_t
 {
 +  /* Does the locale use strcmp to compare the encoding?  */
 +  bool strcmp_collation;
 +
   int col_weight_max;
   int cur_weight_max;
@@ -1510,6 +1513,7 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale,
 	  obstack_init (&collate->mempool);
 	  collate->col_weight_max = -1;
 +	  collate->strcmp_collation = false;
 	}
       else
 	/* Reuse the copy_locale's data structures.  */
@@ -1568,6 +1572,10 @@ collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
       return;
     }
 +  /* No data required.  */
 +  if (collate->strcmp_collation)
 +    return;
 +
   /* If this assertion is hit change the type in `element_t'.  */
   assert (nrules <= sizeof (runp->used_in_level) * 8);
@@ -2115,7 +2123,7 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
   add_locale_uint32 (&file, nrules);
   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
 -  if (collate == NULL)
 +  if (collate == NULL || collate->strcmp_collation)
     {
       size_t idx;
       for (idx = 1; idx < nelems; idx++)
@@ -2123,6 +2131,10 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
 	  /* The words have to be handled specially.  */
 	  if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
 	    add_locale_uint32 (&file, 0);
 +	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
 +		   && collate != NULL)
 +	    /* A valid LC_COLLATE must have a code set name.  */
 +	    add_locale_string (&file, charmap->code_set_name);
 	  else
 	    add_locale_empty (&file);
 	}
@@ -2672,6 +2684,10 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
       switch (nowtok)
 	{
 +	case tok_strcmp_collation:
 +	  collate->strcmp_collation = true;
 +	  break;
 +
 	case tok_copy:
 	  /* Allow copying other locales.  */
 	  now = lr_token (ldfile, charmap, result, NULL, verbose);
@@ -3742,9 +3758,11 @@ error while adding equivalent collating symbol"));
 	  /* Next we assume `LC_COLLATE'.  */
 	  if (!ignore_content)
 	    {
 -	      if (state == 0 && copy_locale == NULL)
 +	      if (state == 0
 +		  && copy_locale == NULL
 +		  && !collate->strcmp_collation)
 		/* We must either see a copy statement or have
 -		   ordering values.  */
 +		   ordering values, or strcmp_collation.  */
 		lr_error (ldfile,
 			  _("%s: empty category description not allowed"),
 			  "LC_COLLATE");
 diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
 index bcded15ddb4c44bb..0ae72673409b3545 100644
 --- a/locale/programs/locfile-kw.gperf
 +++ b/locale/programs/locfile-kw.gperf
@@ -54,6 +54,7 @@ translit_end,           tok_translit_end,           0
 translit_ignore,        tok_translit_ignore,        0
 default_missing,        tok_default_missing,        0
 LC_COLLATE,             tok_lc_collate,             0
 +strcmp_collation,       tok_strcmp_collation,       0
 coll_weight_max,        tok_coll_weight_max,        0
 section-symbol,         tok_section_symbol,         0
 collating-element,      tok_collating_element,      0
 diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h
 index bc1cb8f0845852ad..f7af5c8a0a4dbeeb 100644
 --- a/locale/programs/locfile-kw.h
 +++ b/locale/programs/locfile-kw.h
@@ -54,7 +54,7 @@
 #line 24 "locfile-kw.gperf"
 struct keyword_t ;
 -#define TOTAL_KEYWORDS 178
 +#define TOTAL_KEYWORDS 179
 #define MIN_WORD_LENGTH 3
 #define MAX_WORD_LENGTH 22
 #define MIN_HASH_VALUE 3
@@ -78,7 +78,7 @@ hash (register const char *str, register size_t len)
       631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
       631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
       631, 631, 631, 631, 631, 631, 631, 631, 631, 631,
 -        5,   0, 631, 631, 631, 631, 631, 631, 631, 631,
 +       10,   5, 631, 631, 631, 631, 631, 631, 631, 631,
       631, 631, 631, 631, 631,   5, 631,   0,   0,   0,
         0,   0,  10,   0, 631, 631,   0, 631,   0,   5,
       631, 631,   0,   0,   0,  10, 631, 631, 631,   0,
@@ -134,92 +134,92 @@ locfile_hash (register const char *str, register size_t len)
 #line 31 "locfile-kw.gperf"
       {"END",                    tok_end,                    0},
       {""}, {""},
 -#line 70 "locfile-kw.gperf"
 +#line 71 "locfile-kw.gperf"
       {"IGNORE",                 tok_ignore,                 0},
 -#line 129 "locfile-kw.gperf"
 +#line 130 "locfile-kw.gperf"
       {"LC_TIME",                tok_lc_time,                0},
 #line 30 "locfile-kw.gperf"
       {"LC_CTYPE",               tok_lc_ctype,               0},
       {""},
 -#line 168 "locfile-kw.gperf"
 +#line 169 "locfile-kw.gperf"
       {"LC_ADDRESS",             tok_lc_address,             0},
 -#line 153 "locfile-kw.gperf"
 +#line 154 "locfile-kw.gperf"
       {"LC_MESSAGES",            tok_lc_messages,            0},
 -#line 161 "locfile-kw.gperf"
 +#line 162 "locfile-kw.gperf"
       {"LC_NAME",                tok_lc_name,                0},
 -#line 158 "locfile-kw.gperf"
 +#line 159 "locfile-kw.gperf"
       {"LC_PAPER",               tok_lc_paper,               0},
 -#line 186 "locfile-kw.gperf"
 +#line 187 "locfile-kw.gperf"
       {"LC_MEASUREMENT",         tok_lc_measurement,         0},
 #line 56 "locfile-kw.gperf"
       {"LC_COLLATE",             tok_lc_collate,             0},
       {""},
 -#line 188 "locfile-kw.gperf"
 +#line 189 "locfile-kw.gperf"
       {"LC_IDENTIFICATION",      tok_lc_identification,      0},
 -#line 201 "locfile-kw.gperf"
 +#line 202 "locfile-kw.gperf"
       {"revision",               tok_revision,               0},
 -#line 69 "locfile-kw.gperf"
 +#line 70 "locfile-kw.gperf"
       {"UNDEFINED",              tok_undefined,              0},
 -#line 125 "locfile-kw.gperf"
 +#line 126 "locfile-kw.gperf"
       {"LC_NUMERIC",             tok_lc_numeric,             0},
 -#line 82 "locfile-kw.gperf"
 +#line 83 "locfile-kw.gperf"
       {"LC_MONETARY",            tok_lc_monetary,            0},
 -#line 181 "locfile-kw.gperf"
 +#line 182 "locfile-kw.gperf"
       {"LC_TELEPHONE",           tok_lc_telephone,           0},
       {""}, {""}, {""},
 -#line 75 "locfile-kw.gperf"
 +#line 76 "locfile-kw.gperf"
       {"define",                 tok_define,                 0},
 -#line 154 "locfile-kw.gperf"
 +#line 155 "locfile-kw.gperf"
       {"yesexpr",                tok_yesexpr,                0},
 -#line 141 "locfile-kw.gperf"
 +#line 142 "locfile-kw.gperf"
       {"era_year",               tok_era_year,               0},
       {""},
 #line 54 "locfile-kw.gperf"
       {"translit_ignore",        tok_translit_ignore,        0},
 -#line 156 "locfile-kw.gperf"
 +#line 157 "locfile-kw.gperf"
       {"yesstr",                 tok_yesstr,                 0},
       {""},
 -#line 89 "locfile-kw.gperf"
 +#line 90 "locfile-kw.gperf"
       {"negative_sign",          tok_negative_sign,          0},
       {""},
 -#line 137 "locfile-kw.gperf"
 +#line 138 "locfile-kw.gperf"
       {"t_fmt",                  tok_t_fmt,                  0},
 -#line 159 "locfile-kw.gperf"
 +#line 160 "locfile-kw.gperf"
       {"height",                 tok_height,                 0},
       {""}, {""},
 #line 52 "locfile-kw.gperf"
       {"translit_start",         tok_translit_start,         0},
 -#line 136 "locfile-kw.gperf"
 +#line 137 "locfile-kw.gperf"
       {"d_fmt",                  tok_d_fmt,                  0},
       {""},
 #line 53 "locfile-kw.gperf"
       {"translit_end",           tok_translit_end,           0},
 -#line 94 "locfile-kw.gperf"
 +#line 95 "locfile-kw.gperf"
       {"n_cs_precedes",          tok_n_cs_precedes,          0},
 -#line 144 "locfile-kw.gperf"
 +#line 145 "locfile-kw.gperf"
       {"era_t_fmt",              tok_era_t_fmt,              0},
 #line 39 "locfile-kw.gperf"
       {"space",                  tok_space,                  0},
 -#line 72 "locfile-kw.gperf"
 -      {"reorder-end",            tok_reorder_end,            0},
 #line 73 "locfile-kw.gperf"
 +      {"reorder-end",            tok_reorder_end,            0},
 +#line 74 "locfile-kw.gperf"
       {"reorder-sections-after", tok_reorder_sections_after, 0},
       {""},
 -#line 142 "locfile-kw.gperf"
 +#line 143 "locfile-kw.gperf"
       {"era_d_fmt",              tok_era_d_fmt,              0},
 -#line 189 "locfile-kw.gperf"
 +#line 190 "locfile-kw.gperf"
       {"title",                  tok_title,                  0},
       {""}, {""},
 -#line 149 "locfile-kw.gperf"
 +#line 150 "locfile-kw.gperf"
       {"timezone",               tok_timezone,               0},
       {""},
 -#line 74 "locfile-kw.gperf"
 +#line 75 "locfile-kw.gperf"
       {"reorder-sections-end",   tok_reorder_sections_end,   0},
       {""}, {""}, {""},
 -#line 95 "locfile-kw.gperf"
 +#line 96 "locfile-kw.gperf"
       {"n_sep_by_space",         tok_n_sep_by_space,         0},
       {""}, {""},
 -#line 100 "locfile-kw.gperf"
 +#line 101 "locfile-kw.gperf"
       {"int_n_cs_precedes",      tok_int_n_cs_precedes,      0},
       {""}, {""}, {""},
 #line 26 "locfile-kw.gperf"
@@ -233,147 +233,147 @@ locfile_hash (register const char *str, register size_t len)
       {"print",                  tok_print,                  0},
 #line 44 "locfile-kw.gperf"
       {"xdigit",                 tok_xdigit,                 0},
 -#line 110 "locfile-kw.gperf"
 +#line 111 "locfile-kw.gperf"
       {"duo_n_cs_precedes",      tok_duo_n_cs_precedes,      0},
 -#line 127 "locfile-kw.gperf"
 +#line 128 "locfile-kw.gperf"
       {"thousands_sep",          tok_thousands_sep,          0},
 -#line 197 "locfile-kw.gperf"
 +#line 198 "locfile-kw.gperf"
       {"territory",              tok_territory,              0},
 #line 36 "locfile-kw.gperf"
       {"digit",                  tok_digit,                  0},
       {""}, {""},
 -#line 92 "locfile-kw.gperf"
 +#line 93 "locfile-kw.gperf"
       {"p_cs_precedes",          tok_p_cs_precedes,          0},
       {""}, {""},
 -#line 62 "locfile-kw.gperf"
 +#line 63 "locfile-kw.gperf"
       {"script",                 tok_script,                 0},
 #line 29 "locfile-kw.gperf"
       {"include",                tok_include,                0},
       {""},
 -#line 78 "locfile-kw.gperf"
 +#line 79 "locfile-kw.gperf"
       {"else",                   tok_else,                   0},
 -#line 184 "locfile-kw.gperf"
 +#line 185 "locfile-kw.gperf"
       {"int_select",             tok_int_select,             0},
       {""}, {""}, {""},
 -#line 132 "locfile-kw.gperf"
 +#line 133 "locfile-kw.gperf"
       {"week",                   tok_week,                   0},
 #line 33 "locfile-kw.gperf"
       {"upper",                  tok_upper,                  0},
       {""}, {""},
 -#line 194 "locfile-kw.gperf"
 +#line 195 "locfile-kw.gperf"
       {"tel",                    tok_tel,                    0},
 -#line 93 "locfile-kw.gperf"
 +#line 94 "locfile-kw.gperf"
       {"p_sep_by_space",         tok_p_sep_by_space,         0},
 -#line 160 "locfile-kw.gperf"
 +#line 161 "locfile-kw.gperf"
       {"width",                  tok_width,                  0},
       {""},
 -#line 98 "locfile-kw.gperf"
 +#line 99 "locfile-kw.gperf"
       {"int_p_cs_precedes",      tok_int_p_cs_precedes,      0},
       {""}, {""},
 #line 41 "locfile-kw.gperf"
       {"punct",                  tok_punct,                  0},
       {""}, {""},
 -#line 101 "locfile-kw.gperf"
 +#line 102 "locfile-kw.gperf"
       {"int_n_sep_by_space",     tok_int_n_sep_by_space,     0},
       {""}, {""}, {""},
 -#line 108 "locfile-kw.gperf"
 +#line 109 "locfile-kw.gperf"
       {"duo_p_cs_precedes",      tok_duo_p_cs_precedes,      0},
 #line 48 "locfile-kw.gperf"
       {"charconv",               tok_charconv,               0},
       {""},
 #line 47 "locfile-kw.gperf"
       {"class",                  tok_class,                  0},
 -#line 114 "locfile-kw.gperf"
 -      {"duo_int_n_cs_precedes",  tok_duo_int_n_cs_precedes,  0},
 #line 115 "locfile-kw.gperf"
 +      {"duo_int_n_cs_precedes",  tok_duo_int_n_cs_precedes,  0},
 +#line 116 "locfile-kw.gperf"
       {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0},
 -#line 111 "locfile-kw.gperf"
 +#line 112 "locfile-kw.gperf"
       {"duo_n_sep_by_space",     tok_duo_n_sep_by_space,     0},
 -#line 119 "locfile-kw.gperf"
 +#line 120 "locfile-kw.gperf"
       {"duo_int_n_sign_posn",    tok_duo_int_n_sign_posn,    0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""},
 -#line 58 "locfile-kw.gperf"
 +#line 59 "locfile-kw.gperf"
       {"section-symbol",         tok_section_symbol,         0},
 -#line 185 "locfile-kw.gperf"
 +#line 186 "locfile-kw.gperf"
       {"int_prefix",             tok_int_prefix,             0},
       {""}, {""}, {""}, {""},
 #line 42 "locfile-kw.gperf"
       {"graph",                  tok_graph,                  0},
       {""}, {""},
 -#line 99 "locfile-kw.gperf"
 +#line 100 "locfile-kw.gperf"
       {"int_p_sep_by_space",     tok_int_p_sep_by_space,     0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 112 "locfile-kw.gperf"
 -      {"duo_int_p_cs_precedes",  tok_duo_int_p_cs_precedes,  0},
 #line 113 "locfile-kw.gperf"
 +      {"duo_int_p_cs_precedes",  tok_duo_int_p_cs_precedes,  0},
 +#line 114 "locfile-kw.gperf"
       {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0},
 -#line 109 "locfile-kw.gperf"
 +#line 110 "locfile-kw.gperf"
       {"duo_p_sep_by_space",     tok_duo_p_sep_by_space,     0},
 -#line 118 "locfile-kw.gperf"
 +#line 119 "locfile-kw.gperf"
       {"duo_int_p_sign_posn",    tok_duo_int_p_sign_posn,    0},
 -#line 157 "locfile-kw.gperf"
 +#line 158 "locfile-kw.gperf"
       {"nostr",                  tok_nostr,                  0},
       {""}, {""},
 -#line 140 "locfile-kw.gperf"
 +#line 141 "locfile-kw.gperf"
       {"era",                    tok_era,                    0},
       {""},
 -#line 84 "locfile-kw.gperf"
 +#line 85 "locfile-kw.gperf"
       {"currency_symbol",        tok_currency_symbol,        0},
       {""},
 -#line 167 "locfile-kw.gperf"
 +#line 168 "locfile-kw.gperf"
       {"name_ms",                tok_name_ms,                0},
 -#line 165 "locfile-kw.gperf"
 -      {"name_mrs",               tok_name_mrs,               0},
 #line 166 "locfile-kw.gperf"
 +      {"name_mrs",               tok_name_mrs,               0},
 +#line 167 "locfile-kw.gperf"
       {"name_miss",              tok_name_miss,              0},
 -#line 83 "locfile-kw.gperf"
 +#line 84 "locfile-kw.gperf"
       {"int_curr_symbol",        tok_int_curr_symbol,        0},
 -#line 190 "locfile-kw.gperf"
 +#line 191 "locfile-kw.gperf"
       {"source",                 tok_source,                 0},
 -#line 164 "locfile-kw.gperf"
 +#line 165 "locfile-kw.gperf"
       {"name_mr",                tok_name_mr,                0},
 -#line 163 "locfile-kw.gperf"
 +#line 164 "locfile-kw.gperf"
       {"name_gen",               tok_name_gen,               0},
 -#line 202 "locfile-kw.gperf"
 +#line 203 "locfile-kw.gperf"
       {"date",                   tok_date,                   0},
       {""}, {""},
 -#line 191 "locfile-kw.gperf"
 +#line 192 "locfile-kw.gperf"
       {"address",                tok_address,                0},
 -#line 162 "locfile-kw.gperf"
 +#line 163 "locfile-kw.gperf"
       {"name_fmt",               tok_name_fmt,               0},
 #line 32 "locfile-kw.gperf"
       {"copy",                   tok_copy,                   0},
 -#line 103 "locfile-kw.gperf"
 +#line 104 "locfile-kw.gperf"
       {"int_n_sign_posn",        tok_int_n_sign_posn,        0},
       {""}, {""},
 -#line 131 "locfile-kw.gperf"
 +#line 132 "locfile-kw.gperf"
       {"day",                    tok_day,                    0},
 -#line 105 "locfile-kw.gperf"
 +#line 106 "locfile-kw.gperf"
       {"duo_currency_symbol",    tok_duo_currency_symbol,    0},
       {""}, {""}, {""},
 -#line 150 "locfile-kw.gperf"
 +#line 151 "locfile-kw.gperf"
       {"date_fmt",               tok_date_fmt,               0},
 -#line 64 "locfile-kw.gperf"
 +#line 65 "locfile-kw.gperf"
       {"order_end",              tok_order_end,              0},
 -#line 117 "locfile-kw.gperf"
 +#line 118 "locfile-kw.gperf"
       {"duo_n_sign_posn",        tok_duo_n_sign_posn,        0},
       {""},
 -#line 170 "locfile-kw.gperf"
 +#line 171 "locfile-kw.gperf"
       {"country_name",           tok_country_name,           0},
 -#line 71 "locfile-kw.gperf"
 +#line 72 "locfile-kw.gperf"
       {"reorder-after",          tok_reorder_after,          0},
       {""}, {""},
 -#line 155 "locfile-kw.gperf"
 +#line 156 "locfile-kw.gperf"
       {"noexpr",                 tok_noexpr,                 0},
 #line 50 "locfile-kw.gperf"
       {"tolower",                tok_tolower,                0},
 -#line 198 "locfile-kw.gperf"
 +#line 199 "locfile-kw.gperf"
       {"audience",               tok_audience,               0},
       {""}, {""}, {""},
 #line 49 "locfile-kw.gperf"
       {"toupper",                tok_toupper,                0},
 -#line 68 "locfile-kw.gperf"
 +#line 69 "locfile-kw.gperf"
       {"position",               tok_position,               0},
       {""},
 #line 40 "locfile-kw.gperf"
@@ -381,196 +381,198 @@ locfile_hash (register const char *str, register size_t len)
       {""},
 #line 27 "locfile-kw.gperf"
       {"comment_char",           tok_comment_char,           0},
 -#line 88 "locfile-kw.gperf"
 +#line 89 "locfile-kw.gperf"
       {"positive_sign",          tok_positive_sign,          0},
       {""}, {""}, {""}, {""},
 -#line 61 "locfile-kw.gperf"
 +#line 62 "locfile-kw.gperf"
       {"symbol-equivalence",     tok_symbol_equivalence,     0},
       {""},
 -#line 102 "locfile-kw.gperf"
 +#line 103 "locfile-kw.gperf"
       {"int_p_sign_posn",        tok_int_p_sign_posn,        0},
 -#line 175 "locfile-kw.gperf"
 +#line 176 "locfile-kw.gperf"
       {"country_car",            tok_country_car,            0},
       {""}, {""},
 -#line 104 "locfile-kw.gperf"
 +#line 105 "locfile-kw.gperf"
       {"duo_int_curr_symbol",    tok_duo_int_curr_symbol,    0},
       {""}, {""},
 -#line 135 "locfile-kw.gperf"
 +#line 136 "locfile-kw.gperf"
       {"d_t_fmt",                tok_d_t_fmt,                0},
       {""}, {""},
 -#line 116 "locfile-kw.gperf"
 +#line 117 "locfile-kw.gperf"
       {"duo_p_sign_posn",        tok_duo_p_sign_posn,        0},
 -#line 187 "locfile-kw.gperf"
 +#line 188 "locfile-kw.gperf"
       {"measurement",            tok_measurement,            0},
 -#line 176 "locfile-kw.gperf"
 +#line 177 "locfile-kw.gperf"
       {"country_isbn",           tok_country_isbn,           0},
 #line 37 "locfile-kw.gperf"
       {"outdigit",               tok_outdigit,               0},
       {""}, {""},
 -#line 143 "locfile-kw.gperf"
 +#line 144 "locfile-kw.gperf"
       {"era_d_t_fmt",            tok_era_d_t_fmt,            0},
       {""}, {""}, {""},
 #line 34 "locfile-kw.gperf"
       {"lower",                  tok_lower,                  0},
 -#line 183 "locfile-kw.gperf"
 +#line 184 "locfile-kw.gperf"
       {"tel_dom_fmt",            tok_tel_dom_fmt,            0},
 -#line 171 "locfile-kw.gperf"
 +#line 172 "locfile-kw.gperf"
       {"country_post",           tok_country_post,           0},
 -#line 148 "locfile-kw.gperf"
 +#line 149 "locfile-kw.gperf"
       {"cal_direction",          tok_cal_direction,          0},
       {""},
 -#line 139 "locfile-kw.gperf"
 +#line 140 "locfile-kw.gperf"
       {"t_fmt_ampm",             tok_t_fmt_ampm,             0},
 -#line 91 "locfile-kw.gperf"
 +#line 92 "locfile-kw.gperf"
       {"frac_digits",            tok_frac_digits,            0},
       {""}, {""},
 -#line 177 "locfile-kw.gperf"
 +#line 178 "locfile-kw.gperf"
       {"lang_name",              tok_lang_name,              0},
 -#line 90 "locfile-kw.gperf"
 +#line 91 "locfile-kw.gperf"
       {"int_frac_digits",        tok_int_frac_digits,        0},
       {""},
 -#line 121 "locfile-kw.gperf"
 +#line 122 "locfile-kw.gperf"
       {"uno_valid_to",           tok_uno_valid_to,           0},
 -#line 126 "locfile-kw.gperf"
 +#line 127 "locfile-kw.gperf"
       {"decimal_point",          tok_decimal_point,          0},
       {""},
 -#line 133 "locfile-kw.gperf"
 +#line 134 "locfile-kw.gperf"
       {"abmon",                  tok_abmon,                  0},
       {""}, {""}, {""}, {""},
 -#line 107 "locfile-kw.gperf"
 +#line 108 "locfile-kw.gperf"
       {"duo_frac_digits",        tok_duo_frac_digits,        0},
 -#line 182 "locfile-kw.gperf"
 +#line 183 "locfile-kw.gperf"
       {"tel_int_fmt",            tok_tel_int_fmt,            0},
 -#line 123 "locfile-kw.gperf"
 +#line 124 "locfile-kw.gperf"
       {"duo_valid_to",           tok_duo_valid_to,           0},
 -#line 146 "locfile-kw.gperf"
 +#line 147 "locfile-kw.gperf"
       {"first_weekday",          tok_first_weekday,          0},
       {""},
 -#line 130 "locfile-kw.gperf"
 +#line 131 "locfile-kw.gperf"
       {"abday",                  tok_abday,                  0},
       {""},
 -#line 200 "locfile-kw.gperf"
 +#line 201 "locfile-kw.gperf"
       {"abbreviation",           tok_abbreviation,           0},
 -#line 147 "locfile-kw.gperf"
 +#line 148 "locfile-kw.gperf"
       {"first_workday",          tok_first_workday,          0},
       {""}, {""},
 -#line 97 "locfile-kw.gperf"
 +#line 98 "locfile-kw.gperf"
       {"n_sign_posn",            tok_n_sign_posn,            0},
       {""}, {""}, {""},
 -#line 145 "locfile-kw.gperf"
 +#line 146 "locfile-kw.gperf"
       {"alt_digits",             tok_alt_digits,             0},
       {""}, {""},
 -#line 128 "locfile-kw.gperf"
 +#line 129 "locfile-kw.gperf"
       {"grouping",               tok_grouping,               0},
       {""},
 #line 45 "locfile-kw.gperf"
       {"blank",                  tok_blank,                  0},
       {""}, {""},
 -#line 196 "locfile-kw.gperf"
 +#line 197 "locfile-kw.gperf"
       {"language",               tok_language,               0},
 -#line 120 "locfile-kw.gperf"
 +#line 121 "locfile-kw.gperf"
       {"uno_valid_from",         tok_uno_valid_from,         0},
       {""},
 -#line 199 "locfile-kw.gperf"
 +#line 200 "locfile-kw.gperf"
       {"application",            tok_application,            0},
       {""},
 -#line 80 "locfile-kw.gperf"
 +#line 81 "locfile-kw.gperf"
       {"elifndef",               tok_elifndef,               0},
       {""}, {""}, {""}, {""}, {""},
 -#line 122 "locfile-kw.gperf"
 +#line 123 "locfile-kw.gperf"
       {"duo_valid_from",         tok_duo_valid_from,         0},
 -#line 57 "locfile-kw.gperf"
 +#line 58 "locfile-kw.gperf"
       {"coll_weight_max",        tok_coll_weight_max,        0},
       {""},
 -#line 79 "locfile-kw.gperf"
 +#line 80 "locfile-kw.gperf"
       {"elifdef",                tok_elifdef,                0},
 -#line 67 "locfile-kw.gperf"
 +#line 68 "locfile-kw.gperf"
       {"backward",               tok_backward,               0},
 -#line 106 "locfile-kw.gperf"
 +#line 107 "locfile-kw.gperf"
       {"duo_int_frac_digits",    tok_duo_int_frac_digits,    0},
       {""}, {""}, {""}, {""}, {""}, {""},
 -#line 96 "locfile-kw.gperf"
 +#line 97 "locfile-kw.gperf"
       {"p_sign_posn",            tok_p_sign_posn,            0},
       {""},
 -#line 203 "locfile-kw.gperf"
 +#line 204 "locfile-kw.gperf"
       {"category",               tok_category,               0},
       {""}, {""}, {""}, {""},
 -#line 134 "locfile-kw.gperf"
 +#line 135 "locfile-kw.gperf"
       {"mon",                    tok_mon,                    0},
       {""},
 -#line 124 "locfile-kw.gperf"
 +#line 125 "locfile-kw.gperf"
       {"conversion_rate",        tok_conversion_rate,        0},
       {""}, {""}, {""}, {""}, {""},
 -#line 63 "locfile-kw.gperf"
 +#line 64 "locfile-kw.gperf"
       {"order_start",            tok_order_start,            0},
       {""}, {""}, {""}, {""}, {""},
 -#line 178 "locfile-kw.gperf"
 +#line 179 "locfile-kw.gperf"
       {"lang_ab",                tok_lang_ab,                0},
 -#line 180 "locfile-kw.gperf"
 +#line 181 "locfile-kw.gperf"
       {"lang_lib",               tok_lang_lib,               0},
       {""}, {""}, {""},
 -#line 192 "locfile-kw.gperf"
 +#line 193 "locfile-kw.gperf"
       {"contact",                tok_contact,                0},
       {""}, {""}, {""},
 -#line 173 "locfile-kw.gperf"
 -      {"country_ab3",            tok_country_ab3,            0},
 +#line 57 "locfile-kw.gperf"
 +      {"strcmp_collation",       tok_strcmp_collation,       0},
       {""}, {""}, {""},
 -#line 193 "locfile-kw.gperf"
 +#line 194 "locfile-kw.gperf"
       {"email",                  tok_email,                  0},
 -#line 172 "locfile-kw.gperf"
 -      {"country_ab2",            tok_country_ab2,            0},
 +#line 174 "locfile-kw.gperf"
 +      {"country_ab3",            tok_country_ab3,            0},
       {""}, {""}, {""},
 #line 55 "locfile-kw.gperf"
       {"default_missing",        tok_default_missing,        0},
 -      {""}, {""},
 -#line 195 "locfile-kw.gperf"
 +#line 173 "locfile-kw.gperf"
 +      {"country_ab2",            tok_country_ab2,            0},
 +      {""},
 +#line 196 "locfile-kw.gperf"
       {"fax",                    tok_fax,                    0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 174 "locfile-kw.gperf"
 +#line 175 "locfile-kw.gperf"
       {"country_num",            tok_country_num,            0},
       {""}, {""}, {""}, {""}, {""}, {""},
 #line 51 "locfile-kw.gperf"
       {"map",                    tok_map,                    0},
 -#line 65 "locfile-kw.gperf"
 +#line 66 "locfile-kw.gperf"
       {"from",                   tok_from,                   0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 86 "locfile-kw.gperf"
 +#line 87 "locfile-kw.gperf"
       {"mon_thousands_sep",      tok_mon_thousands_sep,      0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""},
 -#line 81 "locfile-kw.gperf"
 +#line 82 "locfile-kw.gperf"
       {"endif",                  tok_endif,                  0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 151 "locfile-kw.gperf"
 +#line 152 "locfile-kw.gperf"
       {"alt_mon",                tok_alt_mon,                0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 76 "locfile-kw.gperf"
 +#line 77 "locfile-kw.gperf"
       {"undef",                  tok_undef,                  0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 59 "locfile-kw.gperf"
 +#line 60 "locfile-kw.gperf"
       {"collating-element",      tok_collating_element,      0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 152 "locfile-kw.gperf"
 +#line 153 "locfile-kw.gperf"
       {"ab_alt_mon",             tok_ab_alt_mon,             0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 66 "locfile-kw.gperf"
 +#line 67 "locfile-kw.gperf"
       {"forward",                tok_forward,                0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""},
 -#line 85 "locfile-kw.gperf"
 +#line 86 "locfile-kw.gperf"
       {"mon_decimal_point",      tok_mon_decimal_point,      0},
       {""}, {""},
 -#line 169 "locfile-kw.gperf"
 +#line 170 "locfile-kw.gperf"
       {"postal_fmt",             tok_postal_fmt,             0},
       {""}, {""}, {""}, {""}, {""},
 -#line 60 "locfile-kw.gperf"
 +#line 61 "locfile-kw.gperf"
       {"collating-symbol",       tok_collating_symbol,       0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
@@ -583,15 +585,15 @@ locfile_hash (register const char *str, register size_t len)
 #line 38 "locfile-kw.gperf"
       {"alnum",                  tok_alnum,                  0},
       {""},
 -#line 87 "locfile-kw.gperf"
 +#line 88 "locfile-kw.gperf"
       {"mon_grouping",           tok_mon_grouping,           0},
       {""},
 -#line 179 "locfile-kw.gperf"
 +#line 180 "locfile-kw.gperf"
       {"lang_term",              tok_lang_term,              0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""},
 -#line 77 "locfile-kw.gperf"
 +#line 78 "locfile-kw.gperf"
       {"ifdef",                  tok_ifdef,                  0},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
@@ -599,7 +601,7 @@ locfile_hash (register const char *str, register size_t len)
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
       {""}, {""}, {""}, {""},
 -#line 138 "locfile-kw.gperf"
 +#line 139 "locfile-kw.gperf"
       {"am_pm",                  tok_am_pm,                  0}
     };
 diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
 index 414ad3076223e971..0ea73c51f14e839d 100644
 --- a/locale/programs/locfile-token.h
 +++ b/locale/programs/locfile-token.h
@@ -91,6 +91,7 @@ enum token_t
   tok_translit_ignore,
   tok_default_missing,
   tok_lc_collate,
 +  tok_strcmp_collation,
   tok_coll_weight_max,
   tok_section_symbol,
   tok_collating_element,
--- a/glibc.spec
+++ b/glibc.spec
@ -111,7 +111,7 @@
 Summary: The GNU libc libraries
 Name: glibc
 Version: %{glibcversion}
-Release: 51%{?dist}
+Release: 52%{?dist}
 # In general, GPLv2+ is used by programs, LGPLv2+ is used for
 # libraries.
@ -185,7 +185,8 @@ Patch13: glibc-fedora-localedata-rh61908.patch
 Patch15: glibc-rh1070416.patch
 Patch16: glibc-nscd-sysconfig.patch
 Patch17: glibc-cs-path.patch
-Patch18: glibc-c-utf8-locale.patch
+Patch18: glibc-c-utf8-strcmp_collation.patch
 Patch19: glibc-c-utf8-locale.patch
 Patch23: glibc-python3.patch
 Patch29: glibc-fedora-nsswitch.patch
 Patch30: glibc-deprecated-selinux-makedb.patch
@ -2242,6 +2243,9 @@ fi
 %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared
 %changelog
 * Mon Jul 26 2021 Florian Weimer <fweimer@redhat.com> - 2.33.9000-52
 - Switch to new version of C.UTF-8 locale
 * Mon Jul 26 2021 Florian Weimer <fweimer@redhat.com> - 2.33.9000-51
 - Auto-sync with upstream branch master,
  commit ddcc612ce923038b867083a0c55d6e034951155a: