Make glibc.spec self-contained for parsing

Store the locales list as a Lua table in the spec file. Add Lua code and a new Python script, parse-SUPPORTED.py, to compute a common representation from it.
2020-10-14 15:19:39 +02:00 · 2020-10-14 15:19:39 +02:00 · a45fef2f84
commit a45fef2f84
parent 1bf34fb3df
4 changed files with 370 additions and 607 deletions
--- a/496
+++ b/496
@ -1,496 +0,0 @@
 # This file names the currently supported and somewhat tested locales.
 # If you have any additions please file a glibc bug report.
 SUPPORTED-LOCALES=\
 C.UTF-8/UTF-8 \
 aa_DJ.UTF-8/UTF-8 \
 aa_DJ/ISO-8859-1 \
 aa_ER/UTF-8 \
 aa_ER@saaho/UTF-8 \
 aa_ET/UTF-8 \
 af_ZA.UTF-8/UTF-8 \
 af_ZA/ISO-8859-1 \
 agr_PE/UTF-8 \
 ak_GH/UTF-8 \
 am_ET/UTF-8 \
 an_ES.UTF-8/UTF-8 \
 an_ES/ISO-8859-15 \
 anp_IN/UTF-8 \
 ar_AE.UTF-8/UTF-8 \
 ar_AE/ISO-8859-6 \
 ar_BH.UTF-8/UTF-8 \
 ar_BH/ISO-8859-6 \
 ar_DZ.UTF-8/UTF-8 \
 ar_DZ/ISO-8859-6 \
 ar_EG.UTF-8/UTF-8 \
 ar_EG/ISO-8859-6 \
 ar_IN/UTF-8 \
 ar_IQ.UTF-8/UTF-8 \
 ar_IQ/ISO-8859-6 \
 ar_JO.UTF-8/UTF-8 \
 ar_JO/ISO-8859-6 \
 ar_KW.UTF-8/UTF-8 \
 ar_KW/ISO-8859-6 \
 ar_LB.UTF-8/UTF-8 \
 ar_LB/ISO-8859-6 \
 ar_LY.UTF-8/UTF-8 \
 ar_LY/ISO-8859-6 \
 ar_MA.UTF-8/UTF-8 \
 ar_MA/ISO-8859-6 \
 ar_OM.UTF-8/UTF-8 \
 ar_OM/ISO-8859-6 \
 ar_QA.UTF-8/UTF-8 \
 ar_QA/ISO-8859-6 \
 ar_SA.UTF-8/UTF-8 \
 ar_SA/ISO-8859-6 \
 ar_SD.UTF-8/UTF-8 \
 ar_SD/ISO-8859-6 \
 ar_SS/UTF-8 \
 ar_SY.UTF-8/UTF-8 \
 ar_SY/ISO-8859-6 \
 ar_TN.UTF-8/UTF-8 \
 ar_TN/ISO-8859-6 \
 ar_YE.UTF-8/UTF-8 \
 ar_YE/ISO-8859-6 \
 ayc_PE/UTF-8 \
 az_AZ/UTF-8 \
 az_IR/UTF-8 \
 as_IN/UTF-8 \
 ast_ES.UTF-8/UTF-8 \
 ast_ES/ISO-8859-15 \
 be_BY.UTF-8/UTF-8 \
 be_BY/CP1251 \
 be_BY@latin/UTF-8 \
 bem_ZM/UTF-8 \
 ber_DZ/UTF-8 \
 ber_MA/UTF-8 \
 bg_BG.UTF-8/UTF-8 \
 bg_BG/CP1251 \
 bhb_IN.UTF-8/UTF-8 \
 bho_IN/UTF-8 \
 bho_NP/UTF-8 \
 bi_VU/UTF-8 \
 bn_BD/UTF-8 \
 bn_IN/UTF-8 \
 bo_CN/UTF-8 \
 bo_IN/UTF-8 \
 br_FR.UTF-8/UTF-8 \
 br_FR/ISO-8859-1 \
 br_FR@euro/ISO-8859-15 \
 brx_IN/UTF-8 \
 bs_BA.UTF-8/UTF-8 \
 bs_BA/ISO-8859-2 \
 byn_ER/UTF-8 \
 ca_AD.UTF-8/UTF-8 \
 ca_AD/ISO-8859-15 \
 ca_ES.UTF-8/UTF-8 \
 ca_ES/ISO-8859-1 \
 ca_ES@euro/ISO-8859-15 \
 ca_ES@valencia/UTF-8 \
 ca_FR.UTF-8/UTF-8 \
 ca_FR/ISO-8859-15 \
 ca_IT.UTF-8/UTF-8 \
 ca_IT/ISO-8859-15 \
 ce_RU/UTF-8 \
 chr_US/UTF-8 \
 ckb_IQ/UTF-8 \
 cmn_TW/UTF-8 \
 crh_UA/UTF-8 \
 cs_CZ.UTF-8/UTF-8 \
 cs_CZ/ISO-8859-2 \
 csb_PL/UTF-8 \
 cv_RU/UTF-8 \
 cy_GB.UTF-8/UTF-8 \
 cy_GB/ISO-8859-14 \
 da_DK.UTF-8/UTF-8 \
 da_DK/ISO-8859-1 \
 da_DK.ISO-8859-15/ISO-8859-15 \
 de_AT.UTF-8/UTF-8 \
 de_AT/ISO-8859-1 \
 de_AT@euro/ISO-8859-15 \
 de_BE.UTF-8/UTF-8 \
 de_BE/ISO-8859-1 \
 de_BE@euro/ISO-8859-15 \
 de_CH.UTF-8/UTF-8 \
 de_CH/ISO-8859-1 \
 de_DE.UTF-8/UTF-8 \
 de_DE/ISO-8859-1 \
 de_DE@euro/ISO-8859-15 \
 de_IT.UTF-8/UTF-8 \
 de_IT/ISO-8859-1 \
 de_LI.UTF-8/UTF-8 \
 de_LU.UTF-8/UTF-8 \
 de_LU/ISO-8859-1 \
 de_LU@euro/ISO-8859-15 \
 doi_IN/UTF-8 \
 dsb_DE/UTF-8 \
 dv_MV/UTF-8 \
 dz_BT/UTF-8 \
 el_GR.UTF-8/UTF-8 \
 el_GR/ISO-8859-7 \
 el_GR@euro/ISO-8859-7 \
 el_CY.UTF-8/UTF-8 \
 el_CY/ISO-8859-7 \
 en_AG/UTF-8 \
 en_AU.UTF-8/UTF-8 \
 en_AU/ISO-8859-1 \
 en_BW.UTF-8/UTF-8 \
 en_BW/ISO-8859-1 \
 en_CA.UTF-8/UTF-8 \
 en_CA/ISO-8859-1 \
 en_DK.UTF-8/UTF-8 \
 en_DK/ISO-8859-1 \
 en_GB.UTF-8/UTF-8 \
 en_GB/ISO-8859-1 \
 en_GB.ISO-8859-15/ISO-8859-15 \
 en_HK.UTF-8/UTF-8 \
 en_HK/ISO-8859-1 \
 en_IE.UTF-8/UTF-8 \
 en_IE/ISO-8859-1 \
 en_IE@euro/ISO-8859-15 \
 en_IL/UTF-8 \
 en_IN/UTF-8 \
 en_NG/UTF-8 \
 en_NZ.UTF-8/UTF-8 \
 en_NZ/ISO-8859-1 \
 en_PH.UTF-8/UTF-8 \
 en_PH/ISO-8859-1 \
 en_SC.UTF-8/UTF-8 \
 en_SG.UTF-8/UTF-8 \
 en_SG/ISO-8859-1 \
 en_US.UTF-8/UTF-8 \
 en_US/ISO-8859-1 \
 en_US.ISO-8859-15/ISO-8859-15 \
 en_ZA.UTF-8/UTF-8 \
 en_ZA/ISO-8859-1 \
 en_ZM/UTF-8 \
 en_ZW.UTF-8/UTF-8 \
 en_ZW/ISO-8859-1 \
 eo/UTF-8 \
 es_AR.UTF-8/UTF-8 \
 es_AR/ISO-8859-1 \
 es_BO.UTF-8/UTF-8 \
 es_BO/ISO-8859-1 \
 es_CL.UTF-8/UTF-8 \
 es_CL/ISO-8859-1 \
 es_CO.UTF-8/UTF-8 \
 es_CO/ISO-8859-1 \
 es_CR.UTF-8/UTF-8 \
 es_CR/ISO-8859-1 \
 es_CU/UTF-8 \
 es_DO.UTF-8/UTF-8 \
 es_DO/ISO-8859-1 \
 es_EC.UTF-8/UTF-8 \
 es_EC/ISO-8859-1 \
 es_ES.UTF-8/UTF-8 \
 es_ES/ISO-8859-1 \
 es_ES@euro/ISO-8859-15 \
 es_GT.UTF-8/UTF-8 \
 es_GT/ISO-8859-1 \
 es_HN.UTF-8/UTF-8 \
 es_HN/ISO-8859-1 \
 es_MX.UTF-8/UTF-8 \
 es_MX/ISO-8859-1 \
 es_NI.UTF-8/UTF-8 \
 es_NI/ISO-8859-1 \
 es_PA.UTF-8/UTF-8 \
 es_PA/ISO-8859-1 \
 es_PE.UTF-8/UTF-8 \
 es_PE/ISO-8859-1 \
 es_PR.UTF-8/UTF-8 \
 es_PR/ISO-8859-1 \
 es_PY.UTF-8/UTF-8 \
 es_PY/ISO-8859-1 \
 es_SV.UTF-8/UTF-8 \
 es_SV/ISO-8859-1 \
 es_US.UTF-8/UTF-8 \
 es_US/ISO-8859-1 \
 es_UY.UTF-8/UTF-8 \
 es_UY/ISO-8859-1 \
 es_VE.UTF-8/UTF-8 \
 es_VE/ISO-8859-1 \
 et_EE.UTF-8/UTF-8 \
 et_EE/ISO-8859-1 \
 et_EE.ISO-8859-15/ISO-8859-15 \
 eu_ES.UTF-8/UTF-8 \
 eu_ES/ISO-8859-1 \
 eu_ES@euro/ISO-8859-15 \
 fa_IR/UTF-8 \
 ff_SN/UTF-8 \
 fi_FI.UTF-8/UTF-8 \
 fi_FI/ISO-8859-1 \
 fi_FI@euro/ISO-8859-15 \
 fil_PH/UTF-8 \
 fo_FO.UTF-8/UTF-8 \
 fo_FO/ISO-8859-1 \
 fr_BE.UTF-8/UTF-8 \
 fr_BE/ISO-8859-1 \
 fr_BE@euro/ISO-8859-15 \
 fr_CA.UTF-8/UTF-8 \
 fr_CA/ISO-8859-1 \
 fr_CH.UTF-8/UTF-8 \
 fr_CH/ISO-8859-1 \
 fr_FR.UTF-8/UTF-8 \
 fr_FR/ISO-8859-1 \
 fr_FR@euro/ISO-8859-15 \
 fr_LU.UTF-8/UTF-8 \
 fr_LU/ISO-8859-1 \
 fr_LU@euro/ISO-8859-15 \
 fur_IT/UTF-8 \
 fy_NL/UTF-8 \
 fy_DE/UTF-8 \
 ga_IE.UTF-8/UTF-8 \
 ga_IE/ISO-8859-1 \
 ga_IE@euro/ISO-8859-15 \
 gd_GB.UTF-8/UTF-8 \
 gd_GB/ISO-8859-15 \
 gez_ER/UTF-8 \
 gez_ER@abegede/UTF-8 \
 gez_ET/UTF-8 \
 gez_ET@abegede/UTF-8 \
 gl_ES.UTF-8/UTF-8 \
 gl_ES/ISO-8859-1 \
 gl_ES@euro/ISO-8859-15 \
 gu_IN/UTF-8 \
 gv_GB.UTF-8/UTF-8 \
 gv_GB/ISO-8859-1 \
 ha_NG/UTF-8 \
 hak_TW/UTF-8 \
 he_IL.UTF-8/UTF-8 \
 he_IL/ISO-8859-8 \
 hi_IN/UTF-8 \
 hif_FJ/UTF-8 \
 hne_IN/UTF-8 \
 hr_HR.UTF-8/UTF-8 \
 hr_HR/ISO-8859-2 \
 hsb_DE/ISO-8859-2 \
 hsb_DE.UTF-8/UTF-8 \
 ht_HT/UTF-8 \
 hu_HU.UTF-8/UTF-8 \
 hu_HU/ISO-8859-2 \
 hy_AM/UTF-8 \
 hy_AM.ARMSCII-8/ARMSCII-8 \
 ia_FR/UTF-8 \
 id_ID.UTF-8/UTF-8 \
 id_ID/ISO-8859-1 \
 ig_NG/UTF-8 \
 ik_CA/UTF-8 \
 is_IS.UTF-8/UTF-8 \
 is_IS/ISO-8859-1 \
 it_CH.UTF-8/UTF-8 \
 it_CH/ISO-8859-1 \
 it_IT.UTF-8/UTF-8 \
 it_IT/ISO-8859-1 \
 it_IT@euro/ISO-8859-15 \
 iu_CA/UTF-8 \
 ja_JP.EUC-JP/EUC-JP \
 ja_JP.UTF-8/UTF-8 \
 ka_GE.UTF-8/UTF-8 \
 ka_GE/GEORGIAN-PS \
 kab_DZ/UTF-8 \
 kk_KZ.UTF-8/UTF-8 \
 kk_KZ/PT154 \
 kl_GL.UTF-8/UTF-8 \
 kl_GL/ISO-8859-1 \
 km_KH/UTF-8 \
 kn_IN/UTF-8 \
 ko_KR.EUC-KR/EUC-KR \
 ko_KR.UTF-8/UTF-8 \
 kok_IN/UTF-8 \
 ks_IN/UTF-8 \
 ks_IN@devanagari/UTF-8 \
 ku_TR.UTF-8/UTF-8 \
 ku_TR/ISO-8859-9 \
 kw_GB.UTF-8/UTF-8 \
 kw_GB/ISO-8859-1 \
 ky_KG/UTF-8 \
 lb_LU/UTF-8 \
 lg_UG.UTF-8/UTF-8 \
 lg_UG/ISO-8859-10 \
 li_BE/UTF-8 \
 li_NL/UTF-8 \
 lij_IT/UTF-8 \
 ln_CD/UTF-8 \
 lo_LA/UTF-8 \
 lt_LT.UTF-8/UTF-8 \
 lt_LT/ISO-8859-13 \
 lv_LV.UTF-8/UTF-8 \
 lv_LV/ISO-8859-13 \
 lzh_TW/UTF-8 \
 mag_IN/UTF-8 \
 mai_IN/UTF-8 \
 mai_NP/UTF-8 \
 mfe_MU/UTF-8 \
 mg_MG.UTF-8/UTF-8 \
 mg_MG/ISO-8859-15 \
 mhr_RU/UTF-8 \
 mi_NZ.UTF-8/UTF-8 \
 mi_NZ/ISO-8859-13 \
 miq_NI/UTF-8 \
 mjw_IN/UTF-8 \
 mk_MK.UTF-8/UTF-8 \
 mk_MK/ISO-8859-5 \
 ml_IN/UTF-8 \
 mn_MN/UTF-8 \
 mni_IN/UTF-8 \
 mnw_MM/UTF-8 \
 mr_IN/UTF-8 \
 ms_MY.UTF-8/UTF-8 \
 ms_MY/ISO-8859-1 \
 mt_MT.UTF-8/UTF-8 \
 mt_MT/ISO-8859-3 \
 my_MM/UTF-8 \
 nan_TW/UTF-8 \
 nan_TW@latin/UTF-8 \
 nb_NO.UTF-8/UTF-8 \
 nb_NO/ISO-8859-1 \
 nds_DE/UTF-8 \
 nds_NL/UTF-8 \
 ne_NP/UTF-8 \
 nhn_MX/UTF-8 \
 niu_NU/UTF-8 \
 niu_NZ/UTF-8 \
 nl_AW/UTF-8 \
 nl_BE.UTF-8/UTF-8 \
 nl_BE/ISO-8859-1 \
 nl_BE@euro/ISO-8859-15 \
 nl_NL.UTF-8/UTF-8 \
 nl_NL/ISO-8859-1 \
 nl_NL@euro/ISO-8859-15 \
 nn_NO.UTF-8/UTF-8 \
 nn_NO/ISO-8859-1 \
 nr_ZA/UTF-8 \
 nso_ZA/UTF-8 \
 oc_FR.UTF-8/UTF-8 \
 oc_FR/ISO-8859-1 \
 om_ET/UTF-8 \
 om_KE.UTF-8/UTF-8 \
 om_KE/ISO-8859-1 \
 or_IN/UTF-8 \
 os_RU/UTF-8 \
 pa_IN/UTF-8 \
 pa_PK/UTF-8 \
 pap_AW/UTF-8 \
 pap_CW/UTF-8 \
 pl_PL.UTF-8/UTF-8 \
 pl_PL/ISO-8859-2 \
 ps_AF/UTF-8 \
 pt_BR.UTF-8/UTF-8 \
 pt_BR/ISO-8859-1 \
 pt_PT.UTF-8/UTF-8 \
 pt_PT/ISO-8859-1 \
 pt_PT@euro/ISO-8859-15 \
 quz_PE/UTF-8 \
 raj_IN/UTF-8 \
 ro_RO.UTF-8/UTF-8 \
 ro_RO/ISO-8859-2 \
 ru_RU.KOI8-R/KOI8-R \
 ru_RU.UTF-8/UTF-8 \
 ru_RU/ISO-8859-5 \
 ru_UA.UTF-8/UTF-8 \
 ru_UA/KOI8-U \
 rw_RW/UTF-8 \
 sa_IN/UTF-8 \
 sah_RU/UTF-8 \
 sat_IN/UTF-8 \
 sc_IT/UTF-8 \
 sd_IN/UTF-8 \
 sd_IN@devanagari/UTF-8 \
 se_NO/UTF-8 \
 sgs_LT/UTF-8 \
 shn_MM/UTF-8 \
 shs_CA/UTF-8 \
 si_LK/UTF-8 \
 sid_ET/UTF-8 \
 sk_SK.UTF-8/UTF-8 \
 sk_SK/ISO-8859-2 \
 sl_SI.UTF-8/UTF-8 \
 sl_SI/ISO-8859-2 \
 sm_WS/UTF-8 \
 so_DJ.UTF-8/UTF-8 \
 so_DJ/ISO-8859-1 \
 so_ET/UTF-8 \
 so_KE.UTF-8/UTF-8 \
 so_KE/ISO-8859-1 \
 so_SO.UTF-8/UTF-8 \
 so_SO/ISO-8859-1 \
 sq_AL.UTF-8/UTF-8 \
 sq_AL/ISO-8859-1 \
 sq_MK/UTF-8 \
 sr_ME/UTF-8 \
 sr_RS/UTF-8 \
 sr_RS@latin/UTF-8 \
 ss_ZA/UTF-8 \
 st_ZA.UTF-8/UTF-8 \
 st_ZA/ISO-8859-1 \
 sv_FI.UTF-8/UTF-8 \
 sv_FI/ISO-8859-1 \
 sv_FI@euro/ISO-8859-15 \
 sv_SE.UTF-8/UTF-8 \
 sv_SE/ISO-8859-1 \
 sv_SE.ISO-8859-15/ISO-8859-15 \
 sw_KE/UTF-8 \
 sw_TZ/UTF-8 \
 szl_PL/UTF-8 \
 ta_IN/UTF-8 \
 ta_LK/UTF-8 \
 tcy_IN.UTF-8/UTF-8 \
 te_IN/UTF-8 \
 tg_TJ.UTF-8/UTF-8 \
 tg_TJ/KOI8-T \
 th_TH.UTF-8/UTF-8 \
 th_TH/TIS-620 \
 the_NP/UTF-8 \
 ti_ER/UTF-8 \
 ti_ET/UTF-8 \
 tig_ER/UTF-8 \
 tk_TM/UTF-8 \
 tl_PH.UTF-8/UTF-8 \
 tl_PH/ISO-8859-1 \
 tn_ZA/UTF-8 \
 to_TO/UTF-8 \
 tpi_PG/UTF-8 \
 tr_CY.UTF-8/UTF-8 \
 tr_CY/ISO-8859-9 \
 tr_TR.UTF-8/UTF-8 \
 tr_TR/ISO-8859-9 \
 ts_ZA/UTF-8 \
 tt_RU/UTF-8 \
 tt_RU@iqtelif/UTF-8 \
 ug_CN/UTF-8 \
 uk_UA.UTF-8/UTF-8 \
 uk_UA/KOI8-U \
 unm_US/UTF-8 \
 ur_IN/UTF-8 \
 ur_PK/UTF-8 \
 uz_UZ.UTF-8/UTF-8 \
 uz_UZ/ISO-8859-1 \
 uz_UZ@cyrillic/UTF-8 \
 ve_ZA/UTF-8 \
 vi_VN/UTF-8 \
 wa_BE/ISO-8859-1 \
 wa_BE@euro/ISO-8859-15 \
 wa_BE.UTF-8/UTF-8 \
 wae_CH/UTF-8 \
 wal_ET/UTF-8 \
 wo_SN/UTF-8 \
 xh_ZA.UTF-8/UTF-8 \
 xh_ZA/ISO-8859-1 \
 yi_US.UTF-8/UTF-8 \
 yi_US/CP1255 \
 yo_NG/UTF-8 \
 yue_HK/UTF-8 \
 yuw_PG/UTF-8 \
 zh_CN.GB18030/GB18030 \
 zh_CN.GBK/GBK \
 zh_CN.UTF-8/UTF-8 \
 zh_CN/GB2312 \
 zh_HK.UTF-8/UTF-8 \
 zh_HK/BIG5-HKSCS \
 zh_SG.UTF-8/UTF-8 \
 zh_SG.GBK/GBK \
 zh_SG/GB2312 \
 zh_TW.EUC-TW/EUC-TW \
 zh_TW.UTF-8/UTF-8 \
 zh_TW/BIG5 \
 zu_ZA.UTF-8/UTF-8 \
 zu_ZA/ISO-8859-1 \
--- a/convnames.py
+++ b/convnames.py
@ -1,18 +0,0 @@
 #!/usr/bin/python3
 # This code is called by glibc.spec via lua to generate the mapping
 # from language code to language name.  The code uses langtable to
 # do the mapping.  The information in langtable is a harmonization
 # of CLDR and glibc lang_name data.
 import sys
 try:
    import langtable
 except ImportError:
    # if the import fails, don't translate anything
    langtable = None
 for lang in sys.argv[1:]:
    if langtable:
        name = langtable.language_name(languageId=lang, languageIdQuery='en')
        print(name or lang)
    else:
        print(lang)
--- a/glibc.spec
+++ b/glibc.spec
@ -96,7 +96,7 @@
 Summary: The GNU libc libraries
 Name: glibc
 Version: %{glibcversion}
-Release: 8%{?dist}
+Release: 9%{?dist}
 # In general, GPLv2+ is used by programs, LGPLv2+ is used for
 # libraries.
@ -130,20 +130,9 @@ Source0: %{?glibc_release_url}%{glibcsrcdir}.tar.xz
 Source1: nscd.conf
 Source2: bench.mk
 Source3: glibc-bench-compare
-# A copy of localedata/SUPPORTED in the Source0 tarball.  The
+Source11: parse-SUPPORTED.py
 # SUPPORTED file is used below to generate the list of locale
 # packages, using a Lua snippet.
 # When the upstream SUPPORTED is out of sync with our copy, the
 # prep phase will fail and you will need to update the local
 # copy.
 Source11: SUPPORTED
 # Include in the source RPM for reference.
 Source12: ChangeLog.old
 # Provide ISO language code to name translation using Python's
 # langtable. The langtable data is maintained by the Fedora
 # i18n team and is a harmonization of CLDR and glibc lang_name
 # data in a more accessible API (also used by Anaconda).
 Source13: convnames.py
 ##############################################################################
 # Patches:
@ -238,7 +227,6 @@ BuildRequires: systemd
 # distributions, python3 does not actually install /usr/bin/python3,
 # so we also depend on python3-devel.
 BuildRequires: python3 python3-devel
 BuildRequires: python3dist(langtable)
 # This GCC version is needed for -fstack-clash-protection support.
 BuildRequires: gcc >= 7.2.1-6
@ -432,84 +420,331 @@ If you are building custom locales you will most likely use
 these sources as the basis for your new locale.
 %{lua:
-- Array of languages (ISO-639 codes).
+-- To make lua-mode happy: '
 local languages = {}
 -- Dictionary from language codes (as in the languages array) to arrays
 -- of regions.
 local supplements = {}
 do
   -- Parse the SUPPORTED file.  Eliminate duplicates.
   local lang_region_seen = {}
   for line in io.lines(rpm.expand("%{SOURCE11}")) do
      -- Match lines which contain a language (eo) or language/region
      -- (en_US) strings.
      local lang_region = string.match(line, "^([a-z][^/@.]+)")
      if lang_region ~= nil then
 	 if lang_region_seen[lang_region] == nil then
 	    lang_region_seen[lang_region] = true
-	    -- Split language/region pair.
+-- List of supported locales.  This is used to generate the langpack
-	    local lang, region = string.match(lang_region, "^(.+)_(.+)")
+-- subpackages below.  This table needs adjustments if the set of
-	    if lang == nil then
+-- glibc locales changes.  "code" is the glibc code for the language
-	       -- Region is missing, use only the language.
+-- (before the "_".  "name" is the English translation of the language
-	       lang = lang_region
+-- name (for use in subpackage descriptions).  "regions" is a table of
-	    end
+-- variant specifiers (after the "_", excluding "@" and "."
-	    local suppl = supplements[lang]
+-- variants/charset specifiers).  The table must be sorted by the code
-	    if suppl == nil then
+-- field, and the regions table must be sorted as well.
-	       suppl = {}
+--
-	       supplements[lang] = suppl
+-- English translations of language names can be obtained using (for
-	       -- New language not seen before.
+-- the "aa" language in this example):
-	       languages[#languages + 1] = lang
+--
-	    end
+-- python3 -c 'import langtable; print(langtable.language_name("aa", languageIdQuery="en"))'
 	    if region ~= nil then
 	       -- New region because of the check against
 	       -- lang_region_seen above.
 	       suppl[#suppl + 1] = region
 	    end
 	 end
      end
   end
   -- Sort for determinism.
   table.sort(languages)
   for _, supples in pairs(supplements) do
      table.sort(supplements)
   end
 end
-- Compute the language names
+local locales =  {
-local langnames = {}
+  { code="aa", name="Afar", regions={ "DJ", "ER", "ET" } },
-local python3 = io.open('/usr/bin/python3', 'r')
+  { code="af", name="Afrikaans", regions={ "ZA" } },
-if python3 then
+  { code="agr", name="Aguaruna", regions={ "PE" } },
-   python3:close()
+  { code="ak", name="Akan", regions={ "GH" } },
-   local args = table.concat(languages, ' ')
+  { code="am", name="Amharic", regions={ "ET" } },
-   local file = io.popen(rpm.expand("%{SOURCE13}") .. ' ' .. args)
+  { code="an", name="Aragonese", regions={ "ES" } },
-   while true do
+  { code="anp", name="Angika", regions={ "IN" } },
-       line = file:read()
+  {
-       if line == nil then break end
+    code="ar",
-       langnames[#langnames + 1] = line
+    name="Arabic",
-   end
+    regions={
-   file:close()
+      "AE",
      "BH",
      "DZ",
      "EG",
      "IN",
      "IQ",
      "JO",
      "KW",
      "LB",
      "LY",
      "MA",
      "OM",
      "QA",
      "SA",
      "SD",
      "SS",
      "SY",
      "TN",
      "YE" 
    } 
  },
  { code="as", name="Assamese", regions={ "IN" } },
  { code="ast", name="Asturian", regions={ "ES" } },
  { code="ayc", name="Southern Aymara", regions={ "PE" } },
  { code="az", name="Azerbaijani", regions={ "AZ", "IR" } },
  { code="be", name="Belarusian", regions={ "BY" } },
  { code="bem", name="Bemba", regions={ "ZM" } },
  { code="ber", name="Berber", regions={ "DZ", "MA" } },
  { code="bg", name="Bulgarian", regions={ "BG" } },
  { code="bhb", name="Bhili", regions={ "IN" } },
  { code="bho", name="Bhojpuri", regions={ "IN", "NP" } },
  { code="bi", name="Bislama", regions={ "VU" } },
  { code="bn", name="Bangla", regions={ "BD", "IN" } },
  { code="bo", name="Tibetan", regions={ "CN", "IN" } },
  { code="br", name="Breton", regions={ "FR" } },
  { code="brx", name="Bodo", regions={ "IN" } },
  { code="bs", name="Bosnian", regions={ "BA" } },
  { code="byn", name="Blin", regions={ "ER" } },
  { code="ca", name="Catalan", regions={ "AD", "ES", "FR", "IT" } },
  { code="ce", name="Chechen", regions={ "RU" } },
  { code="chr", name="Cherokee", regions={ "US" } },
  { code="ckb", name="Central Kurdish", regions={ "IQ" } },
  { code="cmn", name="Mandarin Chinese", regions={ "TW" } },
  { code="crh", name="Crimean Turkish", regions={ "UA" } },
  { code="cs", name="Czech", regions={ "CZ" } },
  { code="csb", name="Kashubian", regions={ "PL" } },
  { code="cv", name="Chuvash", regions={ "RU" } },
  { code="cy", name="Welsh", regions={ "GB" } },
  { code="da", name="Danish", regions={ "DK" } },
  {
    code="de",
    name="German",
    regions={ "AT", "BE", "CH", "DE", "IT", "LI", "LU" } 
  },
  { code="doi", name="Dogri", regions={ "IN" } },
  { code="dsb", name="Lower Sorbian", regions={ "DE" } },
  { code="dv", name="Divehi", regions={ "MV" } },
  { code="dz", name="Dzongkha", regions={ "BT" } },
  { code="el", name="Greek", regions={ "CY", "GR" } },
  {
    code="en",
    name="English",
    regions={
      "AG",
      "AU",
      "BW",
      "CA",
      "DK",
      "GB",
      "HK",
      "IE",
      "IL",
      "IN",
      "NG",
      "NZ",
      "PH",
      "SC",
      "SG",
      "US",
      "ZA",
      "ZM",
      "ZW" 
    } 
  },
  { code="eo", name="Esperanto", regions={} },
  {
    code="es",
    name="Spanish",
    regions={
      "AR",
      "BO",
      "CL",
      "CO",
      "CR",
      "CU",
      "DO",
      "EC",
      "ES",
      "GT",
      "HN",
      "MX",
      "NI",
      "PA",
      "PE",
      "PR",
      "PY",
      "SV",
      "US",
      "UY",
      "VE" 
    } 
  },
  { code="et", name="Estonian", regions={ "EE" } },
  { code="eu", name="Basque", regions={ "ES" } },
  { code="fa", name="Persian", regions={ "IR" } },
  { code="ff", name="Fulah", regions={ "SN" } },
  { code="fi", name="Finnish", regions={ "FI" } },
  { code="fil", name="Filipino", regions={ "PH" } },
  { code="fo", name="Faroese", regions={ "FO" } },
  { code="fr", name="French", regions={ "BE", "CA", "CH", "FR", "LU" } },
  { code="fur", name="Friulian", regions={ "IT" } },
  { code="fy", name="Western Frisian", regions={ "DE", "NL" } },
  { code="ga", name="Irish", regions={ "IE" } },
  { code="gd", name="Scottish Gaelic", regions={ "GB" } },
  { code="gez", name="Geez", regions={ "ER", "ET" } },
  { code="gl", name="Galician", regions={ "ES" } },
  { code="gu", name="Gujarati", regions={ "IN" } },
  { code="gv", name="Manx", regions={ "GB" } },
  { code="ha", name="Hausa", regions={ "NG" } },
  { code="hak", name="Hakka Chinese", regions={ "TW" } },
  { code="he", name="Hebrew", regions={ "IL" } },
  { code="hi", name="Hindi", regions={ "IN" } },
  { code="hif", name="Fiji Hindi", regions={ "FJ" } },
  { code="hne", name="Chhattisgarhi", regions={ "IN" } },
  { code="hr", name="Croatian", regions={ "HR" } },
  { code="hsb", name="Upper Sorbian", regions={ "DE" } },
  { code="ht", name="Haitian Creole", regions={ "HT" } },
  { code="hu", name="Hungarian", regions={ "HU" } },
  { code="hy", name="Armenian", regions={ "AM" } },
  { code="ia", name="Interlingua", regions={ "FR" } },
  { code="id", name="Indonesian", regions={ "ID" } },
  { code="ig", name="Igbo", regions={ "NG" } },
  { code="ik", name="Inupiaq", regions={ "CA" } },
  { code="is", name="Icelandic", regions={ "IS" } },
  { code="it", name="Italian", regions={ "CH", "IT" } },
  { code="iu", name="Inuktitut", regions={ "CA" } },
  { code="ja", name="Japanese", regions={ "JP" } },
  { code="ka", name="Georgian", regions={ "GE" } },
  { code="kab", name="Kabyle", regions={ "DZ" } },
  { code="kk", name="Kazakh", regions={ "KZ" } },
  { code="kl", name="Kalaallisut", regions={ "GL" } },
  { code="km", name="Khmer", regions={ "KH" } },
  { code="kn", name="Kannada", regions={ "IN" } },
  { code="ko", name="Korean", regions={ "KR" } },
  { code="kok", name="Konkani", regions={ "IN" } },
  { code="ks", name="Kashmiri", regions={ "IN" } },
  { code="ku", name="Kurdish", regions={ "TR" } },
  { code="kw", name="Cornish", regions={ "GB" } },
  { code="ky", name="Kyrgyz", regions={ "KG" } },
  { code="lb", name="Luxembourgish", regions={ "LU" } },
  { code="lg", name="Ganda", regions={ "UG" } },
  { code="li", name="Limburgish", regions={ "BE", "NL" } },
  { code="lij", name="Ligurian", regions={ "IT" } },
  { code="ln", name="Lingala", regions={ "CD" } },
  { code="lo", name="Lao", regions={ "LA" } },
  { code="lt", name="Lithuanian", regions={ "LT" } },
  { code="lv", name="Latvian", regions={ "LV" } },
  { code="lzh", name="Literary Chinese", regions={ "TW" } },
  { code="mag", name="Magahi", regions={ "IN" } },
  { code="mai", name="Maithili", regions={ "IN", "NP" } },
  { code="mfe", name="Morisyen", regions={ "MU" } },
  { code="mg", name="Malagasy", regions={ "MG" } },
  { code="mhr", name="Meadow Mari", regions={ "RU" } },
  { code="mi", name="Maori", regions={ "NZ" } },
  { code="miq", name="Miskito", regions={ "NI" } },
  { code="mjw", name="Karbi", regions={ "IN" } },
  { code="mk", name="Macedonian", regions={ "MK" } },
  { code="ml", name="Malayalam", regions={ "IN" } },
  { code="mn", name="Mongolian", regions={ "MN" } },
  { code="mni", name="Manipuri", regions={ "IN" } },
  { code="mnw", name="Mon", regions={ "MM" } },
  { code="mr", name="Marathi", regions={ "IN" } },
  { code="ms", name="Malay", regions={ "MY" } },
  { code="mt", name="Maltese", regions={ "MT" } },
  { code="my", name="Burmese", regions={ "MM" } },
  { code="nan", name="Min Nan Chinese", regions={ "TW" } },
  { code="nb", name="Norwegian Bokmål", regions={ "NO" } },
  { code="nds", name="Low German", regions={ "DE", "NL" } },
  { code="ne", name="Nepali", regions={ "NP" } },
  { code="nhn", name="Tlaxcala-Puebla Nahuatl", regions={ "MX" } },
  { code="niu", name="Niuean", regions={ "NU", "NZ" } },
  { code="nl", name="Dutch", regions={ "AW", "BE", "NL" } },
  { code="nn", name="Norwegian Nynorsk", regions={ "NO" } },
  { code="nr", name="South Ndebele", regions={ "ZA" } },
  { code="nso", name="Northern Sotho", regions={ "ZA" } },
  { code="oc", name="Occitan", regions={ "FR" } },
  { code="om", name="Oromo", regions={ "ET", "KE" } },
  { code="or", name="Odia", regions={ "IN" } },
  { code="os", name="Ossetic", regions={ "RU" } },
  { code="pa", name="Punjabi", regions={ "IN", "PK" } },
  { code="pap", name="Papiamento", regions={ "AW", "CW" } },
  { code="pl", name="Polish", regions={ "PL" } },
  { code="ps", name="Pashto", regions={ "AF" } },
  { code="pt", name="Portuguese", regions={ "BR", "PT" } },
  { code="quz", name="Cusco Quechua", regions={ "PE" } },
  { code="raj", name="Rajasthani", regions={ "IN" } },
  { code="ro", name="Romanian", regions={ "RO" } },
  { code="ru", name="Russian", regions={ "RU", "UA" } },
  { code="rw", name="Kinyarwanda", regions={ "RW" } },
  { code="sa", name="Sanskrit", regions={ "IN" } },
  { code="sah", name="Sakha", regions={ "RU" } },
  { code="sat", name="Santali", regions={ "IN" } },
  { code="sc", name="Sardinian", regions={ "IT" } },
  { code="sd", name="Sindhi", regions={ "IN" } },
  { code="se", name="Northern Sami", regions={ "NO" } },
  { code="sgs", name="Samogitian", regions={ "LT" } },
  { code="shn", name="Shan", regions={ "MM" } },
  { code="shs", name="Shuswap", regions={ "CA" } },
  { code="si", name="Sinhala", regions={ "LK" } },
  { code="sid", name="Sidamo", regions={ "ET" } },
  { code="sk", name="Slovak", regions={ "SK" } },
  { code="sl", name="Slovenian", regions={ "SI" } },
  { code="sm", name="Samoan", regions={ "WS" } },
  { code="so", name="Somali", regions={ "DJ", "ET", "KE", "SO" } },
  { code="sq", name="Albanian", regions={ "AL", "MK" } },
  { code="sr", name="Serbian", regions={ "ME", "RS" } },
  { code="ss", name="Swati", regions={ "ZA" } },
  { code="st", name="Southern Sotho", regions={ "ZA" } },
  { code="sv", name="Swedish", regions={ "FI", "SE" } },
  { code="sw", name="Swahili", regions={ "KE", "TZ" } },
  { code="szl", name="Silesian", regions={ "PL" } },
  { code="ta", name="Tamil", regions={ "IN", "LK" } },
  { code="tcy", name="Tulu", regions={ "IN" } },
  { code="te", name="Telugu", regions={ "IN" } },
  { code="tg", name="Tajik", regions={ "TJ" } },
  { code="th", name="Thai", regions={ "TH" } },
  { code="the", name="Chitwania Tharu", regions={ "NP" } },
  { code="ti", name="Tigrinya", regions={ "ER", "ET" } },
  { code="tig", name="Tigre", regions={ "ER" } },
  { code="tk", name="Turkmen", regions={ "TM" } },
  { code="tl", name="Tagalog", regions={ "PH" } },
  { code="tn", name="Tswana", regions={ "ZA" } },
  { code="to", name="Tongan", regions={ "TO" } },
  { code="tpi", name="Tok Pisin", regions={ "PG" } },
  { code="tr", name="Turkish", regions={ "CY", "TR" } },
  { code="ts", name="Tsonga", regions={ "ZA" } },
  { code="tt", name="Tatar", regions={ "RU" } },
  { code="ug", name="Uyghur", regions={ "CN" } },
  { code="uk", name="Ukrainian", regions={ "UA" } },
  { code="unm", name="Unami language", regions={ "US" } },
  { code="ur", name="Urdu", regions={ "IN", "PK" } },
  { code="uz", name="Uzbek", regions={ "UZ" } },
  { code="ve", name="Venda", regions={ "ZA" } },
  { code="vi", name="Vietnamese", regions={ "VN" } },
  { code="wa", name="Walloon", regions={ "BE" } },
  { code="wae", name="Walser", regions={ "CH" } },
  { code="wal", name="Wolaytta", regions={ "ET" } },
  { code="wo", name="Wolof", regions={ "SN" } },
  { code="xh", name="Xhosa", regions={ "ZA" } },
  { code="yi", name="Yiddish", regions={ "US" } },
  { code="yo", name="Yoruba", regions={ "NG" } },
  { code="yue", name="Cantonese", regions={ "HK" } },
  { code="yuw", name="Yau", regions={ "PG" } },
  { code="zh", name="Mandarin Chinese", regions={ "CN", "HK", "SG", "TW" } },
  { code="zu", name="Zulu", regions={ "ZA" } } 
 }
 -- Prints a list of LANGUAGE "_" REGION pairs.  The output is expected
 -- to be identical to parse-SUPPORTED.py.  Called from the %%prep section.
 function print_locale_pairs()
   for i = 1, #locales do
      local locale = locales[i]
      if #locale.regions == 0 then
 	 print(locale.code .. "\n")
      else
-   for i = 1, #languages do
+	 for j = 1, #locale.regions do
-      langnames[#langnames + 1] = languages[i]
+	    print(locale.code .. "_" .. locale.regions[j] .. "\n")
 	 end
      end
   end
 end
-- Compute the Supplements: list for a language, based on the regions.
+local function compute_supplements(locale)
-local function compute_supplements(lang)
+   local lang = locale.code
   local regions = locale.regions
   result = "langpacks-core-" .. lang
   regions = supplements[lang]
   if regions ~= nil then
   for i = 1, #regions do
      result = result .. " or langpacks-core-" .. lang .. "_" .. regions[i]
   end
   end
   return result
 end
 -- Emit the definition of a language pack package.
-local function lang_package(lang, langname)
+local function lang_package(locale)
-   local suppl = compute_supplements(lang)
+   local lang = locale.code
   local langname = locale.name
   local suppl = compute_supplements(locale)
   print(rpm.expand([[
 %package langpack-]]..lang..[[
@ -528,8 +763,8 @@ to support the ]]..langname..[[ language in your applications.
 ]]))
 end
-for i = 1, #languages do
+for i = 1, #locales do
-   lang_package(languages[i], langnames[i])
+   lang_package(locales[i])
 end
 }
@ -748,17 +983,16 @@ touch `find . -name configure`
 # Ensure *-kw.h files are current to prevent regenerating them.
 touch locale/programs/*-kw.h
-# Verify that our copy of localedata/SUPPORTED matches the glibc
+# Verify that our locales table is compatible with the locales table
-# version.
+# in the spec file.
-#
+set +x
-# The separate file copy is used by the Lua parser above.
+echo '%{lua: print_locale_pairs()}' > localedata/SUPPORTED.spec
-# Patches or new upstream versions may change the list of locales,
+set -x
-# which changes the set of langpacks we need to build.  Verify the
+python3 %{SOURCE11} localedata/SUPPORTED > localedata/SUPPORTED.glibc
-# differences then update the copy of SUPPORTED.  This approach has
+diff -u \
-# two purposes: (a) avoid spurious changes to the set of langpacks,
+  --label "spec file" localedata/SUPPORTED.spec \
-# and (b) the Lua snippet can use a fully patched-up version
+  --label "glibc localedata/SUPPORTED" localedata/SUPPORTED.glibc
-# of the localedata/SUPPORTED file.
+rm localedata/SUPPORTED.spec localedata/SUPPORTED.glibc
 diff -u %{SOURCE11} localedata/SUPPORTED
 ##############################################################################
 # Build glibc...
@ -2019,6 +2253,9 @@ fi
 %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared
 %changelog
 * Wed Oct 14 2020 Florian Weimer <fweimer@redhat.com> - 2.32.9000-9
 - Make glibc.spec self-contained (#1887097)
 * Thu Oct 08 2020 Arjun Shankar <arjun@redhat.com> - 2.32.9000-8
 - Drop glibc-fix-float128-benchtests.patch; applied upstream.
 - Auto-sync with upstream branch master,
--- a/parse-SUPPORTED.py
+++ b/parse-SUPPORTED.py
@ -0,0 +1,40 @@
 #!/usr/bin/python3
 #
 # This script turns localedata/SUPPORTED (whose path is passed as the
 # first argument) into a normalized list of LANGUAGE "_" REGION pairs.
 # (If there is no REGION defined, only LANGUAGE is used.)  The list
 # is written to standard output, with one element per line.
 import sys
 supported, = sys.argv[1:]
 # Pairs seen so far.  Used to suppress duplicates.
 seen = set()
 with open(supported) as inp:
    for line in inp:
        if line.startswith("#") or line == "SUPPORTED-LOCALES=\\\n":
            # Comment or prefix.
            continue
        if not line.endswith(" \\\n"):
            raise IOError("line without continuation: " + repr(line))
        try:
            slash = line.index("/")
        except ValueError:
            raise IOError("line without slash: " + repr(line))
        spec = line[:slash]
        for separator in ".@":
            try:
                # Strip charset, variant specifiers.
                spec = spec[:spec.index(separator)]
            except ValueError:
                pass
        seen.add(spec)
 # The C locale does not correspond to a language.
 seen.remove("C")
 # The glibc source file is not sorted.
 for spec in sorted(seen):
    print(spec)
 print() # The Lua generator produces a trailing newline.