upgrade: be more benevolent in locale comparison

In F20+, we should be OK for upgrades where locale changed like from en_US.utf8 to en_US.UTF-8. Resolves: #1007802 Version: 9.3.5-7
2014-10-20 11:47:13 +02:00 · 2014-10-20 11:47:13 +02:00 · 7fcd42641d
commit 7fcd42641d
parent c88f76d540
3 changed files with 177 additions and 1 deletions
--- a/postgresql-upgrade-locale-spelling-2.patch
+++ b/postgresql-upgrade-locale-spelling-2.patch
@ -0,0 +1,40 @@
+From cd3e0071b8c9e082f5fe903a019d4e474be98e57 Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl@sss.pgh.pa.us>
+Date: Fri, 31 Jan 2014 02:03:30 -0500
+Subject: [PATCH] Allow unrecognized encoding names in locales, as long as
+ they're the same.
+
+The buildfarm says commit 58274728fb8e087049df67c0eee903d9743fdeda doesn't
+work so well on Windows.  This is because the encoding part of Windows
+locale names can be just a code page number, eg "1252", which we don't
+consider to be a valid encoding name.  Add a check to accept encoding
+parts that are case-insensitively string equal; this at least ensures
+that the new code doesn't reject any cases that the old code allowed.
+---
+ contrib/pg_upgrade/check.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
+index 58c2d16..faeaff5 100644
+--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
+@@ -447,8 +447,14 @@ equivalent_locale(const char *loca, const char *locb)
+ 	if (!chara || !charb)
+ 		return (pg_strcasecmp(loca, locb) == 0);
+ 
+-	/* Compare the encoding parts. */
+-	if (!equivalent_encoding(chara + 1, charb + 1))
+	/*
+	 * Compare the encoding parts.	Windows tends to use code page numbers for
+	 * the encoding part, which equivalent_encoding() won't like, so accept if
+	 * the strings are case-insensitive equal; otherwise use
+	 * equivalent_encoding() to compare.
+	 */
+	if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
+		!equivalent_encoding(chara + 1, charb + 1))
+ 		return false;
+ 
+ 	/*
+-- 
+1.9.3
+
--- a/postgresql-upgrade-locale-spelling.patch
+++ b/postgresql-upgrade-locale-spelling.patch
@ -0,0 +1,129 @@
+From 58274728fb8e087049df67c0eee903d9743fdeda Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl@sss.pgh.pa.us>
+Date: Thu, 30 Jan 2014 19:07:06 -0500
+Subject: [PATCH] Be forgiving of variant spellings of locale names in
+ pg_upgrade.
+
+Even though the server tries to canonicalize stored locale names, the
+platform often doesn't cooperate, so it's entirely possible that one DB
+thinks its locale is, say, "en_US.UTF-8" while the other has "en_US.utf8".
+Rather than failing, we should try to allow this where it's clearly OK.
+
+There is already pretty robust encoding lookup in encnames.c, so make
+use of that to compare the encoding parts of the names.  The locale
+identifier parts are just compared case-insensitively, which we were
+already doing.  The major problem known to exist in the field is variant
+encoding-name spellings, so hopefully this will be Good Enough.  If not,
+we can try being even laxer.
+
+Pavel Raiskup, reviewed by Rushabh Lathia
+---
+ contrib/pg_upgrade/check.c | 66 +++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 63 insertions(+), 3 deletions(-)
+
+diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
+index 794d22c..58c2d16 100644
+--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
+@@ -9,6 +9,7 @@
+ 
+ #include "postgres_fe.h"
+ 
+#include "mb/pg_wchar.h"
+ #include "pg_upgrade.h"
+ 
+ 
+@@ -16,6 +17,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
+ static void check_new_cluster_is_empty(void);
+ static void check_locale_and_encoding(ControlData *oldctrl,
+ 						  ControlData *newctrl);
+static bool equivalent_locale(const char *loca, const char *locb);
+static bool equivalent_encoding(const char *chara, const char *charb);
+ static void check_is_super_user(ClusterInfo *cluster);
+ static void check_for_prepared_transactions(ClusterInfo *cluster);
+ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
+@@ -412,21 +415,78 @@ check_locale_and_encoding(ControlData *oldctrl,
+ 	 * They also often use inconsistent hyphenation, which we cannot fix, e.g.
+ 	 * UTF-8 vs. UTF8, so at least we display the mismatching values.
+ 	 */
+-	if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
+	if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
+ 		pg_log(PG_FATAL,
+ 		 "lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
+ 			   oldctrl->lc_collate, newctrl->lc_collate);
+-	if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
+	if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
+ 		pg_log(PG_FATAL,
+ 		   "lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
+ 			   oldctrl->lc_ctype, newctrl->lc_ctype);
+-	if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
+	if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
+ 		pg_log(PG_FATAL,
+ 		   "encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
+ 			   oldctrl->encoding, newctrl->encoding);
+ }
+ 
+ 
+/*
+ * equivalent_locale()
+ *
+ * Best effort locale-name comparison.  Return false if we are not 100% sure
+ * the locales are equivalent.
+ */
+static bool
+equivalent_locale(const char *loca, const char *locb)
+{
+	const char *chara = strrchr(loca, '.');
+	const char *charb = strrchr(locb, '.');
+	int			lencmp;
+
+	/* If they don't both contain an encoding part, just do strcasecmp(). */
+	if (!chara || !charb)
+		return (pg_strcasecmp(loca, locb) == 0);
+
+	/* Compare the encoding parts. */
+	if (!equivalent_encoding(chara + 1, charb + 1))
+		return false;
+
+	/*
+	 * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
+	 *
+	 * It's tempting to ignore non-alphanumeric chars here, but for now it's
+	 * not clear that that's necessary; just do case-insensitive comparison.
+	 */
+	lencmp = chara - loca;
+	if (lencmp != charb - locb)
+		return false;
+
+	return (pg_strncasecmp(loca, locb, lencmp) == 0);
+}
+
+/*
+ * equivalent_encoding()
+ *
+ * Best effort encoding-name comparison.  Return true only if the encodings
+ * are valid server-side encodings and known equivalent.
+ *
+ * Because the lookup in pg_valid_server_encoding() does case folding and
+ * ignores non-alphanumeric characters, this will recognize many popular
+ * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
+ */
+static bool
+equivalent_encoding(const char *chara, const char *charb)
+{
+	int			enca = pg_valid_server_encoding(chara);
+	int			encb = pg_valid_server_encoding(charb);
+
+	if (enca < 0 || encb < 0)
+		return false;
+
+	return (enca == encb);
+}
+
+
+ static void
+ check_new_cluster_is_empty(void)
+ {
+-- 
+1.9.3
+
--- a/postgresql.spec
+++ b/postgresql.spec
@ -67,7 +67,7 @@ Summary: PostgreSQL client programs
 Name: postgresql
 %global majorversion 9.3
 Version: 9.3.5
-Release: 6%{?dist}
+Release: 7%{?dist}

 # The PostgreSQL license is very similar to other MIT licenses, but the OSI
 # recognizes it as an independent license, so we do as well.
@ -119,6 +119,8 @@ Patch3: postgresql-perl-rpath.patch
 Patch4: postgresql-config-comment.patch
 Patch5: postgresql-var-run-socket.patch
 Patch6: postgresql-man.patch
+Patch7: postgresql-upgrade-locale-spelling.patch
+Patch8: postgresql-upgrade-locale-spelling-2.patch

 BuildRequires: perl(ExtUtils::MakeMaker) glibc-devel bison flex gawk help2man
 BuildRequires: perl(ExtUtils::Embed), perl-devel
@ -355,6 +357,8 @@ benchmarks.
 %patch4 -p1
 %patch5 -p1
 %patch6 -p1
+%patch7 -p1
+%patch8 -p1

 # We used to run autoconf here, but there's no longer any real need to,
 # since Postgres ships with a reasonably modern configure script.
@ -1151,6 +1155,9 @@ fi
 %endif

 %changelog
+* Mon Oct 20 2014 Pavel Raiskup <praiskup@redhat.com> - 9.3.5-7
+- be forgiving of variant spellings of locale names in pg_upgrade (#1007802)
+
 * Sun Sep 21 2014 Pavel Raiskup <praiskup@redhat.com> - 9.3.5-6
 - postgresql-setup & relatives are now in separate tarball