upgrade: be more benevolent in locale comparison

In F20+, we should be OK for upgrades where locale changed like
from en_US.utf8 to en_US.UTF-8.

Resolves: #1007802
Version: 9.3.5-7
This commit is contained in:
Pavel Raiskup 2014-10-20 11:47:13 +02:00
parent c88f76d540
commit 7fcd42641d
3 changed files with 177 additions and 1 deletions

View File

@ -0,0 +1,40 @@
From cd3e0071b8c9e082f5fe903a019d4e474be98e57 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 31 Jan 2014 02:03:30 -0500
Subject: [PATCH] Allow unrecognized encoding names in locales, as long as
they're the same.
The buildfarm says commit 58274728fb8e087049df67c0eee903d9743fdeda doesn't
work so well on Windows. This is because the encoding part of Windows
locale names can be just a code page number, eg "1252", which we don't
consider to be a valid encoding name. Add a check to accept encoding
parts that are case-insensitively string equal; this at least ensures
that the new code doesn't reject any cases that the old code allowed.
---
contrib/pg_upgrade/check.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
index 58c2d16..faeaff5 100644
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -447,8 +447,14 @@ equivalent_locale(const char *loca, const char *locb)
if (!chara || !charb)
return (pg_strcasecmp(loca, locb) == 0);
- /* Compare the encoding parts. */
- if (!equivalent_encoding(chara + 1, charb + 1))
+ /*
+ * Compare the encoding parts. Windows tends to use code page numbers for
+ * the encoding part, which equivalent_encoding() won't like, so accept if
+ * the strings are case-insensitive equal; otherwise use
+ * equivalent_encoding() to compare.
+ */
+ if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
+ !equivalent_encoding(chara + 1, charb + 1))
return false;
/*
--
1.9.3

View File

@ -0,0 +1,129 @@
From 58274728fb8e087049df67c0eee903d9743fdeda Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 30 Jan 2014 19:07:06 -0500
Subject: [PATCH] Be forgiving of variant spellings of locale names in
pg_upgrade.
Even though the server tries to canonicalize stored locale names, the
platform often doesn't cooperate, so it's entirely possible that one DB
thinks its locale is, say, "en_US.UTF-8" while the other has "en_US.utf8".
Rather than failing, we should try to allow this where it's clearly OK.
There is already pretty robust encoding lookup in encnames.c, so make
use of that to compare the encoding parts of the names. The locale
identifier parts are just compared case-insensitively, which we were
already doing. The major problem known to exist in the field is variant
encoding-name spellings, so hopefully this will be Good Enough. If not,
we can try being even laxer.
Pavel Raiskup, reviewed by Rushabh Lathia
---
contrib/pg_upgrade/check.c | 66 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
index 794d22c..58c2d16 100644
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -9,6 +9,7 @@
#include "postgres_fe.h"
+#include "mb/pg_wchar.h"
#include "pg_upgrade.h"
@@ -16,6 +17,8 @@ static void set_locale_and_encoding(ClusterInfo *cluster);
static void check_new_cluster_is_empty(void);
static void check_locale_and_encoding(ControlData *oldctrl,
ControlData *newctrl);
+static bool equivalent_locale(const char *loca, const char *locb);
+static bool equivalent_encoding(const char *chara, const char *charb);
static void check_is_super_user(ClusterInfo *cluster);
static void check_for_prepared_transactions(ClusterInfo *cluster);
static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
@@ -412,21 +415,78 @@ check_locale_and_encoding(ControlData *oldctrl,
* They also often use inconsistent hyphenation, which we cannot fix, e.g.
* UTF-8 vs. UTF8, so at least we display the mismatching values.
*/
- if (pg_strcasecmp(oldctrl->lc_collate, newctrl->lc_collate) != 0)
+ if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
pg_log(PG_FATAL,
"lc_collate cluster values do not match: old \"%s\", new \"%s\"\n",
oldctrl->lc_collate, newctrl->lc_collate);
- if (pg_strcasecmp(oldctrl->lc_ctype, newctrl->lc_ctype) != 0)
+ if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
pg_log(PG_FATAL,
"lc_ctype cluster values do not match: old \"%s\", new \"%s\"\n",
oldctrl->lc_ctype, newctrl->lc_ctype);
- if (pg_strcasecmp(oldctrl->encoding, newctrl->encoding) != 0)
+ if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
pg_log(PG_FATAL,
"encoding cluster values do not match: old \"%s\", new \"%s\"\n",
oldctrl->encoding, newctrl->encoding);
}
+/*
+ * equivalent_locale()
+ *
+ * Best effort locale-name comparison. Return false if we are not 100% sure
+ * the locales are equivalent.
+ */
+static bool
+equivalent_locale(const char *loca, const char *locb)
+{
+ const char *chara = strrchr(loca, '.');
+ const char *charb = strrchr(locb, '.');
+ int lencmp;
+
+ /* If they don't both contain an encoding part, just do strcasecmp(). */
+ if (!chara || !charb)
+ return (pg_strcasecmp(loca, locb) == 0);
+
+ /* Compare the encoding parts. */
+ if (!equivalent_encoding(chara + 1, charb + 1))
+ return false;
+
+ /*
+ * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
+ *
+ * It's tempting to ignore non-alphanumeric chars here, but for now it's
+ * not clear that that's necessary; just do case-insensitive comparison.
+ */
+ lencmp = chara - loca;
+ if (lencmp != charb - locb)
+ return false;
+
+ return (pg_strncasecmp(loca, locb, lencmp) == 0);
+}
+
+/*
+ * equivalent_encoding()
+ *
+ * Best effort encoding-name comparison. Return true only if the encodings
+ * are valid server-side encodings and known equivalent.
+ *
+ * Because the lookup in pg_valid_server_encoding() does case folding and
+ * ignores non-alphanumeric characters, this will recognize many popular
+ * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
+ */
+static bool
+equivalent_encoding(const char *chara, const char *charb)
+{
+ int enca = pg_valid_server_encoding(chara);
+ int encb = pg_valid_server_encoding(charb);
+
+ if (enca < 0 || encb < 0)
+ return false;
+
+ return (enca == encb);
+}
+
+
static void
check_new_cluster_is_empty(void)
{
--
1.9.3

View File

@ -67,7 +67,7 @@ Summary: PostgreSQL client programs
Name: postgresql
%global majorversion 9.3
Version: 9.3.5
Release: 6%{?dist}
Release: 7%{?dist}
# The PostgreSQL license is very similar to other MIT licenses, but the OSI
# recognizes it as an independent license, so we do as well.
@ -119,6 +119,8 @@ Patch3: postgresql-perl-rpath.patch
Patch4: postgresql-config-comment.patch
Patch5: postgresql-var-run-socket.patch
Patch6: postgresql-man.patch
Patch7: postgresql-upgrade-locale-spelling.patch
Patch8: postgresql-upgrade-locale-spelling-2.patch
BuildRequires: perl(ExtUtils::MakeMaker) glibc-devel bison flex gawk help2man
BuildRequires: perl(ExtUtils::Embed), perl-devel
@ -355,6 +357,8 @@ benchmarks.
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
# We used to run autoconf here, but there's no longer any real need to,
# since Postgres ships with a reasonably modern configure script.
@ -1151,6 +1155,9 @@ fi
%endif
%changelog
* Mon Oct 20 2014 Pavel Raiskup <praiskup@redhat.com> - 9.3.5-7
- be forgiving of variant spellings of locale names in pg_upgrade (#1007802)
* Sun Sep 21 2014 Pavel Raiskup <praiskup@redhat.com> - 9.3.5-6
- postgresql-setup & relatives are now in separate tarball