diff --git a/SOURCES/backport-cve-2025-1094.patch b/SOURCES/backport-cve-2025-1094.patch
new file mode 100644
index 0000000..4db01d7
--- /dev/null
+++ b/SOURCES/backport-cve-2025-1094.patch
@@ -0,0 +1,3670 @@
+From 62235454d50a62138341a87be065e4681684753a Mon Sep 17 00:00:00 2001
+From: Andres Freund <andres@anarazel.de>
+Date: Mon, 10 Feb 2025 10:03:37 -0500
+Subject: [PATCH 1/8] Backport upstream commit
+ 4dc28963533704fc7dd922b9447467466a233d89 Add pg_encoding_set_invalid()
+
+There are cases where we cannot / do not want to error out for invalidly
+encoded input. In such cases it can be useful to replace e.g. an incomplete
+multi-byte characters with bytes that will trigger an error when getting
+validated as part of a larger string.
+
+Unfortunately, until now, for some encoding no such sequence existed. For
+those encodings this commit removes one previously accepted input combination
+- we consider that to be ok, as the chosen bytes are outside of the valid
+ranges for the encodings, we just previously failed to detect that.
+
+As we cannot add a new field to pg_wchar_table without breaking ABI, this is
+implemented "in-line" in the newly added function.
+---
+ src/backend/utils/mb/wchar.c | 55 +++++++++++++++++++++++++++++++++++-
+ src/include/mb/pg_wchar.h    |  1 +
+ 2 files changed, 55 insertions(+), 1 deletion(-)
+
+diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
+index 1b5ce1740c0..872241cc804 100644
+--- a/src/backend/utils/mb/wchar.c
++++ b/src/backend/utils/mb/wchar.c
+@@ -14,6 +14,25 @@
+ #include "mb/pg_wchar.h"
+ 
+ 
++/*
++ * In today's multibyte encodings other than UTF8, this two-byte sequence
++ * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
++ *
++ * For historical reasons, several verifychar implementations opt to reject
++ * this pair specifically.  Byte pair range constraints, in encoding
++ * originator documentation, always excluded this pair.  No core conversion
++ * could translate it.  However, longstanding verifychar implementations
++ * accepted any non-NUL byte.  big5_to_euc_tw and big5_to_mic even translate
++ * pairs not valid per encoding originator documentation.  To avoid tightening
++ * core or non-core conversions in a security patch, we sought this one pair.
++ *
++ * PQescapeString() historically used spaces for BYTE1; many other values
++ * could suffice for BYTE1.
++ */
++#define NONUTF8_INVALID_BYTE0 (0x8d)
++#define NONUTF8_INVALID_BYTE1 (' ')
++
++
+ /*
+  * Operations on multi-byte encodings are driven by a table of helper
+  * functions.
+@@ -1394,6 +1413,11 @@ pg_big5_verifier(const unsigned char *s, int len)
+ 	if (len < l)
+ 		return -1;
+ 
++	if (l == 2 &&
++		s[0] == NONUTF8_INVALID_BYTE0 &&
++		s[1] == NONUTF8_INVALID_BYTE1)
++		return -1;
++
+ 	while (--l > 0)
+ 	{
+ 		if (*++s == '\0')
+@@ -1414,6 +1438,11 @@ pg_gbk_verifier(const unsigned char *s, int len)
+ 	if (len < l)
+ 		return -1;
+ 
++	if (l == 2 &&
++		s[0] == NONUTF8_INVALID_BYTE0 &&
++		s[1] == NONUTF8_INVALID_BYTE1)
++		return -1;
++
+ 	while (--l > 0)
+ 	{
+ 		if (*++s == '\0')
+@@ -1434,6 +1463,11 @@ pg_uhc_verifier(const unsigned char *s, int len)
+ 	if (len < l)
+ 		return -1;
+ 
++	if (l == 2 &&
++		s[0] == NONUTF8_INVALID_BYTE0 &&
++		s[1] == NONUTF8_INVALID_BYTE1)
++		return -1;
++
+ 	while (--l > 0)
+ 	{
+ 		if (*++s == '\0')
+@@ -1768,6 +1802,19 @@ pg_eucjp_increment(unsigned char *charptr, int length)
+ #endif							/* !FRONTEND */
+ 
+ 
++/*
++ * Fills the provided buffer with two bytes such that:
++ *   pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
++ */
++void
++pg_encoding_set_invalid(int encoding, char *dst)
++{
++	Assert(pg_encoding_max_length(encoding) > 1);
++
++	dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
++	dst[1] = NONUTF8_INVALID_BYTE1;
++}
++
+ /*
+  *-------------------------------------------------------------------
+  * encoding info table
+@@ -1869,7 +1916,13 @@ pg_encoding_max_length(int encoding)
+ {
+ 	Assert(PG_VALID_ENCODING(encoding));
+ 
+-	return pg_wchar_table[encoding].maxmblen;
++	/*
++	 * Check for the encoding despite the assert, due to some mingw versions
++	 * otherwise issuing bogus warnings.
++	 */
++	return PG_VALID_ENCODING(encoding) ?
++		pg_wchar_table[encoding].maxmblen :
++		pg_wchar_table[PG_SQL_ASCII].maxmblen;
+ }
+ 
+ #ifndef FRONTEND
+diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
+index ec101a834ef..07b316fae1d 100644
+--- a/src/include/mb/pg_wchar.h
++++ b/src/include/mb/pg_wchar.h
+@@ -520,6 +520,7 @@ extern int	pg_valid_server_encoding_id(int encoding);
+  * Remaining functions are not considered part of libpq's API, though many
+  * of them do exist inside libpq.
+  */
++extern void pg_encoding_set_invalid(int encoding, char *dst);
+ extern int	pg_mb2wchar(const char *from, pg_wchar *to);
+ extern int	pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
+ extern int	pg_encoding_mb2wchar_with_len(int encoding,
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 581adbfe8c9db2e641705b308a74e5b6d89c61a6 Mon Sep 17 00:00:00 2001
+From: Andres Freund <andres@anarazel.de>
+Date: Mon, 10 Feb 2025 10:03:37 -0500
+Subject: [PATCH 2/8] Backport upstream
+ commit:3e98c8ce50e46d58b91bf3ea806e995296dc5b91 Specify the encoding of input
+ to fmtId()
+
+This commit adds fmtIdEnc() and fmtQualifiedIdEnc(), which allow to specify
+the encoding as an explicit argument.  Additionally setFmtEncoding() is
+provided, which defines the encoding when no explicit encoding is provided, to
+avoid breaking all code using fmtId().
+
+All users of fmtId()/fmtQualifiedId() are either converted to the explicit
+version or a call to setFmtEncoding() has been added.
+
+This commit does not yet utilize the now well-defined encoding, that will
+happen in a subsequent commit.
+---
+ src/bin/pg_dump/pg_backup_archiver.c |  1 +
+ src/bin/pg_dump/pg_dump.c            |  1 +
+ src/bin/pg_dump/pg_dumpall.c         |  1 +
+ src/bin/psql/command.c               |  3 +
+ src/bin/scripts/common.c             |  5 +-
+ src/bin/scripts/createdb.c           |  2 +
+ src/bin/scripts/createuser.c         |  2 +
+ src/bin/scripts/dropdb.c             |  8 ++-
+ src/bin/scripts/dropuser.c           |  3 +-
+ src/bin/scripts/reindexdb.c          |  4 +-
+ src/bin/scripts/vacuumdb.c           |  5 +-
+ src/fe_utils/string_utils.c          | 84 ++++++++++++++++++++++++++--
+ src/include/fe_utils/string_utils.h  |  5 +-
+ 13 files changed, 109 insertions(+), 15 deletions(-)
+
+diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
+index 6476f7119af..489a84aca3d 100644
+--- a/src/bin/pg_dump/pg_backup_archiver.c
++++ b/src/bin/pg_dump/pg_backup_archiver.c
+@@ -2731,6 +2731,7 @@ processEncodingEntry(ArchiveHandle *AH, TocEntry *te)
+ 			fatal("unrecognized encoding \"%s\"",
+ 				  ptr1);
+ 		AH->public.encoding = encoding;
++		setFmtEncoding(encoding);
+ 	}
+ 	else
+ 		fatal("invalid ENCODING item: %s",
+diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
+index 67a3714c62c..53fc95f3033 100644
+--- a/src/bin/pg_dump/pg_dump.c
++++ b/src/bin/pg_dump/pg_dump.c
+@@ -1085,6 +1085,7 @@ setup_connection(Archive *AH, const char *dumpencoding,
+ 	 * we know how to escape strings.
+ 	 */
+ 	AH->encoding = PQclientEncoding(conn);
++	setFmtEncoding(AH->encoding);
+ 
+ 	std_strings = PQparameterStatus(conn, "standard_conforming_strings");
+ 	AH->std_strings = (std_strings && strcmp(std_strings, "on") == 0);
+diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
+index 27093220ab9..a44cd765c45 100644
+--- a/src/bin/pg_dump/pg_dumpall.c
++++ b/src/bin/pg_dump/pg_dumpall.c
+@@ -508,6 +508,7 @@ main(int argc, char *argv[])
+ 	 * we know how to escape strings.
+ 	 */
+ 	encoding = PQclientEncoding(conn);
++	setFmtEncoding(encoding);
+ 	std_strings = PQparameterStatus(conn, "standard_conforming_strings");
+ 	if (!std_strings)
+ 		std_strings = "off";
+diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
+index 8889f833714..66e7fb4cf31 100644
+--- a/src/bin/psql/command.c
++++ b/src/bin/psql/command.c
+@@ -1183,6 +1183,7 @@ exec_command_encoding(PsqlScanState scan_state, bool active_branch)
+ 				/* save encoding info into psql internal data */
+ 				pset.encoding = PQclientEncoding(pset.db);
+ 				pset.popt.topt.encoding = pset.encoding;
++				setFmtEncoding(pset.encoding);
+ 				SetVariable(pset.vars, "ENCODING",
+ 							pg_encoding_to_char(pset.encoding));
+ 			}
+@@ -3467,6 +3468,8 @@ SyncVariables(void)
+ 	pset.popt.topt.encoding = pset.encoding;
+ 	pset.sversion = PQserverVersion(pset.db);
+ 
++	setFmtEncoding(pset.encoding);
++
+ 	SetVariable(pset.vars, "DBNAME", PQdb(pset.db));
+ 	SetVariable(pset.vars, "USER", PQuser(pset.db));
+ 	SetVariable(pset.vars, "HOST", PQhost(pset.db));
+diff --git a/src/bin/scripts/common.c b/src/bin/scripts/common.c
+index 2de696c19ef..ff79421a31d 100644
+--- a/src/bin/scripts/common.c
++++ b/src/bin/scripts/common.c
+@@ -376,8 +376,9 @@ appendQualifiedRelation(PQExpBuffer buf, const char *spec,
+ 		exit(1);
+ 	}
+ 	appendPQExpBufferStr(buf,
+-						 fmtQualifiedId(PQgetvalue(res, 0, 1),
+-										PQgetvalue(res, 0, 0)));
++						 fmtQualifiedIdEnc(PQgetvalue(res, 0, 1),
++										   PQgetvalue(res, 0, 0),
++										   PQclientEncoding(conn)));
+ 	appendPQExpBufferStr(buf, columns);
+ 	PQclear(res);
+ 	termPQExpBuffer(&sql);
+diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
+index b4d3e134d93..d9f55cc9f5d 100644
+--- a/src/bin/scripts/createdb.c
++++ b/src/bin/scripts/createdb.c
+@@ -190,6 +190,8 @@ main(int argc, char *argv[])
+ 
+ 	conn = connectMaintenanceDatabase(&cparams, progname, echo);
+ 
++	setFmtEncoding(PQclientEncoding(conn));
++
+ 	initPQExpBuffer(&sql);
+ 
+ 	appendPQExpBuffer(&sql, "CREATE DATABASE %s",
+diff --git a/src/bin/scripts/createuser.c b/src/bin/scripts/createuser.c
+index dbc2c2a58cd..7ec8ee51be7 100644
+--- a/src/bin/scripts/createuser.c
++++ b/src/bin/scripts/createuser.c
+@@ -271,6 +271,8 @@ main(int argc, char *argv[])
+ 
+ 	conn = connectMaintenanceDatabase(&cparams, progname, echo);
+ 
++	setFmtEncoding(PQclientEncoding(conn));
++
+ 	initPQExpBuffer(&sql);
+ 
+ 	printfPQExpBuffer(&sql, "CREATE ROLE %s", fmtId(newuser));
+diff --git a/src/bin/scripts/dropdb.c b/src/bin/scripts/dropdb.c
+index ffdf12bfea7..0d636d0ef46 100644
+--- a/src/bin/scripts/dropdb.c
++++ b/src/bin/scripts/dropdb.c
+@@ -125,7 +125,7 @@ main(int argc, char *argv[])
+ 	initPQExpBuffer(&sql);
+ 
+ 	appendPQExpBuffer(&sql, "DROP DATABASE %s%s;",
+-					  (if_exists ? "IF EXISTS " : ""), fmtId(dbname));
++					  (if_exists ? "IF EXISTS " : ""), fmtIdEnc(dbname, PQclientEncoding(conn)));
+ 
+ 	/* Avoid trying to drop postgres db while we are connected to it. */
+ 	if (maintenance_db == NULL && strcmp(dbname, "postgres") == 0)
+@@ -140,6 +140,12 @@ main(int argc, char *argv[])
+ 
+ 	conn = connectMaintenanceDatabase(&cparams, progname, echo);
+ 
++	initPQExpBuffer(&sql);
++	appendPQExpBuffer(&sql, "DROP DATABASE %s%s%s;",
++					  (if_exists ? "IF EXISTS " : ""),
++					  fmtIdEnc(dbname, PQclientEncoding(conn)),
++					  force ? " WITH (FORCE)" : "");
++
+ 	if (echo)
+ 		printf("%s\n", sql.data);
+ 	result = PQexec(conn, sql.data);
+diff --git a/src/bin/scripts/dropuser.c b/src/bin/scripts/dropuser.c
+index a8be6b0784b..26523f85784 100644
+--- a/src/bin/scripts/dropuser.c
++++ b/src/bin/scripts/dropuser.c
+@@ -143,7 +143,8 @@ main(int argc, char *argv[])
+ 
+ 	initPQExpBuffer(&sql);
+ 	appendPQExpBuffer(&sql, "DROP ROLE %s%s;",
+-					  (if_exists ? "IF EXISTS " : ""), fmtId(dropuser));
++					  (if_exists ? "IF EXISTS " : ""),
++					  fmtIdEnc(dropuser, PQclientEncoding(conn)));
+ 
+ 	if (echo)
+ 		printf("%s\n", sql.data);
+diff --git a/src/bin/scripts/reindexdb.c b/src/bin/scripts/reindexdb.c
+index 39b4078b411..b96d0ff54cf 100644
+--- a/src/bin/scripts/reindexdb.c
++++ b/src/bin/scripts/reindexdb.c
+@@ -325,7 +325,7 @@ reindex_one_database(const ConnParams *cparams,
+ 	else if (strcmp(type, "SCHEMA") == 0)
+ 		appendPQExpBufferStr(&sql, name);
+ 	else if (strcmp(type, "DATABASE") == 0)
+-		appendPQExpBufferStr(&sql, fmtId(PQdb(conn)));
++		appendPQExpBufferStr(&sql, fmtIdEnc(PQdb(conn),PQclientEncoding(conn)));
+ 	appendPQExpBufferChar(&sql, ';');
+ 
+ 	if (!executeMaintenanceCommand(conn, sql.data, echo))
+@@ -403,7 +403,7 @@ reindex_system_catalogs(const ConnParams *cparams,
+ 	appendPQExpBufferStr(&sql, " SYSTEM ");
+ 	if (concurrently)
+ 		appendPQExpBuffer(&sql, "CONCURRENTLY ");
+-	appendPQExpBufferStr(&sql, fmtId(PQdb(conn)));
++	appendPQExpBufferStr(&sql, fmtIdEnc(PQdb(conn),PQclientEncoding(conn)));
+ 	appendPQExpBufferChar(&sql, ';');
+ 
+ 	if (!executeMaintenanceCommand(conn, sql.data, echo))
+diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c
+index 6ade0c31a9d..8f9ce6529dc 100644
+--- a/src/bin/scripts/vacuumdb.c
++++ b/src/bin/scripts/vacuumdb.c
+@@ -602,8 +602,9 @@ vacuum_one_database(const ConnParams *cparams,
+ 	for (i = 0; i < ntups; i++)
+ 	{
+ 		appendPQExpBufferStr(&buf,
+-							 fmtQualifiedId(PQgetvalue(res, i, 1),
+-											PQgetvalue(res, i, 0)));
++							 fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
++											   PQgetvalue(res, i, 0),
++											   PQclientEncoding(conn)));
+ 
+ 		if (tables_listed && !PQgetisnull(res, i, 2))
+ 			appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2));
+diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
+index d5757becef2..05f0bd2576d 100644
+--- a/src/fe_utils/string_utils.c
++++ b/src/fe_utils/string_utils.c
+@@ -18,6 +18,7 @@
+ #include <ctype.h>
+ 
+ #include "fe_utils/string_utils.h"
++#include "mb/pg_wchar.h"
+ 
+ #include "common/keywords.h"
+ 
+@@ -28,6 +29,8 @@ static PQExpBuffer defaultGetLocalPQExpBuffer(void);
+ int			quote_all_identifiers = 0;
+ PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
+ 
++static int	fmtIdEncoding = -1;
++
+ 
+ /*
+  * Returns a temporary PQExpBuffer, valid until the next call to the function.
+@@ -56,14 +59,48 @@ defaultGetLocalPQExpBuffer(void)
+ 	return id_return;
+ }
+ 
++/*
++ * Set the encoding that fmtId() and fmtQualifiedId() use.
++ *
++ * This is not safe against multiple connections having different encodings,
++ * but there is no real other way to address the need to know the encoding for
++ * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
++ * rid of fmtId().
++ */
++void
++setFmtEncoding(int encoding)
++{
++	fmtIdEncoding = encoding;
++}
++
++/*
++ * Return the currently configured encoding for fmtId() and fmtQualifiedId().
++ */
++static int
++getFmtEncoding(void)
++{
++	if (fmtIdEncoding != -1)
++		return fmtIdEncoding;
++
++	/*
++	 * In assertion builds it seems best to fail hard if the encoding was not
++	 * set, to make it easier to find places with missing calls. But in
++	 * production builds that seems like a bad idea, thus we instead just
++	 * default to UTF-8.
++	 */
++	Assert(fmtIdEncoding != -1);
++
++	return PG_UTF8;
++}
++
+ /*
+  *	Quotes input string if it's not a legitimate SQL identifier as-is.
+  *
+- *	Note that the returned string must be used before calling fmtId again,
++ *	Note that the returned string must be used before calling fmtIdEnc again,
+  *	since we re-use the same return buffer each time.
+  */
+ const char *
+-fmtId(const char *rawid)
++fmtIdEnc(const char *rawid, int encoding)
+ {
+ 	PQExpBuffer id_return = getLocalPQExpBuffer();
+ 
+@@ -136,7 +173,24 @@ fmtId(const char *rawid)
+ }
+ 
+ /*
+- * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
++ *	Quotes input string if it's not a legitimate SQL identifier as-is.
++ *
++ *	Note that the returned string must be used before calling fmtId again,
++ *	since we re-use the same return buffer each time.
++ *
++ *  NB: This assumes setFmtEncoding() previously has been called to configure
++ *  the encoding of rawid. It is preferable to use fmtIdEnc() with an
++ *  explicit encoding.
++ */
++const char *
++fmtId(const char *rawid)
++{
++	return fmtIdEnc(rawid, getFmtEncoding());
++}
++
++/*
++ * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
++ * needed.
+  *
+  * Like fmtId, use the result before calling again.
+  *
+@@ -144,7 +198,7 @@ fmtId(const char *rawid)
+  * use that buffer until we're finished with calling fmtId().
+  */
+ const char *
+-fmtQualifiedId(const char *schema, const char *id)
++fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
+ {
+ 	PQExpBuffer id_return;
+ 	PQExpBuffer lcl_pqexp = createPQExpBuffer();
+@@ -152,9 +206,9 @@ fmtQualifiedId(const char *schema, const char *id)
+ 	/* Some callers might fail to provide a schema name */
+ 	if (schema && *schema)
+ 	{
+-		appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
++		appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
+ 	}
+-	appendPQExpBufferStr(lcl_pqexp, fmtId(id));
++	appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));
+ 
+ 	id_return = getLocalPQExpBuffer();
+ 
+@@ -164,6 +218,24 @@ fmtQualifiedId(const char *schema, const char *id)
+ 	return id_return->data;
+ }
+ 
++/*
++ * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
++ *
++ * Like fmtId, use the result before calling again.
++ *
++ * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
++ * use that buffer until we're finished with calling fmtId().
++ *
++ * NB: This assumes setFmtEncoding() previously has been called to configure
++ * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
++ * with an explicit encoding.
++ */
++const char *
++fmtQualifiedId(const char *schema, const char *id)
++{
++	return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
++}
++
+ 
+ /*
+  * Format a Postgres version number (in the PG_VERSION_NUM integer format
+diff --git a/src/include/fe_utils/string_utils.h b/src/include/fe_utils/string_utils.h
+index 8c13cc0a66d..37f17f0b370 100644
+--- a/src/include/fe_utils/string_utils.h
++++ b/src/include/fe_utils/string_utils.h
+@@ -24,8 +24,11 @@ extern int	quote_all_identifiers;
+ extern PQExpBuffer (*getLocalPQExpBuffer) (void);
+ 
+ /* Functions */
+-extern const char *fmtId(const char *identifier);
++extern const char *fmtId(const char *rawid);
++extern const char *fmtIdEnc(const char *rawid, int encoding);
+ extern const char *fmtQualifiedId(const char *schema, const char *id);
++extern const char *fmtQualifiedIdEnc(const char *schema, const char *id, int encoding);
++extern void setFmtEncoding(int encoding);
+ 
+ extern char *formatPGVersionNumber(int version_number, bool include_minor,
+ 								   char *buf, size_t buflen);
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 7c56df18c1f6e48c4343f2d6d1364c5825e45278 Mon Sep 17 00:00:00 2001
+From: Andres Freund <andres@anarazel.de>
+Date: Mon, 10 Feb 2025 10:03:37 -0500
+Subject: [PATCH 3/8] Backport upstream commit:
+ 5dc1e42b4fa6a4434afa7d7cdcf0291351a7b873 Fix handling of invalidly encoded
+ data in escaping functions
+
+Previously invalidly encoded input to various escaping functions could lead to
+the escaped string getting incorrectly parsed by psql.  To be safe, escaping
+functions need to ensure that neither invalid nor incomplete multi-byte
+characters can be used to "escape" from being quoted.
+
+Functions which can report errors now return an error in more cases than
+before. Functions that cannot report errors now replace invalid input bytes
+with a byte sequence that cannot be used to escape the quotes and that is
+guaranteed to error out when a query is sent to the server.
+
+The following functions are fixed by this commit:
+- PQescapeLiteral()
+- PQescapeIdentifier()
+- PQescapeString()
+- PQescapeStringConn()
+- fmtId()
+- appendStringLiteral()
+---
+ src/fe_utils/string_utils.c    | 170 ++++++++++++++++++++++++++-------
+ src/interfaces/libpq/fe-exec.c | 114 ++++++++++++++--------
+ 2 files changed, 212 insertions(+), 72 deletions(-)
+
+diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
+index 05f0bd2576d..9f7151bd542 100644
+--- a/src/fe_utils/string_utils.c
++++ b/src/fe_utils/string_utils.c
+@@ -106,6 +106,7 @@ fmtIdEnc(const char *rawid, int encoding)
+ 
+ 	const char *cp;
+ 	bool		need_quotes = false;
++	size_t		remaining = strlen(rawid);
+ 
+ 	/*
+ 	 * These checks need to match the identifier production in scan.l. Don't
+@@ -119,7 +120,8 @@ fmtIdEnc(const char *rawid, int encoding)
+ 	else
+ 	{
+ 		/* otherwise check the entire string */
+-		for (cp = rawid; *cp; cp++)
++		cp = rawid;
++		for (size_t i = 0; i < remaining; i++, cp++)
+ 		{
+ 			if (!((*cp >= 'a' && *cp <= 'z')
+ 				  || (*cp >= '0' && *cp <= '9')
+@@ -155,17 +157,90 @@ fmtIdEnc(const char *rawid, int encoding)
+ 	else
+ 	{
+ 		appendPQExpBufferChar(id_return, '"');
+-		for (cp = rawid; *cp; cp++)
++
++		cp = &rawid[0];
++		while (remaining > 0)
+ 		{
+-			/*
+-			 * Did we find a double-quote in the string? Then make this a
+-			 * double double-quote per SQL99. Before, we put in a
+-			 * backslash/double-quote pair. - thomas 2000-08-05
+-			 */
+-			if (*cp == '"')
+-				appendPQExpBufferChar(id_return, '"');
+-			appendPQExpBufferChar(id_return, *cp);
++			int			charlen;
++
++			/* Fast path for plain ASCII */
++			if (!IS_HIGHBIT_SET(*cp))
++			{
++				/*
++				 * Did we find a double-quote in the string? Then make this a
++				 * double double-quote per SQL99. Before, we put in a
++				 * backslash/double-quote pair. - thomas 2000-08-05
++				 */
++				if (*cp == '"')
++					appendPQExpBufferChar(id_return, '"');
++				appendPQExpBufferChar(id_return, *cp);
++				remaining--;
++				cp++;
++				continue;
++			}
++
++			/* Slow path for possible multibyte characters */
++			charlen = pg_encoding_mblen(encoding, cp);
++
++			if (remaining < charlen)
++			{
++				/*
++				 * If the character is longer than the available input,
++				 * replace the string with an invalid sequence. The invalid
++				 * sequence ensures that the escaped string will trigger an
++				 * error on the server-side, even if we can't directly report
++				 * an error here.
++				 */
++				enlargePQExpBuffer(id_return, 2);
++				pg_encoding_set_invalid(encoding,
++										id_return->data + id_return->len);
++				id_return->len += 2;
++				id_return->data[id_return->len] = '\0';
++
++				/* there's no more input data, so we can stop */
++				break;
++			}
++			else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
++			{
++				/*
++				 * Multibyte character is invalid.  It's important to verify
++				 * that as invalid multi-byte characters could e.g. be used to
++				 * "skip" over quote characters, e.g. when parsing
++				 * character-by-character.
++				 *
++				 * Replace the bytes corresponding to the invalid character
++				 * with an invalid sequence, for the same reason as above.
++				 *
++				 * It would be a bit faster to verify the whole string the
++				 * first time we encounter a set highbit, but this way we can
++				 * replace just the invalid characters, which probably makes
++				 * it easier for users to find the invalidly encoded portion
++				 * of a larger string.
++				 */
++				enlargePQExpBuffer(id_return, 2);
++				pg_encoding_set_invalid(encoding,
++										id_return->data + id_return->len);
++				id_return->len += 2;
++				id_return->data[id_return->len] = '\0';
++
++				/*
++				 * Copy the rest of the string after the invalid multi-byte
++				 * character.
++				 */
++				remaining -= charlen;
++				cp += charlen;
++			}
++			else
++			{
++				for (int i = 0; i < charlen; i++)
++				{
++					appendPQExpBufferChar(id_return, *cp);
++					remaining--;
++					cp++;
++				}
++			}
+ 		}
++
+ 		appendPQExpBufferChar(id_return, '"');
+ 	}
+ 
+@@ -292,6 +367,7 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
+ 	size_t		length = strlen(str);
+ 	const char *source = str;
+ 	char	   *target;
++	size_t		remaining = length;
+ 
+ 	if (!enlargePQExpBuffer(buf, 2 * length + 2))
+ 		return;
+@@ -299,10 +375,10 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
+ 	target = buf->data + buf->len;
+ 	*target++ = '\'';
+ 
+-	while (*source != '\0')
++	while (remaining > 0)
+ 	{
+ 		char		c = *source;
+-		int			len;
++		int			charlen;
+ 		int			i;
+ 
+ 		/* Fast path for plain ASCII */
+@@ -314,39 +390,65 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
+ 			/* Copy the character */
+ 			*target++ = c;
+ 			source++;
++			remaining--;
+ 			continue;
+ 		}
+ 
+ 		/* Slow path for possible multibyte characters */
+-		len = PQmblen(source, encoding);
++		charlen = PQmblen(source, encoding);
+ 
+-		/* Copy the character */
+-		for (i = 0; i < len; i++)
++		if (remaining < charlen)
+ 		{
+-			if (*source == '\0')
+-				break;
+-			*target++ = *source++;
+-		}
++			/*
++			 * If the character is longer than the available input, replace
++			 * the string with an invalid sequence. The invalid sequence
++			 * ensures that the escaped string will trigger an error on the
++			 * server-side, even if we can't directly report an error here.
++			 *
++			 * We know there's enough space for the invalid sequence because
++			 * the "target" buffer is 2 * length + 2 long, and at worst we're
++			 * replacing a single input byte with two invalid bytes.
++			 */
++			pg_encoding_set_invalid(encoding, target);
++			target += 2;
+ 
+-		/*
+-		 * If we hit premature end of string (ie, incomplete multibyte
+-		 * character), try to pad out to the correct length with spaces. We
+-		 * may not be able to pad completely, but we will always be able to
+-		 * insert at least one pad space (since we'd not have quoted a
+-		 * multibyte character).  This should be enough to make a string that
+-		 * the server will error out on.
+-		 */
+-		if (i < len)
++			/* there's no more valid input data, so we can stop */
++			break;
++		}
++		else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
+ 		{
+-			char	   *stop = buf->data + buf->maxlen - 2;
++			/*
++			 * Multibyte character is invalid.  It's important to verify that
++			 * as invalid multi-byte characters could e.g. be used to "skip"
++			 * over quote characters, e.g. when parsing
++			 * character-by-character.
++			 *
++			 * Replace the bytes corresponding to the invalid character with
++			 * an invalid sequence, for the same reason as above.
++			 *
++			 * It would be a bit faster to verify the whole string the first
++			 * time we encounter a set highbit, but this way we can replace
++			 * just the invalid characters, which probably makes it easier for
++			 * users to find the invalidly encoded portion of a larger string.
++			 */
++			pg_encoding_set_invalid(encoding, target);
++			target += 2;
++			remaining -= charlen;
+ 
+-			for (; i < len; i++)
++			/*
++			 * Copy the rest of the string after the invalid multi-byte
++			 * character.
++			 */
++			source += charlen;
++		}
++		else
++		{
++			/* Copy the character */
++			for (i = 0; i < charlen; i++)
+ 			{
+-				if (target >= stop)
+-					break;
+-				*target++ = ' ';
++				*target++ = *source++;
++				remaining--;
+ 			}
+-			break;
+ 		}
+ 	}
+ 
+diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
+index ff101c4ca2a..97cd2c53673 100644
+--- a/src/interfaces/libpq/fe-exec.c
++++ b/src/interfaces/libpq/fe-exec.c
+@@ -3348,15 +3348,15 @@ PQescapeStringInternal(PGconn *conn,
+ {
+ 	const char *source = from;
+ 	char	   *target = to;
+-	size_t		remaining = length;
++	size_t		remaining = strnlen(from, length);
+ 
+ 	if (error)
+ 		*error = 0;
+ 
+-	while (remaining > 0 && *source != '\0')
++	while (remaining > 0)
+ 	{
+ 		char		c = *source;
+-		int			len;
++		int			charlen;
+ 		int			i;
+ 
+ 		/* Fast path for plain ASCII */
+@@ -3373,39 +3373,48 @@ PQescapeStringInternal(PGconn *conn,
+ 		}
+ 
+ 		/* Slow path for possible multibyte characters */
+-		len = pg_encoding_mblen(encoding, source);
++		charlen = pg_encoding_mblen(encoding, source);
+ 
+-		/* Copy the character */
+-		for (i = 0; i < len; i++)
+-		{
+-			if (remaining == 0 || *source == '\0')
+-				break;
+-			*target++ = *source++;
+-			remaining--;
+-		}
+-
+-		/*
+-		 * If we hit premature end of string (ie, incomplete multibyte
+-		 * character), try to pad out to the correct length with spaces. We
+-		 * may not be able to pad completely, but we will always be able to
+-		 * insert at least one pad space (since we'd not have quoted a
+-		 * multibyte character).  This should be enough to make a string that
+-		 * the server will error out on.
+-		 */
+-		if (i < len)
++		if (remaining < charlen ||
++              pg_encoding_verifymbchar(encoding, source, charlen) == -1)
+ 		{
++			/*
++			 * If the character is longer than the available input, report an
++			 * error if possible, and replace the string with an invalid
++			 * sequence. The invalid sequence ensures that the escaped string
++			 * will trigger an error on the server-side, even if we can't
++			 * directly report an error here.
++			 *
++			 * This isn't *that* crucial when we can report an error to the
++			 * caller, but if we can't, the caller will use this string
++			 * unmodified and it needs to be safe for parsing.
++			 *
++			 * We know there's enough space for the invalid sequence because
++			 * the "to" buffer needs to be at least 2 * length + 1 long, and
++			 * at worst we're replacing a single input byte with two invalid
++			 * bytes.
++			 */
+ 			if (error)
+ 				*error = 1;
+ 			if (conn)
+-				printfPQExpBuffer(&conn->errorMessage,
+-								  libpq_gettext("incomplete multibyte character\n"));
+-			for (; i < len; i++)
++				libpq_append_conn_error(conn, "incomplete multibyte character");
++
++			pg_encoding_set_invalid(encoding, target);
++			target += 2;
++            source++;
++            remaining--;
++
++			/* there's no more input data, so we can stop */
++			break;
++		}
++		else
++		{
++			/* Copy the character */
++			for (i = 0; i < charlen; i++)
+ 			{
+-				if (((size_t) (target - to)) / 2 >= length)
+-					break;
+-				*target++ = ' ';
++				*target++ = *source++;
++				remaining--;
+ 			}
+-			break;
+ 		}
+ 	}
+ 
+@@ -3451,21 +3460,27 @@ PQescapeString(char *to, const char *from, size_t length)
+ static char *
+ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
+ {
+-	const char *s;
++    const char *s;
+ 	char	   *result;
+ 	char	   *rp;
+ 	int			num_quotes = 0; /* single or double, depending on as_ident */
+ 	int			num_backslashes = 0;
+-	int			input_len;
+-	int			result_size;
++	size_t		input_len = strnlen(str, len);
++	size_t		result_size;
+ 	char		quote_char = as_ident ? '"' : '\'';
++	bool		validated_mb = false;
+ 
+ 	/* We must have a connection, else fail immediately. */
+ 	if (!conn)
+ 		return NULL;
+ 
+-	/* Scan the string for characters that must be escaped. */
+-	for (s = str; (s - str) < len && *s != '\0'; ++s)
++
++	/*
++	 * Scan the string for characters that must be escaped and for invalidly
++	 * encoded data.
++	 */
++	s = str;
++	for (size_t remaining = input_len; remaining > 0; remaining--, s++)
+ 	{
+ 		if (*s == quote_char)
+ 			++num_quotes;
+@@ -3478,21 +3493,42 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
+ 			/* Slow path for possible multibyte characters */
+ 			charlen = pg_encoding_mblen(conn->client_encoding, s);
+ 
+-			/* Multibyte character overruns allowable length. */
+-			if ((s - str) + charlen > len || memchr(s, 0, charlen) != NULL)
++			if (charlen > remaining)
+ 			{
+ 				printfPQExpBuffer(&conn->errorMessage,
+ 								  libpq_gettext("incomplete multibyte character\n"));
+ 				return NULL;
+ 			}
+ 
++			/*
++			 * If we haven't already, check that multibyte characters are
++			 * valid. It's important to verify that as invalid multi-byte
++			 * characters could e.g. be used to "skip" over quote characters,
++			 * e.g. when parsing character-by-character.
++			 *
++			 * We check validity once, for the whole remainder of the string,
++			 * when we first encounter any multi-byte character. Some
++			 * encodings have optimized implementations for longer strings.
++			 */
++			if (!validated_mb)
++			{
++				if (pg_encoding_verifymbstr(conn->client_encoding, s, remaining)
++					!= remaining)
++				{
++					printfPQExpBuffer(&conn->errorMessage,
++									  libpq_gettext("invalid multibyte character\n"));
++					return NULL;
++				}
++				validated_mb = true;
++			}
++
+ 			/* Adjust s, bearing in mind that for loop will increment it. */
+ 			s += charlen - 1;
++			remaining -= charlen - 1;
+ 		}
+ 	}
+ 
+ 	/* Allocate output buffer. */
+-	input_len = s - str;
+ 	result_size = input_len + num_quotes + 3;	/* two quotes, plus a NUL */
+ 	if (!as_ident && num_backslashes > 0)
+ 		result_size += num_backslashes + 2;
+@@ -3538,7 +3574,8 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
+ 	}
+ 	else
+ 	{
+-		for (s = str; s - str < input_len; ++s)
++		s = str;
++		for (size_t remaining = input_len; remaining > 0; remaining--, s++)
+ 		{
+ 			if (*s == quote_char || (!as_ident && *s == '\\'))
+ 			{
+@@ -3556,6 +3593,7 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
+ 					*rp++ = *s;
+ 					if (--i == 0)
+ 						break;
++					remaining--;
+ 					++s;		/* for loop will provide the final increment */
+ 				}
+ 			}
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 3751ccde18122412fcbfcc2df583cf66fefdbab0 Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl@sss.pgh.pa.us>
+Date: Mon, 10 Feb 2025 16:30:03 -0500
+Subject: [PATCH 4/8] Backport upstream commit
+ 5bf12323b6b8b05790aab6876555568898f4fc81 Adapt appendPsqlMetaConnect() to the
+ new fmtId() encoding expectations.
+
+We need to tell fmtId() what encoding to assume, but this function
+doesn't know that.  Fortunately we can fix that without changing the
+function's API, because we can just use SQL_ASCII.  That's because
+database names in connection requests are effectively binary not text:
+no encoding-aware processing will happen on them.
+
+This fixes XversionUpgrade failures seen in the buildfarm.  The
+alternative of having pg_upgrade use setFmtEncoding() is unappetizing,
+given that it's connecting to multiple databases that may have
+different encodings.
+
+Andres Freund, Noah Misch, Tom Lane
+
+Security: CVE-2025-1094
+---
+ src/fe_utils/string_utils.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
+index 9f7151bd542..a289d3001eb 100644
+--- a/src/fe_utils/string_utils.c
++++ b/src/fe_utils/string_utils.c
+@@ -792,16 +792,22 @@ appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
+ 		}
+ 	}
+ 
+-	appendPQExpBufferStr(buf, "\\connect ");
+ 	if (complex)
+ 	{
+ 		PQExpBufferData connstr;
+ 
+ 		initPQExpBuffer(&connstr);
+-		appendPQExpBuffer(&connstr, "dbname=");
+-		appendConnStrVal(&connstr, dbname);
+ 
+-		appendPQExpBuffer(buf, "-reuse-previous=on ");
++		/*
++		 * Force the target psql's encoding to SQL_ASCII.  We don't really
++		 * know the encoding of the database name, and it doesn't matter as
++		 * long as psql will forward it to the server unchanged.
++		 */
++		appendPQExpBufferStr(buf, "\\encoding SQL_ASCII\n");
++		appendPQExpBufferStr(buf, "\\connect -reuse-previous=on ");
++
++		appendPQExpBufferStr(&connstr, "dbname=");
++		appendConnStrVal(&connstr, dbname);
+ 
+ 		/*
+ 		 * As long as the name does not contain a newline, SQL identifier
+@@ -809,12 +815,15 @@ appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
+ 		 * involve psql-interpreted single quotes, which behaved differently
+ 		 * before PostgreSQL 9.2.
+ 		 */
+-		appendPQExpBufferStr(buf, fmtId(connstr.data));
++		appendPQExpBufferStr(buf, fmtIdEnc(connstr.data, PG_SQL_ASCII));
+ 
+ 		termPQExpBuffer(&connstr);
+ 	}
+ 	else
+-		appendPQExpBufferStr(buf, fmtId(dbname));
++	{
++		appendPQExpBufferStr(buf, "\\connect ");
++		appendPQExpBufferStr(buf, fmtIdEnc(dbname, PG_SQL_ASCII));
++	}
+ 	appendPQExpBufferChar(buf, '\n');
+ }
+ 
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 84b7b93568fa4523afb66d2d1776f5e24b5db1de Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl@sss.pgh.pa.us>
+Date: Sat, 15 Feb 2025 16:20:21 -0500
+Subject: [PATCH 5/8] Backport upstream commit:
+ 9f45e6a91d8460ac0b1f30e6ae3eefb185b8d0ab Make escaping functions retain
+ trailing bytes of an invalid character.
+
+Instead of dropping the trailing byte(s) of an invalid or incomplete
+multibyte character, replace only the first byte with a known-invalid
+sequence, and process the rest normally.  This seems less likely to
+confuse incautious callers than the behavior adopted in 5dc1e42b4.
+
+While we're at it, adjust PQescapeStringInternal to produce at most
+one bleat about invalid multibyte characters per string.  This
+matches the behavior of PQescapeInternal, and avoids the risk of
+producing tons of repetitive junk if a long string is simply given
+in the wrong encoding.
+
+This is a followup to the fixes for CVE-2025-1094, and should be
+included if cherry-picking those fixes.
+
+Author: Andres Freund <andres@anarazel.de>
+Co-authored-by: Tom Lane <tgl@sss.pgh.pa.us>
+Reported-by: Jeff Davis <pgsql@j-davis.com>
+Discussion: https://postgr.es/m/20250215012712.45@rfd.leadboat.com
+---
+ src/fe_utils/string_utils.c    | 91 +++++++++++++---------------------
+ src/interfaces/libpq/fe-exec.c | 22 ++++----
+ 2 files changed, 47 insertions(+), 66 deletions(-)
+
+diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
+index a289d3001eb..a2d5ccd1e28 100644
+--- a/src/fe_utils/string_utils.c
++++ b/src/fe_utils/string_utils.c
+@@ -182,40 +182,25 @@ fmtIdEnc(const char *rawid, int encoding)
+ 			/* Slow path for possible multibyte characters */
+ 			charlen = pg_encoding_mblen(encoding, cp);
+ 
+-			if (remaining < charlen)
+-			{
+-				/*
+-				 * If the character is longer than the available input,
+-				 * replace the string with an invalid sequence. The invalid
+-				 * sequence ensures that the escaped string will trigger an
+-				 * error on the server-side, even if we can't directly report
+-				 * an error here.
+-				 */
+-				enlargePQExpBuffer(id_return, 2);
+-				pg_encoding_set_invalid(encoding,
+-										id_return->data + id_return->len);
+-				id_return->len += 2;
+-				id_return->data[id_return->len] = '\0';
+-
+-				/* there's no more input data, so we can stop */
+-				break;
+-			}
+-			else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
++			if (remaining < charlen ||
++				pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
+ 			{
+ 				/*
+ 				 * Multibyte character is invalid.  It's important to verify
+-				 * that as invalid multi-byte characters could e.g. be used to
++				 * that as invalid multibyte characters could e.g. be used to
+ 				 * "skip" over quote characters, e.g. when parsing
+ 				 * character-by-character.
+ 				 *
+-				 * Replace the bytes corresponding to the invalid character
+-				 * with an invalid sequence, for the same reason as above.
++				 * Replace the character's first byte with an invalid
++				 * sequence. The invalid sequence ensures that the escaped
++				 * string will trigger an error on the server-side, even if we
++				 * can't directly report an error here.
+ 				 *
+ 				 * It would be a bit faster to verify the whole string the
+ 				 * first time we encounter a set highbit, but this way we can
+-				 * replace just the invalid characters, which probably makes
+-				 * it easier for users to find the invalidly encoded portion
+-				 * of a larger string.
++				 * replace just the invalid data, which probably makes it
++				 * easier for users to find the invalidly encoded portion of a
++				 * larger string.
+ 				 */
+ 				enlargePQExpBuffer(id_return, 2);
+ 				pg_encoding_set_invalid(encoding,
+@@ -224,11 +209,13 @@ fmtIdEnc(const char *rawid, int encoding)
+ 				id_return->data[id_return->len] = '\0';
+ 
+ 				/*
+-				 * Copy the rest of the string after the invalid multi-byte
+-				 * character.
++				 * Handle the following bytes as if this byte didn't exist.
++				 * That's safer in case the subsequent bytes contain
++				 * characters that are significant for the caller (e.g. '>' in
++				 * html).
+ 				 */
+-				remaining -= charlen;
+-				cp += charlen;
++				remaining--;
++				cp++;
+ 			}
+ 			else
+ 			{
+@@ -397,49 +384,39 @@ appendStringLiteral(PQExpBuffer buf, const char *str,
+ 		/* Slow path for possible multibyte characters */
+ 		charlen = PQmblen(source, encoding);
+ 
+-		if (remaining < charlen)
+-		{
+-			/*
+-			 * If the character is longer than the available input, replace
+-			 * the string with an invalid sequence. The invalid sequence
+-			 * ensures that the escaped string will trigger an error on the
+-			 * server-side, even if we can't directly report an error here.
+-			 *
+-			 * We know there's enough space for the invalid sequence because
+-			 * the "target" buffer is 2 * length + 2 long, and at worst we're
+-			 * replacing a single input byte with two invalid bytes.
+-			 */
+-			pg_encoding_set_invalid(encoding, target);
+-			target += 2;
+-
+-			/* there's no more valid input data, so we can stop */
+-			break;
+-		}
+-		else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
++		if (remaining < charlen ||
++			pg_encoding_verifymbchar(encoding, source, charlen) == -1)
+ 		{
+ 			/*
+ 			 * Multibyte character is invalid.  It's important to verify that
+-			 * as invalid multi-byte characters could e.g. be used to "skip"
++			 * as invalid multibyte characters could e.g. be used to "skip"
+ 			 * over quote characters, e.g. when parsing
+ 			 * character-by-character.
+ 			 *
+-			 * Replace the bytes corresponding to the invalid character with
+-			 * an invalid sequence, for the same reason as above.
++			 * Replace the character's first byte with an invalid sequence.
++			 * The invalid sequence ensures that the escaped string will
++			 * trigger an error on the server-side, even if we can't directly
++			 * report an error here.
++			 *
++			 * We know there's enough space for the invalid sequence because
++			 * the "target" buffer is 2 * length + 2 long, and at worst we're
++			 * replacing a single input byte with two invalid bytes.
+ 			 *
+ 			 * It would be a bit faster to verify the whole string the first
+ 			 * time we encounter a set highbit, but this way we can replace
+-			 * just the invalid characters, which probably makes it easier for
+-			 * users to find the invalidly encoded portion of a larger string.
++			 * just the invalid data, which probably makes it easier for users
++			 * to find the invalidly encoded portion of a larger string.
+ 			 */
+ 			pg_encoding_set_invalid(encoding, target);
+ 			target += 2;
+-			remaining -= charlen;
+ 
+ 			/*
+-			 * Copy the rest of the string after the invalid multi-byte
+-			 * character.
++			 * Handle the following bytes as if this byte didn't exist. That's
++			 * safer in case the subsequent bytes contain important characters
++			 * for the caller (e.g. '>' in html).
+ 			 */
+-			source += charlen;
++			source++;
++			remaining--;
+ 		}
+ 		else
+ 		{
+diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
+index 97cd2c53673..a29d19a6268 100644
+--- a/src/interfaces/libpq/fe-exec.c
++++ b/src/interfaces/libpq/fe-exec.c
+@@ -3349,6 +3349,7 @@ PQescapeStringInternal(PGconn *conn,
+ 	const char *source = from;
+ 	char	   *target = to;
+ 	size_t		remaining = strnlen(from, length);
++	bool		already_complained = false;
+ 
+ 	if (error)
+ 		*error = 0;
+@@ -3379,15 +3380,20 @@ PQescapeStringInternal(PGconn *conn,
+               pg_encoding_verifymbchar(encoding, source, charlen) == -1)
+ 		{
+ 			/*
+-			 * If the character is longer than the available input, report an
+-			 * error if possible, and replace the string with an invalid
+-			 * sequence. The invalid sequence ensures that the escaped string
+-			 * will trigger an error on the server-side, even if we can't
+-			 * directly report an error here.
++			 * Multibyte character is invalid.  It's important to verify that
++			 * as invalid multibyte characters could e.g. be used to "skip"
++			 * over quote characters, e.g. when parsing
++			 * character-by-character.
++			 *
++			 * Report an error if possible, and replace the character's first
++			 * byte with an invalid sequence. The invalid sequence ensures
++			 * that the escaped string will trigger an error on the
++			 * server-side, even if we can't directly report an error here.
+ 			 *
+ 			 * This isn't *that* crucial when we can report an error to the
+-			 * caller, but if we can't, the caller will use this string
+-			 * unmodified and it needs to be safe for parsing.
++			 * caller; but if we can't or the caller ignores it, the caller
++			 * will use this string unmodified and it needs to be safe for
++			 * parsing.
+ 			 *
+ 			 * We know there's enough space for the invalid sequence because
+ 			 * the "to" buffer needs to be at least 2 * length + 1 long, and
+@@ -3404,8 +3410,6 @@ PQescapeStringInternal(PGconn *conn,
+             source++;
+             remaining--;
+ 
+-			/* there's no more input data, so we can stop */
+-			break;
+ 		}
+ 		else
+ 		{
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 21118244dad366d20e1d11549df03dd56e76dbaa Mon Sep 17 00:00:00 2001
+From: Tom Lane <tgl@sss.pgh.pa.us>
+Date: Sun, 16 Feb 2025 12:46:35 -0500
+Subject: [PATCH 6/8] In fmtIdEnc(), handle failure of enlargePQExpBuffer().
+
+Coverity complained that we weren't doing that, and it's right.
+
+This fix just makes fmtIdEnc() honor the general convention that OOM
+causes a PQExpBuffer to become marked "broken", without any immediate
+error.  In the pretty-unlikely case that we actually did hit OOM here,
+the end result would be to return an empty string to the caller,
+probably resulting in invalid SQL syntax in an issued command (if
+nothing else went wrong, which is even more unlikely).  It's tempting
+to throw an "out of memory" error if the buffer becomes broken, but
+there's not a lot of point in doing that only here and not in hundreds
+of other PQExpBuffer-using places in pg_dump and similar callers.
+The whole issue could do with some non-time-crunched redesign, perhaps.
+
+This is a followup to the fixes for CVE-2025-1094, and should be
+included if cherry-picking those fixes.
+---
+ src/fe_utils/string_utils.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
+index a2d5ccd1e28..fe280df3c0f 100644
+--- a/src/fe_utils/string_utils.c
++++ b/src/fe_utils/string_utils.c
+@@ -202,11 +202,13 @@ fmtIdEnc(const char *rawid, int encoding)
+ 				 * easier for users to find the invalidly encoded portion of a
+ 				 * larger string.
+ 				 */
+-				enlargePQExpBuffer(id_return, 2);
+-				pg_encoding_set_invalid(encoding,
+-										id_return->data + id_return->len);
+-				id_return->len += 2;
+-				id_return->data[id_return->len] = '\0';
++				if (enlargePQExpBuffer(id_return, 2))
++				{
++					pg_encoding_set_invalid(encoding,
++											id_return->data + id_return->len);
++					id_return->len += 2;
++					id_return->data[id_return->len] = '\0';
++				}
+ 
+ 				/*
+ 				 * Handle the following bytes as if this byte didn't exist.
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 6f42371a3c3911299c081afe3478022c496b07a9 Mon Sep 17 00:00:00 2001
+From: Filip Janus <fjanus@redhat.com>
+Date: Mon, 17 Mar 2025 18:14:05 +0100
+Subject: [PATCH 7/8]  Backport multiple changes from postgresql13, especially 
+ wchar.c functionality from backend was moved to common directory, it means 
+ that functionaity can be used by server but also by libpq. Due to the
+ necessary changes there are couple of "reverts" from previous commits in
+ src/backend/utils/mb/wchar.c but it's expected because now it's linked with
+ implementation from common/wchar.c instead src/backend/utils/mb/wchar.c
+
+---
+ src/backend/utils/mb/wchar.c                 |  101 +-
+ src/bin/scripts/dropdb.c                     |    5 +-
+ src/common/Makefile                          |    2 +-
+ src/common/wchar.c                           | 1728 ++++++++++++++++++
+ src/include/common/unicode_combining_table.h |  196 ++
+ src/include/mb/pg_wchar.h                    |    4 +
+ src/interfaces/libpq/fe-exec.c               |    7 +-
+ 7 files changed, 1958 insertions(+), 85 deletions(-)
+ create mode 100644 src/common/wchar.c
+ create mode 100644 src/include/common/unicode_combining_table.h
+
+diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
+index 872241cc804..1ca6094d2a3 100644
+--- a/src/backend/utils/mb/wchar.c
++++ b/src/backend/utils/mb/wchar.c
+@@ -14,25 +14,6 @@
+ #include "mb/pg_wchar.h"
+ 
+ 
+-/*
+- * In today's multibyte encodings other than UTF8, this two-byte sequence
+- * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
+- *
+- * For historical reasons, several verifychar implementations opt to reject
+- * this pair specifically.  Byte pair range constraints, in encoding
+- * originator documentation, always excluded this pair.  No core conversion
+- * could translate it.  However, longstanding verifychar implementations
+- * accepted any non-NUL byte.  big5_to_euc_tw and big5_to_mic even translate
+- * pairs not valid per encoding originator documentation.  To avoid tightening
+- * core or non-core conversions in a security patch, we sought this one pair.
+- *
+- * PQescapeString() historically used spaces for BYTE1; many other values
+- * could suffice for BYTE1.
+- */
+-#define NONUTF8_INVALID_BYTE0 (0x8d)
+-#define NONUTF8_INVALID_BYTE1 (' ')
+-
+-
+ /*
+  * Operations on multi-byte encodings are driven by a table of helper
+  * functions.
+@@ -496,7 +477,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+ /*
+  * Map a Unicode code point to UTF-8.  utf8string must have 4 bytes of
+  * space allocated.
+- */
++ *
+ unsigned char *
+ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+ {
+@@ -525,7 +506,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+ 
+ 	return utf8string;
+ }
+-
++*/
+ /*
+  * Trivial conversion from pg_wchar to UTF-8.
+  * caller should allocate enough space for "to"
+@@ -562,7 +543,7 @@ pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
+  *
+  * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
+  * other places would need to be fixed to change this.
+- */
++ *
+ int
+ pg_utf_mblen(const unsigned char *s)
+ {
+@@ -586,7 +567,7 @@ pg_utf_mblen(const unsigned char *s)
+ 		len = 1;
+ 	return len;
+ }
+-
++*/
+ /*
+  * This is an implementation of wcwidth() and wcswidth() as defined in
+  * "The Single UNIX Specification, Version 2, The Open Group, 1997"
+@@ -765,7 +746,7 @@ ucs_wcwidth(pg_wchar ucs)
+  * This is a one-character version of pg_utf2wchar_with_len.
+  *
+  * No error checks here, c must point to a long-enough string.
+- */
++ *
+ pg_wchar
+ utf8_to_unicode(const unsigned char *c)
+ {
+@@ -784,10 +765,10 @@ utf8_to_unicode(const unsigned char *c)
+ 						   ((c[2] & 0x3f) << 6) |
+ 						   (c[3] & 0x3f));
+ 	else
+-		/* that is an invalid code on purpose */
++	//	 that is an invalid code on purpose
+ 		return 0xffffffff;
+ }
+-
++*/
+ static int
+ pg_utf_dsplen(const unsigned char *s)
+ {
+@@ -917,7 +898,7 @@ pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
+ 	*to = 0;
+ 	return cnt;
+ }
+-
++/*
+ int
+ pg_mule_mblen(const unsigned char *s)
+ {
+@@ -932,9 +913,9 @@ pg_mule_mblen(const unsigned char *s)
+ 	else if (IS_LCPRV2(*s))
+ 		len = 4;
+ 	else
+-		len = 1;				/* assume ASCII */
++		len = 1;			
+ 	return len;
+-}
++}*/
+ 
+ static int
+ pg_mule_dsplen(const unsigned char *s)
+@@ -1413,11 +1394,6 @@ pg_big5_verifier(const unsigned char *s, int len)
+ 	if (len < l)
+ 		return -1;
+ 
+-	if (l == 2 &&
+-		s[0] == NONUTF8_INVALID_BYTE0 &&
+-		s[1] == NONUTF8_INVALID_BYTE1)
+-		return -1;
+-
+ 	while (--l > 0)
+ 	{
+ 		if (*++s == '\0')
+@@ -1438,11 +1414,6 @@ pg_gbk_verifier(const unsigned char *s, int len)
+ 	if (len < l)
+ 		return -1;
+ 
+-	if (l == 2 &&
+-		s[0] == NONUTF8_INVALID_BYTE0 &&
+-		s[1] == NONUTF8_INVALID_BYTE1)
+-		return -1;
+-
+ 	while (--l > 0)
+ 	{
+ 		if (*++s == '\0')
+@@ -1463,11 +1434,6 @@ pg_uhc_verifier(const unsigned char *s, int len)
+ 	if (len < l)
+ 		return -1;
+ 
+-	if (l == 2 &&
+-		s[0] == NONUTF8_INVALID_BYTE0 &&
+-		s[1] == NONUTF8_INVALID_BYTE1)
+-		return -1;
+-
+ 	while (--l > 0)
+ 	{
+ 		if (*++s == '\0')
+@@ -1535,7 +1501,7 @@ pg_utf8_verifier(const unsigned char *s, int len)
+  *
+  * length is assumed to have been obtained by pg_utf_mblen(), and the
+  * caller must have checked that that many bytes are present in the buffer.
+- */
++ *
+ bool
+ pg_utf8_islegal(const unsigned char *source, int length)
+ {
+@@ -1544,18 +1510,15 @@ pg_utf8_islegal(const unsigned char *source, int length)
+ 	switch (length)
+ 	{
+ 		default:
+-			/* reject lengths 5 and 6 for now */
+ 			return false;
+ 		case 4:
+ 			a = source[3];
+ 			if (a < 0x80 || a > 0xBF)
+ 				return false;
+-			/* FALL THRU */
+ 		case 3:
+ 			a = source[2];
+ 			if (a < 0x80 || a > 0xBF)
+ 				return false;
+-			/* FALL THRU */
+ 		case 2:
+ 			a = source[1];
+ 			switch (*source)
+@@ -1581,7 +1544,6 @@ pg_utf8_islegal(const unsigned char *source, int length)
+ 						return false;
+ 					break;
+ 			}
+-			/* FALL THRU */
+ 		case 1:
+ 			a = *source;
+ 			if (a >= 0x80 && a < 0xC2)
+@@ -1592,7 +1554,7 @@ pg_utf8_islegal(const unsigned char *source, int length)
+ 	}
+ 	return true;
+ }
+-
++*/
+ #ifndef FRONTEND
+ 
+ /*
+@@ -1802,26 +1764,13 @@ pg_eucjp_increment(unsigned char *charptr, int length)
+ #endif							/* !FRONTEND */
+ 
+ 
+-/*
+- * Fills the provided buffer with two bytes such that:
+- *   pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
+- */
+-void
+-pg_encoding_set_invalid(int encoding, char *dst)
+-{
+-	Assert(pg_encoding_max_length(encoding) > 1);
+-
+-	dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
+-	dst[1] = NONUTF8_INVALID_BYTE1;
+-}
+-
+ /*
+  *-------------------------------------------------------------------
+  * encoding info table
+  * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
+  *-------------------------------------------------------------------
+  */
+-const pg_wchar_tbl pg_wchar_table[] = {
++const pg_wchar_tbl pg_wchar_table1[] = {
+ 	{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
+ 	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},	/* PG_EUC_JP */
+ 	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2},	/* PG_EUC_CN */
+@@ -1875,7 +1824,7 @@ pg_mic_mblen(const unsigned char *mbstr)
+ 
+ /*
+  * Returns the byte length of a multibyte character.
+- */
++ *
+ int
+ pg_encoding_mblen(int encoding, const char *mbstr)
+ {
+@@ -1883,10 +1832,10 @@ pg_encoding_mblen(int encoding, const char *mbstr)
+ 			pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
+ 			pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
+ }
+-
++*/
+ /*
+  * Returns the display length of a multibyte character.
+- */
++ *
+ int
+ pg_encoding_dsplen(int encoding, const char *mbstr)
+ {
+@@ -1894,12 +1843,12 @@ pg_encoding_dsplen(int encoding, const char *mbstr)
+ 			pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
+ 			pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
+ }
+-
++*/
+ /*
+  * Verify the first multibyte character of the given string.
+  * Return its byte length if good, -1 if bad.  (See comments above for
+  * full details of the mbverify API.)
+- */
++ *
+ int
+ pg_encoding_verifymb(int encoding, const char *mbstr, int len)
+ {
+@@ -1907,24 +1856,18 @@ pg_encoding_verifymb(int encoding, const char *mbstr, int len)
+ 			pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
+ 			pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
+ }
+-
++*/
+ /*
+  * fetch maximum length of a given encoding
+- */
++ *
+ int
+ pg_encoding_max_length(int encoding)
+ {
+ 	Assert(PG_VALID_ENCODING(encoding));
+ 
+-	/*
+-	 * Check for the encoding despite the assert, due to some mingw versions
+-	 * otherwise issuing bogus warnings.
+-	 */
+-	return PG_VALID_ENCODING(encoding) ?
+-		pg_wchar_table[encoding].maxmblen :
+-		pg_wchar_table[PG_SQL_ASCII].maxmblen;
++	return pg_wchar_table[encoding].maxmblen;
+ }
+-
++*/
+ #ifndef FRONTEND
+ 
+ /*
+diff --git a/src/bin/scripts/dropdb.c b/src/bin/scripts/dropdb.c
+index 0d636d0ef46..ed3a2c8c19a 100644
+--- a/src/bin/scripts/dropdb.c
++++ b/src/bin/scripts/dropdb.c
+@@ -141,10 +141,9 @@ main(int argc, char *argv[])
+ 	conn = connectMaintenanceDatabase(&cparams, progname, echo);
+ 
+ 	initPQExpBuffer(&sql);
+-	appendPQExpBuffer(&sql, "DROP DATABASE %s%s%s;",
++	appendPQExpBuffer(&sql, "DROP DATABASE %s%s;",
+ 					  (if_exists ? "IF EXISTS " : ""),
+-					  fmtIdEnc(dbname, PQclientEncoding(conn)),
+-					  force ? " WITH (FORCE)" : "");
++					  fmtIdEnc(dbname, PQclientEncoding(conn)));
+ 
+ 	if (echo)
+ 		printf("%s\n", sql.data);
+diff --git a/src/common/Makefile b/src/common/Makefile
+index 2f22b9b101d..c26d938b31e 100644
+--- a/src/common/Makefile
++++ b/src/common/Makefile
+@@ -50,7 +50,7 @@ OBJS_COMMON = base64.o config_info.o controldata_utils.o d2s.o exec.o f2s.o \
+ 	file_perm.o ip.o keywords.o kwlookup.o link-canary.o md5.o \
+ 	pg_lzcompress.o pgfnames.o psprintf.o relpath.o \
+ 	rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \
+-	username.o wait_error.o
++	username.o wait_error.o wchar.o
+ 
+ ifeq ($(with_openssl),yes)
+ OBJS_COMMON += sha2_openssl.o
+diff --git a/src/common/wchar.c b/src/common/wchar.c
+new file mode 100644
+index 00000000000..85822b2c3b5
+--- /dev/null
++++ b/src/common/wchar.c
+@@ -0,0 +1,1728 @@
++/*-------------------------------------------------------------------------
++ *
++ * wchar.c
++ *	  Functions for working with multibyte characters in various encodings.
++ *
++ * Portions Copyright (c) 1998-2020, PostgreSQL Global Development Group
++ *
++ * IDENTIFICATION
++ *	  src/common/wchar.c
++ *
++ *-------------------------------------------------------------------------
++ */
++#include "c.h"
++
++#include "mb/pg_wchar.h"
++
++
++/*
++ * In today's multibyte encodings other than UTF8, this two-byte sequence
++ * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
++ *
++ * For historical reasons, several verifychar implementations opt to reject
++ * this pair specifically.  Byte pair range constraints, in encoding
++ * originator documentation, always excluded this pair.  No core conversion
++ * could translate it.  However, longstanding verifychar implementations
++ * accepted any non-NUL byte.  big5_to_euc_tw and big5_to_mic even translate
++ * pairs not valid per encoding originator documentation.  To avoid tightening
++ * core or non-core conversions in a security patch, we sought this one pair.
++ *
++ * PQescapeString() historically used spaces for BYTE1; many other values
++ * could suffice for BYTE1.
++ */
++#define NONUTF8_INVALID_BYTE0 (0x8d)
++#define NONUTF8_INVALID_BYTE1 (' ')
++
++
++/*
++ * Operations on multi-byte encodings are driven by a table of helper
++ * functions.
++ *
++ * To add an encoding support, define mblen(), dsplen() and verifier() for
++ * the encoding.  For server-encodings, also define mb2wchar() and wchar2mb()
++ * conversion functions.
++ *
++ * These functions generally assume that their input is validly formed.
++ * The "verifier" functions, further down in the file, have to be more
++ * paranoid.
++ *
++ * We expect that mblen() does not need to examine more than the first byte
++ * of the character to discover the correct length.  GB18030 is an exception
++ * to that rule, though, as it also looks at second byte.  But even that
++ * behaves in a predictable way, if you only pass the first byte: it will
++ * treat 4-byte encoded characters as two 2-byte encoded characters, which is
++ * good enough for all current uses.
++ *
++ * Note: for the display output of psql to work properly, the return values
++ * of the dsplen functions must conform to the Unicode standard. In particular
++ * the NUL character is zero width and control characters are generally
++ * width -1. It is recommended that non-ASCII encodings refer their ASCII
++ * subset to the ASCII routines to ensure consistency.
++ */
++
++/*
++ * SQL/ASCII
++ */
++static int
++pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		*to++ = *from++;
++		len--;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++static int
++pg_ascii_mblen(const unsigned char *s)
++{
++	return 1;
++}
++
++static int
++pg_ascii_dsplen(const unsigned char *s)
++{
++	if (*s == '\0')
++		return 0;
++	if (*s < 0x20 || *s == 0x7f)
++		return -1;
++
++	return 1;
++}
++
++/*
++ * EUC
++ */
++static int
++pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		if (*from == SS2 && len >= 2)	/* JIS X 0201 (so called "1 byte
++										 * KANA") */
++		{
++			from++;
++			*to = (SS2 << 8) | *from++;
++			len -= 2;
++		}
++		else if (*from == SS3 && len >= 3)	/* JIS X 0212 KANJI */
++		{
++			from++;
++			*to = (SS3 << 16) | (*from++ << 8);
++			*to |= *from++;
++			len -= 3;
++		}
++		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
++		{
++			*to = *from++ << 8;
++			*to |= *from++;
++			len -= 2;
++		}
++		else					/* must be ASCII */
++		{
++			*to = *from++;
++			len--;
++		}
++		to++;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++static inline int
++pg_euc_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s == SS2)
++		len = 2;
++	else if (*s == SS3)
++		len = 3;
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = 1;
++	return len;
++}
++
++static inline int
++pg_euc_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s == SS2)
++		len = 2;
++	else if (*s == SS3)
++		len = 2;
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = pg_ascii_dsplen(s);
++	return len;
++}
++
++/*
++ * EUC_JP
++ */
++static int
++pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	return pg_euc2wchar_with_len(from, to, len);
++}
++
++static int
++pg_eucjp_mblen(const unsigned char *s)
++{
++	return pg_euc_mblen(s);
++}
++
++static int
++pg_eucjp_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s == SS2)
++		len = 1;
++	else if (*s == SS3)
++		len = 2;
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = pg_ascii_dsplen(s);
++	return len;
++}
++
++/*
++ * EUC_KR
++ */
++static int
++pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	return pg_euc2wchar_with_len(from, to, len);
++}
++
++static int
++pg_euckr_mblen(const unsigned char *s)
++{
++	return pg_euc_mblen(s);
++}
++
++static int
++pg_euckr_dsplen(const unsigned char *s)
++{
++	return pg_euc_dsplen(s);
++}
++
++/*
++ * EUC_CN
++ *
++ */
++static int
++pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		if (*from == SS2 && len >= 3)	/* code set 2 (unused?) */
++		{
++			from++;
++			*to = (SS2 << 16) | (*from++ << 8);
++			*to |= *from++;
++			len -= 3;
++		}
++		else if (*from == SS3 && len >= 3)	/* code set 3 (unused ?) */
++		{
++			from++;
++			*to = (SS3 << 16) | (*from++ << 8);
++			*to |= *from++;
++			len -= 3;
++		}
++		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
++		{
++			*to = *from++ << 8;
++			*to |= *from++;
++			len -= 2;
++		}
++		else
++		{
++			*to = *from++;
++			len--;
++		}
++		to++;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++static int
++pg_euccn_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = 1;
++	return len;
++}
++
++static int
++pg_euccn_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = pg_ascii_dsplen(s);
++	return len;
++}
++
++/*
++ * EUC_TW
++ *
++ */
++static int
++pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		if (*from == SS2 && len >= 4)	/* code set 2 */
++		{
++			from++;
++			*to = (((uint32) SS2) << 24) | (*from++ << 16);
++			*to |= *from++ << 8;
++			*to |= *from++;
++			len -= 4;
++		}
++		else if (*from == SS3 && len >= 3)	/* code set 3 (unused?) */
++		{
++			from++;
++			*to = (SS3 << 16) | (*from++ << 8);
++			*to |= *from++;
++			len -= 3;
++		}
++		else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
++		{
++			*to = *from++ << 8;
++			*to |= *from++;
++			len -= 2;
++		}
++		else
++		{
++			*to = *from++;
++			len--;
++		}
++		to++;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++static int
++pg_euctw_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s == SS2)
++		len = 4;
++	else if (*s == SS3)
++		len = 3;
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = 1;
++	return len;
++}
++
++static int
++pg_euctw_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s == SS2)
++		len = 2;
++	else if (*s == SS3)
++		len = 2;
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = pg_ascii_dsplen(s);
++	return len;
++}
++
++/*
++ * Convert pg_wchar to EUC_* encoding.
++ * caller must allocate enough space for "to", including a trailing zero!
++ * len: length of from.
++ * "from" not necessarily null terminated.
++ */
++static int
++pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		unsigned char c;
++
++		if ((c = (*from >> 24)))
++		{
++			*to++ = c;
++			*to++ = (*from >> 16) & 0xff;
++			*to++ = (*from >> 8) & 0xff;
++			*to++ = *from & 0xff;
++			cnt += 4;
++		}
++		else if ((c = (*from >> 16)))
++		{
++			*to++ = c;
++			*to++ = (*from >> 8) & 0xff;
++			*to++ = *from & 0xff;
++			cnt += 3;
++		}
++		else if ((c = (*from >> 8)))
++		{
++			*to++ = c;
++			*to++ = *from & 0xff;
++			cnt += 2;
++		}
++		else
++		{
++			*to++ = *from;
++			cnt++;
++		}
++		from++;
++		len--;
++	}
++	*to = 0;
++	return cnt;
++}
++
++
++/*
++ * JOHAB
++ */
++static int
++pg_johab_mblen(const unsigned char *s)
++{
++	return pg_euc_mblen(s);
++}
++
++static int
++pg_johab_dsplen(const unsigned char *s)
++{
++	return pg_euc_dsplen(s);
++}
++
++/*
++ * convert UTF8 string to pg_wchar (UCS-4)
++ * caller must allocate enough space for "to", including a trailing zero!
++ * len: length of from.
++ * "from" not necessarily null terminated.
++ */
++static int
++pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++	uint32		c1,
++				c2,
++				c3,
++				c4;
++
++	while (len > 0 && *from)
++	{
++		if ((*from & 0x80) == 0)
++		{
++			*to = *from++;
++			len--;
++		}
++		else if ((*from & 0xe0) == 0xc0)
++		{
++			if (len < 2)
++				break;			/* drop trailing incomplete char */
++			c1 = *from++ & 0x1f;
++			c2 = *from++ & 0x3f;
++			*to = (c1 << 6) | c2;
++			len -= 2;
++		}
++		else if ((*from & 0xf0) == 0xe0)
++		{
++			if (len < 3)
++				break;			/* drop trailing incomplete char */
++			c1 = *from++ & 0x0f;
++			c2 = *from++ & 0x3f;
++			c3 = *from++ & 0x3f;
++			*to = (c1 << 12) | (c2 << 6) | c3;
++			len -= 3;
++		}
++		else if ((*from & 0xf8) == 0xf0)
++		{
++			if (len < 4)
++				break;			/* drop trailing incomplete char */
++			c1 = *from++ & 0x07;
++			c2 = *from++ & 0x3f;
++			c3 = *from++ & 0x3f;
++			c4 = *from++ & 0x3f;
++			*to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
++			len -= 4;
++		}
++		else
++		{
++			/* treat a bogus char as length 1; not ours to raise error */
++			*to = *from++;
++			len--;
++		}
++		to++;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++
++/*
++ * Map a Unicode code point to UTF-8.  utf8string must have 4 bytes of
++ * space allocated.
++ */
++unsigned char *
++unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
++{
++	if (c <= 0x7F)
++	{
++		utf8string[0] = c;
++	}
++	else if (c <= 0x7FF)
++	{
++		utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
++		utf8string[1] = 0x80 | (c & 0x3F);
++	}
++	else if (c <= 0xFFFF)
++	{
++		utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
++		utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
++		utf8string[2] = 0x80 | (c & 0x3F);
++	}
++	else
++	{
++		utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
++		utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
++		utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
++		utf8string[3] = 0x80 | (c & 0x3F);
++	}
++
++	return utf8string;
++}
++
++/*
++ * Trivial conversion from pg_wchar to UTF-8.
++ * caller should allocate enough space for "to"
++ * len: length of from.
++ * "from" not necessarily null terminated.
++ */
++static int
++pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		int			char_len;
++
++		unicode_to_utf8(*from, to);
++		char_len = pg_utf_mblen(to);
++		cnt += char_len;
++		to += char_len;
++		from++;
++		len--;
++	}
++	*to = 0;
++	return cnt;
++}
++
++/*
++ * Return the byte length of a UTF8 character pointed to by s
++ *
++ * Note: in the current implementation we do not support UTF8 sequences
++ * of more than 4 bytes; hence do NOT return a value larger than 4.
++ * We return "1" for any leading byte that is either flat-out illegal or
++ * indicates a length larger than we support.
++ *
++ * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
++ * other places would need to be fixed to change this.
++ */
++int
++pg_utf_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if ((*s & 0x80) == 0)
++		len = 1;
++	else if ((*s & 0xe0) == 0xc0)
++		len = 2;
++	else if ((*s & 0xf0) == 0xe0)
++		len = 3;
++	else if ((*s & 0xf8) == 0xf0)
++		len = 4;
++#ifdef NOT_USED
++	else if ((*s & 0xfc) == 0xf8)
++		len = 5;
++	else if ((*s & 0xfe) == 0xfc)
++		len = 6;
++#endif
++	else
++		len = 1;
++	return len;
++}
++
++/*
++ * This is an implementation of wcwidth() and wcswidth() as defined in
++ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
++ * <http://www.unix.org/online.html>
++ *
++ * Markus Kuhn -- 2001-09-08 -- public domain
++ *
++ * customised for PostgreSQL
++ *
++ * original available at : http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
++ */
++
++struct mbinterval
++{
++	unsigned short first;
++	unsigned short last;
++};
++
++/* auxiliary function for binary search in interval table */
++static int
++mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
++{
++	int			min = 0;
++	int			mid;
++
++	if (ucs < table[0].first || ucs > table[max].last)
++		return 0;
++	while (max >= min)
++	{
++		mid = (min + max) / 2;
++		if (ucs > table[mid].last)
++			min = mid + 1;
++		else if (ucs < table[mid].first)
++			max = mid - 1;
++		else
++			return 1;
++	}
++
++	return 0;
++}
++
++
++/* The following functions define the column width of an ISO 10646
++ * character as follows:
++ *
++ *	  - The null character (U+0000) has a column width of 0.
++ *
++ *	  - Other C0/C1 control characters and DEL will lead to a return
++ *		value of -1.
++ *
++ *	  - Non-spacing and enclosing combining characters (general
++ *		category code Mn or Me in the Unicode database) have a
++ *		column width of 0.
++ *
++ *	  - Other format characters (general category code Cf in the Unicode
++ *		database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
++ *
++ *	  - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
++ *		have a column width of 0.
++ *
++ *	  - Spacing characters in the East Asian Wide (W) or East Asian
++ *		FullWidth (F) category as defined in Unicode Technical
++ *		Report #11 have a column width of 2.
++ *
++ *	  - All remaining characters (including all printable
++ *		ISO 8859-1 and WGL4 characters, Unicode control characters,
++ *		etc.) have a column width of 1.
++ *
++ * This implementation assumes that wchar_t characters are encoded
++ * in ISO 10646.
++ */
++
++static int
++ucs_wcwidth(pg_wchar ucs)
++{
++#include "common/unicode_combining_table.h"
++
++	/* test for 8-bit control characters */
++	if (ucs == 0)
++		return 0;
++
++	if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
++		return -1;
++
++	/* binary search in table of non-spacing characters */
++	if (mbbisearch(ucs, combining,
++				   sizeof(combining) / sizeof(struct mbinterval) - 1))
++		return 0;
++
++	/*
++	 * if we arrive here, ucs is not a combining or C0/C1 control character
++	 */
++
++	return 1 +
++		(ucs >= 0x1100 &&
++		 (ucs <= 0x115f ||		/* Hangul Jamo init. consonants */
++		  (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
++		   ucs != 0x303f) ||	/* CJK ... Yi */
++		  (ucs >= 0xac00 && ucs <= 0xd7a3) ||	/* Hangul Syllables */
++		  (ucs >= 0xf900 && ucs <= 0xfaff) ||	/* CJK Compatibility
++												 * Ideographs */
++		  (ucs >= 0xfe30 && ucs <= 0xfe6f) ||	/* CJK Compatibility Forms */
++		  (ucs >= 0xff00 && ucs <= 0xff5f) ||	/* Fullwidth Forms */
++		  (ucs >= 0xffe0 && ucs <= 0xffe6) ||
++		  (ucs >= 0x20000 && ucs <= 0x2ffff)));
++}
++
++/*
++ * Convert a UTF-8 character to a Unicode code point.
++ * This is a one-character version of pg_utf2wchar_with_len.
++ *
++ * No error checks here, c must point to a long-enough string.
++ */
++pg_wchar
++utf8_to_unicode(const unsigned char *c)
++{
++	if ((*c & 0x80) == 0)
++		return (pg_wchar) c[0];
++	else if ((*c & 0xe0) == 0xc0)
++		return (pg_wchar) (((c[0] & 0x1f) << 6) |
++						   (c[1] & 0x3f));
++	else if ((*c & 0xf0) == 0xe0)
++		return (pg_wchar) (((c[0] & 0x0f) << 12) |
++						   ((c[1] & 0x3f) << 6) |
++						   (c[2] & 0x3f));
++	else if ((*c & 0xf8) == 0xf0)
++		return (pg_wchar) (((c[0] & 0x07) << 18) |
++						   ((c[1] & 0x3f) << 12) |
++						   ((c[2] & 0x3f) << 6) |
++						   (c[3] & 0x3f));
++	else
++		/* that is an invalid code on purpose */
++		return 0xffffffff;
++}
++
++static int
++pg_utf_dsplen(const unsigned char *s)
++{
++	return ucs_wcwidth(utf8_to_unicode(s));
++}
++
++/*
++ * convert mule internal code to pg_wchar
++ * caller should allocate enough space for "to"
++ * len: length of from.
++ * "from" not necessarily null terminated.
++ */
++static int
++pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		if (IS_LC1(*from) && len >= 2)
++		{
++			*to = *from++ << 16;
++			*to |= *from++;
++			len -= 2;
++		}
++		else if (IS_LCPRV1(*from) && len >= 3)
++		{
++			from++;
++			*to = *from++ << 16;
++			*to |= *from++;
++			len -= 3;
++		}
++		else if (IS_LC2(*from) && len >= 3)
++		{
++			*to = *from++ << 16;
++			*to |= *from++ << 8;
++			*to |= *from++;
++			len -= 3;
++		}
++		else if (IS_LCPRV2(*from) && len >= 4)
++		{
++			from++;
++			*to = *from++ << 16;
++			*to |= *from++ << 8;
++			*to |= *from++;
++			len -= 4;
++		}
++		else
++		{						/* assume ASCII */
++			*to = (unsigned char) *from++;
++			len--;
++		}
++		to++;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++/*
++ * convert pg_wchar to mule internal code
++ * caller should allocate enough space for "to"
++ * len: length of from.
++ * "from" not necessarily null terminated.
++ */
++static int
++pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		unsigned char lb;
++
++		lb = (*from >> 16) & 0xff;
++		if (IS_LC1(lb))
++		{
++			*to++ = lb;
++			*to++ = *from & 0xff;
++			cnt += 2;
++		}
++		else if (IS_LC2(lb))
++		{
++			*to++ = lb;
++			*to++ = (*from >> 8) & 0xff;
++			*to++ = *from & 0xff;
++			cnt += 3;
++		}
++		else if (IS_LCPRV1_A_RANGE(lb))
++		{
++			*to++ = LCPRV1_A;
++			*to++ = lb;
++			*to++ = *from & 0xff;
++			cnt += 3;
++		}
++		else if (IS_LCPRV1_B_RANGE(lb))
++		{
++			*to++ = LCPRV1_B;
++			*to++ = lb;
++			*to++ = *from & 0xff;
++			cnt += 3;
++		}
++		else if (IS_LCPRV2_A_RANGE(lb))
++		{
++			*to++ = LCPRV2_A;
++			*to++ = lb;
++			*to++ = (*from >> 8) & 0xff;
++			*to++ = *from & 0xff;
++			cnt += 4;
++		}
++		else if (IS_LCPRV2_B_RANGE(lb))
++		{
++			*to++ = LCPRV2_B;
++			*to++ = lb;
++			*to++ = (*from >> 8) & 0xff;
++			*to++ = *from & 0xff;
++			cnt += 4;
++		}
++		else
++		{
++			*to++ = *from & 0xff;
++			cnt += 1;
++		}
++		from++;
++		len--;
++	}
++	*to = 0;
++	return cnt;
++}
++
++/* exported for direct use by conv.c */
++int
++pg_mule_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_LC1(*s))
++		len = 2;
++	else if (IS_LCPRV1(*s))
++		len = 3;
++	else if (IS_LC2(*s))
++		len = 3;
++	else if (IS_LCPRV2(*s))
++		len = 4;
++	else
++		len = 1;				/* assume ASCII */
++	return len;
++}
++
++static int
++pg_mule_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	/*
++	 * Note: it's not really appropriate to assume that all multibyte charsets
++	 * are double-wide on screen.  But this seems an okay approximation for
++	 * the MULE charsets we currently support.
++	 */
++
++	if (IS_LC1(*s))
++		len = 1;
++	else if (IS_LCPRV1(*s))
++		len = 1;
++	else if (IS_LC2(*s))
++		len = 2;
++	else if (IS_LCPRV2(*s))
++		len = 2;
++	else
++		len = 1;				/* assume ASCII */
++
++	return len;
++}
++
++/*
++ * ISO8859-1
++ */
++static int
++pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		*to++ = *from++;
++		len--;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++/*
++ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
++ * high bits.
++ * caller should allocate enough space for "to"
++ * len: length of from.
++ * "from" not necessarily null terminated.
++ */
++static int
++pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
++{
++	int			cnt = 0;
++
++	while (len > 0 && *from)
++	{
++		*to++ = *from++;
++		len--;
++		cnt++;
++	}
++	*to = 0;
++	return cnt;
++}
++
++static int
++pg_latin1_mblen(const unsigned char *s)
++{
++	return 1;
++}
++
++static int
++pg_latin1_dsplen(const unsigned char *s)
++{
++	return pg_ascii_dsplen(s);
++}
++
++/*
++ * SJIS
++ */
++static int
++pg_sjis_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s >= 0xa1 && *s <= 0xdf)
++		len = 1;				/* 1 byte kana? */
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* kanji? */
++	else
++		len = 1;				/* should be ASCII */
++	return len;
++}
++
++static int
++pg_sjis_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (*s >= 0xa1 && *s <= 0xdf)
++		len = 1;				/* 1 byte kana? */
++	else if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* kanji? */
++	else
++		len = pg_ascii_dsplen(s);	/* should be ASCII */
++	return len;
++}
++
++/*
++ * Big5
++ */
++static int
++pg_big5_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* kanji? */
++	else
++		len = 1;				/* should be ASCII */
++	return len;
++}
++
++static int
++pg_big5_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* kanji? */
++	else
++		len = pg_ascii_dsplen(s);	/* should be ASCII */
++	return len;
++}
++
++/*
++ * GBK
++ */
++static int
++pg_gbk_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* kanji? */
++	else
++		len = 1;				/* should be ASCII */
++	return len;
++}
++
++static int
++pg_gbk_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* kanji? */
++	else
++		len = pg_ascii_dsplen(s);	/* should be ASCII */
++	return len;
++}
++
++/*
++ * UHC
++ */
++static int
++pg_uhc_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* 2byte? */
++	else
++		len = 1;				/* should be ASCII */
++	return len;
++}
++
++static int
++pg_uhc_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;				/* 2byte? */
++	else
++		len = pg_ascii_dsplen(s);	/* should be ASCII */
++	return len;
++}
++
++/*
++ * GB18030
++ *	Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
++ */
++
++/*
++ * Unlike all other mblen() functions, this also looks at the second byte of
++ * the input.  However, if you only pass the first byte of a multi-byte
++ * string, and \0 as the second byte, this still works in a predictable way:
++ * a 4-byte character will be reported as two 2-byte characters.  That's
++ * enough for all current uses, as a client-only encoding.  It works that
++ * way, because in any valid 4-byte GB18030-encoded character, the third and
++ * fourth byte look like a 2-byte encoded character, when looked at
++ * separately.
++ */
++static int
++pg_gb18030_mblen(const unsigned char *s)
++{
++	int			len;
++
++	if (!IS_HIGHBIT_SET(*s))
++		len = 1;				/* ASCII */
++	else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
++		len = 4;
++	else
++		len = 2;
++	return len;
++}
++
++static int
++pg_gb18030_dsplen(const unsigned char *s)
++{
++	int			len;
++
++	if (IS_HIGHBIT_SET(*s))
++		len = 2;
++	else
++		len = pg_ascii_dsplen(s);	/* ASCII */
++	return len;
++}
++
++/*
++ *-------------------------------------------------------------------
++ * multibyte sequence validators
++ *
++ * These functions accept "s", a pointer to the first byte of a string,
++ * and "len", the remaining length of the string.  If there is a validly
++ * encoded character beginning at *s, return its length in bytes; else
++ * return -1.
++ *
++ * The functions can assume that len > 0 and that *s != '\0', but they must
++ * test for and reject zeroes in any additional bytes of a multibyte character.
++ *
++ * Note that this definition allows the function for a single-byte
++ * encoding to be just "return 1".
++ *-------------------------------------------------------------------
++ */
++
++static int
++pg_ascii_verifier(const unsigned char *s, int len)
++{
++	return 1;
++}
++
++#define IS_EUC_RANGE_VALID(c)	((c) >= 0xa1 && (c) <= 0xfe)
++
++static int
++pg_eucjp_verifier(const unsigned char *s, int len)
++{
++	int			l;
++	unsigned char c1,
++				c2;
++
++	c1 = *s++;
++
++	switch (c1)
++	{
++		case SS2:				/* JIS X 0201 */
++			l = 2;
++			if (l > len)
++				return -1;
++			c2 = *s++;
++			if (c2 < 0xa1 || c2 > 0xdf)
++				return -1;
++			break;
++
++		case SS3:				/* JIS X 0212 */
++			l = 3;
++			if (l > len)
++				return -1;
++			c2 = *s++;
++			if (!IS_EUC_RANGE_VALID(c2))
++				return -1;
++			c2 = *s++;
++			if (!IS_EUC_RANGE_VALID(c2))
++				return -1;
++			break;
++
++		default:
++			if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
++			{
++				l = 2;
++				if (l > len)
++					return -1;
++				if (!IS_EUC_RANGE_VALID(c1))
++					return -1;
++				c2 = *s++;
++				if (!IS_EUC_RANGE_VALID(c2))
++					return -1;
++			}
++			else
++				/* must be ASCII */
++			{
++				l = 1;
++			}
++			break;
++	}
++
++	return l;
++}
++
++static int
++pg_euckr_verifier(const unsigned char *s, int len)
++{
++	int			l;
++	unsigned char c1,
++				c2;
++
++	c1 = *s++;
++
++	if (IS_HIGHBIT_SET(c1))
++	{
++		l = 2;
++		if (l > len)
++			return -1;
++		if (!IS_EUC_RANGE_VALID(c1))
++			return -1;
++		c2 = *s++;
++		if (!IS_EUC_RANGE_VALID(c2))
++			return -1;
++	}
++	else
++		/* must be ASCII */
++	{
++		l = 1;
++	}
++
++	return l;
++}
++
++/* EUC-CN byte sequences are exactly same as EUC-KR */
++#define pg_euccn_verifier	pg_euckr_verifier
++
++static int
++pg_euctw_verifier(const unsigned char *s, int len)
++{
++	int			l;
++	unsigned char c1,
++				c2;
++
++	c1 = *s++;
++
++	switch (c1)
++	{
++		case SS2:				/* CNS 11643 Plane 1-7 */
++			l = 4;
++			if (l > len)
++				return -1;
++			c2 = *s++;
++			if (c2 < 0xa1 || c2 > 0xa7)
++				return -1;
++			c2 = *s++;
++			if (!IS_EUC_RANGE_VALID(c2))
++				return -1;
++			c2 = *s++;
++			if (!IS_EUC_RANGE_VALID(c2))
++				return -1;
++			break;
++
++		case SS3:				/* unused */
++			return -1;
++
++		default:
++			if (IS_HIGHBIT_SET(c1)) /* CNS 11643 Plane 1 */
++			{
++				l = 2;
++				if (l > len)
++					return -1;
++				/* no further range check on c1? */
++				c2 = *s++;
++				if (!IS_EUC_RANGE_VALID(c2))
++					return -1;
++			}
++			else
++				/* must be ASCII */
++			{
++				l = 1;
++			}
++			break;
++	}
++	return l;
++}
++
++static int
++pg_johab_verifier(const unsigned char *s, int len)
++{
++	int			l,
++				mbl;
++	unsigned char c;
++
++	l = mbl = pg_johab_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	if (!IS_HIGHBIT_SET(*s))
++		return mbl;
++
++	while (--l > 0)
++	{
++		c = *++s;
++		if (!IS_EUC_RANGE_VALID(c))
++			return -1;
++	}
++	return mbl;
++}
++
++static int
++pg_mule_verifier(const unsigned char *s, int len)
++{
++	int			l,
++				mbl;
++	unsigned char c;
++
++	l = mbl = pg_mule_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	while (--l > 0)
++	{
++		c = *++s;
++		if (!IS_HIGHBIT_SET(c))
++			return -1;
++	}
++	return mbl;
++}
++
++static int
++pg_latin1_verifier(const unsigned char *s, int len)
++{
++	return 1;
++}
++
++static int
++pg_sjis_verifier(const unsigned char *s, int len)
++{
++	int			l,
++				mbl;
++	unsigned char c1,
++				c2;
++
++	l = mbl = pg_sjis_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	if (l == 1)					/* pg_sjis_mblen already verified it */
++		return mbl;
++
++	c1 = *s++;
++	c2 = *s;
++	if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
++		return -1;
++	return mbl;
++}
++
++static int
++pg_big5_verifier(const unsigned char *s, int len)
++{
++	int			l,
++				mbl;
++
++	l = mbl = pg_big5_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	if (l == 2 &&
++		s[0] == NONUTF8_INVALID_BYTE0 &&
++		s[1] == NONUTF8_INVALID_BYTE1)
++		return -1;
++
++	while (--l > 0)
++	{
++		if (*++s == '\0')
++			return -1;
++	}
++
++	return mbl;
++}
++
++static int
++pg_gbk_verifier(const unsigned char *s, int len)
++{
++	int			l,
++				mbl;
++
++	l = mbl = pg_gbk_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	if (l == 2 &&
++		s[0] == NONUTF8_INVALID_BYTE0 &&
++		s[1] == NONUTF8_INVALID_BYTE1)
++		return -1;
++
++	while (--l > 0)
++	{
++		if (*++s == '\0')
++			return -1;
++	}
++
++	return mbl;
++}
++
++static int
++pg_uhc_verifier(const unsigned char *s, int len)
++{
++	int			l,
++				mbl;
++
++	l = mbl = pg_uhc_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	if (l == 2 &&
++		s[0] == NONUTF8_INVALID_BYTE0 &&
++		s[1] == NONUTF8_INVALID_BYTE1)
++		return -1;
++
++	while (--l > 0)
++	{
++		if (*++s == '\0')
++			return -1;
++	}
++
++	return mbl;
++}
++
++static int
++pg_gb18030_verifier(const unsigned char *s, int len)
++{
++	int			l;
++
++	if (!IS_HIGHBIT_SET(*s))
++		l = 1;					/* ASCII */
++	else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
++	{
++		/* Should be 4-byte, validate remaining bytes */
++		if (*s >= 0x81 && *s <= 0xfe &&
++			*(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
++			*(s + 3) >= 0x30 && *(s + 3) <= 0x39)
++			l = 4;
++		else
++			l = -1;
++	}
++	else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
++	{
++		/* Should be 2-byte, validate */
++		if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
++			(*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
++			l = 2;
++		else
++			l = -1;
++	}
++	else
++		l = -1;
++	return l;
++}
++
++static int
++pg_utf8_verifier(const unsigned char *s, int len)
++{
++	int			l = pg_utf_mblen(s);
++
++	if (len < l)
++		return -1;
++
++	if (!pg_utf8_islegal(s, l))
++		return -1;
++
++	return l;
++}
++
++/*
++ * Check for validity of a single UTF-8 encoded character
++ *
++ * This directly implements the rules in RFC3629.  The bizarre-looking
++ * restrictions on the second byte are meant to ensure that there isn't
++ * more than one encoding of a given Unicode character point; that is,
++ * you may not use a longer-than-necessary byte sequence with high order
++ * zero bits to represent a character that would fit in fewer bytes.
++ * To do otherwise is to create security hazards (eg, create an apparent
++ * non-ASCII character that decodes to plain ASCII).
++ *
++ * length is assumed to have been obtained by pg_utf_mblen(), and the
++ * caller must have checked that that many bytes are present in the buffer.
++ */
++bool
++pg_utf8_islegal(const unsigned char *source, int length)
++{
++	unsigned char a;
++
++	switch (length)
++	{
++		default:
++			/* reject lengths 5 and 6 for now */
++			return false;
++		case 4:
++			a = source[3];
++			if (a < 0x80 || a > 0xBF)
++				return false;
++			/* FALL THRU */
++		case 3:
++			a = source[2];
++			if (a < 0x80 || a > 0xBF)
++				return false;
++			/* FALL THRU */
++		case 2:
++			a = source[1];
++			switch (*source)
++			{
++				case 0xE0:
++					if (a < 0xA0 || a > 0xBF)
++						return false;
++					break;
++				case 0xED:
++					if (a < 0x80 || a > 0x9F)
++						return false;
++					break;
++				case 0xF0:
++					if (a < 0x90 || a > 0xBF)
++						return false;
++					break;
++				case 0xF4:
++					if (a < 0x80 || a > 0x8F)
++						return false;
++					break;
++				default:
++					if (a < 0x80 || a > 0xBF)
++						return false;
++					break;
++			}
++			/* FALL THRU */
++		case 1:
++			a = *source;
++			if (a >= 0x80 && a < 0xC2)
++				return false;
++			if (a > 0xF4)
++				return false;
++			break;
++	}
++	return true;
++}
++
++
++/*
++ * Fills the provided buffer with two bytes such that:
++ *   pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
++ */
++void
++pg_encoding_set_invalid(int encoding, char *dst)
++{
++	Assert(pg_encoding_max_length(encoding) > 1);
++
++	dst[0] = (encoding == PG_UTF8 ? 0xc0 : NONUTF8_INVALID_BYTE0);
++	dst[1] = NONUTF8_INVALID_BYTE1;
++}
++
++/*
++ *-------------------------------------------------------------------
++ * encoding info table
++ * XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
++ *-------------------------------------------------------------------
++ */
++const pg_wchar_tbl pg_wchar_table[] = {
++	{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
++	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},	/* PG_EUC_JP */
++	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2},	/* PG_EUC_CN */
++	{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3},	/* PG_EUC_KR */
++	{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4},	/* PG_EUC_TW */
++	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},	/* PG_EUC_JIS_2004 */
++	{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4},	/* PG_UTF8 */
++	{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4},	/* PG_MULE_INTERNAL */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
++	{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
++	{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
++	{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
++	{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2},	/* PG_GBK */
++	{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2},	/* PG_UHC */
++	{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4},	/* PG_GB18030 */
++	{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3},	/* PG_JOHAB */
++	{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}	/* PG_SHIFT_JIS_2004 */
++};
++
++/*
++ * Returns the byte length of a multibyte character.
++ *
++ * Caution: when dealing with text that is not certainly valid in the
++ * specified encoding, the result may exceed the actual remaining
++ * string length.  Callers that are not prepared to deal with that
++ * should use pg_encoding_mblen_bounded() instead.
++ */
++int
++pg_encoding_mblen(int encoding, const char *mbstr)
++{
++	return (PG_VALID_ENCODING(encoding) ?
++			pg_wchar_table[encoding].mblen((const unsigned char *) mbstr) :
++			pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
++}
++
++/*
++ * Returns the byte length of a multibyte character; but not more than
++ * the distance to end of string.
++ */
++int
++pg_encoding_mblen_bounded(int encoding, const char *mbstr)
++{
++	return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
++}
++
++/*
++ * Returns the display length of a multibyte character.
++ */
++int
++pg_encoding_dsplen(int encoding, const char *mbstr)
++{
++	return (PG_VALID_ENCODING(encoding) ?
++			pg_wchar_table[encoding].dsplen((const unsigned char *) mbstr) :
++			pg_wchar_table[PG_SQL_ASCII].dsplen((const unsigned char *) mbstr));
++}
++
++/*
++ * Verify the first multibyte character of the given string.
++ * Return its byte length if good, -1 if bad.  (See comments above for
++ * full details of the mbverify API.)
++ */
++int
++pg_encoding_verifymb(int encoding, const char *mbstr, int len)
++{
++	return (PG_VALID_ENCODING(encoding) ?
++			pg_wchar_table[encoding].mbverify((const unsigned char *) mbstr, len) :
++			pg_wchar_table[PG_SQL_ASCII].mbverify((const unsigned char *) mbstr, len));
++}
++
++/* v14+ function name, for easier backpatching */
++int
++pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
++{
++	int ok_bytes = pg_encoding_verifymb(encoding, mbstr, len);
++
++	if (ok_bytes == 0)
++		return -1;
++	return ok_bytes;
++}
++
++/* replace v14+ function, adapted from pg_verify_mbstr_len */
++int
++pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
++{
++	mbverifier	mbverify;
++	int			ok_bytes;
++
++	Assert(PG_VALID_ENCODING(encoding));
++
++	/*
++	 * In single-byte encodings, we need only reject nulls (\0).
++	 */
++	if (pg_encoding_max_length(encoding) <= 1)
++	{
++		const char *nullpos = memchr(mbstr, 0, len);
++
++		if (nullpos == NULL)
++			return len;
++		return nullpos - mbstr;
++	}
++
++	/* fetch function pointer just once */
++	mbverify = pg_wchar_table[encoding].mbverify;
++
++	ok_bytes = 0;
++
++	while (len > 0)
++	{
++		int			l;
++
++		/* fast path for ASCII-subset characters */
++		if (!IS_HIGHBIT_SET(*mbstr))
++		{
++			if (*mbstr != '\0')
++			{
++				ok_bytes++;
++				mbstr++;
++				len--;
++				continue;
++			}
++			return ok_bytes;
++		}
++
++		l = (*mbverify) ((const unsigned char *) mbstr, len);
++
++		if (l < 0)
++			return ok_bytes;
++
++		mbstr += l;
++		len -= l;
++		ok_bytes += l;
++	}
++	return ok_bytes;
++}
++
++/*
++ * fetch maximum length of a given encoding
++ */
++int
++pg_encoding_max_length(int encoding)
++{
++	Assert(PG_VALID_ENCODING(encoding));
++
++	/*
++	 * Check for the encoding despite the assert, due to some mingw versions
++	 * otherwise issuing bogus warnings.
++	 */
++	return PG_VALID_ENCODING(encoding) ?
++		pg_wchar_table[encoding].maxmblen :
++		pg_wchar_table[PG_SQL_ASCII].maxmblen;
++}
+diff --git a/src/include/common/unicode_combining_table.h b/src/include/common/unicode_combining_table.h
+new file mode 100644
+index 00000000000..a9f10c31bc8
+--- /dev/null
++++ b/src/include/common/unicode_combining_table.h
+@@ -0,0 +1,196 @@
++/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */
++
++static const struct mbinterval combining[] = {
++	{0x0300, 0x036F},
++	{0x0483, 0x0489},
++	{0x0591, 0x05BD},
++	{0x05BF, 0x05BF},
++	{0x05C1, 0x05C2},
++	{0x05C4, 0x05C5},
++	{0x05C7, 0x05C7},
++	{0x0610, 0x061A},
++	{0x064B, 0x065F},
++	{0x0670, 0x0670},
++	{0x06D6, 0x06DC},
++	{0x06DF, 0x06E4},
++	{0x06E7, 0x06E8},
++	{0x06EA, 0x06ED},
++	{0x0711, 0x0711},
++	{0x0730, 0x074A},
++	{0x07A6, 0x07B0},
++	{0x07EB, 0x07F3},
++	{0x07FD, 0x07FD},
++	{0x0816, 0x0819},
++	{0x081B, 0x0823},
++	{0x0825, 0x0827},
++	{0x0829, 0x082D},
++	{0x0859, 0x085B},
++	{0x08D3, 0x08E1},
++	{0x08E3, 0x0902},
++	{0x093A, 0x093A},
++	{0x093C, 0x093C},
++	{0x0941, 0x0948},
++	{0x094D, 0x094D},
++	{0x0951, 0x0957},
++	{0x0962, 0x0963},
++	{0x0981, 0x0981},
++	{0x09BC, 0x09BC},
++	{0x09C1, 0x09C4},
++	{0x09CD, 0x09CD},
++	{0x09E2, 0x09E3},
++	{0x09FE, 0x0A02},
++	{0x0A3C, 0x0A3C},
++	{0x0A41, 0x0A51},
++	{0x0A70, 0x0A71},
++	{0x0A75, 0x0A75},
++	{0x0A81, 0x0A82},
++	{0x0ABC, 0x0ABC},
++	{0x0AC1, 0x0AC8},
++	{0x0ACD, 0x0ACD},
++	{0x0AE2, 0x0AE3},
++	{0x0AFA, 0x0B01},
++	{0x0B3C, 0x0B3C},
++	{0x0B3F, 0x0B3F},
++	{0x0B41, 0x0B44},
++	{0x0B4D, 0x0B56},
++	{0x0B62, 0x0B63},
++	{0x0B82, 0x0B82},
++	{0x0BC0, 0x0BC0},
++	{0x0BCD, 0x0BCD},
++	{0x0C00, 0x0C00},
++	{0x0C04, 0x0C04},
++	{0x0C3E, 0x0C40},
++	{0x0C46, 0x0C56},
++	{0x0C62, 0x0C63},
++	{0x0C81, 0x0C81},
++	{0x0CBC, 0x0CBC},
++	{0x0CBF, 0x0CBF},
++	{0x0CC6, 0x0CC6},
++	{0x0CCC, 0x0CCD},
++	{0x0CE2, 0x0CE3},
++	{0x0D00, 0x0D01},
++	{0x0D3B, 0x0D3C},
++	{0x0D41, 0x0D44},
++	{0x0D4D, 0x0D4D},
++	{0x0D62, 0x0D63},
++	{0x0D81, 0x0D81},
++	{0x0DCA, 0x0DCA},
++	{0x0DD2, 0x0DD6},
++	{0x0E31, 0x0E31},
++	{0x0E34, 0x0E3A},
++	{0x0E47, 0x0E4E},
++	{0x0EB1, 0x0EB1},
++	{0x0EB4, 0x0EBC},
++	{0x0EC8, 0x0ECD},
++	{0x0F18, 0x0F19},
++	{0x0F35, 0x0F35},
++	{0x0F37, 0x0F37},
++	{0x0F39, 0x0F39},
++	{0x0F71, 0x0F7E},
++	{0x0F80, 0x0F84},
++	{0x0F86, 0x0F87},
++	{0x0F8D, 0x0FBC},
++	{0x0FC6, 0x0FC6},
++	{0x102D, 0x1030},
++	{0x1032, 0x1037},
++	{0x1039, 0x103A},
++	{0x103D, 0x103E},
++	{0x1058, 0x1059},
++	{0x105E, 0x1060},
++	{0x1071, 0x1074},
++	{0x1082, 0x1082},
++	{0x1085, 0x1086},
++	{0x108D, 0x108D},
++	{0x109D, 0x109D},
++	{0x135D, 0x135F},
++	{0x1712, 0x1714},
++	{0x1732, 0x1734},
++	{0x1752, 0x1753},
++	{0x1772, 0x1773},
++	{0x17B4, 0x17B5},
++	{0x17B7, 0x17BD},
++	{0x17C6, 0x17C6},
++	{0x17C9, 0x17D3},
++	{0x17DD, 0x17DD},
++	{0x180B, 0x180D},
++	{0x1885, 0x1886},
++	{0x18A9, 0x18A9},
++	{0x1920, 0x1922},
++	{0x1927, 0x1928},
++	{0x1932, 0x1932},
++	{0x1939, 0x193B},
++	{0x1A17, 0x1A18},
++	{0x1A1B, 0x1A1B},
++	{0x1A56, 0x1A56},
++	{0x1A58, 0x1A60},
++	{0x1A62, 0x1A62},
++	{0x1A65, 0x1A6C},
++	{0x1A73, 0x1A7F},
++	{0x1AB0, 0x1B03},
++	{0x1B34, 0x1B34},
++	{0x1B36, 0x1B3A},
++	{0x1B3C, 0x1B3C},
++	{0x1B42, 0x1B42},
++	{0x1B6B, 0x1B73},
++	{0x1B80, 0x1B81},
++	{0x1BA2, 0x1BA5},
++	{0x1BA8, 0x1BA9},
++	{0x1BAB, 0x1BAD},
++	{0x1BE6, 0x1BE6},
++	{0x1BE8, 0x1BE9},
++	{0x1BED, 0x1BED},
++	{0x1BEF, 0x1BF1},
++	{0x1C2C, 0x1C33},
++	{0x1C36, 0x1C37},
++	{0x1CD0, 0x1CD2},
++	{0x1CD4, 0x1CE0},
++	{0x1CE2, 0x1CE8},
++	{0x1CED, 0x1CED},
++	{0x1CF4, 0x1CF4},
++	{0x1CF8, 0x1CF9},
++	{0x1DC0, 0x1DFF},
++	{0x20D0, 0x20F0},
++	{0x2CEF, 0x2CF1},
++	{0x2D7F, 0x2D7F},
++	{0x2DE0, 0x2DFF},
++	{0x302A, 0x302D},
++	{0x3099, 0x309A},
++	{0xA66F, 0xA672},
++	{0xA674, 0xA67D},
++	{0xA69E, 0xA69F},
++	{0xA6F0, 0xA6F1},
++	{0xA802, 0xA802},
++	{0xA806, 0xA806},
++	{0xA80B, 0xA80B},
++	{0xA825, 0xA826},
++	{0xA82C, 0xA82C},
++	{0xA8C4, 0xA8C5},
++	{0xA8E0, 0xA8F1},
++	{0xA8FF, 0xA8FF},
++	{0xA926, 0xA92D},
++	{0xA947, 0xA951},
++	{0xA980, 0xA982},
++	{0xA9B3, 0xA9B3},
++	{0xA9B6, 0xA9B9},
++	{0xA9BC, 0xA9BD},
++	{0xA9E5, 0xA9E5},
++	{0xAA29, 0xAA2E},
++	{0xAA31, 0xAA32},
++	{0xAA35, 0xAA36},
++	{0xAA43, 0xAA43},
++	{0xAA4C, 0xAA4C},
++	{0xAA7C, 0xAA7C},
++	{0xAAB0, 0xAAB0},
++	{0xAAB2, 0xAAB4},
++	{0xAAB7, 0xAAB8},
++	{0xAABE, 0xAABF},
++	{0xAAC1, 0xAAC1},
++	{0xAAEC, 0xAAED},
++	{0xAAF6, 0xAAF6},
++	{0xABE5, 0xABE5},
++	{0xABE8, 0xABE8},
++	{0xABED, 0xABED},
++	{0xFB1E, 0xFB1E},
++	{0xFE00, 0xFE0F},
++	{0xFE20, 0xFE2F},
++};
+diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
+index 07b316fae1d..2bbdf2e792f 100644
+--- a/src/include/mb/pg_wchar.h
++++ b/src/include/mb/pg_wchar.h
+@@ -521,6 +521,10 @@ extern int	pg_valid_server_encoding_id(int encoding);
+  * of them do exist inside libpq.
+  */
+ extern void pg_encoding_set_invalid(int encoding, char *dst);
++extern int	pg_encoding_verifymbchar(int encoding, const char *mbstr, int len);
++extern int	pg_encoding_verifymbstr(int encoding, const char *mbstr, int len);
++extern int	pg_encoding_verifymb(int encoding, const char *mbstr, int len);
++
+ extern int	pg_mb2wchar(const char *from, pg_wchar *to);
+ extern int	pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
+ extern int	pg_encoding_mb2wchar_with_len(int encoding,
+diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
+index a29d19a6268..dccee6a5597 100644
+--- a/src/interfaces/libpq/fe-exec.c
++++ b/src/interfaces/libpq/fe-exec.c
+@@ -132,6 +132,8 @@ static int	check_field_number(const PGresult *res, int field_num);
+ #define PGRESULT_SEP_ALLOC_THRESHOLD	(PGRESULT_DATA_BLOCKSIZE / 2)
+ 
+ 
++
++
+ /*
+  * PQmakeEmptyPGresult
+  *	 returns a newly allocated, initialized PGresult with given status.
+@@ -3403,9 +3405,10 @@ PQescapeStringInternal(PGconn *conn,
+ 			if (error)
+ 				*error = 1;
+ 			if (conn)
+-				libpq_append_conn_error(conn, "incomplete multibyte character");
++                printfPQExpBuffer(&conn->errorMessage,
++								  libpq_gettext("incomplete multibyte character\n"));
+ 
+-			pg_encoding_set_invalid(encoding, target);
++            pg_encoding_set_invalid(encoding, target);
+ 			target += 2;
+             source++;
+             remaining--;
+-- 
+2.39.5 (Apple Git-154)
+
+
+From 27827fe62777a809cc3f5a54742839bc031b02f6 Mon Sep 17 00:00:00 2001
+From: Filip Janus <fjanus@redhat.com>
+Date: Tue, 18 Mar 2025 10:11:09 +0100
+Subject: [PATCH 8/8] Fix failing dropdb.c
+
+---
+ src/bin/scripts/dropdb.c                  | 4 ----
+ 1 files changed, 0 insertion(+), 5 deletions(-)
+
+diff --git a/src/bin/scripts/dropdb.c b/src/bin/scripts/dropdb.c
+index ed3a2c8c19a..140982717d9 100644
+--- a/src/bin/scripts/dropdb.c
++++ b/src/bin/scripts/dropdb.c
+@@ -122,10 +122,6 @@ main(int argc, char *argv[])
+ 			exit(0);
+ 	}
+ 
+-	initPQExpBuffer(&sql);
+-
+-	appendPQExpBuffer(&sql, "DROP DATABASE %s%s;",
+-					  (if_exists ? "IF EXISTS " : ""), fmtIdEnc(dbname, PQclientEncoding(conn)));
+ 
+ 	/* Avoid trying to drop postgres db while we are connected to it. */
+ 	if (maintenance_db == NULL && strcmp(dbname, "postgres") == 0)
+2.39.5 (Apple Git-154)
+
diff --git a/SOURCES/timezone-test-fix.patch b/SOURCES/timezone-test-fix.patch
new file mode 100644
index 0000000..d97920d
--- /dev/null
+++ b/SOURCES/timezone-test-fix.patch
@@ -0,0 +1,25 @@
+From 9d18b30ac7a17d70ee789b710865bd20b206023d Mon Sep 17 00:00:00 2001
+From: Filip Janus <fjanus@redhat.com>
+Date: Tue, 18 Mar 2025 10:11:09 +0100
+Subject: [PATCH] Fix failing test regardless the CVE-2025-1094 fix
+
+---
+ src/test/regress/expected/timestamptz.out | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/test/regress/expected/timestamptz.out b/src/test/regress/expected/timestamptz.out
+index 55efd183868..5964b65bc6b 100644
+--- a/src/test/regress/expected/timestamptz.out
++++ b/src/test/regress/expected/timestamptz.out
+@@ -2060,7 +2060,7 @@ SELECT make_timestamptz(2014, 12, 10, 0, 0, 0, 'Europe/Prague') AT TIME ZONE 'UT
+ SELECT make_timestamptz(1846, 12, 10, 0, 0, 0, 'Asia/Manila') AT TIME ZONE 'UTC';
+          timezone         
+ --------------------------
+- Wed Dec 09 15:56:00 1846
++ Wed Dec 09 15:56:08 1846
+ (1 row)
+ 
+ SELECT make_timestamptz(1881, 12, 10, 0, 0, 0, 'Europe/Paris') AT TIME ZONE 'UTC';
+-- 
+2.39.5 (Apple Git-154)
+
diff --git a/SPECS/postgresql.spec b/SPECS/postgresql.spec
index 928adfc..b138d42 100644
--- a/SPECS/postgresql.spec
+++ b/SPECS/postgresql.spec
@@ -60,7 +60,7 @@ Summary: PostgreSQL client programs
 Name: postgresql
 %global majorversion 12
 Version: %{majorversion}.22
-Release: 1%{?dist}
+Release: 3%{?dist}
 
 # The PostgreSQL license is very similar to other MIT licenses, but the OSI
 # recognizes it as an independent license, so we do as well.
@@ -108,6 +108,8 @@ Patch6: postgresql-man.patch
 Patch8: postgresql-external-libpq.patch
 Patch9: postgresql-server-pg_config.patch
 Patch10: postgresql-12.5-contrib-dblink-expected-out.patch
+Patch11: backport-cve-2025-1094.patch
+Patch12: timezone-test-fix.patch
 
 BuildRequires: gcc
 BuildRequires: perl(ExtUtils::MakeMaker) glibc-devel bison flex gawk
@@ -369,6 +371,8 @@ benchmarks.
 %patch8 -p1
 %patch9 -p1
 %patch10 -p1
+%patch11 -p1
+%patch12 -p1
 
 # We used to run autoconf here, but there's no longer any real need to,
 # since Postgres ships with a reasonably modern configure script.
@@ -1224,6 +1228,12 @@ make -C postgresql-setup-%{setup_version} check
 
 
 %changelog
+* Tue Mar 18 2025 Filip Janus <fjanus@redhat.com> - 12.22-3
+- Fix backport for CVE-2025-1094
+
+* Tue Mar 18 2025 Filip Janus <fjanus@redhat.com> - 12.22-2
+- Backport fix for CVE-2025-1094
+
 * Thu Nov 21 2024 Lukas Javorsky <ljavorsk@redhat.com> - 12.22-1
 - Update to 12.22
 - Fixes: CVE-2024-10976 CVE-2024-10978