From bdde82de7756dd598fa6e77f663ac9b2e4e9bca6 Mon Sep 17 00:00:00 2001
From: AlmaLinux RelEng Bot <eabdullin@almalinux.org>
Date: Tue, 10 Mar 2026 15:55:58 -0400
Subject: [PATCH] import UBI postgresql-13.23-2.el9_7

---
 ...6-2004--CVE-2026-2005--CVE-2026-2006.patch | 6357 +++++++++++++++++
 SPECS/postgresql.spec                         |    7 +-
 2 files changed, 6363 insertions(+), 1 deletion(-)
 create mode 100644 SOURCES/CVE-2026-2004--CVE-2026-2005--CVE-2026-2006.patch

diff --git a/SOURCES/CVE-2026-2004--CVE-2026-2005--CVE-2026-2006.patch b/SOURCES/CVE-2026-2004--CVE-2026-2005--CVE-2026-2006.patch
new file mode 100644
index 0000000..ce70126
--- /dev/null
+++ b/SOURCES/CVE-2026-2004--CVE-2026-2005--CVE-2026-2006.patch
@@ -0,0 +1,6357 @@
+diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c
+index 2886c08b85e..9d93b3c775e 100644
+--- a/contrib/btree_gist/btree_utils_var.c
++++ b/contrib/btree_gist/btree_utils_var.c
+@@ -116,36 +116,47 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
+ 
+ /*
+  * returns the common prefix length of a node key
++ *
++ * If the underlying type is character data, the prefix length may point in
++ * the middle of a multibyte character.
+ */
+ static int32
+ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
+ {
+ 	GBT_VARKEY_R r = gbt_var_key_readable(node);
+ 	int32		i = 0;
+-	int32		l = 0;
++	int32		l_left_to_match = 0;
++	int32		l_total = 0;
+ 	int32		t1len = VARSIZE(r.lower) - VARHDRSZ;
+ 	int32		t2len = VARSIZE(r.upper) - VARHDRSZ;
+ 	int32		ml = Min(t1len, t2len);
+ 	char	   *p1 = VARDATA(r.lower);
+ 	char	   *p2 = VARDATA(r.upper);
++	const char *end1 = p1 + t1len;
++	const char *end2 = p2 + t2len;
+ 
+ 	if (ml == 0)
+ 		return 0;
+ 
+ 	while (i < ml)
+ 	{
+-		if (tinfo->eml > 1 && l == 0)
++		if (tinfo->eml > 1 && l_left_to_match == 0)
+ 		{
+-			if ((l = pg_mblen(p1)) != pg_mblen(p2))
++			l_total = pg_mblen_range(p1, end1);
++			if (l_total != pg_mblen_range(p2, end2))
+ 			{
+ 				return i;
+ 			}
++			l_left_to_match = l_total;
+ 		}
+ 		if (*p1 != *p2)
+ 		{
+ 			if (tinfo->eml > 1)
+ 			{
+-				return (i - l + 1);
++				int32		l_matched_subset = l_total - l_left_to_match;
++
++				/* end common prefix at final byte of last matching char */
++				return i - l_matched_subset;
+ 			}
+ 			else
+ 			{
+@@ -155,7 +166,7 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
+ 
+ 		p1++;
+ 		p2++;
+-		l--;
++		l_left_to_match--;
+ 		i++;
+ 	}
+ 	return ml;					/* lower == upper */
+diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c
+index 1065d64ccb0..9bca5a21b8d 100644
+--- a/contrib/dict_xsyn/dict_xsyn.c
++++ b/contrib/dict_xsyn/dict_xsyn.c
+@@ -48,15 +48,15 @@ find_word(char *in, char **end)
+ 	char	   *start;
+ 
+ 	*end = NULL;
+-	while (*in && t_isspace(in))
+-		in += pg_mblen(in);
++	while (*in && t_isspace_cstr(in))
++		in += pg_mblen_cstr(in);
+ 
+ 	if (!*in || *in == '#')
+ 		return NULL;
+ 	start = in;
+ 
+-	while (*in && !t_isspace(in))
+-		in += pg_mblen(in);
++	while (*in && !t_isspace_cstr(in))
++		in += pg_mblen_cstr(in);
+ 
+ 	*end = in;
+ 
+diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
+index f369c68187b..0b1e0581e84 100644
+--- a/contrib/hstore/hstore_io.c
++++ b/contrib/hstore/hstore_io.c
+@@ -81,7 +81,9 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
+ 			}
+ 			else if (*(state->ptr) == '=' && !ignoreeq)
+ 			{
+-				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
++				elog(ERROR, "Syntax error near \"%.*s\" at position %d",
++					 pg_mblen_cstr(state->ptr), state->ptr,
++					 (int32) (state->ptr - state->begin));
+ 			}
+ 			else if (*(state->ptr) == '\\')
+ 			{
+@@ -220,7 +222,9 @@ parse_hstore(HSParser *state)
+ 			}
+ 			else if (!scanner_isspace((unsigned char) *(state->ptr)))
+ 			{
+-				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
++				elog(ERROR, "Syntax error near \"%.*s\" at position %d",
++					 pg_mblen_cstr(state->ptr), state->ptr,
++					 (int32) (state->ptr - state->begin));
+ 			}
+ 		}
+ 		else if (st == WGT)
+@@ -235,7 +239,9 @@ parse_hstore(HSParser *state)
+ 			}
+ 			else
+ 			{
+-				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
++				elog(ERROR, "Syntax error near \"%.*s\" at position %d",
++					 pg_mblen_cstr(state->ptr), state->ptr,
++					 (int32) (state->ptr - state->begin));
+ 			}
+ 		}
+ 		else if (st == WVAL)
+@@ -268,7 +274,9 @@ parse_hstore(HSParser *state)
+ 			}
+ 			else if (!scanner_isspace((unsigned char) *(state->ptr)))
+ 			{
+-				elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
++				elog(ERROR, "Syntax error near \"%.*s\" at position %d",
++					 pg_mblen_cstr(state->ptr), state->ptr,
++					 (int32) (state->ptr - state->begin));
+ 			}
+ 		}
+ 		else
+diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c
+index bcb785b15b2..66ba874391d 100644
+--- a/contrib/intarray/_int_selfuncs.c
++++ b/contrib/intarray/_int_selfuncs.c
+@@ -19,6 +19,7 @@
+ #include "catalog/pg_operator.h"
+ #include "catalog/pg_statistic.h"
+ #include "catalog/pg_type.h"
++#include "commands/extension.h"
+ #include "miscadmin.h"
+ #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
+@@ -171,7 +172,18 @@ _int_matchsel(PG_FUNCTION_ARGS)
+ 		PG_RETURN_FLOAT8(0.0);
+ 	}
+ 
+-	/* The caller made sure the const is a query, so get it now */
++	/*
++	 * Verify that the Const is a query_int, else return a default estimate.
++	 * (This could only fail if someone attached this estimator to the wrong
++	 * operator.)
++	 */
++	if (((Const *) other)->consttype !=
++		get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int"))
++	{
++		ReleaseVariableStats(vardata);
++		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
++	}
++
+ 	query = DatumGetQueryTypeP(((Const *) other)->constvalue);
+ 
+ 	/* Empty query matches nothing */
+diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
+index ef86046fc4b..abccbb8927f 100644
+--- a/contrib/ltree/lquery_op.c
++++ b/contrib/ltree/lquery_op.c
+@@ -26,14 +26,14 @@ getlexeme(char *start, char *end, int *len)
+ 	char	   *ptr;
+ 	int			charlen;
+ 
+-	while (start < end && (charlen = pg_mblen(start)) == 1 && t_iseq(start, '_'))
++	while (start < end && (charlen = pg_mblen_range(start, end)) == 1 && t_iseq(start, '_'))
+ 		start += charlen;
+ 
+ 	ptr = start;
+ 	if (ptr >= end)
+ 		return NULL;
+ 
+-	while (ptr < end && !((charlen = pg_mblen(ptr)) == 1 && t_iseq(ptr, '_')))
++	while (ptr < end && !((charlen = pg_mblen_range(ptr, end)) == 1 && t_iseq(ptr, '_')))
+ 		ptr += charlen;
+ 
+ 	*len = ptr - start;
+diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
+index 83fc705ef86..852fc9f5f5e 100644
+--- a/contrib/ltree/ltree.h
++++ b/contrib/ltree/ltree.h
+@@ -113,7 +113,8 @@ typedef struct
+ 
+ #define LQUERY_HASNOT		0x01
+ 
+-#define ISALNUM(x)	( t_isalpha(x) || t_isdigit(x)	|| ( pg_mblen(x) == 1 && t_iseq((x), '_') ) )
++/* Caller has already called mblen, so we can use _unbounded variants safely. */
++#define ISALNUM(x)	( t_isalpha_unbounded(x) || t_isdigit_unbounded(x) || ( pg_mblen_unbounded(x) == 1 && t_iseq((x), '_') ) )
+ 
+ /* full text query */
+ 
+diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
+index 15115cb29f3..0a44a8c4691 100644
+--- a/contrib/ltree/ltree_io.c
++++ b/contrib/ltree/ltree_io.c
+@@ -54,7 +54,7 @@ parse_ltree(const char *buf)
+ 	ptr = buf;
+ 	while (*ptr)
+ 	{
+-		charlen = pg_mblen(ptr);
++		charlen = pg_mblen_cstr(ptr);
+ 		if (t_iseq(ptr, '.'))
+ 			num++;
+ 		ptr += charlen;
+@@ -69,7 +69,7 @@ parse_ltree(const char *buf)
+ 	ptr = buf;
+ 	while (*ptr)
+ 	{
+-		charlen = pg_mblen(ptr);
++		charlen = pg_mblen_cstr(ptr);
+ 
+ 		switch (state)
+ 		{
+@@ -285,7 +285,7 @@ parse_lquery(const char *buf)
+ 	ptr = buf;
+ 	while (*ptr)
+ 	{
+-		charlen = pg_mblen(ptr);
++		charlen = pg_mblen_cstr(ptr);
+ 
+ 		if (t_iseq(ptr, '.'))
+ 			num++;
+@@ -305,7 +305,7 @@ parse_lquery(const char *buf)
+ 	ptr = buf;
+ 	while (*ptr)
+ 	{
+-		charlen = pg_mblen(ptr);
++		charlen = pg_mblen_cstr(ptr);
+ 
+ 		switch (state)
+ 		{
+@@ -402,7 +402,7 @@ parse_lquery(const char *buf)
+ 			case LQPRS_WAITFNUM:
+ 				if (t_iseq(ptr, ','))
+ 					state = LQPRS_WAITSNUM;
+-				else if (t_isdigit(ptr))
++				else if (t_isdigit_cstr(ptr))
+ 				{
+ 					int			low = atoi(ptr);
+ 
+@@ -420,7 +420,7 @@ parse_lquery(const char *buf)
+ 					UNCHAR;
+ 				break;
+ 			case LQPRS_WAITSNUM:
+-				if (t_isdigit(ptr))
++				if (t_isdigit_cstr(ptr))
+ 				{
+ 					int			high = atoi(ptr);
+ 
+@@ -451,7 +451,7 @@ parse_lquery(const char *buf)
+ 			case LQPRS_WAITCLOSE:
+ 				if (t_iseq(ptr, '}'))
+ 					state = LQPRS_WAITEND;
+-				else if (!t_isdigit(ptr))
++				else if (!t_isdigit_cstr(ptr))
+ 					UNCHAR;
+ 				break;
+ 			case LQPRS_WAITND:
+@@ -462,7 +462,7 @@ parse_lquery(const char *buf)
+ 				}
+ 				else if (t_iseq(ptr, ','))
+ 					state = LQPRS_WAITSNUM;
+-				else if (!t_isdigit(ptr))
++				else if (!t_isdigit_cstr(ptr))
+ 					UNCHAR;
+ 				break;
+ 			case LQPRS_WAITEND:
+diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
+index d967f92110f..7f98bdedecb 100644
+--- a/contrib/ltree/ltxtquery_io.c
++++ b/contrib/ltree/ltxtquery_io.c
+@@ -59,7 +59,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
+ 
+ 	for (;;)
+ 	{
+-		charlen = pg_mblen(state->buf);
++		charlen = pg_mblen_cstr(state->buf);
+ 
+ 		switch (state->state)
+ 		{
+@@ -83,7 +83,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
+ 					*lenval = charlen;
+ 					*flag = 0;
+ 				}
+-				else if (!t_isspace(state->buf))
++				else if (!t_isspace_unbounded(state->buf))
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_SYNTAX_ERROR),
+ 							 errmsg("operand syntax error")));
+diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
+index 1e9df8dfcf6..189bcfa9cac 100644
+--- a/contrib/pageinspect/heapfuncs.c
++++ b/contrib/pageinspect/heapfuncs.c
+@@ -26,6 +26,7 @@
+ #include "postgres.h"
+ 
+ #include "access/htup_details.h"
++#include "mb/pg_wchar.h"
+ #include "access/relation.h"
+ #include "catalog/pg_am_d.h"
+ #include "catalog/pg_type.h"
+@@ -99,7 +100,8 @@ text_to_bits(char *str, int len)
+ 		else
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_DATA_CORRUPTED),
+-					 errmsg("illegal character '%c' in t_bits string", str[off])));
++					 errmsg("invalid character \"%.*s\" in t_bits string",
++							pg_mblen_cstr(str + off), str + off)));
+ 
+ 		if (off % 8 == 7)
+ 			bits[off / 8] = byte;
+diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile
+index d75e9ada2e4..970380adfb0 100644
+--- a/contrib/pg_trgm/Makefile
++++ b/contrib/pg_trgm/Makefile
+@@ -14,7 +14,7 @@ DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
+ 	pg_trgm--1.0--1.1.sql
+ PGFILEDESC = "pg_trgm - trigram matching"
+ 
+-REGRESS = pg_trgm pg_word_trgm pg_strict_word_trgm
++REGRESS = pg_trgm pg_utf8_trgm pg_word_trgm pg_strict_word_trgm
+ 
+ ifdef USE_PGXS
+ PG_CONFIG = pg_config
+diff --git a/contrib/pg_trgm/data/trgm_utf8.data b/contrib/pg_trgm/data/trgm_utf8.data
+new file mode 100644
+index 00000000000..713856e76a6
+--- /dev/null
++++ b/contrib/pg_trgm/data/trgm_utf8.data
+@@ -0,0 +1,50 @@
++Mathematics
++数学
++गणित
++Matemáticas
++رياضيات
++Mathématiques
++গণিত
++Matemática
++Математика
++ریاضی
++Matematika
++Mathematik
++数学
++Mathematics
++गणित
++గణితం
++Matematik
++கணிதம்
++數學
++Toán học
++Matematika
++数学
++수학
++ریاضی
++Lissafi
++Hisabati
++Matematika
++Matematica
++ریاضی
++ಗಣಿತ
++ગણિત
++คณิตศาสตร์
++ሂሳብ
++गणित
++ਗਣਿਤ
++數學
++数学
++Iṣiro
++數學
++သင်္ချာ
++Herrega
++رياضي
++गणित
++Математика
++Matematyka
++ഗണിതം
++Matematika
++رياضي
++Matematika
++Matematică
+diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm.out b/contrib/pg_trgm/expected/pg_utf8_trgm.out
+new file mode 100644
+index 00000000000..0768e7d6a83
+--- /dev/null
++++ b/contrib/pg_trgm/expected/pg_utf8_trgm.out
+@@ -0,0 +1,8 @@
++SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
++\if :skip_test
++\quit
++\endif
++-- Index 50 translations of the word "Mathematics"
++CREATE TEMP TABLE mb (s text);
++\copy mb from 'data/trgm_utf8.data'
++CREATE INDEX ON mb USING gist(s gist_trgm_ops);
+diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm_1.out b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out
+new file mode 100644
+index 00000000000..8505c4fa552
+--- /dev/null
++++ b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out
+@@ -0,0 +1,3 @@
++SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
++\if :skip_test
++\quit
+diff --git a/contrib/pg_trgm/sql/pg_utf8_trgm.sql b/contrib/pg_trgm/sql/pg_utf8_trgm.sql
+new file mode 100644
+index 00000000000..0dd962ced83
+--- /dev/null
++++ b/contrib/pg_trgm/sql/pg_utf8_trgm.sql
+@@ -0,0 +1,9 @@
++SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
++\if :skip_test
++\quit
++\endif
++
++-- Index 50 translations of the word "Mathematics"
++CREATE TEMP TABLE mb (s text);
++\copy mb from 'data/trgm_utf8.data'
++CREATE INDEX ON mb USING gist(s gist_trgm_ops);
+diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
+index b616953462e..76ee4bbdc68 100644
+--- a/contrib/pg_trgm/trgm.h
++++ b/contrib/pg_trgm/trgm.h
+@@ -51,10 +51,10 @@ typedef char trgm[3];
+ } while(0)
+ 
+ #ifdef KEEPONLYALNUM
+-#define ISWORDCHR(c)	(t_isalpha(c) || t_isdigit(c))
++#define ISWORDCHR(c, len)	(t_isalpha_with_len(c, len) || t_isdigit_with_len(c, len))
+ #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
+ #else
+-#define ISWORDCHR(c)	(!t_isspace(c))
++#define ISWORDCHR(c, len)	(!t_isspace_with_len(c, len))
+ #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
+ #endif
+ #define ISPRINTABLETRGM(t)	( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
+diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
+index fb38135f7a3..63895c3017d 100644
+--- a/contrib/pg_trgm/trgm_op.c
++++ b/contrib/pg_trgm/trgm_op.c
+@@ -171,18 +171,29 @@ static char *
+ find_word(char *str, int lenstr, char **endword, int *charlen)
+ {
+ 	char	   *beginword = str;
++	const char *endstr = str + lenstr;
+ 
+-	while (beginword - str < lenstr && !ISWORDCHR(beginword))
+-		beginword += pg_mblen(beginword);
++	while (beginword < endstr)
++	{
++		int			clen = pg_mblen_range(beginword, endstr);
+ 
+-	if (beginword - str >= lenstr)
++		if (ISWORDCHR(beginword, clen))
++			break;
++		beginword += clen;
++	}
++
++	if (beginword >= endstr)
+ 		return NULL;
+ 
+ 	*endword = beginword;
+ 	*charlen = 0;
+-	while (*endword - str < lenstr && ISWORDCHR(*endword))
++	while (*endword < endstr)
+ 	{
+-		*endword += pg_mblen(*endword);
++		int			clen = pg_mblen_range(*endword, endstr);
++
++		if (!ISWORDCHR(*endword, clen))
++			break;
++		*endword += clen;
+ 		(*charlen)++;
+ 	}
+ 
+@@ -230,9 +241,9 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
+ 	if (bytelen > charlen)
+ 	{
+ 		/* Find multibyte character boundaries and apply compact_trigram */
+-		int			lenfirst = pg_mblen(str),
+-					lenmiddle = pg_mblen(str + lenfirst),
+-					lenlast = pg_mblen(str + lenfirst + lenmiddle);
++		int			lenfirst = pg_mblen_unbounded(str),
++					lenmiddle = pg_mblen_unbounded(str + lenfirst),
++					lenlast = pg_mblen_unbounded(str + lenfirst + lenmiddle);
+ 
+ 		while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
+ 		{
+@@ -243,7 +254,7 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
+ 
+ 			lenfirst = lenmiddle;
+ 			lenmiddle = lenlast;
+-			lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
++			lenlast = pg_mblen_unbounded(ptr + lenfirst + lenmiddle);
+ 		}
+ 	}
+ 	else
+@@ -723,6 +734,7 @@ get_wildcard_part(const char *str, int lenstr,
+ {
+ 	const char *beginword = str;
+ 	const char *endword;
++	const char *endstr = str + lenstr;
+ 	char	   *s = buf;
+ 	bool		in_leading_wildcard_meta = false;
+ 	bool		in_trailing_wildcard_meta = false;
+@@ -735,11 +747,13 @@ get_wildcard_part(const char *str, int lenstr,
+ 	 * from this loop to the next one, since we may exit at a word character
+ 	 * that is in_escape.
+ 	 */
+-	while (beginword - str < lenstr)
++	while (beginword < endstr)
+ 	{
++		clen = pg_mblen_range(beginword, endstr);
++
+ 		if (in_escape)
+ 		{
+-			if (ISWORDCHR(beginword))
++			if (ISWORDCHR(beginword, clen))
+ 				break;
+ 			in_escape = false;
+ 			in_leading_wildcard_meta = false;
+@@ -750,12 +764,12 @@ get_wildcard_part(const char *str, int lenstr,
+ 				in_escape = true;
+ 			else if (ISWILDCARDCHAR(beginword))
+ 				in_leading_wildcard_meta = true;
+-			else if (ISWORDCHR(beginword))
++			else if (ISWORDCHR(beginword, clen))
+ 				break;
+ 			else
+ 				in_leading_wildcard_meta = false;
+ 		}
+-		beginword += pg_mblen(beginword);
++		beginword += clen;
+ 	}
+ 
+ 	/*
+@@ -788,12 +802,12 @@ get_wildcard_part(const char *str, int lenstr,
+ 	 * string boundary.  Strip escapes during copy.
+ 	 */
+ 	endword = beginword;
+-	while (endword - str < lenstr)
++	while (endword < endstr)
+ 	{
+-		clen = pg_mblen(endword);
++		clen = pg_mblen_range(endword, endstr);
+ 		if (in_escape)
+ 		{
+-			if (ISWORDCHR(endword))
++			if (ISWORDCHR(endword, clen))
+ 			{
+ 				memcpy(s, endword, clen);
+ 				(*charlen)++;
+@@ -821,7 +835,7 @@ get_wildcard_part(const char *str, int lenstr,
+ 				in_trailing_wildcard_meta = true;
+ 				break;
+ 			}
+-			else if (ISWORDCHR(endword))
++			else if (ISWORDCHR(endword, clen))
+ 			{
+ 				memcpy(s, endword, clen);
+ 				(*charlen)++;
+diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
+index 31192209065..7e235c66946 100644
+--- a/contrib/pg_trgm/trgm_regexp.c
++++ b/contrib/pg_trgm/trgm_regexp.c
+@@ -480,7 +480,7 @@ static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
+ static void RE_compile(regex_t *regex, text *text_re,
+ 					   int cflags, Oid collation);
+ static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
+-static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
++static int	convertPgWchar(pg_wchar c, trgm_mb_char *result);
+ static void transformGraph(TrgmNFA *trgmNFA);
+ static void processState(TrgmNFA *trgmNFA, TrgmState *state);
+ static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
+@@ -815,10 +815,11 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
+ 		for (j = 0; j < charsCount; j++)
+ 		{
+ 			trgm_mb_char c;
++			int			clen = convertPgWchar(chars[j], &c);
+ 
+-			if (!convertPgWchar(chars[j], &c))
++			if (!clen)
+ 				continue;		/* ok to ignore it altogether */
+-			if (ISWORDCHR(c.bytes))
++			if (ISWORDCHR(c.bytes, clen))
+ 				colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
+ 			else
+ 				colorInfo->containsNonWord = true;
+@@ -830,13 +831,15 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
+ 
+ /*
+  * Convert pg_wchar to multibyte format.
+- * Returns false if the character should be ignored completely.
++ * Returns 0 if the character should be ignored completely, else returns its
++ * byte length.
+  */
+-static bool
++static int
+ convertPgWchar(pg_wchar c, trgm_mb_char *result)
+ {
+ 	/* "s" has enough space for a multibyte character and a trailing NUL */
+ 	char		s[MAX_MULTIBYTE_CHAR_LEN + 1];
++	int			clen;
+ 
+ 	/*
+ 	 * We can ignore the NUL character, since it can never appear in a PG text
+@@ -844,11 +847,11 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
+ 	 * reconstructing trigrams.
+ 	 */
+ 	if (c == 0)
+-		return false;
++		return 0;
+ 
+ 	/* Do the conversion, making sure the result is NUL-terminated */
+ 	memset(s, 0, sizeof(s));
+-	pg_wchar2mb_with_len(&c, s, 1);
++	clen = pg_wchar2mb_with_len(&c, s, 1);
+ 
+ 	/*
+ 	 * In IGNORECASE mode, we can ignore uppercase characters.  We assume that
+@@ -870,7 +873,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
+ 		if (strcmp(lowerCased, s) != 0)
+ 		{
+ 			pfree(lowerCased);
+-			return false;
++			return 0;
+ 		}
+ 		pfree(lowerCased);
+ 	}
+@@ -878,7 +881,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
+ 
+ 	/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
+ 	memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
+-	return true;
++	return clen;
+ }
+ 
+ 
+diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile
+--- a/contrib/pgcrypto/Makefile
++++ b/contrib/pgcrypto/Makefile
+@@ -53,7 +53,8 @@
+ 	$(CF_TESTS) \
+ 	crypt-md5 \
+ 	pgp-armor pgp-decrypt pgp-encrypt $(CF_PGP_TESTS) \
+-	pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-info
++	pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-pubkey-session \
++	pgp-info
+ 
+ #REGRESS = init pgp-pubkey-decrypt pgp-decrypt \
+ EXTRA_CLEAN = gen-rtab
+ 
+diff --git a/contrib/pgcrypto/expected/pgp-decrypt.out b/contrib/pgcrypto/expected/pgp-decrypt.out
+index e8250b090ab..a1dd7586f7b 100644
+--- a/contrib/pgcrypto/expected/pgp-decrypt.out
++++ b/contrib/pgcrypto/expected/pgp-decrypt.out
+@@ -317,7 +317,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
+ (1 row)
+ 
+ -- expected: da39a3ee5e6b4b0d3255bfef95601890afd80709
+-select encode(digest(pgp_sym_decrypt(dearmor('
++select encode(digest(pgp_sym_decrypt_bytea(dearmor('
+ -----BEGIN PGP MESSAGE-----
+ Comment: dat3.aes.sha1.mdc.s2k3.z0
+ 
+@@ -393,6 +393,28 @@ ERROR:  Wrong key or corrupt data
+ select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
+ NOTICE:  dbg: parse_literal_data: data type=b
+ ERROR:  Not text data
++-- NUL byte in text decrypt.  Ciphertext source:
++-- printf 'a\x00\xc' | gpg --homedir /nonexistent \
++--      --personal-compress-preferences uncompressed --textmode \
++--      --personal-cipher-preferences aes --no-emit-version --batch \
++--      --symmetric --passphrase key --armor
++do $$
++begin
++  perform pgp_sym_decrypt(dearmor('
++-----BEGIN PGP MESSAGE-----
++
++jA0EBwMCXLc8pozB10Fg0jQBVUID59TLvWutJp0j6eh9ZgjqIRzdYaIymFB8y4XH
++vu0YlJP5D5BX7yqZ+Pry7TlDmiFO
++=rV7z
++-----END PGP MESSAGE-----
++'), 'key', 'debug=1');
++exception when others then
++  raise '%',
++    regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
++end
++$$;
++ERROR:  invalid byte sequence for encoding [REDACTED]: 0x00
++CONTEXT:  PL/pgSQL function inline_code_block line 12 at RAISE
+ -- Decryption with a certain incorrect key yields an apparent BZip2-compressed
+ -- plaintext.  Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
+ -- until the random prefix gave rise to that property.
+diff --git a/contrib/pgcrypto/expected/pgp-decrypt_1.out b/contrib/pgcrypto/expected/pgp-decrypt_1.out
+index 63d5ab98654..7bcf32ec541 100644
+--- a/contrib/pgcrypto/expected/pgp-decrypt_1.out
++++ b/contrib/pgcrypto/expected/pgp-decrypt_1.out
+@@ -313,7 +313,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
+ (1 row)
+ 
+ -- expected: da39a3ee5e6b4b0d3255bfef95601890afd80709
+-select encode(digest(pgp_sym_decrypt(dearmor('
++select encode(digest(pgp_sym_decrypt_bytea(dearmor('
+ -----BEGIN PGP MESSAGE-----
+ Comment: dat3.aes.sha1.mdc.s2k3.z0
+ 
+@@ -389,6 +389,28 @@ ERROR:  Wrong key or corrupt data
+ select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
+ NOTICE:  dbg: parse_literal_data: data type=b
+ ERROR:  Not text data
++-- NUL byte in text decrypt.  Ciphertext source:
++-- printf 'a\x00\xc' | gpg --homedir /nonexistent \
++--      --personal-compress-preferences uncompressed --textmode \
++--      --personal-cipher-preferences aes --no-emit-version --batch \
++--      --symmetric --passphrase key --armor
++do $$
++begin
++  perform pgp_sym_decrypt(dearmor('
++-----BEGIN PGP MESSAGE-----
++
++jA0EBwMCXLc8pozB10Fg0jQBVUID59TLvWutJp0j6eh9ZgjqIRzdYaIymFB8y4XH
++vu0YlJP5D5BX7yqZ+Pry7TlDmiFO
++=rV7z
++-----END PGP MESSAGE-----
++'), 'key', 'debug=1');
++exception when others then
++  raise '%',
++    regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
++end
++$$;
++ERROR:  invalid byte sequence for encoding [REDACTED]: 0x00
++CONTEXT:  PL/pgSQL function inline_code_block line 12 at RAISE
+ -- Decryption with a certain incorrect key yields an apparent BZip2-compressed
+ -- plaintext.  Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
+ -- until the random prefix gave rise to that property.
+diff --git a/contrib/pgcrypto/expected/pgp-pubkey-session.out b/contrib/pgcrypto/expected/pgp-pubkey-session.out
+new file mode 100644
+index 00000000000..f724d98eb24
+--- /dev/null
++++ b/contrib/pgcrypto/expected/pgp-pubkey-session.out
+@@ -0,0 +1,47 @@
++-- Test for overflow with session key at decrypt.
++-- Data automatically generated by scripts/pgp_session_data.py.
++-- See this file for details explaining how this data is generated.
++SELECT pgp_pub_decrypt_bytea(
++'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1
++da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30
++94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd
++0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616
++3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10
++a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7
++b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d
++8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc
++0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494
++57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599
++ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3
++67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5
++060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56
++2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175
++5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d
++135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea,
++'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad
++9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f
++f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12
++07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1
++23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709
++f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c
++138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4
++c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5
++18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847
++e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9
++de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0
++239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0
++ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9
++9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e
++74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c
++3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8
++58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549
++507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd
++183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302
++25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45
++3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103
++cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03
++ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8
++7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8
++487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75
++9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea);
++ERROR:  Public key too big
+diff --git a/contrib/pgcrypto/pgp-pgsql.c b/contrib/pgcrypto/pgp-pgsql.c
+index 62a2f351e43..ae634a05b72 100644
+--- a/contrib/pgcrypto/pgp-pgsql.c
++++ b/contrib/pgcrypto/pgp-pgsql.c
+@@ -643,6 +643,7 @@ pgp_sym_decrypt_text(PG_FUNCTION_ARGS)
+ 		arg = PG_GETARG_BYTEA_PP(2);
+ 
+ 	res = decrypt_internal(0, 1, data, key, NULL, arg);
++	pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false);
+ 
+ 	PG_FREE_IF_COPY(data, 0);
+ 	PG_FREE_IF_COPY(key, 1);
+@@ -744,6 +745,7 @@ pgp_pub_decrypt_text(PG_FUNCTION_ARGS)
+ 		arg = PG_GETARG_BYTEA_PP(3);
+ 
+ 	res = decrypt_internal(1, 1, data, key, psw, arg);
++	pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false);
+ 
+ 	PG_FREE_IF_COPY(data, 0);
+ 	PG_FREE_IF_COPY(key, 1);
+diff --git a/contrib/pgcrypto/pgp-pubdec.c b/contrib/pgcrypto/pgp-pubdec.c
+index a0a5738a40e..2a13aa3e6ad 100644
+--- a/contrib/pgcrypto/pgp-pubdec.c
++++ b/contrib/pgcrypto/pgp-pubdec.c
+@@ -157,6 +157,7 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt)
+ 	uint8	   *msg;
+ 	int			msglen;
+ 	PGP_MPI    *m;
++	unsigned	sess_key_len;
+ 
+ 	pk = ctx->pub_key;
+ 	if (pk == NULL)
+@@ -220,11 +221,19 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt)
+ 	if (res < 0)
+ 		goto out;
+ 
++	sess_key_len = msglen - 3;
++	if (sess_key_len > PGP_MAX_KEY)
++	{
++		px_debug("incorrect session key length=%u", sess_key_len);
++		res = PXE_PGP_KEY_TOO_BIG;
++		goto out;
++	}
++
+ 	/*
+ 	 * got sesskey
+ 	 */
+ 	ctx->cipher_algo = *msg;
+-	ctx->sess_key_len = msglen - 3;
++	ctx->sess_key_len = sess_key_len;
+ 	memcpy(ctx->sess_key, msg + 1, ctx->sess_key_len);
+ 
+ out:
+diff --git a/contrib/pgcrypto/px.c b/contrib/pgcrypto/px.c
+index e4fbfd75106..fbee6ad0d68 100644
+--- a/contrib/pgcrypto/px.c
++++ b/contrib/pgcrypto/px.c
+@@ -68,6 +68,7 @@ static const struct error_desc px_err_list[] = {
+ 	{PXE_PGP_UNEXPECTED_PKT, "Unexpected packet in key data"},
+ 	{PXE_PGP_MATH_FAILED, "Math operation failed"},
+ 	{PXE_PGP_SHORT_ELGAMAL_KEY, "Elgamal keys must be at least 1024 bits long"},
++	{PXE_PGP_KEY_TOO_BIG, "Public key too big"},
+ 	{PXE_PGP_UNKNOWN_PUBALGO, "Unknown public-key encryption algorithm"},
+ 	{PXE_PGP_WRONG_KEY, "Wrong key"},
+ 	{PXE_PGP_MULTIPLE_KEYS,
+diff --git a/contrib/pgcrypto/px.h b/contrib/pgcrypto/px.h
+index 0d4722a04a0..2a1725d9969 100644
+--- a/contrib/pgcrypto/px.h
++++ b/contrib/pgcrypto/px.h
+@@ -86,7 +86,7 @@ void		px_free(void *p);
+ /* -108 is unused */
+ #define PXE_PGP_MATH_FAILED			-109
+ #define PXE_PGP_SHORT_ELGAMAL_KEY	-110
+-/* -111 is unused */
++#define PXE_PGP_KEY_TOO_BIG			-111
+ #define PXE_PGP_UNKNOWN_PUBALGO		-112
+ #define PXE_PGP_WRONG_KEY			-113
+ #define PXE_PGP_MULTIPLE_KEYS		-114
+diff --git a/contrib/pgcrypto/scripts/pgp_session_data.py b/contrib/pgcrypto/scripts/pgp_session_data.py
+new file mode 100644
+index 00000000000..999350bb2bc
+--- /dev/null
++++ b/contrib/pgcrypto/scripts/pgp_session_data.py
+@@ -0,0 +1,491 @@
++#!/usr/bin/python
++# -*- coding: utf-8 -*-
++#
++# Generate PGP data to check the session key length of the input data provided
++# to pgp_pub_decrypt_bytea().
++#
++# First, the crafted data is generated from valid RSA data, freshly generated
++# by this script each time it is run, see generate_rsa_keypair().
++# Second, the crafted PGP data is built, see build_message_data() and
++# build_key_data().  Finally, the resulting SQL script is generated.
++#
++# This script generates in stdout the SQL file that is used in the regression
++# tests of pgcrypto.  The following command can be used to regenerate the file
++# which should never be manually manipulated:
++# python3 scripts/pgp_session_data.py > sql/pgp-pubkey-session.sql
++
++import os
++import re
++import struct
++import secrets
++import sys
++import time
++
++# pwn for binary manipulation (p32, p64)
++from pwn import *
++
++# Cryptographic libraries, to craft the PGP data.
++from Crypto.Cipher import AES
++from Crypto.PublicKey import RSA
++from Crypto.Util.number import inverse
++
++# AES key used for session key encryption (16 bytes for AES-128)
++AES_KEY = b'\x01' * 16
++
++def generate_rsa_keypair(key_size: int = 2048) -> dict:
++    """
++    Generate a fresh RSA key pair.
++
++    The generated key includes all components needed for PGP operations:
++    - n: public modulus (p * q)
++    - e: public exponent (typically 65537)
++    - d: private exponent (e^-1 mod phi(n))
++    - p, q: prime factors of n
++    - u: coefficient (p^-1 mod q) for CRT optimization
++
++    The caller can pass the wanted key size in input, for a default of 2048
++    bytes.  This function returns the RSA key components, after performing
++    some validation on them.
++    """
++
++    start_time = time.time()
++
++    # Generate RSA key
++    key = RSA.generate(key_size)
++
++    # Extract all key components
++    rsa_components = {
++        'n': key.n,      # Public modulus (p * q)
++        'e': key.e,      # Public exponent (typically 65537)
++        'd': key.d,      # Private exponent (e^-1 mod phi(n))
++        'p': key.p,      # First prime factor
++        'q': key.q,      # Second prime factor
++        'u': inverse(key.p, key.q)  # Coefficient for CRT: p^-1 mod q
++    }
++
++    # Validate key components for correctness
++    validate_rsa_key(rsa_components)
++
++    return rsa_components
++
++def validate_rsa_key(rsa: dict) -> None:
++    """
++    Validate a generated RSA key.
++
++    This function performs basic validation to ensure the RSA key is properly
++    constructed and all components are consistent, at least mathematically.
++
++    Validations performed:
++    1. n = p * q (modulus is product of primes)
++    2. gcd(e, phi(n)) = 1 (public exponent is coprime to phi(n))
++    3. (d * e) mod(phi(n)) = 1 (private exponent is multiplicative inverse)
++    4. (u * p) (mod q) = 1 (coefficient is correct for CRT)
++    """
++
++    n, e, d, p, q, u = rsa['n'], rsa['e'], rsa['d'], rsa['p'], rsa['q'], rsa['u']
++
++    # Check that n = p * q
++    if n != p * q:
++        raise ValueError("RSA validation failed: n <> p * q")
++
++    # Check that p and q are different
++    if p == q:
++        raise ValueError("RSA validation failed: p = q (not allowed)")
++
++    # Calculate phi(n) = (p-1)(q-1)
++    phi_n = (p - 1) * (q - 1)
++
++    # Check that gcd(e, phi(n)) = 1
++    def gcd(a, b):
++        while b:
++            a, b = b, a % b
++        return a
++
++    if gcd(e, phi_n) != 1:
++        raise ValueError("RSA validation failed: gcd(e, phi(n)) <> 1")
++
++    # Check that (d * e) mod(phi(n)) = 1
++    if (d * e) % phi_n != 1:
++        raise ValueError("RSA validation failed: d * e <> 1 (mod phi(n))")
++
++    # Check that (u * p) (mod q) = 1
++    if (u * p) % q != 1:
++        raise ValueError("RSA validation failed: u * p <> 1 (mod q)")
++
++def mpi_encode(x: int) -> bytes:
++    """
++    Encode an integer as an OpenPGP Multi-Precision Integer (MPI).
++
++    Format (RFC 4880, Section 3.2):
++    - 2 bytes: bit length of the integer (big-endian)
++    - N bytes: the integer in big-endian format
++
++    This is used to encode RSA key components (n, e, d, p, q, u) in PGP
++    packets.
++
++    The integer to encode is given in input, returning an MPI-encoded
++    integer.
++
++    For example:
++        mpi_encode(65537) -> b'\x00\x11\x01\x00\x01'
++        (17 bits, value 0x010001)
++    """
++    if x < 0:
++        raise ValueError("MPI cannot encode negative integers")
++
++    if x == 0:
++        # Special case: zero has 0 bits and empty magnitude
++        bits = 0
++        mag = b""
++    else:
++        # Calculate bit length and convert to bytes
++        bits = x.bit_length()
++        mag = x.to_bytes((bits + 7) // 8, 'big')
++
++    # Pack: 2-byte bit length + magnitude bytes
++    return struct.pack('>H', bits) + mag
++
++def new_packet(tag: int, payload: bytes) -> bytes:
++    """
++    Create a new OpenPGP packet with a proper header.
++
++    OpenPGP packet format (RFC 4880, Section 4.2):
++    - New packet format: 0xC0 | tag
++    - Length encoding depends on payload size:
++      * 0-191: single byte
++      * 192-8383: two bytes (192 + ((length - 192) >> 8), (length - 192) & 0xFF)
++      * 8384+: five bytes (0xFF + 4-byte big-endian length)
++
++    The packet is built from a "tag" (1-63) and some "payload" data.  The
++    result generated is a complete OpenPGP packet.
++
++    For example:
++        new_packet(1, b'data') -> b'\xC1\x04data'
++        (Tag 1, length 4, payload 'data')
++    """
++    # New packet format: set bit 7 and 6, clear bit 5, tag in bits 0-5
++    first = 0xC0 | (tag & 0x3F)
++    ln = len(payload)
++
++    # Encode length according to OpenPGP specification
++    if ln <= 191:
++        # Single byte length for small packets
++        llen = bytes([ln])
++    elif ln <= 8383:
++        # Two-byte length for medium packets
++        ln2 = ln - 192
++        llen = bytes([192 + (ln2 >> 8), ln2 & 0xFF])
++    else:
++        # Five-byte length for large packets
++        llen = bytes([255]) + struct.pack('>I', ln)
++
++    return bytes([first]) + llen + payload
++
++def build_key_data(rsa: dict) -> bytes:
++    """
++    Build the key data, containing an RSA private key.
++
++    The RSA contents should have been generated previously.
++
++    Format (see RFC 4880, Section 5.5.3):
++    - 1 byte: version (4)
++    - 4 bytes: creation time (current Unix timestamp)
++    - 1 byte: public key algorithm (2 = RSA encrypt)
++    - MPI: RSA public modulus n
++    - MPI: RSA public exponent e
++    - 1 byte: string-to-key usage (0 = no encryption)
++    - MPI: RSA private exponent d
++    - MPI: RSA prime p
++    - MPI: RSA prime q
++    - MPI: RSA coefficient u = p^-1 mod q
++    - 2 bytes: checksum of private key material
++
++    This function takes a set of RSA key components in input (n, e, d, p, q, u)
++    and returns a secret key packet.
++    """
++
++    # Public key portion
++    ver = bytes([4])                           # Version 4 key
++    ctime = struct.pack('>I', int(time.time())) # Current Unix timestamp
++    algo = bytes([2])                          # RSA encrypt algorithm
++    n_mpi = mpi_encode(rsa['n'])               # Public modulus
++    e_mpi = mpi_encode(rsa['e'])               # Public exponent
++    pub = ver + ctime + algo + n_mpi + e_mpi
++
++    # Private key portion
++    hide_type = bytes([0])              # No string-to-key encryption
++    d_mpi = mpi_encode(rsa['d'])        # Private exponent
++    p_mpi = mpi_encode(rsa['p'])        # Prime p
++    q_mpi = mpi_encode(rsa['q'])        # Prime q
++    u_mpi = mpi_encode(rsa['u'])        # Coefficient u = p^-1 mod q
++
++    # Calculate checksum of private key material (simple sum mod 65536)
++    private_data = d_mpi + p_mpi + q_mpi + u_mpi
++    cksum = sum(private_data) & 0xFFFF
++
++    secret = hide_type + private_data + struct.pack('>H', cksum)
++    payload = pub + secret
++
++    return new_packet(7, payload)
++
++def pgp_cfb_encrypt_resync(key, plaintext):
++    """
++    Implement OpenPGP CFB mode with resync.
++
++    OpenPGP CFB mode is a variant of standard CFB with a resync operation
++    after the first two blocks.
++
++    Algorithm (RFC 4880, Section 13.9):
++    1. Block 1: FR=zeros, encrypt full block_size bytes
++    2. Block 2: FR=block1, encrypt only 2 bytes
++    3. Resync: FR = block1[2:] + block2
++    4. Remaining blocks: standard CFB mode
++
++    This function uses the following arguments:
++    - key: AES encryption key (16 bytes for AES-128)
++    - plaintext: Data to encrypt
++    """
++    block_size = 16  # AES block size
++    cipher = AES.new(key[:16], AES.MODE_ECB)  # Use ECB for manual CFB
++    ciphertext = b''
++
++    # Block 1: FR=zeros, encrypt full 16 bytes
++    FR = b'\x00' * block_size
++    FRE = cipher.encrypt(FR)  # Encrypt the feedback register
++    block1 = bytes(a ^ b for a, b in zip(FRE, plaintext[0:16]))
++    ciphertext += block1
++
++    # Block 2: FR=block1, encrypt only 2 bytes
++    FR = block1
++    FRE = cipher.encrypt(FR)
++    block2 = bytes(a ^ b for a, b in zip(FRE[0:2], plaintext[16:18]))
++    ciphertext += block2
++
++    # Resync: FR = block1[2:16] + block2[0:2]
++    # This is the key difference from standard CFB mode
++    FR = block1[2:] + block2
++
++    # Block 3+: Continue with standard CFB mode
++    pos = 18
++    while pos < len(plaintext):
++        FRE = cipher.encrypt(FR)
++        chunk_len = min(block_size, len(plaintext) - pos)
++        chunk = plaintext[pos:pos+chunk_len]
++        enc_chunk = bytes(a ^ b for a, b in zip(FRE[:chunk_len], chunk))
++        ciphertext += enc_chunk
++
++        # Update feedback register for next iteration
++        if chunk_len == block_size:
++            FR = enc_chunk
++        else:
++            # Partial block: pad with old FR bytes
++            FR = enc_chunk + FR[chunk_len:]
++        pos += chunk_len
++
++    return ciphertext
++
++def build_literal_data_packet(data: bytes) -> bytes:
++    """
++    Build a literal data packet containing a message.
++
++    Format (RFC 4880, Section 5.9):
++    - 1 byte: data format ('b' = binary, 't' = text, 'u' = UTF-8 text)
++    - 1 byte: filename length (0 = no filename)
++    - N bytes: filename (empty in this case)
++    - 4 bytes: date (current Unix timestamp)
++    - M bytes: literal data
++
++    The data used to build the packet is given in input, with the generated
++    result returned.
++    """
++    body = bytes([
++        ord('b'),                              # Binary data format
++        0,                                     # Filename length (0 = no filename)
++    ]) + struct.pack('>I', int(time.time())) + data  # Current timestamp + data
++
++    return new_packet(11, body)
++
++def build_symenc_data_packet(sess_key: bytes, cipher_algo: int, payload: bytes) -> bytes:
++    """
++    Build a symmetrically-encrypted data packet using AES-128-CFB.
++
++    This packet contains encrypted data using the session key. The format
++    includes a random prefix, for security (see RFC 4880, Section 5.7).
++
++    Packet structure:
++    - Random prefix (block_size bytes)
++    - Prefix repeat (last 2 bytes of prefix repeated)
++    - Encrypted literal data packet
++
++    This function uses the following set of arguments:
++    - sess_key: Session key for encryption
++    - cipher_algo: Cipher algorithm identifier (7 = AES-128)
++    - payload: Data to encrypt (wrapped in literal data packet)
++    """
++    block_size = 16  # AES-128 block size
++    key = sess_key[:16]  # Use first 16 bytes for AES-128
++
++    # Create random prefix + repeat last 2 bytes (total 18 bytes)
++    # This is required by OpenPGP for integrity checking
++    prefix_random = secrets.token_bytes(block_size)
++    prefix = prefix_random + prefix_random[-2:]  # 18 bytes total
++
++    # Wrap payload in literal data packet
++    literal_pkt = build_literal_data_packet(payload)
++
++    # Plaintext = prefix + literal data packet
++    plaintext = prefix + literal_pkt
++
++    # Encrypt using OpenPGP CFB mode with resync
++    ciphertext = pgp_cfb_encrypt_resync(key, plaintext)
++
++    return new_packet(9, ciphertext)
++
++def build_tag1_packet(rsa: dict, sess_key: bytes) -> bytes:
++    """
++    Build a public-key encrypted key.
++
++    This is a very important function, as it is able to create the packet
++    triggering the overflow check.  This function can also be used to create
++    "legit" packet data.
++
++    Format (RFC 4880, Section 5.1):
++    - 1 byte: version (3)
++    - 8 bytes: key ID (0 = any key accepted)
++    - 1 byte: public key algorithm (2 = RSA encrypt)
++    - MPI: RSA-encrypted session key
++
++    This uses in arguments the generated RSA key pair, and the session key
++    to encrypt.  The latter is manipulated to trigger the overflow.
++
++    This function returns a complete packet encrypted by a session key.
++    """
++
++    # Calculate RSA modulus size in bytes
++    n_bytes = (rsa['n'].bit_length() + 7) // 8
++
++    # Session key message format:
++    # - 1 byte: symmetric cipher algorithm (7 = AES-128)
++    # - N bytes: session key
++    # - 2 bytes: checksum (simple sum of session key bytes)
++    algo_byte = bytes([7])  # AES-128 algorithm identifier
++    cksum = sum(sess_key) & 0xFFFF  # 16-bit checksum
++    M = algo_byte + sess_key + struct.pack('>H', cksum)
++
++    # PKCS#1 v1.5 padding construction
++    # Format: 0x02 || PS || 0x00 || M
++    # Total padded message must be exactly n_bytes long.
++    total_len = n_bytes  # Total length must equal modulus size in bytes
++    ps_len = total_len - len(M) - 2  # Subtract 2 for 0x02 and 0x00 bytes
++
++    if ps_len < 8:
++        raise ValueError(f"Padding string too short ({ps_len} bytes); need at least 8 bytes. "
++                        f"Message length: {len(M)}, Modulus size: {n_bytes} bytes")
++
++    # Create padding string with *ALL* bytes being 0xFF (no zero separator!)
++    PS = bytes([0xFF]) * ps_len
++
++    # Construct the complete padded message
++    # Normal PKCS#1 v1.5 padding: 0x02 || PS || 0x00 || M
++    padded = bytes([0x02]) + PS + bytes([0x00]) + M
++
++    # Verify padding construction
++    if len(padded) != n_bytes:
++        raise ValueError(f"Padded message length ({len(padded)}) doesn't match RSA modulus size ({n_bytes})")
++
++    # Convert padded message to integer and encrypt with RSA
++    m_int = int.from_bytes(padded, 'big')
++
++    # Ensure message is smaller than modulus (required for RSA)
++    if m_int >= rsa['n']:
++        raise ValueError("Padded message is larger than RSA modulus")
++
++    # RSA encryption: c = m^e mod n
++    c_int = pow(m_int, rsa['e'], rsa['n'])
++
++    # Encode encrypted result as MPI
++    c_mpi = mpi_encode(c_int)
++
++    # Build complete packet
++    ver = bytes([3])           # Version 3 packet
++    key_id = b"\x00" * 8      # Key ID (0 = any key accepted)
++    algo = bytes([2])         # RSA encrypt algorithm
++    payload = ver + key_id + algo + c_mpi
++
++    return new_packet(1, payload)
++
++def build_message_data(rsa: dict) -> bytes:
++    """
++    This function creates a crafted message, with a long session key
++    length.
++
++    This takes in input the RSA key components generated previously,
++    returning a concatenated set of PGP packets crafted for the purpose
++    of this test.
++    """
++
++    # Base prefix for session key (AES key + padding + size).
++    # Note that the crafted size is the important part for this test.
++    prefix = AES_KEY + b"\x00" * 16 + p32(0x10)
++
++    # Build encrypted data packet, legit.
++    sedata = build_symenc_data_packet(AES_KEY, cipher_algo=7, payload=b"\x0a\x00")
++
++    # Build multiple packets
++    packets = [
++        # First packet, legit.
++        build_tag1_packet(rsa, prefix),
++
++        # Encrypted data packet, legit.
++        sedata,
++
++        # Second packet: information payload.
++        #
++        # This packet contains a longer-crafted session key, able to trigger
++        # the overflow check in pgcrypto.  This is the critical part, and
++        # and you are right to pay a lot of attention here if you are
++        # reading this code.
++        build_tag1_packet(rsa, prefix)
++    ]
++
++    return b"".join(packets)
++
++def main():
++    # Default key size.
++    # This number can be set to a higher number if wanted, like 4096.  We
++    # just do not need to do that here.
++    key_size = 2048
++
++    # Generate fresh RSA key pair
++    rsa = generate_rsa_keypair(key_size)
++
++    # Generate the message data.
++    print("### Building message data", file=sys.stderr)
++    message_data = build_message_data(rsa)
++
++    # Build the key containing the RSA private key
++    print("### Building key data", file=sys.stderr)
++    key_data = build_key_data(rsa)
++
++    # Convert to hexadecimal, for the bytea used in the SQL file.
++    message_data = message_data.hex()
++    key_data = key_data.hex()
++
++    # Split each value into lines of 72 characters, for readability.
++    message_data = re.sub("(.{72})", "\\1\n", message_data, 0, re.DOTALL)
++    key_data = re.sub("(.{72})", "\\1\n", key_data, 0, re.DOTALL)
++
++    # Get the script filename for documentation
++    file_basename = os.path.basename(__file__)
++
++    # Output the SQL test case
++    print(f'''-- Test for overflow with session key at decrypt.
++-- Data automatically generated by scripts/{file_basename}.
++-- See this file for details explaining how this data is generated.
++SELECT pgp_pub_decrypt_bytea(
++'\\x{message_data}'::bytea,
++'\\x{key_data}'::bytea);''',
++          file=sys.stdout)
++
++if __name__ == "__main__":
++    main()
+diff --git a/contrib/pgcrypto/sql/pgp-decrypt.sql b/contrib/pgcrypto/sql/pgp-decrypt.sql
+index 557948d7c75..4901ab66539 100644
+--- a/contrib/pgcrypto/sql/pgp-decrypt.sql
++++ b/contrib/pgcrypto/sql/pgp-decrypt.sql
+@@ -230,7 +230,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
+ '), '0123456789abcdefghij'), 'sha1'), 'hex');
+ -- expected: da39a3ee5e6b4b0d3255bfef95601890afd80709
+ 
+-select encode(digest(pgp_sym_decrypt(dearmor('
++select encode(digest(pgp_sym_decrypt_bytea(dearmor('
+ -----BEGIN PGP MESSAGE-----
+ Comment: dat3.aes.sha1.mdc.s2k3.z0
+ 
+@@ -288,6 +288,27 @@ VsxxqLSPzNLAeIspJk5G
+ -- Routine text/binary mismatch.
+ select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
+ 
++-- NUL byte in text decrypt.  Ciphertext source:
++-- printf 'a\x00\xc' | gpg --homedir /nonexistent \
++--      --personal-compress-preferences uncompressed --textmode \
++--      --personal-cipher-preferences aes --no-emit-version --batch \
++--      --symmetric --passphrase key --armor
++do $$
++begin
++  perform pgp_sym_decrypt(dearmor('
++-----BEGIN PGP MESSAGE-----
++
++jA0EBwMCXLc8pozB10Fg0jQBVUID59TLvWutJp0j6eh9ZgjqIRzdYaIymFB8y4XH
++vu0YlJP5D5BX7yqZ+Pry7TlDmiFO
++=rV7z
++-----END PGP MESSAGE-----
++'), 'key', 'debug=1');
++exception when others then
++  raise '%',
++    regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
++end
++$$;
++
+ -- Decryption with a certain incorrect key yields an apparent BZip2-compressed
+ -- plaintext.  Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
+ -- until the random prefix gave rise to that property.
+diff --git a/contrib/pgcrypto/sql/pgp-pubkey-session.sql b/contrib/pgcrypto/sql/pgp-pubkey-session.sql
+new file mode 100644
+index 00000000000..51792f1f4d8
+--- /dev/null
++++ b/contrib/pgcrypto/sql/pgp-pubkey-session.sql
+@@ -0,0 +1,46 @@
++-- Test for overflow with session key at decrypt.
++-- Data automatically generated by scripts/pgp_session_data.py.
++-- See this file for details explaining how this data is generated.
++SELECT pgp_pub_decrypt_bytea(
++'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1
++da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30
++94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd
++0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616
++3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10
++a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7
++b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d
++8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc
++0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494
++57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599
++ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3
++67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5
++060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56
++2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175
++5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d
++135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea,
++'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad
++9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f
++f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12
++07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1
++23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709
++f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c
++138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4
++c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5
++18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847
++e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9
++de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0
++239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0
++ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9
++9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e
++74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c
++3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8
++58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549
++507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd
++183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302
++25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45
++3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103
++cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03
++ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8
++7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8
++487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75
++9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea);
+diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
+index 0047efc075f..33c3a94874d 100644
+--- a/contrib/unaccent/unaccent.c
++++ b/contrib/unaccent/unaccent.c
+@@ -149,9 +149,9 @@ initTrie(const char *filename)
+ 				state = 0;
+ 				for (ptr = line; *ptr; ptr += ptrlen)
+ 				{
+-					ptrlen = pg_mblen(ptr);
++					ptrlen = pg_mblen_cstr(ptr);
+ 					/* ignore whitespace, but end src or trg */
+-					if (t_isspace(ptr))
++					if (t_isspace_cstr(ptr))
+ 					{
+ 						if (state == 1)
+ 							state = 2;
+@@ -315,6 +315,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
+ 	char	   *srcchar = (char *) PG_GETARG_POINTER(1);
+ 	int32		len = PG_GETARG_INT32(2);
+ 	char	   *srcstart = srcchar;
++	const char *srcend = srcstart + len;
+ 	TSLexeme   *res;
+ 	StringInfoData buf;
+ 
+@@ -342,7 +343,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
+ 		}
+ 		else
+ 		{
+-			matchlen = pg_mblen(srcchar);
++			matchlen = pg_mblen_range(srcchar, srcend);
+ 			if (buf.data != NULL)
+ 				appendBinaryStringInfo(&buf, srcchar, matchlen);
+ 		}
+diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
+index a676966b877..6aa1b34d66e 100644
+--- a/src/backend/access/transam/multixact.c
++++ b/src/backend/access/transam/multixact.c
+@@ -3306,7 +3306,6 @@ multixact_redo(XLogReaderState *record)
+ 	else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
+ 	{
+ 		xl_multixact_truncate xlrec;
+-		int			pageno;
+ 
+ 		memcpy(&xlrec, XLogRecGetData(record),
+ 			   SizeOfMultiXactTruncate);
+@@ -3331,14 +3330,6 @@ multixact_redo(XLogReaderState *record)
+ 		SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
+ 
+ 		PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
+-
+-		/*
+-		 * During XLOG replay, latest_page_number isn't necessarily set up
+-		 * yet; insert a suitable value to bypass the sanity test in
+-		 * SimpleLruTruncate.
+-		 */
+-		pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
+-		MultiXactOffsetCtl->shared->latest_page_number = pageno;
+ 		PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
+ 
+ 		LWLockRelease(MultiXactTruncationLock);
+diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c
+index 38615647253..fcd3860f7db 100644
+--- a/src/backend/catalog/pg_depend.c
++++ b/src/backend/catalog/pg_depend.c
+@@ -22,11 +22,13 @@
+ #include "catalog/pg_constraint.h"
+ #include "catalog/pg_depend.h"
+ #include "catalog/pg_extension.h"
++#include "catalog/pg_type.h"
+ #include "commands/extension.h"
+ #include "miscadmin.h"
+ #include "utils/fmgroids.h"
+ #include "utils/lsyscache.h"
+ #include "utils/rel.h"
++#include "utils/syscache.h"
+ 
+ 
+ static bool isObjectPinned(const ObjectAddress *object, Relation rel);
+@@ -804,6 +806,77 @@ getAutoExtensionsOfObject(Oid classId, Oid objectId)
+ 	return result;
+ }
+ 
++/*
++ * Look up a type belonging to an extension.
++ *
++ * Returns the type's OID, or InvalidOid if not found.
++ *
++ * Notice that the type is specified by name only, without a schema.
++ * That's because this will typically be used by relocatable extensions
++ * which can't make a-priori assumptions about which schema their objects
++ * are in.  As long as the extension only defines one type of this name,
++ * the answer is unique anyway.
++ *
++ * We might later add the ability to look up functions, operators, etc.
++ */
++Oid
++getExtensionType(Oid extensionOid, const char *typname)
++{
++	Oid			result = InvalidOid;
++	Relation	depRel;
++	ScanKeyData key[3];
++	SysScanDesc scan;
++	HeapTuple	tup;
++
++	depRel = table_open(DependRelationId, AccessShareLock);
++
++	ScanKeyInit(&key[0],
++				Anum_pg_depend_refclassid,
++				BTEqualStrategyNumber, F_OIDEQ,
++				ObjectIdGetDatum(ExtensionRelationId));
++	ScanKeyInit(&key[1],
++				Anum_pg_depend_refobjid,
++				BTEqualStrategyNumber, F_OIDEQ,
++				ObjectIdGetDatum(extensionOid));
++	ScanKeyInit(&key[2],
++				Anum_pg_depend_refobjsubid,
++				BTEqualStrategyNumber, F_INT4EQ,
++				Int32GetDatum(0));
++
++	scan = systable_beginscan(depRel, DependReferenceIndexId, true,
++							  NULL, 3, key);
++
++	while (HeapTupleIsValid(tup = systable_getnext(scan)))
++	{
++		Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup);
++
++		if (depform->classid == TypeRelationId &&
++			depform->deptype == DEPENDENCY_EXTENSION)
++		{
++			Oid			typoid = depform->objid;
++			HeapTuple	typtup;
++
++			typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typoid));
++			if (!HeapTupleIsValid(typtup))
++				continue;		/* should we throw an error? */
++			if (strcmp(NameStr(((Form_pg_type) GETSTRUCT(typtup))->typname),
++					   typname) == 0)
++			{
++				result = typoid;
++				ReleaseSysCache(typtup);
++				break;			/* no need to keep searching */
++			}
++			ReleaseSysCache(typtup);
++		}
++	}
++
++	systable_endscan(scan);
++
++	table_close(depRel, AccessShareLock);
++
++	return result;
++}
++
+ /*
+  * Detect whether a sequence is marked as "owned" by a column
+  *
+diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
+index 0d754874c1e..1818906b54a 100644
+--- a/src/backend/catalog/pg_proc.c
++++ b/src/backend/catalog/pg_proc.c
+@@ -1129,7 +1129,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal,
+ 			if (cursorpos > 0)
+ 				newcp++;
+ 		}
+-		chlen = pg_mblen(prosrc);
++		chlen = pg_mblen_cstr(prosrc);
+ 		if (strncmp(prosrc, literal, chlen) != 0)
+ 			goto fail;
+ 		prosrc += chlen;
+diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
+index ccffec91132..a00cdcf6435 100644
+--- a/src/backend/commands/extension.c
++++ b/src/backend/commands/extension.c
+@@ -45,6 +45,7 @@
+ #include "catalog/pg_depend.h"
+ #include "catalog/pg_extension.h"
+ #include "catalog/pg_namespace.h"
++#include "catalog/pg_proc.h"
+ #include "catalog/pg_type.h"
+ #include "commands/alter.h"
+ #include "commands/comment.h"
+@@ -60,10 +61,12 @@
+ #include "utils/acl.h"
+ #include "utils/builtins.h"
+ #include "utils/fmgroids.h"
++#include "utils/inval.h"
+ #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ #include "utils/rel.h"
+ #include "utils/snapmgr.h"
++#include "utils/syscache.h"
+ #include "utils/varlena.h"
+ 
+ 
+@@ -104,7 +107,26 @@ typedef struct ExtensionVersionInfo
+ 	struct ExtensionVersionInfo *previous;	/* current best predecessor */
+ } ExtensionVersionInfo;
+ 
++/*
++ * Cache structure for get_function_sibling_type (and maybe later,
++ * allied lookup functions).
++ */
++typedef struct ExtensionSiblingCache
++{
++	struct ExtensionSiblingCache *next; /* list link */
++	/* lookup key: requesting function's OID and type name */
++	Oid			reqfuncoid;
++	const char *typname;
++	bool		valid;			/* is entry currently valid? */
++	uint32		exthash;		/* cache hash of owning extension's OID */
++	Oid			typeoid;		/* OID associated with typname */
++} ExtensionSiblingCache;
++
++/* Head of linked list of ExtensionSiblingCache structs */
++static ExtensionSiblingCache *ext_sibling_list = NULL;
++
+ /* Local functions */
++static void ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue);
+ static List *find_update_path(List *evi_list,
+ 							  ExtensionVersionInfo *evi_start,
+ 							  ExtensionVersionInfo *evi_target,
+@@ -254,6 +276,114 @@ get_extension_schema(Oid ext_oid)
+ 	return result;
+ }
+ 
++/*
++ * get_function_sibling_type - find a type belonging to same extension as func
++ *
++ * Returns the type's OID, or InvalidOid if not found.
++ *
++ * This is useful in extensions, which won't have fixed object OIDs.
++ * We work from the calling function's own OID, which it can get from its
++ * FunctionCallInfo parameter, and look up the owning extension and thence
++ * a type belonging to the same extension.
++ *
++ * Notice that the type is specified by name only, without a schema.
++ * That's because this will typically be used by relocatable extensions
++ * which can't make a-priori assumptions about which schema their objects
++ * are in.  As long as the extension only defines one type of this name,
++ * the answer is unique anyway.
++ *
++ * We might later add the ability to look up functions, operators, etc.
++ *
++ * This code is simply a frontend for some pg_depend lookups.  Those lookups
++ * are fairly expensive, so we provide a simple cache facility.  We assume
++ * that the passed typname is actually a C constant, or at least permanently
++ * allocated, so that we need not copy that string.
++ */
++Oid
++get_function_sibling_type(Oid funcoid, const char *typname)
++{
++	ExtensionSiblingCache *cache_entry;
++	Oid			extoid;
++	Oid			typeoid;
++
++	/*
++	 * See if we have the answer cached.  Someday there may be enough callers
++	 * to justify a hash table, but for now, a simple linked list is fine.
++	 */
++	for (cache_entry = ext_sibling_list; cache_entry != NULL;
++		 cache_entry = cache_entry->next)
++	{
++		if (funcoid == cache_entry->reqfuncoid &&
++			strcmp(typname, cache_entry->typname) == 0)
++			break;
++	}
++	if (cache_entry && cache_entry->valid)
++		return cache_entry->typeoid;
++
++	/*
++	 * Nope, so do the expensive lookups.  We do not expect failures, so we do
++	 * not cache negative results.
++	 */
++	extoid = getExtensionOfObject(ProcedureRelationId, funcoid);
++	if (!OidIsValid(extoid))
++		return InvalidOid;
++	typeoid = getExtensionType(extoid, typname);
++	if (!OidIsValid(typeoid))
++		return InvalidOid;
++
++	/*
++	 * Build, or revalidate, cache entry.
++	 */
++	if (cache_entry == NULL)
++	{
++		/* Register invalidation hook if this is first entry */
++		if (ext_sibling_list == NULL)
++			CacheRegisterSyscacheCallback(EXTENSIONOID,
++										  ext_sibling_callback,
++										  (Datum) 0);
++
++		/* Momentarily zero the space to ensure valid flag is false */
++		cache_entry = (ExtensionSiblingCache *)
++			MemoryContextAllocZero(CacheMemoryContext,
++								   sizeof(ExtensionSiblingCache));
++		cache_entry->next = ext_sibling_list;
++		ext_sibling_list = cache_entry;
++	}
++
++	cache_entry->reqfuncoid = funcoid;
++	cache_entry->typname = typname;
++	cache_entry->exthash = GetSysCacheHashValue1(EXTENSIONOID,
++												 ObjectIdGetDatum(extoid));
++	cache_entry->typeoid = typeoid;
++	/* Mark it valid only once it's fully populated */
++	cache_entry->valid = true;
++
++	return typeoid;
++}
++
++/*
++ * ext_sibling_callback
++ *		Syscache inval callback function for EXTENSIONOID cache
++ *
++ * It seems sufficient to invalidate ExtensionSiblingCache entries when
++ * the owning extension's pg_extension entry is modified or deleted.
++ * Neither a requesting function's OID, nor the OID of the object it's
++ * looking for, could change without an extension update or drop/recreate.
++ */
++static void
++ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue)
++{
++	ExtensionSiblingCache *cache_entry;
++
++	for (cache_entry = ext_sibling_list; cache_entry != NULL;
++		 cache_entry = cache_entry->next)
++	{
++		if (hashvalue == 0 ||
++			cache_entry->exthash == hashvalue)
++			cache_entry->valid = false;
++	}
++}
++
+ /*
+  * Utility functions to check validity of extension and version names
+  */
+diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c
+index 640b22ad936..10a30d235dc 100644
+--- a/src/backend/commands/operatorcmds.c
++++ b/src/backend/commands/operatorcmds.c
+@@ -262,7 +262,6 @@ ValidateRestrictionEstimator(List *restrictionName)
+ {
+ 	Oid			typeId[4];
+ 	Oid			restrictionOid;
+-	AclResult	aclresult;
+ 
+ 	typeId[0] = INTERNALOID;	/* PlannerInfo */
+ 	typeId[1] = OIDOID;			/* operator OID */
+@@ -278,11 +277,32 @@ ValidateRestrictionEstimator(List *restrictionName)
+ 				 errmsg("restriction estimator function %s must return type %s",
+ 						NameListToString(restrictionName), "float8")));
+ 
+-	/* Require EXECUTE rights for the estimator */
+-	aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
+-	if (aclresult != ACLCHECK_OK)
+-		aclcheck_error(aclresult, OBJECT_FUNCTION,
+-					   NameListToString(restrictionName));
++	/*
++	 * If the estimator is not a built-in function, require superuser
++	 * privilege to install it.  This protects against using something that is
++	 * not a restriction estimator or has hard-wired assumptions about what
++	 * data types it is working with.  (Built-in estimators are required to
++	 * defend themselves adequately against unexpected data type choices, but
++	 * it seems impractical to expect that of extensions' estimators.)
++	 *
++	 * If it is built-in, only require EXECUTE rights.
++	 */
++	if (restrictionOid >= FirstGenbkiObjectId)
++	{
++		if (!superuser())
++			ereport(ERROR,
++					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
++					 errmsg("must be superuser to specify a non-built-in restriction estimator function")));
++	}
++	else
++	{
++		AclResult	aclresult;
++
++		aclresult = pg_proc_aclcheck(restrictionOid, GetUserId(), ACL_EXECUTE);
++		if (aclresult != ACLCHECK_OK)
++			aclcheck_error(aclresult, OBJECT_FUNCTION,
++						   NameListToString(restrictionName));
++	}
+ 
+ 	return restrictionOid;
+ }
+@@ -298,7 +318,6 @@ ValidateJoinEstimator(List *joinName)
+ 	Oid			typeId[5];
+ 	Oid			joinOid;
+ 	Oid			joinOid2;
+-	AclResult	aclresult;
+ 
+ 	typeId[0] = INTERNALOID;	/* PlannerInfo */
+ 	typeId[1] = OIDOID;			/* operator OID */
+@@ -336,11 +355,23 @@ ValidateJoinEstimator(List *joinName)
+ 				 errmsg("join estimator function %s must return type %s",
+ 						NameListToString(joinName), "float8")));
+ 
+-	/* Require EXECUTE rights for the estimator */
+-	aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
+-	if (aclresult != ACLCHECK_OK)
+-		aclcheck_error(aclresult, OBJECT_FUNCTION,
+-					   NameListToString(joinName));
++	/* privilege checks are the same as in ValidateRestrictionEstimator */
++	if (joinOid >= FirstGenbkiObjectId)
++	{
++		if (!superuser())
++			ereport(ERROR,
++					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
++					 errmsg("must be superuser to specify a non-built-in join estimator function")));
++	}
++	else
++	{
++		AclResult	aclresult;
++
++		aclresult = pg_proc_aclcheck(joinOid, GetUserId(), ACL_EXECUTE);
++		if (aclresult != ACLCHECK_OK)
++			aclcheck_error(aclresult, OBJECT_FUNCTION,
++						   NameListToString(joinName));
++	}
+ 
+ 	return joinOid;
+ }
+diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
+index e732e66dace..d798610a8a9 100644
+--- a/src/backend/tsearch/dict_synonym.c
++++ b/src/backend/tsearch/dict_synonym.c
+@@ -47,8 +47,8 @@ findwrd(char *in, char **end, uint16 *flags)
+ 	char	   *lastchar;
+ 
+ 	/* Skip leading spaces */
+-	while (*in && t_isspace(in))
+-		in += pg_mblen(in);
++	while (*in && t_isspace_cstr(in))
++		in += pg_mblen_cstr(in);
+ 
+ 	/* Return NULL on empty lines */
+ 	if (*in == '\0')
+@@ -60,10 +60,10 @@ findwrd(char *in, char **end, uint16 *flags)
+ 	lastchar = start = in;
+ 
+ 	/* Find end of word */
+-	while (*in && !t_isspace(in))
++	while (*in && !t_isspace_cstr(in))
+ 	{
+ 		lastchar = in;
+-		in += pg_mblen(in);
++		in += pg_mblen_cstr(in);
+ 	}
+ 
+ 	if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
+diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
+index cb0835982d8..9a29b22d7e9 100644
+--- a/src/backend/tsearch/dict_thesaurus.c
++++ b/src/backend/tsearch/dict_thesaurus.c
+@@ -190,8 +190,8 @@ thesaurusRead(const char *filename, DictThesaurus *d)
+ 		ptr = line;
+ 
+ 		/* is it a comment? */
+-		while (*ptr && t_isspace(ptr))
+-			ptr += pg_mblen(ptr);
++		while (*ptr && t_isspace_cstr(ptr))
++			ptr += pg_mblen_cstr(ptr);
+ 
+ 		if (t_iseq(ptr, '#') || *ptr == '\0' ||
+ 			t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
+@@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
+ 								 errmsg("unexpected delimiter")));
+ 					state = TR_WAITSUBS;
+ 				}
+-				else if (!t_isspace(ptr))
++				else if (!t_isspace_cstr(ptr))
+ 				{
+ 					beginwrd = ptr;
+ 					state = TR_INLEX;
+@@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
+ 					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
+ 					state = TR_WAITSUBS;
+ 				}
+-				else if (t_isspace(ptr))
++				else if (t_isspace_cstr(ptr))
+ 				{
+ 					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
+ 					state = TR_WAITLEX;
+@@ -237,15 +237,15 @@ thesaurusRead(const char *filename, DictThesaurus *d)
+ 				{
+ 					useasis = true;
+ 					state = TR_INSUBS;
+-					beginwrd = ptr + pg_mblen(ptr);
++					beginwrd = ptr + pg_mblen_cstr(ptr);
+ 				}
+ 				else if (t_iseq(ptr, '\\'))
+ 				{
+ 					useasis = false;
+ 					state = TR_INSUBS;
+-					beginwrd = ptr + pg_mblen(ptr);
++					beginwrd = ptr + pg_mblen_cstr(ptr);
+ 				}
+-				else if (!t_isspace(ptr))
++				else if (!t_isspace_cstr(ptr))
+ 				{
+ 					useasis = false;
+ 					beginwrd = ptr;
+@@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
+ 			}
+ 			else if (state == TR_INSUBS)
+ 			{
+-				if (t_isspace(ptr))
++				if (t_isspace_cstr(ptr))
+ 				{
+ 					if (ptr == beginwrd)
+ 						ereport(ERROR,
+@@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
+ 			else
+ 				elog(ERROR, "unrecognized thesaurus state: %d", state);
+ 
+-			ptr += pg_mblen(ptr);
++			ptr += pg_mblen_cstr(ptr);
+ 		}
+ 
+ 		if (state == TR_INSUBS)
+diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c
+index 2edd4faa8ec..a9b64fccd6e 100644
+--- a/src/backend/tsearch/regis.c
++++ b/src/backend/tsearch/regis.c
+@@ -37,7 +37,7 @@ RS_isRegis(const char *str)
+ 	{
+ 		if (state == RS_IN_WAIT)
+ 		{
+-			if (t_isalpha(c))
++			if (t_isalpha_cstr(c))
+ 				 /* okay */ ;
+ 			else if (t_iseq(c, '['))
+ 				state = RS_IN_ONEOF;
+@@ -48,14 +48,14 @@ RS_isRegis(const char *str)
+ 		{
+ 			if (t_iseq(c, '^'))
+ 				state = RS_IN_NONEOF;
+-			else if (t_isalpha(c))
++			else if (t_isalpha_cstr(c))
+ 				state = RS_IN_ONEOF_IN;
+ 			else
+ 				return false;
+ 		}
+ 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+ 		{
+-			if (t_isalpha(c))
++			if (t_isalpha_cstr(c))
+ 				 /* okay */ ;
+ 			else if (t_iseq(c, ']'))
+ 				state = RS_IN_WAIT;
+@@ -64,7 +64,7 @@ RS_isRegis(const char *str)
+ 		}
+ 		else
+ 			elog(ERROR, "internal error in RS_isRegis: state %d", state);
+-		c += pg_mblen(c);
++		c += pg_mblen_cstr(c);
+ 	}
+ 
+ 	return (state == RS_IN_WAIT);
+@@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str)
+ 	{
+ 		if (state == RS_IN_WAIT)
+ 		{
+-			if (t_isalpha(c))
++			if (t_isalpha_cstr(c))
+ 			{
+ 				if (ptr)
+ 					ptr = newRegisNode(ptr, len);
+ 				else
+ 					ptr = r->node = newRegisNode(NULL, len);
+-				COPYCHAR(ptr->data, c);
+ 				ptr->type = RSF_ONEOF;
+-				ptr->len = pg_mblen(c);
++				ptr->len = ts_copychar_cstr(ptr->data, c);
+ 			}
+ 			else if (t_iseq(c, '['))
+ 			{
+@@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str)
+ 				ptr->type = RSF_NONEOF;
+ 				state = RS_IN_NONEOF;
+ 			}
+-			else if (t_isalpha(c))
++			else if (t_isalpha_cstr(c))
+ 			{
+-				COPYCHAR(ptr->data, c);
+-				ptr->len = pg_mblen(c);
++				ptr->len = ts_copychar_cstr(ptr->data, c);
+ 				state = RS_IN_ONEOF_IN;
+ 			}
+ 			else				/* shouldn't get here */
+@@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str)
+ 		}
+ 		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+ 		{
+-			if (t_isalpha(c))
+-			{
+-				COPYCHAR(ptr->data + ptr->len, c);
+-				ptr->len += pg_mblen(c);
+-			}
++			if (t_isalpha_cstr(c))
++				ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c);
+ 			else if (t_iseq(c, ']'))
+ 				state = RS_IN_WAIT;
+ 			else				/* shouldn't get here */
+@@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str)
+ 		}
+ 		else
+ 			elog(ERROR, "internal error in RS_compile: state %d", state);
+-		c += pg_mblen(c);
++		c += pg_mblen_cstr(c);
+ 	}
+ 
+ 	if (state != RS_IN_WAIT)	/* shouldn't get here */
+@@ -187,10 +182,10 @@ mb_strchr(char *str, char *c)
+ 	char	   *ptr = str;
+ 	bool		res = false;
+ 
+-	clen = pg_mblen(c);
++	clen = pg_mblen_cstr(c);
+ 	while (*ptr && !res)
+ 	{
+-		plen = pg_mblen(ptr);
++		plen = pg_mblen_cstr(ptr);
+ 		if (plen == clen)
+ 		{
+ 			i = plen;
+@@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str)
+ 	while (*c)
+ 	{
+ 		len++;
+-		c += pg_mblen(c);
++		c += pg_mblen_cstr(c);
+ 	}
+ 
+ 	if (len < r->nchar)
+@@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str)
+ 	{
+ 		len -= r->nchar;
+ 		while (len-- > 0)
+-			c += pg_mblen(c);
++			c += pg_mblen_cstr(c);
+ 	}
+ 
+ 
+@@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str)
+ 				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
+ 		}
+ 		ptr = ptr->next;
+-		c += pg_mblen(c);
++		c += pg_mblen_cstr(c);
+ 	}
+ 
+ 	return true;
+diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
+index 7c45e3206d4..b2d564ad5e1 100644
+--- a/src/backend/tsearch/spell.c
++++ b/src/backend/tsearch/spell.c
+@@ -232,7 +232,7 @@ findchar(char *str, int c)
+ 	{
+ 		if (t_iseq(str, c))
+ 			return str;
+-		str += pg_mblen(str);
++		str += pg_mblen_cstr(str);
+ 	}
+ 
+ 	return NULL;
+@@ -245,7 +245,7 @@ findchar2(char *str, int c1, int c2)
+ 	{
+ 		if (t_iseq(str, c1) || t_iseq(str, c2))
+ 			return str;
+-		str += pg_mblen(str);
++		str += pg_mblen_cstr(str);
+ 	}
+ 
+ 	return NULL;
+@@ -352,6 +352,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+ 	char	   *next,
+ 			   *sbuf = *sflagset;
+ 	int			maxstep;
++	int			clen;
+ 	bool		stop = false;
+ 	bool		met_comma = false;
+ 
+@@ -363,11 +364,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+ 		{
+ 			case FM_LONG:
+ 			case FM_CHAR:
+-				COPYCHAR(sflag, *sflagset);
+-				sflag += pg_mblen(*sflagset);
++				clen = ts_copychar_cstr(sflag, *sflagset);
++				sflag += clen;
+ 
+ 				/* Go to start of the next flag */
+-				*sflagset += pg_mblen(*sflagset);
++				*sflagset += clen;
+ 
+ 				/* Check if we get all characters of flag */
+ 				maxstep--;
+@@ -391,7 +392,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+ 				*sflagset = next;
+ 				while (**sflagset)
+ 				{
+-					if (t_isdigit(*sflagset))
++					if (t_isdigit_cstr(*sflagset))
+ 					{
+ 						if (!met_comma)
+ 							ereport(ERROR,
+@@ -409,7 +410,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+ 											*sflagset)));
+ 						met_comma = true;
+ 					}
+-					else if (!t_isspace(*sflagset))
++					else if (!t_isspace_cstr(*sflagset))
+ 					{
+ 						ereport(ERROR,
+ 								(errcode(ERRCODE_CONFIG_FILE_ERROR),
+@@ -417,7 +418,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag)
+ 										*sflagset)));
+ 					}
+ 
+-					*sflagset += pg_mblen(*sflagset);
++					*sflagset += pg_mblen_cstr(*sflagset);
+ 				}
+ 				stop = true;
+ 				break;
+@@ -543,7 +544,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
+ 			while (*s)
+ 			{
+ 				/* we allow only single encoded flags for faster works */
+-				if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
++				if (pg_mblen_cstr(s) == 1 && t_isprint_unbounded(s) && !t_isspace_unbounded(s))
+ 					s++;
+ 				else
+ 				{
+@@ -559,12 +560,12 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
+ 		s = line;
+ 		while (*s)
+ 		{
+-			if (t_isspace(s))
++			if (t_isspace_cstr(s))
+ 			{
+ 				*s = '\0';
+ 				break;
+ 			}
+-			s += pg_mblen(s);
++			s += pg_mblen_cstr(s);
+ 		}
+ 		pstr = lowerstr_ctx(Conf, line);
+ 
+@@ -816,17 +817,17 @@ get_nextfield(char **str, char *next)
+ 
+ 	while (**str)
+ 	{
++		int			clen = pg_mblen_cstr(*str);
++
+ 		if (state == PAE_WAIT_MASK)
+ 		{
+ 			if (t_iseq(*str, '#'))
+ 				return false;
+-			else if (!t_isspace(*str))
++			else if (!t_isspace_cstr(*str))
+ 			{
+-				int			clen = pg_mblen(*str);
+-
+ 				if (clen < avail)
+ 				{
+-					COPYCHAR(next, *str);
++					ts_copychar_with_len(next, *str, clen);
+ 					next += clen;
+ 					avail -= clen;
+ 				}
+@@ -835,24 +836,22 @@ get_nextfield(char **str, char *next)
+ 		}
+ 		else					/* state == PAE_INMASK */
+ 		{
+-			if (t_isspace(*str))
++			if (t_isspace_cstr(*str))
+ 			{
+ 				*next = '\0';
+ 				return true;
+ 			}
+ 			else
+ 			{
+-				int			clen = pg_mblen(*str);
+-
+ 				if (clen < avail)
+ 				{
+-					COPYCHAR(next, *str);
++					ts_copychar_with_len(next, *str, clen);
+ 					next += clen;
+ 					avail -= clen;
+ 				}
+ 			}
+ 		}
+-		*str += pg_mblen(*str);
++		*str += clen;
+ 	}
+ 
+ 	*next = '\0';
+@@ -942,14 +941,15 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 
+ 	while (*str)
+ 	{
++		int			clen = pg_mblen_cstr(str);
++
+ 		if (state == PAE_WAIT_MASK)
+ 		{
+ 			if (t_iseq(str, '#'))
+ 				return false;
+-			else if (!t_isspace(str))
++			else if (!t_isspace_cstr(str))
+ 			{
+-				COPYCHAR(pmask, str);
+-				pmask += pg_mblen(str);
++				pmask += ts_copychar_with_len(pmask, str, clen);
+ 				state = PAE_INMASK;
+ 			}
+ 		}
+@@ -960,10 +960,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 				*pmask = '\0';
+ 				state = PAE_WAIT_FIND;
+ 			}
+-			else if (!t_isspace(str))
++			else if (!t_isspace_cstr(str))
+ 			{
+-				COPYCHAR(pmask, str);
+-				pmask += pg_mblen(str);
++				pmask += ts_copychar_with_len(pmask, str, clen);
+ 			}
+ 		}
+ 		else if (state == PAE_WAIT_FIND)
+@@ -972,13 +971,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 			{
+ 				state = PAE_INFIND;
+ 			}
+-			else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
++			else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
+ 			{
+-				COPYCHAR(prepl, str);
+-				prepl += pg_mblen(str);
++				prepl += ts_copychar_with_len(prepl, str, clen);
+ 				state = PAE_INREPL;
+ 			}
+-			else if (!t_isspace(str))
++			else if (!t_isspace_cstr(str))
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 						 errmsg("syntax error")));
+@@ -990,12 +988,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 				*pfind = '\0';
+ 				state = PAE_WAIT_REPL;
+ 			}
+-			else if (t_isalpha(str))
++			else if (t_isalpha_cstr(str))
+ 			{
+-				COPYCHAR(pfind, str);
+-				pfind += pg_mblen(str);
++				pfind += ts_copychar_with_len(pfind, str, clen);
+ 			}
+-			else if (!t_isspace(str))
++			else if (!t_isspace_cstr(str))
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 						 errmsg("syntax error")));
+@@ -1006,13 +1003,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 			{
+ 				break;			/* void repl */
+ 			}
+-			else if (t_isalpha(str))
++			else if (t_isalpha_cstr(str))
+ 			{
+-				COPYCHAR(prepl, str);
+-				prepl += pg_mblen(str);
++				prepl += ts_copychar_with_len(prepl, str, clen);
+ 				state = PAE_INREPL;
+ 			}
+-			else if (!t_isspace(str))
++			else if (!t_isspace_cstr(str))
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 						 errmsg("syntax error")));
+@@ -1024,12 +1020,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 				*prepl = '\0';
+ 				break;
+ 			}
+-			else if (t_isalpha(str))
++			else if (t_isalpha_cstr(str))
+ 			{
+-				COPYCHAR(prepl, str);
+-				prepl += pg_mblen(str);
++				prepl += ts_copychar_with_len(prepl, str, clen);
+ 			}
+-			else if (!t_isspace(str))
++			else if (!t_isspace_cstr(str))
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 						 errmsg("syntax error")));
+@@ -1037,7 +1032,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
+ 		else
+ 			elog(ERROR, "unrecognized state in parse_affentry: %d", state);
+ 
+-		str += pg_mblen(str);
++		str += clen;
+ 	}
+ 
+ 	*pmask = *pfind = *prepl = '\0';
+@@ -1090,10 +1085,9 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
+ 	CompoundAffixFlag *newValue;
+ 	char		sbuf[BUFSIZ];
+ 	char	   *sflag;
+-	int			clen;
+ 
+-	while (*s && t_isspace(s))
+-		s += pg_mblen(s);
++	while (*s && t_isspace_cstr(s))
++		s += pg_mblen_cstr(s);
+ 
+ 	if (!*s)
+ 		ereport(ERROR,
+@@ -1102,10 +1096,10 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
+ 
+ 	/* Get flag without \n */
+ 	sflag = sbuf;
+-	while (*s && !t_isspace(s) && *s != '\n')
++	while (*s && !t_isspace_cstr(s) && *s != '\n')
+ 	{
+-		clen = pg_mblen(s);
+-		COPYCHAR(sflag, s);
++		int			clen = ts_copychar_cstr(sflag, s);
++
+ 		sflag += clen;
+ 		s += clen;
+ 	}
+@@ -1248,7 +1242,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
+ 
+ 	while ((recoded = tsearch_readline(&trst)) != NULL)
+ 	{
+-		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
++		if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
+ 		{
+ 			pfree(recoded);
+ 			continue;
+@@ -1285,8 +1279,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
+ 		{
+ 			char	   *s = recoded + strlen("FLAG");
+ 
+-			while (*s && t_isspace(s))
+-				s += pg_mblen(s);
++			while (*s && t_isspace_cstr(s))
++				s += pg_mblen_cstr(s);
+ 
+ 			if (*s)
+ 			{
+@@ -1321,7 +1315,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
+ 	{
+ 		int			fields_read;
+ 
+-		if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
++		if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#'))
+ 			goto nextline;
+ 
+ 		fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
+@@ -1484,12 +1478,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
+ 			s = findchar2(recoded, 'l', 'L');
+ 			if (s)
+ 			{
+-				while (*s && !t_isspace(s))
+-					s += pg_mblen(s);
+-				while (*s && t_isspace(s))
+-					s += pg_mblen(s);
++				while (*s && !t_isspace_cstr(s))
++					s += pg_mblen_cstr(s);
++				while (*s && t_isspace_cstr(s))
++					s += pg_mblen_cstr(s);
+ 
+-				if (*s && pg_mblen(s) == 1)
++				if (*s && pg_mblen_cstr(s) == 1)
+ 				{
+ 					addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
+ 					Conf->usecompound = true;
+@@ -1517,8 +1511,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
+ 			s = recoded + 4;	/* we need non-lowercased string */
+ 			flagflags = 0;
+ 
+-			while (*s && t_isspace(s))
+-				s += pg_mblen(s);
++			while (*s && t_isspace_cstr(s))
++				s += pg_mblen_cstr(s);
+ 
+ 			if (*s == '*')
+ 			{
+@@ -1539,14 +1533,13 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
+ 			 * be followed by EOL, whitespace, or ':'.  Otherwise this is a
+ 			 * new-format flag command.
+ 			 */
+-			if (*s && pg_mblen(s) == 1)
++			if (*s && pg_mblen_cstr(s) == 1)
+ 			{
+-				COPYCHAR(flag, s);
++				flag[0] = *s++;
+ 				flag[1] = '\0';
+ 
+-				s++;
+ 				if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
+-					t_isspace(s))
++					t_isspace_cstr(s))
+ 				{
+ 					oldformat = true;
+ 					goto nextline;
+@@ -1769,7 +1762,7 @@ NISortDictionary(IspellDict *Conf)
+ 							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 							 errmsg("invalid affix alias \"%s\"",
+ 									Conf->Spell[i]->p.flag)));
+-				if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
++				if (*end != '\0' && !t_isdigit_cstr(end) && !t_isspace_cstr(end))
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_CONFIG_FILE_ERROR),
+ 							 errmsg("invalid affix alias \"%s\"",
+diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
+index 9b199d0ac18..3b189fc68d1 100644
+--- a/src/backend/tsearch/ts_locale.c
++++ b/src/backend/tsearch/ts_locale.c
+@@ -32,70 +32,43 @@ static void tsearch_readline_callback(void *arg);
+  */
+ #define WC_BUF_LEN  3
+ 
+-int
+-t_isdigit(const char *ptr)
+-{
+-	int			clen = pg_mblen(ptr);
+-	wchar_t		character[WC_BUF_LEN];
+-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
+-	pg_locale_t mylocale = 0;	/* TODO */
+-
+-	if (clen == 1 || lc_ctype_is_c(collation))
+-		return isdigit(TOUCHAR(ptr));
+-
+-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+-
+-	return iswdigit((wint_t) character[0]);
+-}
+-
+-int
+-t_isspace(const char *ptr)
+-{
+-	int			clen = pg_mblen(ptr);
+-	wchar_t		character[WC_BUF_LEN];
+-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
+-	pg_locale_t mylocale = 0;	/* TODO */
+-
+-	if (clen == 1 || lc_ctype_is_c(collation))
+-		return isspace(TOUCHAR(ptr));
+-
+-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+-
+-	return iswspace((wint_t) character[0]);
+-}
+-
+-int
+-t_isalpha(const char *ptr)
+-{
+-	int			clen = pg_mblen(ptr);
+-	wchar_t		character[WC_BUF_LEN];
+-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
+-	pg_locale_t mylocale = 0;	/* TODO */
+-
+-	if (clen == 1 || lc_ctype_is_c(collation))
+-		return isalpha(TOUCHAR(ptr));
+-
+-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+-
+-	return iswalpha((wint_t) character[0]);
+-}
+-
+-int
+-t_isprint(const char *ptr)
+-{
+-	int			clen = pg_mblen(ptr);
+-	wchar_t		character[WC_BUF_LEN];
+-	Oid			collation = DEFAULT_COLLATION_OID;	/* TODO */
+-	pg_locale_t mylocale = 0;	/* TODO */
+-
+-	if (clen == 1 || lc_ctype_is_c(collation))
+-		return isprint(TOUCHAR(ptr));
+-
+-	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
+-
+-	return iswprint((wint_t) character[0]);
++#define GENERATE_T_ISCLASS_DEF(character_class) \
++/* mblen shall be that of the first character */ \
++int \
++t_is##character_class##_with_len(const char *ptr, int mblen) \
++{ \
++	int			clen = pg_mblen_with_len(ptr, mblen); \
++	wchar_t		character[WC_BUF_LEN]; \
++	pg_locale_t mylocale = 0;	/* TODO */ \
++	if (clen == 1 || lc_ctype_is_c(DEFAULT_COLLATION_OID)) \
++		return is##character_class(TOUCHAR(ptr)); \
++	char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); \
++	return isw##character_class((wint_t) character[0]); \
++} \
++\
++/* ptr shall point to a NUL-terminated string */ \
++int \
++t_is##character_class##_cstr(const char *ptr) \
++{ \
++	return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
++} \
++/* ptr shall point to a string with pre-validated encoding */ \
++int \
++t_is##character_class##_unbounded(const char *ptr) \
++{ \
++	return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
++} \
++/* historical name for _unbounded */ \
++int \
++t_is##character_class(const char *ptr) \
++{ \
++	return t_is##character_class##_unbounded(ptr); \
+ }
+ 
++GENERATE_T_ISCLASS_DEF(alpha)
++GENERATE_T_ISCLASS_DEF(digit)
++GENERATE_T_ISCLASS_DEF(print)
++GENERATE_T_ISCLASS_DEF(space)
+ 
+ /*
+  * Set up to read a file using tsearch_readline().  This facility is
+diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
+index e74b85a6900..d1c4ca959b0 100644
+--- a/src/backend/tsearch/ts_selfuncs.c
++++ b/src/backend/tsearch/ts_selfuncs.c
+@@ -109,12 +109,14 @@ tsmatchsel(PG_FUNCTION_ARGS)
+ 	 * OK, there's a Var and a Const we're dealing with here.  We need the
+ 	 * Const to be a TSQuery, else we can't do anything useful.  We have to
+ 	 * check this because the Var might be the TSQuery not the TSVector.
++	 *
++	 * Also check that the Var really is a TSVector, in case this estimator is
++	 * mistakenly attached to some other operator.
+ 	 */
+-	if (((Const *) other)->consttype == TSQUERYOID)
++	if (((Const *) other)->consttype == TSQUERYOID &&
++		vardata.vartype == TSVECTOROID)
+ 	{
+ 		/* tsvector @@ tsquery or the other way around */
+-		Assert(vardata.vartype == TSVECTOROID);
+-
+ 		selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
+ 	}
+ 	else
+diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
+index 3bc6b32095f..f6c367ea6a4 100644
+--- a/src/backend/tsearch/ts_utils.c
++++ b/src/backend/tsearch/ts_utils.c
+@@ -88,8 +88,8 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
+ 			char	   *pbuf = line;
+ 
+ 			/* Trim trailing space */
+-			while (*pbuf && !t_isspace(pbuf))
+-				pbuf += pg_mblen(pbuf);
++			while (*pbuf && !t_isspace_cstr(pbuf))
++				pbuf += pg_mblen_cstr(pbuf);
+ 			*pbuf = '\0';
+ 
+ 			/* Skip empty lines */
+diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
+index af97b5b4213..99c5648024f 100644
+--- a/src/backend/tsearch/wparser_def.c
++++ b/src/backend/tsearch/wparser_def.c
+@@ -1735,7 +1735,8 @@ TParserGet(TParser *prs)
+ 			prs->state->charlen = 0;
+ 		else
+ 			prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
+-				pg_mblen(prs->str + prs->state->posbyte);
++				pg_mblen_range(prs->str + prs->state->posbyte,
++							   prs->str + prs->lenstr);
+ 
+ 		Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
+ 		Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
+diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
+index 5e9e6809785..bd8defcbf42 100644
+--- a/src/backend/utils/adt/arrayfuncs.c
++++ b/src/backend/utils/adt/arrayfuncs.c
+@@ -3357,6 +3357,92 @@ construct_array(Datum *elems, int nelems,
+ 							  elmtype, elmlen, elmbyval, elmalign);
+ }
+ 
++/*
++ * Like construct_array(), where elmtype must be a built-in type, and
++ * elmlen/elmbyval/elmalign is looked up from hardcoded data.  This is often
++ * useful when manipulating arrays from/for system catalogs.
++ */
++ArrayType *
++construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
++{
++	int			elmlen;
++	bool		elmbyval;
++	char		elmalign;
++
++	switch (elmtype)
++	{
++		case CHAROID:
++			elmlen = 1;
++			elmbyval = true;
++			elmalign = TYPALIGN_CHAR;
++			break;
++
++		case CSTRINGOID:
++			elmlen = -2;
++			elmbyval = false;
++			elmalign = TYPALIGN_CHAR;
++			break;
++
++		case FLOAT4OID:
++			elmlen = sizeof(float4);
++			elmbyval = true;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case INT2OID:
++			elmlen = sizeof(int16);
++			elmbyval = true;
++			elmalign = TYPALIGN_SHORT;
++			break;
++
++		case INT4OID:
++			elmlen = sizeof(int32);
++			elmbyval = true;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case INT8OID:
++			elmlen = sizeof(int64);
++			elmbyval = FLOAT8PASSBYVAL;
++			elmalign = TYPALIGN_DOUBLE;
++			break;
++
++		case NAMEOID:
++			elmlen = NAMEDATALEN;
++			elmbyval = false;
++			elmalign = TYPALIGN_CHAR;
++			break;
++
++		case OIDOID:
++		case REGTYPEOID:
++			elmlen = sizeof(Oid);
++			elmbyval = true;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case TEXTOID:
++			elmlen = -1;
++			elmbyval = false;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case TIDOID:
++			elmlen = sizeof(ItemPointerData);
++			elmbyval = false;
++			elmalign = TYPALIGN_SHORT;
++			break;
++
++		default:
++			elog(ERROR, "type %u not supported by construct_array_builtin()", elmtype);
++			/* keep compiler quiet */
++			elmlen = 0;
++			elmbyval = false;
++			elmalign = 0;
++	}
++
++	return construct_array(elems, nelems, elmtype, elmlen, elmbyval, elmalign);
++}
++
+ /*
+  * construct_md_array	--- simple method for constructing an array object
+  *							with arbitrary dimensions and possible NULLs
+@@ -3575,6 +3661,81 @@ deconstruct_array(ArrayType *array,
+ 	}
+ }
+ 
++/*
++ * Like deconstruct_array(), where elmtype must be a built-in type, and
++ * elmlen/elmbyval/elmalign is looked up from hardcoded data.  This is often
++ * useful when manipulating arrays from/for system catalogs.
++ */
++void
++deconstruct_array_builtin(ArrayType *array,
++						  Oid elmtype,
++						  Datum **elemsp, bool **nullsp, int *nelemsp)
++{
++	int			elmlen;
++	bool		elmbyval;
++	char		elmalign;
++
++	switch (elmtype)
++	{
++		case CHAROID:
++			elmlen = 1;
++			elmbyval = true;
++			elmalign = TYPALIGN_CHAR;
++			break;
++
++		case CSTRINGOID:
++			elmlen = -2;
++			elmbyval = false;
++			elmalign = TYPALIGN_CHAR;
++			break;
++
++		case FLOAT8OID:
++			elmlen = sizeof(float8);
++			elmbyval = FLOAT8PASSBYVAL;
++			elmalign = TYPALIGN_DOUBLE;
++			break;
++
++		case INT2OID:
++			elmlen = sizeof(int16);
++			elmbyval = true;
++			elmalign = TYPALIGN_SHORT;
++			break;
++
++		case INT4OID:
++			elmlen = sizeof(int32);
++			elmbyval = true;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case OIDOID:
++			elmlen = sizeof(Oid);
++			elmbyval = true;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case TEXTOID:
++			elmlen = -1;
++			elmbyval = false;
++			elmalign = TYPALIGN_INT;
++			break;
++
++		case TIDOID:
++			elmlen = sizeof(ItemPointerData);
++			elmbyval = false;
++			elmalign = TYPALIGN_SHORT;
++			break;
++
++		default:
++			elog(ERROR, "type %u not supported by deconstruct_array_builtin()", elmtype);
++			/* keep compiler quiet */
++			elmlen = 0;
++			elmbyval = false;
++			elmalign = 0;
++	}
++
++	deconstruct_array(array, elmtype, elmlen, elmbyval, elmalign, elemsp, nullsp, nelemsp);
++}
++
+ /*
+  * array_contains_nulls --- detect whether an array has any null elements
+  *
+diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
+index 61d318d93ca..767efb87dd7 100644
+--- a/src/backend/utils/adt/encode.c
++++ b/src/backend/utils/adt/encode.c
+@@ -15,6 +15,7 @@
+ 
+ #include <ctype.h>
+ 
++#include "mb/pg_wchar.h"
+ #include "utils/builtins.h"
+ #include "utils/memutils.h"
+ 
+@@ -170,18 +171,42 @@ hex_encode(const char *src, size_t len, char *dst)
+ 	return (uint64) len * 2;
+ }
+ 
++/*
++ * compat version of get_hex without end parameter used in error report
++ */
++static inline char
++get_hex(const char *cp)
++{
++    unsigned char c = (unsigned char) *cp;
++	int			res = -1;
++
++	if (c < 127)
++		res = hexlookup[(unsigned char) c];
++
++	if (res < 0)
++		ereport(ERROR,
++				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
++				 errmsg("invalid hexadecimal digit", cp)));
++
++	return (char) res;
++}
++/*
++ * original function from backport using two args
++ */
+ static inline char
+-get_hex(char c)
++get_hex_new(const char *cp, const char *end)
+ {
++    unsigned char c = (unsigned char) *cp;
+ 	int			res = -1;
+ 
+-	if (c > 0 && c < 127)
++	if (c < 127)
+ 		res = hexlookup[(unsigned char) c];
+ 
+ 	if (res < 0)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+-				 errmsg("invalid hexadecimal digit: \"%c\"", c)));
++				 errmsg("invalid hexadecimal digit: \"%.*s\"",
++						pg_mblen_range(cp, end), cp)));
+ 
+ 	return (char) res;
+ }
+@@ -205,13 +230,15 @@ hex_decode(const char *src, size_t len, char *dst)
+ 			s++;
+ 			continue;
+ 		}
+-		v1 = get_hex(*s++) << 4;
++		v1 = get_hex_new(s, srcend) << 4;
++		s++;
+ 		if (s >= srcend)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 					 errmsg("invalid hexadecimal data: odd number of digits")));
+ 
+-		v2 = get_hex(*s++);
++		v2 = get_hex_new(s, srcend);
++		s++;
+ 		*p++ = v1 | v2;
+ 	}
+ 
+@@ -338,7 +365,8 @@ pg_base64_decode(const char *src, size_t len, char *dst)
+ 			if (b < 0)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+-						 errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
++						 errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
++								pg_mblen_range(s - 1, srcend), s - 1)));
+ 		}
+ 		/* add it to buffer */
+ 		buf = (buf << 6) + b;
+diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
+index 47bef085fa3..d4f6e3e21ef 100644
+--- a/src/backend/utils/adt/formatting.c
++++ b/src/backend/utils/adt/formatting.c
+@@ -1392,7 +1392,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ 							 errmsg("invalid datetime format separator: \"%s\"",
+-									pnstrdup(str, pg_mblen(str)))));
++									pnstrdup(str, pg_mblen_cstr(str)))));
+ 
+ 				if (*str == ' ')
+ 					n->type = NODE_TYPE_SPACE;
+@@ -1422,7 +1422,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
+ 					/* backslash quotes the next character, if any */
+ 					if (*str == '\\' && *(str + 1))
+ 						str++;
+-					chlen = pg_mblen(str);
++					chlen = pg_mblen_cstr(str);
+ 					n->type = NODE_TYPE_CHAR;
+ 					memcpy(n->character, str, chlen);
+ 					n->character[chlen] = '\0';
+@@ -1440,7 +1440,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
+ 				 */
+ 				if (*str == '\\' && *(str + 1) == '"')
+ 					str++;
+-				chlen = pg_mblen(str);
++				chlen = pg_mblen_cstr(str);
+ 
+ 				if ((flags & DCH_FLAG) && is_separator_char(str))
+ 					n->type = NODE_TYPE_SEPARATOR;
+@@ -2152,8 +2152,8 @@ asc_toupper_z(const char *buff)
+ 	do { \
+ 		if (S_THth(_suf)) \
+ 		{ \
+-			if (*(ptr)) (ptr) += pg_mblen(ptr); \
+-			if (*(ptr)) (ptr) += pg_mblen(ptr); \
++			if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
++			if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
+ 		} \
+ 	} while (0)
+ 
+@@ -3366,7 +3366,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
+ 				 * insist that the consumed character match the format's
+ 				 * character.
+ 				 */
+-				s += pg_mblen(s);
++				s += pg_mblen_cstr(s);
+ 			}
+ 			continue;
+ 		}
+@@ -3388,11 +3388,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
+ 				if (extra_skip > 0)
+ 					extra_skip--;
+ 				else
+-					s += pg_mblen(s);
++					s += pg_mblen_cstr(s);
+ 			}
+ 			else
+ 			{
+-				int			chlen = pg_mblen(s);
++				int			chlen = pg_mblen_cstr(s);
+ 
+ 				/*
+ 				 * Standard mode requires strict match of format characters.
+@@ -5564,13 +5564,15 @@ NUM_numpart_to_char(NUMProc *Np, int id)
+ static void
+ NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
+ {
++	const char *end = Np->inout + input_len;
++
+ 	while (n-- > 0)
+ 	{
+ 		if (OVERLOAD_TEST)
+ 			break;				/* end of input */
+ 		if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
+ 			break;				/* it's a data character */
+-		Np->inout_p += pg_mblen(Np->inout_p);
++		Np->inout_p += pg_mblen_range(Np->inout_p, end);
+ 	}
+ }
+ 
+@@ -6027,7 +6029,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
+ 			}
+ 			else
+ 			{
+-				Np->inout_p += pg_mblen(Np->inout_p);
++				Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len);
+ 			}
+ 			continue;
+ 		}
+diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
+index 69e8dea1a19..13c9b0e9e96 100644
+--- a/src/backend/utils/adt/jsonfuncs.c
++++ b/src/backend/utils/adt/jsonfuncs.c
+@@ -663,7 +663,7 @@ report_json_context(JsonLexContext *lex)
+ 			break;
+ 		/* Advance to next multibyte character */
+ 		if (IS_HIGHBIT_SET(*context_start))
+-			context_start += pg_mblen(context_start);
++			context_start += pg_mblen_range(context_start, context_end);
+ 		else
+ 			context_start++;
+ 	}
+diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
+index 287d497477c..362027ab80e 100644
+--- a/src/backend/utils/adt/jsonpath_gram.y
++++ b/src/backend/utils/adt/jsonpath_gram.y
+@@ -526,8 +526,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_SYNTAX_ERROR),
+ 						 errmsg("invalid input syntax for type %s", "jsonpath"),
+-						 errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
+-								   flags->val[i])));
++						 errdetail("unrecognized flag character \"%.*s\" in LIKE_REGEX predicate",
++								   pg_mblen_range(flags->val + i, flags->val + flags->len), flags->val + i)));
+ 				break;
+ 		}
+ 	}
+diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
+index d11278c505b..4d656f0af2c 100644
+--- a/src/backend/utils/adt/levenshtein.c
++++ b/src/backend/utils/adt/levenshtein.c
+@@ -84,6 +84,8 @@ varstr_levenshtein(const char *source, int slen,
+ 	int			i,
+ 				j;
+ 	const char *y;
++	const char *send = source + slen;
++	const char *tend = target + tlen;
+ 
+ 	/*
+ 	 * For varstr_levenshtein_less_equal, we have real variables called
+@@ -184,10 +186,10 @@ varstr_levenshtein(const char *source, int slen,
+ #endif
+ 
+ 	/*
+-	 * In order to avoid calling pg_mblen() repeatedly on each character in s,
+-	 * we cache all the lengths before starting the main loop -- but if all
+-	 * the characters in both strings are single byte, then we skip this and
+-	 * use a fast-path in the main loop.  If only one string contains
++	 * In order to avoid calling pg_mblen_range() repeatedly on each character
++	 * in s, we cache all the lengths before starting the main loop -- but if
++	 * all the characters in both strings are single byte, then we skip this
++	 * and use a fast-path in the main loop.  If only one string contains
+ 	 * multi-byte characters, we still build the array, so that the fast-path
+ 	 * needn't deal with the case where the array hasn't been initialized.
+ 	 */
+@@ -199,7 +201,7 @@ varstr_levenshtein(const char *source, int slen,
+ 		s_char_len = (int *) palloc((m + 1) * sizeof(int));
+ 		for (i = 0; i < m; ++i)
+ 		{
+-			s_char_len[i] = pg_mblen(cp);
++			s_char_len[i] = pg_mblen_range(cp, send);
+ 			cp += s_char_len[i];
+ 		}
+ 		s_char_len[i] = 0;
+@@ -225,7 +227,7 @@ varstr_levenshtein(const char *source, int slen,
+ 	{
+ 		int		   *temp;
+ 		const char *x = source;
+-		int			y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;
++		int			y_char_len = n != tlen + 1 ? pg_mblen_range(y, tend) : 1;
+ 
+ #ifdef LEVENSHTEIN_LESS_EQUAL
+ 
+diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
+index 5bf94628c30..dde8ec2097d 100644
+--- a/src/backend/utils/adt/like.c
++++ b/src/backend/utils/adt/like.c
+@@ -54,20 +54,20 @@ static int	Generic_Text_IC_like(text *str, text *pat, Oid collation);
+  *--------------------
+  */
+ static inline int
+-wchareq(const char *p1, const char *p2)
++wchareq(const char *p1, int p1len, const char *p2, int p2len)
+ {
+-	int			p1_len;
++	int			p1clen;
+ 
+ 	/* Optimization:  quickly compare the first byte. */
+ 	if (*p1 != *p2)
+ 		return 0;
+ 
+-	p1_len = pg_mblen(p1);
+-	if (pg_mblen(p2) != p1_len)
++	p1clen = pg_mblen_with_len(p1, p1len);
++	if (pg_mblen_with_len(p2, p2len) != p1clen)
+ 		return 0;
+ 
+ 	/* They are the same length */
+-	while (p1_len--)
++	while (p1clen--)
+ 	{
+ 		if (*p1++ != *p2++)
+ 			return 0;
+@@ -106,11 +106,11 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+ #define NextByte(p, plen)	((p)++, (plen)--)
+ 
+ /* Set up to compile like_match.c for multibyte characters */
+-#define CHAREQ(p1, p2) wchareq((p1), (p2))
++#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len))
+ #define NextChar(p, plen) \
+-	do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
++	do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0)
+ #define CopyAdvChar(dst, src, srclen) \
+-	do { int __l = pg_mblen(src); \
++	do { int __l = pg_mblen_with_len((src), (srclen)); \
+ 		 (srclen) -= __l; \
+ 		 while (__l-- > 0) \
+ 			 *(dst)++ = *(src)++; \
+@@ -122,7 +122,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+ #include "like_match.c"
+ 
+ /* Set up to compile like_match.c for single-byte characters */
+-#define CHAREQ(p1, p2) (*(p1) == *(p2))
++#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2))
+ #define NextChar(p, plen) NextByte((p), (plen))
+ #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
+ 
+diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
+index ee30170fbb4..9a93744aff6 100644
+--- a/src/backend/utils/adt/like_match.c
++++ b/src/backend/utils/adt/like_match.c
+@@ -294,6 +294,7 @@ do_like_escape(text *pat, text *esc)
+ 					 errhint("Escape string must be empty or one character.")));
+ 
+ 		e = VARDATA_ANY(esc);
++		elen = VARSIZE_ANY_EXHDR(esc);
+ 
+ 		/*
+ 		 * If specified escape is '\', just copy the pattern as-is.
+@@ -312,7 +313,7 @@ do_like_escape(text *pat, text *esc)
+ 		afterescape = false;
+ 		while (plen > 0)
+ 		{
+-			if (CHAREQ(p, e) && !afterescape)
++			if (CHAREQ(p, plen, e, elen) && !afterescape)
+ 			{
+ 				*r++ = '\\';
+ 				NextChar(p, plen);
+diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
+index 955e0ee87f8..9a08dea351d 100644
+--- a/src/backend/utils/adt/network_selfuncs.c
++++ b/src/backend/utils/adt/network_selfuncs.c
+@@ -43,9 +43,9 @@
+ /* Maximum number of items to consider in join selectivity calculations */
+ #define MAX_CONSIDERED_ELEMS 1024
+ 
+-static Selectivity networkjoinsel_inner(Oid operator,
++static Selectivity networkjoinsel_inner(Oid operator, int opr_codenum,
+ 										VariableStatData *vardata1, VariableStatData *vardata2);
+-static Selectivity networkjoinsel_semi(Oid operator,
++static Selectivity networkjoinsel_semi(Oid operator, int opr_codenum,
+ 									   VariableStatData *vardata1, VariableStatData *vardata2);
+ static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
+ static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
+@@ -82,6 +82,7 @@ networksel(PG_FUNCTION_ARGS)
+ 	Oid			operator = PG_GETARG_OID(1);
+ 	List	   *args = (List *) PG_GETARG_POINTER(2);
+ 	int			varRelid = PG_GETARG_INT32(3);
++	int			opr_codenum;
+ 	VariableStatData vardata;
+ 	Node	   *other;
+ 	bool		varonleft;
+@@ -95,6 +96,14 @@ networksel(PG_FUNCTION_ARGS)
+ 				nullfrac;
+ 	FmgrInfo	proc;
+ 
++	/*
++	 * Before all else, verify that the operator is one of the ones supported
++	 * by this function, which in turn proves that the input datatypes are
++	 * what we expect.  Otherwise, attaching this selectivity function to some
++	 * unexpected operator could cause trouble.
++	 */
++	opr_codenum = inet_opr_codenum(operator);
++
+ 	/*
+ 	 * If expression is not (variable op something) or (something op
+ 	 * variable), then punt and return a default estimate.
+@@ -150,13 +159,12 @@ networksel(PG_FUNCTION_ARGS)
+ 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ 						 ATTSTATSSLOT_VALUES))
+ 	{
+-		int			opr_codenum = inet_opr_codenum(operator);
++		int			h_codenum;
+ 
+ 		/* Commute if needed, so we can consider histogram to be on the left */
+-		if (!varonleft)
+-			opr_codenum = -opr_codenum;
++		h_codenum = varonleft ? opr_codenum : -opr_codenum;
+ 		non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
+-											constvalue, opr_codenum);
++											constvalue, h_codenum);
+ 
+ 		free_attstatsslot(&hslot);
+ 	}
+@@ -203,10 +211,19 @@ networkjoinsel(PG_FUNCTION_ARGS)
+ #endif
+ 	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ 	double		selec;
++	int			opr_codenum;
+ 	VariableStatData vardata1;
+ 	VariableStatData vardata2;
+ 	bool		join_is_reversed;
+ 
++	/*
++	 * Before all else, verify that the operator is one of the ones supported
++	 * by this function, which in turn proves that the input datatypes are
++	 * what we expect.  Otherwise, attaching this selectivity function to some
++	 * unexpected operator could cause trouble.
++	 */
++	opr_codenum = inet_opr_codenum(operator);
++
+ 	get_join_variables(root, args, sjinfo,
+ 					   &vardata1, &vardata2, &join_is_reversed);
+ 
+@@ -220,15 +237,18 @@ networkjoinsel(PG_FUNCTION_ARGS)
+ 			 * Selectivity for left/full join is not exactly the same as inner
+ 			 * join, but we neglect the difference, as eqjoinsel does.
+ 			 */
+-			selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
++			selec = networkjoinsel_inner(operator, opr_codenum,
++										 &vardata1, &vardata2);
+ 			break;
+ 		case JOIN_SEMI:
+ 		case JOIN_ANTI:
+ 			/* Here, it's important that we pass the outer var on the left. */
+ 			if (!join_is_reversed)
+-				selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
++				selec = networkjoinsel_semi(operator, opr_codenum,
++											&vardata1, &vardata2);
+ 			else
+ 				selec = networkjoinsel_semi(get_commutator(operator),
++											-opr_codenum,
+ 											&vardata2, &vardata1);
+ 			break;
+ 		default:
+@@ -260,7 +280,7 @@ networkjoinsel(PG_FUNCTION_ARGS)
+  * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
+  */
+ static Selectivity
+-networkjoinsel_inner(Oid operator,
++networkjoinsel_inner(Oid operator, int opr_codenum,
+ 					 VariableStatData *vardata1, VariableStatData *vardata2)
+ {
+ 	Form_pg_statistic stats;
+@@ -273,7 +293,6 @@ networkjoinsel_inner(Oid operator,
+ 				mcv2_exists = false,
+ 				hist1_exists = false,
+ 				hist2_exists = false;
+-	int			opr_codenum;
+ 	int			mcv1_length = 0,
+ 				mcv2_length = 0;
+ 	AttStatsSlot mcv1_slot;
+@@ -325,8 +344,6 @@ networkjoinsel_inner(Oid operator,
+ 		memset(&hist2_slot, 0, sizeof(hist2_slot));
+ 	}
+ 
+-	opr_codenum = inet_opr_codenum(operator);
+-
+ 	/*
+ 	 * Calculate selectivity for MCV vs MCV matches.
+ 	 */
+@@ -387,7 +404,7 @@ networkjoinsel_inner(Oid operator,
+  * histogram selectivity for semi/anti join cases.
+  */
+ static Selectivity
+-networkjoinsel_semi(Oid operator,
++networkjoinsel_semi(Oid operator, int opr_codenum,
+ 					VariableStatData *vardata1, VariableStatData *vardata2)
+ {
+ 	Form_pg_statistic stats;
+@@ -401,7 +418,6 @@ networkjoinsel_semi(Oid operator,
+ 				mcv2_exists = false,
+ 				hist1_exists = false,
+ 				hist2_exists = false;
+-	int			opr_codenum;
+ 	FmgrInfo	proc;
+ 	int			i,
+ 				mcv1_length = 0,
+@@ -455,7 +471,6 @@ networkjoinsel_semi(Oid operator,
+ 		memset(&hist2_slot, 0, sizeof(hist2_slot));
+ 	}
+ 
+-	opr_codenum = inet_opr_codenum(operator);
+ 	fmgr_info(get_opcode(operator), &proc);
+ 
+ 	/* Estimate number of input rows represented by RHS histogram. */
+@@ -827,6 +842,9 @@ inet_semi_join_sel(Datum lhs_value,
+ /*
+  * Assign useful code numbers for the subnet inclusion/overlap operators
+  *
++ * This will throw an error if the operator is not one of the ones we
++ * support in networksel() and networkjoinsel().
++ *
+  * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
+  * on the exact codes assigned here; but many other places in this file
+  * know that they can negate a code to obtain the code for the commutator
+diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
+index 60d2d4cc768..4427a9a0829 100644
+--- a/src/backend/utils/adt/oracle_compat.c
++++ b/src/backend/utils/adt/oracle_compat.c
+@@ -148,8 +148,8 @@ lpad(PG_FUNCTION_ARGS)
+ 	char	   *ptr1,
+ 			   *ptr2,
+ 			   *ptr2start,
+-			   *ptr2end,
+ 			   *ptr_ret;
++	const char *ptr2end;
+ 	int			m,
+ 				s1len,
+ 				s2len;
+@@ -194,7 +194,7 @@ lpad(PG_FUNCTION_ARGS)
+ 
+ 	while (m--)
+ 	{
+-		int			mlen = pg_mblen(ptr2);
++		int			mlen = pg_mblen_range(ptr2, ptr2end);
+ 
+ 		memcpy(ptr_ret, ptr2, mlen);
+ 		ptr_ret += mlen;
+@@ -207,7 +207,7 @@ lpad(PG_FUNCTION_ARGS)
+ 
+ 	while (s1len--)
+ 	{
+-		int			mlen = pg_mblen(ptr1);
++		int			mlen = pg_mblen_unbounded(ptr1);
+ 
+ 		memcpy(ptr_ret, ptr1, mlen);
+ 		ptr_ret += mlen;
+@@ -246,8 +246,8 @@ rpad(PG_FUNCTION_ARGS)
+ 	char	   *ptr1,
+ 			   *ptr2,
+ 			   *ptr2start,
+-			   *ptr2end,
+ 			   *ptr_ret;
++	const char *ptr2end;
+ 	int			m,
+ 				s1len,
+ 				s2len;
+@@ -286,11 +286,12 @@ rpad(PG_FUNCTION_ARGS)
+ 	m = len - s1len;
+ 
+ 	ptr1 = VARDATA_ANY(string1);
++
+ 	ptr_ret = VARDATA(ret);
+ 
+ 	while (s1len--)
+ 	{
+-		int			mlen = pg_mblen(ptr1);
++		int			mlen = pg_mblen_unbounded(ptr1);
+ 
+ 		memcpy(ptr_ret, ptr1, mlen);
+ 		ptr_ret += mlen;
+@@ -302,7 +303,7 @@ rpad(PG_FUNCTION_ARGS)
+ 
+ 	while (m--)
+ 	{
+-		int			mlen = pg_mblen(ptr2);
++		int			mlen = pg_mblen_range(ptr2, ptr2end);
+ 
+ 		memcpy(ptr_ret, ptr2, mlen);
+ 		ptr_ret += mlen;
+@@ -387,6 +388,7 @@ dotrim(const char *string, int stringlen,
+ 			 */
+ 			const char **stringchars;
+ 			const char **setchars;
++			const char *setend;
+ 			int		   *stringmblen;
+ 			int		   *setmblen;
+ 			int			stringnchars;
+@@ -394,6 +396,7 @@ dotrim(const char *string, int stringlen,
+ 			int			resultndx;
+ 			int			resultnchars;
+ 			const char *p;
++			const char *pend;
+ 			int			len;
+ 			int			mblen;
+ 			const char *str_pos;
+@@ -404,10 +407,11 @@ dotrim(const char *string, int stringlen,
+ 			stringnchars = 0;
+ 			p = string;
+ 			len = stringlen;
++			pend = p + len;
+ 			while (len > 0)
+ 			{
+ 				stringchars[stringnchars] = p;
+-				stringmblen[stringnchars] = mblen = pg_mblen(p);
++				stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend);
+ 				stringnchars++;
+ 				p += mblen;
+ 				len -= mblen;
+@@ -418,10 +422,11 @@ dotrim(const char *string, int stringlen,
+ 			setnchars = 0;
+ 			p = set;
+ 			len = setlen;
++			setend = set + setlen;
+ 			while (len > 0)
+ 			{
+ 				setchars[setnchars] = p;
+-				setmblen[setnchars] = mblen = pg_mblen(p);
++				setmblen[setnchars] = mblen = pg_mblen_range(p, setend);
+ 				setnchars++;
+ 				p += mblen;
+ 				len -= mblen;
+@@ -727,6 +732,8 @@ translate(PG_FUNCTION_ARGS)
+ 			   *to_end;
+ 	char	   *source,
+ 			   *target;
++	const char *source_end;
++	const char *from_end;
+ 	int			m,
+ 				fromlen,
+ 				tolen,
+@@ -741,9 +748,11 @@ translate(PG_FUNCTION_ARGS)
+ 	if (m <= 0)
+ 		PG_RETURN_TEXT_P(string);
+ 	source = VARDATA_ANY(string);
++	source_end = source + m;
+ 
+ 	fromlen = VARSIZE_ANY_EXHDR(from);
+ 	from_ptr = VARDATA_ANY(from);
++	from_end = from_ptr + fromlen;
+ 	tolen = VARSIZE_ANY_EXHDR(to);
+ 	to_ptr = VARDATA_ANY(to);
+ 	to_end = to_ptr + tolen;
+@@ -766,12 +775,12 @@ translate(PG_FUNCTION_ARGS)
+ 
+ 	while (m > 0)
+ 	{
+-		source_len = pg_mblen(source);
++		source_len = pg_mblen_range(source, source_end);
+ 		from_index = 0;
+ 
+ 		for (i = 0; i < fromlen; i += len)
+ 		{
+-			len = pg_mblen(&from_ptr[i]);
++			len = pg_mblen_range(&from_ptr[i], from_end);
+ 			if (len == source_len &&
+ 				memcmp(source, &from_ptr[i], len) == 0)
+ 				break;
+@@ -787,11 +796,11 @@ translate(PG_FUNCTION_ARGS)
+ 			{
+ 				if (p >= to_end)
+ 					break;
+-				p += pg_mblen(p);
++				p += pg_mblen_range(p, to_end);
+ 			}
+ 			if (p < to_end)
+ 			{
+-				len = pg_mblen(p);
++				len = pg_mblen_range(p, to_end);
+ 				memcpy(target, p, len);
+ 				target += len;
+ 				retlen += len;
+diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
+index 1b2a0434ae2..fc567ba14d4 100644
+--- a/src/backend/utils/adt/regexp.c
++++ b/src/backend/utils/adt/regexp.c
+@@ -423,8 +423,8 @@ parse_re_flags(pg_re_flags *flags, text *opts)
+ 				default:
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+-							 errmsg("invalid regular expression option: \"%c\"",
+-									opt_p[i])));
++							 errmsg("invalid regular expression option: \"%.*s\"",
++									pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i)));
+ 					break;
+ 			}
+ 		}
+@@ -672,6 +672,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
+ 			   *r;
+ 	int			plen,
+ 				elen;
++	const char *pend;
+ 	bool		afterescape = false;
+ 	int			nquotes = 0;
+ 	int			bracket_depth = 0;	/* square bracket nesting level */
+@@ -679,6 +680,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
+ 
+ 	p = VARDATA_ANY(pat_text);
+ 	plen = VARSIZE_ANY_EXHDR(pat_text);
++	pend = p + plen;
+ 	if (esc_text == NULL)
+ 	{
+ 		/* No ESCAPE clause provided; default to backslash as escape */
+@@ -778,7 +780,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
+ 
+ 		if (elen > 1)
+ 		{
+-			int			mblen = pg_mblen(p);
++			int			mblen = pg_mblen_range(p, pend);
+ 
+ 			if (mblen > 1)
+ 			{
+diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
+index dc373cee860..c5e16f5f96b 100644
+--- a/src/backend/utils/adt/tsquery.c
++++ b/src/backend/utils/adt/tsquery.c
+@@ -110,7 +110,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
+ 		return buf;
+ 
+ 	buf++;
+-	while (*buf && pg_mblen(buf) == 1)
++	while (*buf && pg_mblen_cstr(buf) == 1)
+ 	{
+ 		switch (*buf)
+ 		{
+@@ -187,7 +187,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
+ 					continue;
+ 				}
+ 
+-				if (!t_isdigit(ptr))
++				if (!t_isdigit_cstr(ptr))
+ 					return false;
+ 
+ 				errno = 0;
+@@ -252,12 +252,12 @@ parse_or_operator(TSQueryParserState pstate)
+ 		return false;
+ 
+ 	/* it shouldn't be a part of any word */
+-	if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
++	if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha_cstr(ptr) || t_isdigit_cstr(ptr))
+ 		return false;
+ 
+ 	for (;;)
+ 	{
+-		ptr += pg_mblen(ptr);
++		ptr += pg_mblen_cstr(ptr);
+ 
+ 		if (*ptr == '\0')		/* got end of string without operand */
+ 			return false;
+@@ -267,7 +267,7 @@ parse_or_operator(TSQueryParserState pstate)
+ 		 * So we still treat OR literal as operation with possibly incorrect
+ 		 * operand and  will not search it as lexeme
+ 		 */
+-		if (!t_isspace(ptr))
++		if (!t_isspace_cstr(ptr))
+ 			break;
+ 	}
+ 
+@@ -310,7 +310,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
+ 							 errmsg("syntax error in tsquery: \"%s\"",
+ 									state->buffer)));
+ 				}
+-				else if (!t_isspace(state->buf))
++				else if (!t_isspace_cstr(state->buf))
+ 				{
+ 					/*
+ 					 * We rely on the tsvector parser to parse the value for
+@@ -368,14 +368,14 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
+ 				{
+ 					return (state->count) ? PT_ERR : PT_END;
+ 				}
+-				else if (!t_isspace(state->buf))
++				else if (!t_isspace_cstr(state->buf))
+ 				{
+ 					return PT_ERR;
+ 				}
+ 				break;
+ 		}
+ 
+-		state->buf += pg_mblen(state->buf);
++		state->buf += pg_mblen_cstr(state->buf);
+ 	}
+ }
+ 
+@@ -438,7 +438,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
+ 					state->state = WAITOPERAND;
+ 					continue;
+ 				}
+-				else if (!t_isspace(state->buf))
++				else if (!t_isspace_cstr(state->buf))
+ 				{
+ 					/*
+ 					 * We rely on the tsvector parser to parse the value for
+@@ -497,13 +497,13 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
+ 					/* ignore other operators in this state too */
+ 					state->buf++;
+ 					continue;
+-				}
++				} /* backport starts here */
+ 				else if (*state->buf == '\0')
+ 				{
+ 					return PT_END;
+ 				}
+-				else if (!t_isspace(state->buf))
+-				{
++				else if (!t_isspace_cstr(state->buf))
++				{/* backport end */
+ 					if (state->in_quotes)
+ 					{
+ 						/* put implicit <-> after an operand */
+@@ -522,7 +522,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
+ 				break;
+ 		}
+ 
+-		state->buf += pg_mblen(state->buf);
++		state->buf += pg_mblen_cstr(state->buf);
+ 	}
+ }
+ 
+@@ -1006,9 +1006,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp)
+ 				*(in->cur) = '\\';
+ 				in->cur++;
+ 			}
+-			COPYCHAR(in->cur, op);
+ 
+-			clen = pg_mblen(op);
++			clen = ts_copychar_cstr(in->cur, op);
+ 			op += clen;
+ 			in->cur += clen;
+ 		}
+diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
+index 8972f419abc..8b8f4e160eb 100644
+--- a/src/backend/utils/adt/tsvector.c
++++ b/src/backend/utils/adt/tsvector.c
+@@ -313,9 +313,9 @@ tsvectorout(PG_FUNCTION_ARGS)
+ 				lenbuf = 0,
+ 				pp;
+ 	WordEntry  *ptr = ARRPTR(out);
+-	char	   *curbegin,
+-			   *curin,
++	char	   *curin,
+ 			   *curout;
++	const char *curend;
+ 
+ 	lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
+ 	for (i = 0; i < out->size; i++)
+@@ -328,13 +328,14 @@ tsvectorout(PG_FUNCTION_ARGS)
+ 	curout = outbuf = (char *) palloc(lenbuf);
+ 	for (i = 0; i < out->size; i++)
+ 	{
+-		curbegin = curin = STRPTR(out) + ptr->pos;
++		curin = STRPTR(out) + ptr->pos;
++		curend = curin + ptr->len;
+ 		if (i != 0)
+ 			*curout++ = ' ';
+ 		*curout++ = '\'';
+-		while (curin - curbegin < ptr->len)
++		while (curin < curend)
+ 		{
+-			int			len = pg_mblen(curin);
++			int			len = pg_mblen_range(curin, curend);
+ 
+ 			if (t_iseq(curin, '\''))
+ 				*curout++ = '\'';
+diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
+index cc3e3c15054..b559916f84c 100644
+--- a/src/backend/utils/adt/tsvector_op.c
++++ b/src/backend/utils/adt/tsvector_op.c
+@@ -2434,11 +2434,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
+ 	if (ws)
+ 	{
+ 		char	   *buf;
++		const char *end;
+ 
+ 		buf = VARDATA_ANY(ws);
+-		while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
++		end = buf + VARSIZE_ANY_EXHDR(ws);
++		while (buf < end)
+ 		{
+-			if (pg_mblen(buf) == 1)
++			int			len = pg_mblen_range(buf, end);
++
++			if (len == 1)
+ 			{
+ 				switch (*buf)
+ 				{
+@@ -2462,7 +2466,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
+ 						stat->weight |= 0;
+ 				}
+ 			}
+-			buf += pg_mblen(buf);
++			buf += len;
+ 		}
+ 	}
+ 
+diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
+index cfc181c20df..3567f0f9f43 100644
+--- a/src/backend/utils/adt/tsvector_parser.c
++++ b/src/backend/utils/adt/tsvector_parser.c
+@@ -185,10 +185,9 @@ gettoken_tsvector(TSVectorParseState state,
+ 			else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ 					 (state->is_web && t_iseq(state->prsbuf, '"')))
+ 				PRSSYNTAXERROR;
+-			else if (!t_isspace(state->prsbuf))
++			else if (!t_isspace_cstr(state->prsbuf))
+ 			{
+-				COPYCHAR(curpos, state->prsbuf);
+-				curpos += pg_mblen(state->prsbuf);
++				curpos += ts_copychar_cstr(curpos, state->prsbuf);
+ 				statecode = WAITENDWORD;
+ 			}
+ 		}
+@@ -202,8 +201,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 			else
+ 			{
+ 				RESIZEPRSBUF;
+-				COPYCHAR(curpos, state->prsbuf);
+-				curpos += pg_mblen(state->prsbuf);
++				curpos += ts_copychar_cstr(curpos, state->prsbuf);
+ 				Assert(oldstate != 0);
+ 				statecode = oldstate;
+ 			}
+@@ -215,7 +213,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 				statecode = WAITNEXTCHAR;
+ 				oldstate = WAITENDWORD;
+ 			}
+-			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
++			else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0' ||
+ 					 (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ 					 (state->is_web && t_iseq(state->prsbuf, '"')))
+ 			{
+@@ -238,8 +236,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 			else
+ 			{
+ 				RESIZEPRSBUF;
+-				COPYCHAR(curpos, state->prsbuf);
+-				curpos += pg_mblen(state->prsbuf);
++				curpos += ts_copychar_cstr(curpos, state->prsbuf);
+ 			}
+ 		}
+ 		else if (statecode == WAITENDCMPLX)
+@@ -258,8 +255,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 			else
+ 			{
+ 				RESIZEPRSBUF;
+-				COPYCHAR(curpos, state->prsbuf);
+-				curpos += pg_mblen(state->prsbuf);
++				curpos += ts_copychar_cstr(curpos, state->prsbuf);
+ 			}
+ 		}
+ 		else if (statecode == WAITCHARCMPLX)
+@@ -267,8 +263,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 			if (!state->is_web && t_iseq(state->prsbuf, '\''))
+ 			{
+ 				RESIZEPRSBUF;
+-				COPYCHAR(curpos, state->prsbuf);
+-				curpos += pg_mblen(state->prsbuf);
++				curpos += ts_copychar_cstr(curpos, state->prsbuf);
+ 				statecode = WAITENDCMPLX;
+ 			}
+ 			else
+@@ -279,7 +274,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 					PRSSYNTAXERROR;
+ 				if (state->oprisdelim)
+ 				{
+-					/* state->prsbuf+=pg_mblen(state->prsbuf); */
++					/* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */
+ 					RETURN_TOKEN;
+ 				}
+ 				else
+@@ -296,7 +291,7 @@ gettoken_tsvector(TSVectorParseState state,
+ 		}
+ 		else if (statecode == INPOSINFO)
+ 		{
+-			if (t_isdigit(state->prsbuf))
++			if (t_isdigit_cstr(state->prsbuf))
+ 			{
+ 				if (posalen == 0)
+ 				{
+@@ -351,10 +346,10 @@ gettoken_tsvector(TSVectorParseState state,
+ 					PRSSYNTAXERROR;
+ 				WEP_SETWEIGHT(pos[npos - 1], 0);
+ 			}
+-			else if (t_isspace(state->prsbuf) ||
++			else if (t_isspace_cstr(state->prsbuf) ||
+ 					 *(state->prsbuf) == '\0')
+ 				RETURN_TOKEN;
+-			else if (!t_isdigit(state->prsbuf))
++			else if (!t_isdigit_cstr(state->prsbuf))
+ 				PRSSYNTAXERROR;
+ 		}
+ 		else					/* internal error */
+@@ -362,6 +357,6 @@ gettoken_tsvector(TSVectorParseState state,
+ 				 statecode);
+ 
+ 		/* get next char */
+-		state->prsbuf += pg_mblen(state->prsbuf);
++		state->prsbuf += pg_mblen_cstr(state->prsbuf);
+ 	}
+ }
+diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
+index de3852045b1..78e4615501b 100644
+--- a/src/backend/utils/adt/varbit.c
++++ b/src/backend/utils/adt/varbit.c
+@@ -230,8 +230,8 @@ bit_in(PG_FUNCTION_ARGS)
+ 			else if (*sp != '0')
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+-						 errmsg("\"%c\" is not a valid binary digit",
+-								*sp)));
++						 errmsg("\"%.*s\" is not a valid binary digit",
++								pg_mblen_cstr(sp), sp)));
+ 
+ 			x >>= 1;
+ 			if (x == 0)
+@@ -255,8 +255,8 @@ bit_in(PG_FUNCTION_ARGS)
+ 			else
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+-						 errmsg("\"%c\" is not a valid hexadecimal digit",
+-								*sp)));
++						 errmsg("\"%.*s\" is not a valid hexadecimal digit",
++								pg_mblen_cstr(sp), sp)));
+ 
+ 			if (bc)
+ 			{
+@@ -531,8 +531,8 @@ varbit_in(PG_FUNCTION_ARGS)
+ 			else if (*sp != '0')
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+-						 errmsg("\"%c\" is not a valid binary digit",
+-								*sp)));
++						 errmsg("\"%.*s\" is not a valid binary digit",
++								pg_mblen_cstr(sp), sp)));
+ 
+ 			x >>= 1;
+ 			if (x == 0)
+@@ -556,8 +556,8 @@ varbit_in(PG_FUNCTION_ARGS)
+ 			else
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+-						 errmsg("\"%c\" is not a valid hexadecimal digit",
+-								*sp)));
++						 errmsg("\"%.*s\" is not a valid hexadecimal digit",
++								pg_mblen_cstr(sp), sp)));
+ 
+ 			if (bc)
+ 			{
+diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
+index 9dea2a5494d..437d84c9241 100644
+--- a/src/backend/utils/adt/varlena.c
++++ b/src/backend/utils/adt/varlena.c
+@@ -122,6 +122,7 @@ static text *text_substring(Datum str,
+ 							int32 start,
+ 							int32 length,
+ 							bool length_not_specified);
++static int	pg_mbcharcliplen_chars(const char *mbstr, int len, int limit);
+ static text *text_overlay(text *t1, text *t2, int sp, int sl);
+ static int	text_position(text *t1, text *t2, Oid collid);
+ static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
+@@ -762,8 +763,11 @@ text_catenate(text *t1, text *t2)
+  * charlen_to_bytelen()
+  *	Compute the number of bytes occupied by n characters starting at *p
+  *
+- * It is caller's responsibility that there actually are n characters;
+- * the string need not be null-terminated.
++ * The caller shall ensure there are n complete characters.  Callers achieve
++ * this by deriving "n" from regmatch_t findings from searching a wchar array.
++ * pg_mb2wchar_with_len() skips any trailing incomplete character, so regex
++ * matches will end no later than the last complete character.  (The string
++ * need not be null-terminated.)
+  */
+ static int
+ charlen_to_bytelen(const char *p, int n)
+@@ -778,7 +782,7 @@ charlen_to_bytelen(const char *p, int n)
+ 		const char *s;
+ 
+ 		for (s = p; n > 0; n--)
+-			s += pg_mblen(s);
++			s += pg_mblen_unbounded(s); /* caller verified encoding */
+ 
+ 		return s - p;
+ 	}
+@@ -851,7 +855,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 	int32		S = start;		/* start position */
+ 	int32		S1;				/* adjusted start position */
+ 	int32		L1;				/* adjusted substring length */
+-	int32		E;				/* end position */
++	int32		E;				/* end position, exclusive */
+ 
+ 	/*
+ 	 * SQL99 says S can be zero or negative, but we still must fetch from the
+@@ -911,6 +915,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 		int32		slice_start;
+ 		int32		slice_size;
+ 		int32		slice_strlen;
++		int32		slice_len;
+ 		text	   *slice;
+ 		int32		E1;
+ 		int32		i;
+@@ -947,11 +952,11 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 		else
+ 		{
+ 			/*
+-			 * A zero or negative value for the end position can happen if the
+-			 * start was negative or one. SQL99 says to return a zero-length
+-			 * string.
++			 * Ending at position 1, exclusive, obviously yields an empty
++			 * string.  A zero or negative value can happen if the start was
++			 * negative or one. SQL99 says to return a zero-length string.
+ 			 */
+-			if (E < 1)
++			if (E <= 1)
+ 				return cstring_to_text("");
+ 
+ 			/*
+@@ -961,11 +966,11 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 			L1 = E - S1;
+ 
+ 			/*
+-			 * Total slice size in bytes can't be any longer than the start
+-			 * position plus substring length times the encoding max length.
+-			 * If that overflows, we can just use -1.
++			 * Total slice size in bytes can't be any longer than the
++			 * inclusive end position times the encoding max length.  If that
++			 * overflows, we can just use -1.
+ 			 */
+-			if (pg_mul_s32_overflow(E, eml, &slice_size))
++			if (pg_mul_s32_overflow(E - 1, eml, &slice_size))
+ 				slice_size = -1;
+ 		}
+ 
+@@ -980,16 +985,25 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 			slice = (text *) DatumGetPointer(str);
+ 
+ 		/* see if we got back an empty string */
+-		if (VARSIZE_ANY_EXHDR(slice) == 0)
++		slice_len = VARSIZE_ANY_EXHDR(slice);
++		if (slice_len == 0)
+ 		{
+ 			if (slice != (text *) DatumGetPointer(str))
+ 				pfree(slice);
+ 			return cstring_to_text("");
+ 		}
+ 
+-		/* Now we can get the actual length of the slice in MB characters */
+-		slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
+-											VARSIZE_ANY_EXHDR(slice));
++		/*
++		 * Now we can get the actual length of the slice in MB characters,
++		 * stopping at the end of the substring.  Continuing beyond the
++		 * substring end could find an incomplete character attributable
++		 * solely to DatumGetTextPSlice() chopping in the middle of a
++		 * character, and it would be superfluous work at best.
++		 */
++		slice_strlen =
++			(slice_size == -1 ?
++			 pg_mbstrlen_with_len(VARDATA_ANY(slice), slice_len) :
++			 pg_mbcharcliplen_chars(VARDATA_ANY(slice), slice_len, E - 1));
+ 
+ 		/*
+ 		 * Check that the start position wasn't > slice_strlen. If so, SQL99
+@@ -1016,7 +1030,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 		 */
+ 		p = VARDATA_ANY(slice);
+ 		for (i = 0; i < S1 - 1; i++)
+-			p += pg_mblen(p);
++			p += pg_mblen_unbounded(p);
+ 
+ 		/* hang onto a pointer to our start position */
+ 		s = p;
+@@ -1026,7 +1040,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 		 * length.
+ 		 */
+ 		for (i = S1; i < E1; i++)
+-			p += pg_mblen(p);
++			p += pg_mblen_unbounded(p);
+ 
+ 		ret = (text *) palloc(VARHDRSZ + (p - s));
+ 		SET_VARSIZE(ret, VARHDRSZ + (p - s));
+@@ -1044,6 +1058,35 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+ 	return NULL;
+ }
+ 
++/*
++ * pg_mbcharcliplen_chars -
++ *	Mirror pg_mbcharcliplen(), except return value unit is chars, not bytes.
++ *
++ *	This mirrors all the dubious historical behavior, so it's static to
++ *	discourage proliferation.  The assertions are specific to the one caller.
++ */
++static int
++pg_mbcharcliplen_chars(const char *mbstr, int len, int limit)
++{
++	int			nch = 0;
++	int			l;
++
++	Assert(len > 0);
++	Assert(limit > 0);
++	Assert(pg_database_encoding_max_length() > 1);
++
++	while (len > 0 && *mbstr)
++	{
++		l = pg_mblen_with_len(mbstr, len);
++		nch++;
++		if (nch == limit)
++			break;
++		len -= l;
++		mbstr += l;
++	}
++	return nch;
++}
++
+ /*
+  * textoverlay
+  *	Replace specified substring of first string with second
+@@ -1333,6 +1376,8 @@ retry:
+ 	 */
+ 	if (state->is_multibyte_char_in_char)
+ 	{
++		const char *haystack_end = state->str1 + state->len1;
++
+ 		/* Walk one character at a time, until we reach the match. */
+ 
+ 		/* the search should never move backwards. */
+@@ -1341,7 +1386,7 @@ retry:
+ 		while (state->refpoint < matchptr)
+ 		{
+ 			/* step to next character. */
+-			state->refpoint += pg_mblen(state->refpoint);
++			state->refpoint += pg_mblen_range(state->refpoint, haystack_end);
+ 			state->refpos++;
+ 
+ 			/*
+@@ -1457,7 +1502,8 @@ text_position_get_match_pos(TextPositionState *state)
+ 		/* Convert the byte position to char position. */
+ 		while (state->refpoint < state->last_match)
+ 		{
+-			state->refpoint += pg_mblen(state->refpoint);
++			state->refpoint += pg_mblen_range(state->refpoint,
++											  state->last_match);
+ 			state->refpos++;
+ 		}
+ 		Assert(state->refpoint == state->last_match);
+@@ -4326,7 +4372,7 @@ check_replace_text_has_escape_char(const text *replace_text)
+ 	}
+ 	else
+ 	{
+-		for (; p < p_end; p += pg_mblen(p))
++		for (; p < p_end; p += pg_mblen_range(p, p_end))
+ 		{
+ 			if (*p == '\\')
+ 				return true;
+@@ -4366,7 +4412,7 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
+ 		}
+ 		else
+ 		{
+-			for (; p < p_end && *p != '\\'; p += pg_mblen(p))
++			for (; p < p_end && *p != '\\'; p += pg_mblen_range(p, p_end))
+ 				 /* nothing */ ;
+ 		}
+ 
+@@ -4834,6 +4880,8 @@ text_to_array_internal(PG_FUNCTION_ARGS)
+ 	}
+ 	else
+ 	{
++		const char *end_ptr;
++
+ 		/*
+ 		 * When fldsep is NULL, each character in the inputstring becomes an
+ 		 * element in the result array.  The separator is effectively the
+@@ -4846,10 +4894,11 @@ text_to_array_internal(PG_FUNCTION_ARGS)
+ 			PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
+ 
+ 		start_ptr = VARDATA_ANY(inputstring);
++		end_ptr = start_ptr + inputstring_len;
+ 
+ 		while (inputstring_len > 0)
+ 		{
+-			int			chunk_len = pg_mblen(start_ptr);
++			int			chunk_len = pg_mblen_range(start_ptr, end_ptr);
+ 
+ 			CHECK_FOR_INTERRUPTS();
+ 
+@@ -5440,7 +5489,7 @@ text_reverse(PG_FUNCTION_ARGS)
+ 		{
+ 			int			sz;
+ 
+-			sz = pg_mblen(p);
++			sz = pg_mblen_range(p, endp);
+ 			dst -= sz;
+ 			memcpy(dst, p, sz);
+ 			p += sz;
+@@ -5600,8 +5649,8 @@ text_format(PG_FUNCTION_ARGS)
+ 		if (strchr("sIL", *cp) == NULL)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+-					 errmsg("unrecognized format() type specifier \"%c\"",
+-							*cp),
++					 errmsg("unrecognized format() type specifier \"%.*s\"",
++							pg_mblen_range(cp, end_ptr), cp),
+ 					 errhint("For a single \"%%\" use \"%%%%\".")));
+ 
+ 		/* If indirect width was specified, get its value */
+@@ -5721,8 +5770,8 @@ text_format(PG_FUNCTION_ARGS)
+ 				/* should not get here, because of previous check */
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+-						 errmsg("unrecognized format() type specifier \"%c\"",
+-								*cp),
++						 errmsg("unrecognized format() type specifier \"%.*s\"",
++								pg_mblen_range(cp, end_ptr), cp),
+ 						 errhint("For a single \"%%\" use \"%%%%\".")));
+ 				break;
+ 		}
+diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
+index 4cb1d5d9d6c..e0d8351652a 100644
+--- a/src/backend/utils/adt/xml.c
++++ b/src/backend/utils/adt/xml.c
+@@ -2037,8 +2037,7 @@ sqlchar_to_unicode(const char *s)
+ 	char	   *utf8string;
+ 	pg_wchar	ret[2];			/* need space for trailing zero */
+ 
+-	/* note we're not assuming s is null-terminated */
+-	utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
++	utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8);
+ 
+ 	pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
+ 								  pg_encoding_mblen(PG_UTF8, utf8string));
+@@ -2091,7 +2090,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
+ 
+ 	initStringInfo(&buf);
+ 
+-	for (p = ident; *p; p += pg_mblen(p))
++	for (p = ident; *p; p += pg_mblen_cstr(p))
+ 	{
+ 		if (*p == ':' && (p == ident || fully_escaped))
+ 			appendStringInfoString(&buf, "_x003A_");
+@@ -2116,7 +2115,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
+ 				: !is_valid_xml_namechar(u))
+ 				appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
+ 			else
+-				appendBinaryStringInfo(&buf, p, pg_mblen(p));
++				appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
+ 		}
+ 	}
+ 
+@@ -2139,7 +2138,7 @@ map_xml_name_to_sql_identifier(const char *name)
+ 
+ 	initStringInfo(&buf);
+ 
+-	for (p = name; *p; p += pg_mblen(p))
++	for (p = name; *p; p += pg_mblen_cstr(p))
+ 	{
+ 		if (*p == '_' && *(p + 1) == 'x'
+ 			&& isxdigit((unsigned char) *(p + 2))
+@@ -2157,7 +2156,7 @@ map_xml_name_to_sql_identifier(const char *name)
+ 			p += 6;
+ 		}
+ 		else
+-			appendBinaryStringInfo(&buf, p, pg_mblen(p));
++			appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
+ 	}
+ 
+ 	return buf.data;
+diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
+index 8856ae1508f..cec0b83fcfe 100644
+--- a/src/backend/utils/cache/syscache.c
++++ b/src/backend/utils/cache/syscache.c
+@@ -40,6 +40,7 @@
+ #include "catalog/pg_description.h"
+ #include "catalog/pg_enum.h"
+ #include "catalog/pg_event_trigger.h"
++#include "catalog/pg_extension.h"
+ #include "catalog/pg_foreign_data_wrapper.h"
+ #include "catalog/pg_foreign_server.h"
+ #include "catalog/pg_foreign_table.h"
+@@ -983,6 +984,18 @@ static const struct cachedesc cacheinfo[] = {
+ 			0
+ 		},
+ 		2
++	},
++	/* intentionally out of alphabetical order, to avoid an ABI break: */
++	{ExtensionRelationId,		/* EXTENSIONOID */
++		ExtensionOidIndexId,
++		1,
++		{
++			Anum_pg_extension_oid,
++			0,
++			0,
++			0
++		},
++		2
+ 	}
+ };
+ 
+diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
+index 831f38b6758..82b39470662 100644
+--- a/src/backend/utils/mb/mbutils.c
++++ b/src/backend/utils/mb/mbutils.c
+@@ -38,6 +38,7 @@
+ #include "catalog/namespace.h"
+ #include "mb/pg_wchar.h"
+ #include "utils/builtins.h"
++#include "utils/memdebug.h"
+ #include "utils/memutils.h"
+ #include "utils/syscache.h"
+ 
+@@ -96,6 +97,13 @@ static char *perform_default_encoding_conversion(const char *src,
+ 												 int len, bool is_client_to_server);
+ static int	cliplen(const char *str, int len, int limit);
+ 
++pg_attribute_noreturn()
++static void report_invalid_encoding_int(int encoding, const char *mbstr,
++										int mblen, int len);
++
++pg_attribute_noreturn()
++static void report_invalid_encoding_db(const char *mbstr, int mblen, int len);
++
+ 
+ /*
+  * Prepare for a future call to SetClientEncoding.  Success should mean
+@@ -902,11 +910,128 @@ pg_encoding_wchar2mb_with_len(int encoding,
+ 	return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
+ }
+ 
+-/* returns the byte length of a multibyte character */
++/*
++ * Returns the byte length of a multibyte character sequence in a
++ * null-terminated string.  Raises an illegal byte sequence error if the
++ * sequence would hit a null terminator.
++ *
++ * The caller is expected to have checked for a terminator at *mbstr == 0
++ * before calling, but some callers want 1 in that case, so this function
++ * continues that tradition.
++ *
++ * This must only be used for strings that have a null-terminator to enable
++ * bounds detection.
++ */
++int
++pg_mblen_cstr(const char *mbstr)
++{
++	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
++
++	/*
++	 * The .mblen functions return 1 when given a pointer to a terminator.
++	 * Some callers depend on that, so we tolerate it for now.  Well-behaved
++	 * callers check the leading byte for a terminator *before* calling.
++	 */
++	for (int i = 1; i < length; ++i)
++		if (unlikely(mbstr[i] == 0))
++			report_invalid_encoding_db(mbstr, length, i);
++
++	/*
++	 * String should be NUL-terminated, but checking that would make typical
++	 * callers O(N^2), tripling Valgrind check-world time.  Unless
++	 * VALGRIND_EXPENSIVE, check 1 byte after each actual character.  (If we
++	 * found a character, not a terminator, the next byte must be a terminator
++	 * or the start of the next character.)  If the caller iterates the whole
++	 * string, the last call will diagnose a missing terminator.
++	 */
++	if (mbstr[0] != '\0')
++	{
++#ifdef VALGRIND_EXPENSIVE
++		VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr));
++#else
++		VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1);
++#endif
++	}
++
++	return length;
++}
++
++/*
++ * Returns the byte length of a multibyte character sequence bounded by a range
++ * [mbstr, end) of at least one byte in size.  Raises an illegal byte sequence
++ * error if the sequence would exceed the range.
++ */
++int
++pg_mblen_range(const char *mbstr, const char *end)
++{
++	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
++
++	Assert(end > mbstr);
++
++	if (unlikely(mbstr + length > end))
++		report_invalid_encoding_db(mbstr, length, end - mbstr);
++
++#ifdef VALGRIND_EXPENSIVE
++	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr);
++#else
++	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
++#endif
++
++	return length;
++}
++
++/*
++ * Returns the byte length of a multibyte character sequence bounded by a range
++ * extending for 'limit' bytes, which must be at least one.  Raises an illegal
++ * byte sequence error if the sequence would exceed the range.
++ */
++int
++pg_mblen_with_len(const char *mbstr, int limit)
++{
++	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
++
++	Assert(limit >= 1);
++
++	if (unlikely(length > limit))
++		report_invalid_encoding_db(mbstr, length, limit);
++
++#ifdef VALGRIND_EXPENSIVE
++	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit);
++#else
++	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
++#endif
++
++	return length;
++}
++
++
++/*
++ * Returns the length of a multibyte character sequence, without any
++ * validation of bounds.
++ *
++ * PLEASE NOTE:  This function can only be used safely if the caller has
++ * already verified the input string, since otherwise there is a risk of
++ * overrunning the buffer if the string is invalid.  A prior call to a
++ * pg_mbstrlen* function suffices.
++ */
++int
++pg_mblen_unbounded(const char *mbstr)
++{
++	int			length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
++
++	VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
++
++	return length;
++}
++
++/*
++ * Historical name for pg_mblen_unbounded().  Should not be used and will be
++ * removed in a later version.
++ */
+ int
+ pg_mblen(const char *mbstr)
+ {
+-	return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
++	return pg_mblen_unbounded(mbstr);
+ }
+ 
+ /* returns the display length of a multibyte character */
+@@ -928,14 +1053,14 @@ pg_mbstrlen(const char *mbstr)
+ 
+ 	while (*mbstr)
+ 	{
+-		mbstr += pg_mblen(mbstr);
++		mbstr += pg_mblen_cstr(mbstr);
+ 		len++;
+ 	}
+ 	return len;
+ }
+ 
+ /* returns the length (counted in wchars) of a multibyte string
+- * (not necessarily NULL terminated)
++ * (stops at the first of "limit" or a NUL)
+  */
+ int
+ pg_mbstrlen_with_len(const char *mbstr, int limit)
+@@ -948,7 +1073,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
+ 
+ 	while (limit > 0 && *mbstr)
+ 	{
+-		int			l = pg_mblen(mbstr);
++		int			l = pg_mblen_with_len(mbstr, limit);
+ 
+ 		limit -= l;
+ 		mbstr += l;
+@@ -1018,7 +1143,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
+ 
+ 	while (len > 0 && *mbstr)
+ 	{
+-		l = pg_mblen(mbstr);
++		l = pg_mblen_with_len(mbstr, len);
+ 		nch++;
+ 		if (nch > limit)
+ 			break;
+@@ -1575,12 +1700,19 @@ void
+ report_invalid_encoding(int encoding, const char *mbstr, int len)
+ {
+ 	int			l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
++
++	report_invalid_encoding_int(encoding, mbstr, l, len);
++}
++
++static void
++report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len)
++{
+ 	char		buf[8 * 5 + 1];
+ 	char	   *p = buf;
+ 	int			j,
+ 				jlimit;
+ 
+-	jlimit = Min(l, len);
++	jlimit = Min(mblen, len);
+ 	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
+ 
+ 	for (j = 0; j < jlimit; j++)
+@@ -1597,6 +1729,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
+ 					buf)));
+ }
+ 
++static void
++report_invalid_encoding_db(const char *mbstr, int mblen, int len)
++{
++	report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len);
++}
++
+ /*
+  * report_untranslatable_char: complain about untranslatable character
+  *
+diff --git a/src/common/wchar.c b/src/common/wchar.c
+index 78c60eeef97..e0c3a7c89d5 100644
+--- a/src/common/wchar.c
++++ b/src/common/wchar.c
+@@ -266,12 +266,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+ 	return cnt;
+ }
+ 
++/*
++ * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for
++ * EUC_CN), but mb2wchar_with_len does.  Tell a coherent story for code that
++ * relies on agreement between mb2wchar_with_len and mblen.  Invalid text
++ * datums (e.g. from shared catalogs) reach this.
++ */
+ static int
+ pg_euccn_mblen(const unsigned char *s)
+ {
+ 	int			len;
+ 
+-	if (IS_HIGHBIT_SET(*s))
++	if (*s == SS2)
++		len = 3;
++	else if (*s == SS3)
++		len = 3;
++	else if (IS_HIGHBIT_SET(*s))
+ 		len = 2;
+ 	else
+ 		len = 1;
+@@ -1554,7 +1564,7 @@ pg_encoding_set_invalid(int encoding, char *dst)
+ const pg_wchar_tbl pg_wchar_table[] = {
+ 	{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
+ 	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},	/* PG_EUC_JP */
+-	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2},	/* PG_EUC_CN */
++	{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 3},	/* PG_EUC_CN */
+ 	{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3},	/* PG_EUC_KR */
+ 	{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4},	/* PG_EUC_TW */
+ 	{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},	/* PG_EUC_JIS_2004 */
+diff --git a/src/include/access/slru.h b/src/include/access/slru.h
+index 19982f6e226..c1e481b3647 100644
+--- a/src/include/access/slru.h
++++ b/src/include/access/slru.h
+@@ -92,7 +92,9 @@ typedef struct SlruSharedData
+ 	/*
+ 	 * latest_page_number is the page number of the current end of the log;
+ 	 * this is not critical data, since we use it only to avoid swapping out
+-	 * the latest page.
++	 * the latest page.  (An exception: an accurate latest_page_number is
++	 * needed on pg_multixact/offsets to replay WAL generated with older minor
++	 * versions correctly.  See RecordNewMultiXact().)
+ 	 */
+ 	int			latest_page_number;
+ 
+diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
+index be235ed6d04..6c042320de1 100644
+--- a/src/include/catalog/dependency.h
++++ b/src/include/catalog/dependency.h
+@@ -226,6 +226,8 @@ extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId,
+ extern Oid	getExtensionOfObject(Oid classId, Oid objectId);
+ extern List *getAutoExtensionsOfObject(Oid classId, Oid objectId);
+ 
++extern Oid	getExtensionType(Oid extensionOid, const char *typname);
++
+ extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId);
+ extern List *getOwnedSequences(Oid relid);
+ extern Oid	getIdentitySequence(Oid relid, AttrNumber attnum, bool missing_ok);
+diff --git a/src/include/commands/extension.h b/src/include/commands/extension.h
+index 8b06df02a72..09ad68450a7 100644
+--- a/src/include/commands/extension.h
++++ b/src/include/commands/extension.h
+@@ -49,6 +49,8 @@ extern Oid	get_extension_oid(const char *extname, bool missing_ok);
+ extern char *get_extension_name(Oid ext_oid);
+ extern bool extension_file_exists(const char *extensionName);
+ 
++extern Oid	get_function_sibling_type(Oid funcoid, const char *typname);
++
+ extern ObjectAddress AlterExtensionNamespace(const char *extensionName, const char *newschema,
+ 											 Oid *oldschema);
+ 
+diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
+index c970e51b41b..d3b38814825 100644
+--- a/src/include/mb/pg_wchar.h
++++ b/src/include/mb/pg_wchar.h
+@@ -588,7 +588,14 @@ extern int	pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
+ extern int	pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
+ extern int	pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
+ extern size_t pg_wchar_strlen(const pg_wchar *wstr);
++extern int	pg_mblen_cstr(const char *mbstr);
++extern int	pg_mblen_range(const char *mbstr, const char *end);
++extern int	pg_mblen_with_len(const char *mbstr, int limit);
++extern int	pg_mblen_unbounded(const char *mbstr);
++
++/* deprecated */
+ extern int	pg_mblen(const char *mbstr);
++
+ extern int	pg_dsplen(const char *mbstr);
+ extern int	pg_mbstrlen(const char *mbstr);
+ extern int	pg_mbstrlen_with_len(const char *mbstr, int len);
+diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
+index cc4bd9ab20d..b54345659e7 100644
+--- a/src/include/tsearch/ts_locale.h
++++ b/src/include/tsearch/ts_locale.h
+@@ -42,12 +42,36 @@ typedef struct
+ /* The second argument of t_iseq() must be a plain ASCII character */
+ #define t_iseq(x,c)		(TOUCHAR(x) == (unsigned char) (c))
+ 
+-#define COPYCHAR(d,s)	memcpy(d, s, pg_mblen(s))
++/* Copy multibyte character of known byte length, return byte length. */
++static inline int
++ts_copychar_with_len(void *dest, const void *src, int length)
++{
++	memcpy(dest, src, length);
++	return length;
++}
++
++/* Copy multibyte character from null-terminated string,  return byte length. */
++static inline int
++ts_copychar_cstr(void *dest, const void *src)
++{
++	return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src));
++}
++
++/* Historical macro for the above. */
++#define COPYCHAR ts_copychar_cstr
++
++#define GENERATE_T_ISCLASS_DECL(character_class) \
++extern int	t_is##character_class##_with_len(const char *ptr, int len); \
++extern int	t_is##character_class##_cstr(const char *ptr); \
++extern int	t_is##character_class##_unbounded(const char *ptr); \
++\
++/* deprecated */ \
++extern int	t_is##character_class(const char *ptr);
+ 
+-extern int	t_isdigit(const char *ptr);
+-extern int	t_isspace(const char *ptr);
+-extern int	t_isalpha(const char *ptr);
+-extern int	t_isprint(const char *ptr);
++GENERATE_T_ISCLASS_DECL(alpha);
++GENERATE_T_ISCLASS_DECL(digit);
++GENERATE_T_ISCLASS_DECL(print);
++GENERATE_T_ISCLASS_DECL(space);
+ 
+ extern char *lowerstr(const char *str);
+ extern char *lowerstr_with_len(const char *str, int len);
+diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
+index a9b6f596585..0fad51b7e06 100644
+--- a/src/include/tsearch/ts_utils.h
++++ b/src/include/tsearch/ts_utils.h
+@@ -38,14 +38,12 @@ extern bool gettoken_tsvector(TSVectorParseState state,
+ extern void close_tsvector_parser(TSVectorParseState state);
+ 
+ /* phrase operator begins with '<' */
+-#define ISOPERATOR(x) \
+-	( pg_mblen(x) == 1 && ( *(x) == '!' ||	\
+-							*(x) == '&' ||	\
+-							*(x) == '|' ||	\
+-							*(x) == '(' ||	\
+-							*(x) == ')' ||	\
+-							*(x) == '<'		\
+-						  ) )
++#define ISOPERATOR(x)		(*(x) == '!' ||	\
++							 *(x) == '&' ||	\
++							 *(x) == '|' ||	\
++							 *(x) == '(' ||	\
++							 *(x) == ')' ||	\
++							 *(x) == '<')
+ 
+ /* parse_tsquery */
+ 
+diff --git a/src/include/utils/array.h b/src/include/utils/array.h
+index 3a3fc0f910d..78952a34e46 100644
+--- a/src/include/utils/array.h
++++ b/src/include/utils/array.h
+@@ -389,6 +389,7 @@ extern void array_bitmap_copy(bits8 *destbitmap, int destoffset,
+ extern ArrayType *construct_array(Datum *elems, int nelems,
+ 								  Oid elmtype,
+ 								  int elmlen, bool elmbyval, char elmalign);
++extern ArrayType *construct_array_builtin(Datum *elems, int nelems, Oid elmtype);
+ extern ArrayType *construct_md_array(Datum *elems,
+ 									 bool *nulls,
+ 									 int ndims,
+@@ -403,6 +404,9 @@ extern void deconstruct_array(ArrayType *array,
+ 							  Oid elmtype,
+ 							  int elmlen, bool elmbyval, char elmalign,
+ 							  Datum **elemsp, bool **nullsp, int *nelemsp);
++extern void deconstruct_array_builtin(ArrayType *array,
++									  Oid elmtype,
++									  Datum **elemsp, bool **nullsp, int *nelemsp);
+ extern bool array_contains_nulls(ArrayType *array);
+ 
+ extern ArrayBuildState *initArrayResult(Oid element_type,
+diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
+index 067ac0928a2..1433cbe8754 100644
+--- a/src/include/utils/syscache.h
++++ b/src/include/utils/syscache.h
+@@ -108,9 +108,11 @@ enum SysCacheIdentifier
+ 	TYPENAMENSP,
+ 	TYPEOID,
+ 	USERMAPPINGOID,
+-	USERMAPPINGUSERSERVER
++	USERMAPPINGUSERSERVER,
++	/* intentionally out of alphabetical order, to avoid an ABI break: */
++	EXTENSIONOID
+ 
+-#define SysCacheSize (USERMAPPINGUSERSERVER + 1)
++#define SysCacheSize (EXTENSIONOID + 1)
+ };
+ 
+ extern void InitCatalogCache(void);
+diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c
+new file mode 100644
+index 00000000000..b8ff535c8f3
+--- /dev/null
++++ b/src/test/modules/test_regex/test_regex.c
+@@ -0,0 +1,774 @@
++/*--------------------------------------------------------------------------
++ *
++ * test_regex.c
++ *		Test harness for the regular expression package.
++ *
++ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
++ * Portions Copyright (c) 1994, Regents of the University of California
++ *
++ * IDENTIFICATION
++ *		src/test/modules/test_regex/test_regex.c
++ *
++ * -------------------------------------------------------------------------
++ */
++
++#include "postgres.h"
++
++#include "funcapi.h"
++#include "miscadmin.h"
++#include "regex/regex.h"
++#include "utils/array.h"
++#include "utils/builtins.h"
++
++PG_MODULE_MAGIC;
++
++
++/* all the options of interest for regex functions */
++typedef struct test_re_flags
++{
++	int			cflags;			/* compile flags for Spencer's regex code */
++	int			eflags;			/* execute flags for Spencer's regex code */
++	long		info;			/* expected re_info bits */
++	bool		glob;			/* do it globally (for each occurrence) */
++	bool		indices;		/* report indices not actual strings */
++	bool		partial;		/* expect partial match */
++} test_re_flags;
++
++/* cross-call state for test_regex() */
++typedef struct test_regex_ctx
++{
++	test_re_flags re_flags;		/* flags */
++	rm_detail_t details;		/* "details" from execution */
++	text	   *orig_str;		/* data string in original TEXT form */
++	int			nmatches;		/* number of places where pattern matched */
++	int			npatterns;		/* number of capturing subpatterns */
++	/* We store start char index and end+1 char index for each match */
++	/* so the number of entries in match_locs is nmatches * npatterns * 2 */
++	int		   *match_locs;		/* 0-based character indexes */
++	int			next_match;		/* 0-based index of next match to process */
++	/* workspace for build_test_match_result() */
++	Datum	   *elems;			/* has npatterns+1 elements */
++	bool	   *nulls;			/* has npatterns+1 elements */
++	pg_wchar   *wide_str;		/* wide-char version of original string */
++	char	   *conv_buf;		/* conversion buffer, if needed */
++	int			conv_bufsiz;	/* size thereof */
++} test_regex_ctx;
++
++/* Local functions */
++static void test_re_compile(text *text_re, int cflags, Oid collation,
++							regex_t *result_re);
++static void parse_test_flags(test_re_flags *flags, text *opts);
++static test_regex_ctx *setup_test_matches(text *orig_str,
++										  regex_t *cpattern,
++										  test_re_flags *flags,
++										  Oid collation,
++										  bool use_subpatterns);
++static ArrayType *build_test_info_result(regex_t *cpattern,
++										 test_re_flags *flags);
++static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
++
++
++/*
++ * test_regex(pattern text, string text, flags text) returns setof text[]
++ *
++ * This is largely based on regexp.c's regexp_matches, with additions
++ * for debugging purposes.
++ */
++PG_FUNCTION_INFO_V1(test_regex);
++
++Datum
++test_regex(PG_FUNCTION_ARGS)
++{
++	FuncCallContext *funcctx;
++	test_regex_ctx *matchctx;
++	ArrayType  *result_ary;
++
++	if (SRF_IS_FIRSTCALL())
++	{
++		text	   *pattern = PG_GETARG_TEXT_PP(0);
++		text	   *flags = PG_GETARG_TEXT_PP(2);
++		Oid			collation = PG_GET_COLLATION();
++		test_re_flags re_flags;
++		regex_t		cpattern;
++		MemoryContext oldcontext;
++
++		funcctx = SRF_FIRSTCALL_INIT();
++		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
++
++		/* Determine options */
++		parse_test_flags(&re_flags, flags);
++
++		/* set up the compiled pattern */
++		test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
++
++		/* be sure to copy the input string into the multi-call ctx */
++		matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
++									  &re_flags,
++									  collation,
++									  true);
++
++		/* Pre-create workspace that build_test_match_result needs */
++		matchctx->elems = (Datum *) palloc(sizeof(Datum) *
++										   (matchctx->npatterns + 1));
++		matchctx->nulls = (bool *) palloc(sizeof(bool) *
++										  (matchctx->npatterns + 1));
++
++		MemoryContextSwitchTo(oldcontext);
++		funcctx->user_fctx = (void *) matchctx;
++
++		/*
++		 * Return the first result row, which is info equivalent to Tcl's
++		 * "regexp -about" output
++		 */
++		result_ary = build_test_info_result(&cpattern, &re_flags);
++
++		pg_regfree(&cpattern);
++
++		SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
++	}
++	else
++	{
++		/* Each subsequent row describes one match */
++		funcctx = SRF_PERCALL_SETUP();
++		matchctx = (test_regex_ctx *) funcctx->user_fctx;
++
++		if (matchctx->next_match < matchctx->nmatches)
++		{
++			result_ary = build_test_match_result(matchctx);
++			matchctx->next_match++;
++			SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
++		}
++	}
++
++	SRF_RETURN_DONE(funcctx);
++}
++
++
++/*
++ * test_re_compile - compile a RE
++ *
++ *	text_re --- the pattern, expressed as a TEXT object
++ *	cflags --- compile options for the pattern
++ *	collation --- collation to use for LC_CTYPE-dependent behavior
++ *  result_re --- output, compiled RE is stored here
++ *
++ * Pattern is given in the database encoding.  We internally convert to
++ * an array of pg_wchar, which is what Spencer's regex package wants.
++ *
++ * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
++ */
++static void
++test_re_compile(text *text_re, int cflags, Oid collation,
++				regex_t *result_re)
++{
++	int			text_re_len = VARSIZE_ANY_EXHDR(text_re);
++	char	   *text_re_val = VARDATA_ANY(text_re);
++	pg_wchar   *pattern;
++	int			pattern_len;
++	int			regcomp_result;
++	char		errMsg[100];
++
++	/* Convert pattern string to wide characters */
++	pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
++	pattern_len = pg_mb2wchar_with_len(text_re_val,
++									   pattern,
++									   text_re_len);
++
++	regcomp_result = pg_regcomp(result_re,
++								pattern,
++								pattern_len,
++								cflags,
++								collation);
++
++	pfree(pattern);
++
++	if (regcomp_result != REG_OKAY)
++	{
++		/* re didn't compile (no need for pg_regfree, if so) */
++
++		/*
++		 * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
++		 * before reporting a regex error.  This is so that if the regex
++		 * library aborts and returns REG_CANCEL, we don't print an error
++		 * message that implies the regex was invalid.
++		 */
++		CHECK_FOR_INTERRUPTS();
++
++		pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
++		ereport(ERROR,
++				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
++				 errmsg("invalid regular expression: %s", errMsg)));
++	}
++}
++
++/*
++ * test_re_execute - execute a RE on pg_wchar data
++ *
++ * Returns true on match, false on no match
++ * Arguments are as for pg_regexec
++ */
++static bool
++test_re_execute(regex_t *re, pg_wchar *data, int data_len,
++				int start_search,
++				rm_detail_t *details,
++				int nmatch, regmatch_t *pmatch,
++				int eflags)
++{
++	int			regexec_result;
++	char		errMsg[100];
++
++	/* Initialize match locations in case engine doesn't */
++	details->rm_extend.rm_so = -1;
++	details->rm_extend.rm_eo = -1;
++	for (int i = 0; i < nmatch; i++)
++	{
++		pmatch[i].rm_so = -1;
++		pmatch[i].rm_eo = -1;
++	}
++
++	/* Perform RE match and return result */
++	regexec_result = pg_regexec(re,
++								data,
++								data_len,
++								start_search,
++								details,
++								nmatch,
++								pmatch,
++								eflags);
++
++	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
++	{
++		/* re failed??? */
++		CHECK_FOR_INTERRUPTS();
++		pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
++		ereport(ERROR,
++				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
++				 errmsg("regular expression failed: %s", errMsg)));
++	}
++
++	return (regexec_result == REG_OKAY);
++}
++
++
++/*
++ * parse_test_flags - parse the flags argument
++ *
++ *	flags --- output argument, filled with desired options
++ *	opts --- TEXT object, or NULL for defaults
++ */
++static void
++parse_test_flags(test_re_flags *flags, text *opts)
++{
++	/* these defaults must match Tcl's */
++	int			cflags = REG_ADVANCED;
++	int			eflags = 0;
++	long		info = 0;
++
++	flags->glob = false;
++	flags->indices = false;
++	flags->partial = false;
++
++	if (opts)
++	{
++		char	   *opt_p = VARDATA_ANY(opts);
++		int			opt_len = VARSIZE_ANY_EXHDR(opts);
++		int			i;
++
++		for (i = 0; i < opt_len; i++)
++		{
++			switch (opt_p[i])
++			{
++				case '-':
++					/* allowed, no-op */
++					break;
++				case '!':
++					flags->partial = true;
++					break;
++				case '*':
++					/* test requires Unicode --- ignored here */
++					break;
++				case '0':
++					flags->indices = true;
++					break;
++
++					/* These flags correspond to user-exposed RE options: */
++				case 'g':		/* global match */
++					flags->glob = true;
++					break;
++				case 'i':		/* case insensitive */
++					cflags |= REG_ICASE;
++					break;
++				case 'n':		/* \n affects ^ $ . [^ */
++					cflags |= REG_NEWLINE;
++					break;
++				case 'p':		/* ~Perl, \n affects . [^ */
++					cflags |= REG_NLSTOP;
++					cflags &= ~REG_NLANCH;
++					break;
++				case 'w':		/* weird, \n affects ^ $ only */
++					cflags &= ~REG_NLSTOP;
++					cflags |= REG_NLANCH;
++					break;
++				case 'x':		/* expanded syntax */
++					cflags |= REG_EXPANDED;
++					break;
++
++					/* These flags correspond to Tcl's -xflags options: */
++				case 'a':
++					cflags |= REG_ADVF;
++					break;
++				case 'b':
++					cflags &= ~REG_ADVANCED;
++					break;
++				case 'c':
++
++					/*
++					 * Tcl calls this TCL_REG_CANMATCH, but it's really
++					 * REG_EXPECT.  In this implementation we must also set
++					 * the partial and indices flags, so that
++					 * setup_test_matches and build_test_match_result will
++					 * emit the desired data.  (They'll emit more fields than
++					 * Tcl would, but that's fine.)
++					 */
++					cflags |= REG_EXPECT;
++					flags->partial = true;
++					flags->indices = true;
++					break;
++				case 'e':
++					cflags &= ~REG_ADVANCED;
++					cflags |= REG_EXTENDED;
++					break;
++				case 'q':
++					cflags &= ~REG_ADVANCED;
++					cflags |= REG_QUOTE;
++					break;
++				case 'o':		/* o for opaque */
++					cflags |= REG_NOSUB;
++					break;
++				case 's':		/* s for start */
++					cflags |= REG_BOSONLY;
++					break;
++				case '+':
++					cflags |= REG_FAKE;
++					break;
++				case ',':
++					cflags |= REG_PROGRESS;
++					break;
++				case '.':
++					cflags |= REG_DUMP;
++					break;
++				case ':':
++					eflags |= REG_MTRACE;
++					break;
++				case ';':
++					eflags |= REG_FTRACE;
++					break;
++				case '^':
++					eflags |= REG_NOTBOL;
++					break;
++				case '$':
++					eflags |= REG_NOTEOL;
++					break;
++				case 't':
++					cflags |= REG_EXPECT;
++					break;
++				case '%':
++					eflags |= REG_SMALL;
++					break;
++
++					/* These flags define expected info bits: */
++				case 'A':
++					info |= REG_UBSALNUM;
++					break;
++				case 'B':
++					info |= REG_UBRACES;
++					break;
++				case 'E':
++					info |= REG_UBBS;
++					break;
++				case 'H':
++					info |= REG_ULOOKAROUND;
++					break;
++				case 'I':
++					info |= REG_UIMPOSSIBLE;
++					break;
++				case 'L':
++					info |= REG_ULOCALE;
++					break;
++				case 'M':
++					info |= REG_UUNPORT;
++					break;
++				case 'N':
++					info |= REG_UEMPTYMATCH;
++					break;
++				case 'P':
++					info |= REG_UNONPOSIX;
++					break;
++				case 'Q':
++					info |= REG_UBOUNDS;
++					break;
++				case 'R':
++					info |= REG_UBACKREF;
++					break;
++				case 'S':
++					info |= REG_UUNSPEC;
++					break;
++				case 'T':
++					info |= REG_USHORTEST;
++					break;
++				case 'U':
++					info |= REG_UPBOTCH;
++					break;
++
++				default:
++					ereport(ERROR,
++							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
++							 errmsg("invalid regular expression test option: \"%.*s\"",
++									pg_mblen_range(opt_p + i, opt_p + opt_len),
++									opt_p + i)));
++					break;
++			}
++		}
++	}
++	flags->cflags = cflags;
++	flags->eflags = eflags;
++	flags->info = info;
++}
++
++/*
++ * setup_test_matches --- do the initial matching
++ *
++ * To simplify memory management, we do all the matching in one swoop.
++ * The returned test_regex_ctx contains the locations of all the substrings
++ * matching the pattern.
++ */
++static test_regex_ctx *
++setup_test_matches(text *orig_str,
++				   regex_t *cpattern, test_re_flags *re_flags,
++				   Oid collation,
++				   bool use_subpatterns)
++{
++	test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx));
++	int			eml = pg_database_encoding_max_length();
++	int			orig_len;
++	pg_wchar   *wide_str;
++	int			wide_len;
++	regmatch_t *pmatch;
++	int			pmatch_len;
++	int			array_len;
++	int			array_idx;
++	int			prev_match_end;
++	int			start_search;
++	int			maxlen = 0;		/* largest fetch length in characters */
++
++	/* save flags */
++	matchctx->re_flags = *re_flags;
++
++	/* save original string --- we'll extract result substrings from it */
++	matchctx->orig_str = orig_str;
++
++	/* convert string to pg_wchar form for matching */
++	orig_len = VARSIZE_ANY_EXHDR(orig_str);
++	wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
++	wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
++
++	/* do we want to remember subpatterns? */
++	if (use_subpatterns && cpattern->re_nsub > 0)
++	{
++		matchctx->npatterns = cpattern->re_nsub + 1;
++		pmatch_len = cpattern->re_nsub + 1;
++	}
++	else
++	{
++		use_subpatterns = false;
++		matchctx->npatterns = 1;
++		pmatch_len = 1;
++	}
++
++	/* temporary output space for RE package */
++	pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
++
++	/*
++	 * the real output space (grown dynamically if needed)
++	 *
++	 * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
++	 * than at 2^27
++	 */
++	array_len = re_flags->glob ? 255 : 31;
++	matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
++	array_idx = 0;
++
++	/* search for the pattern, perhaps repeatedly */
++	prev_match_end = 0;
++	start_search = 0;
++	while (test_re_execute(cpattern, wide_str, wide_len,
++						   start_search,
++						   &matchctx->details,
++						   pmatch_len, pmatch,
++						   re_flags->eflags))
++	{
++		/* enlarge output space if needed */
++		while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
++		{
++			array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
++			if (array_len > MaxAllocSize / sizeof(int))
++				ereport(ERROR,
++						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
++						 errmsg("too many regular expression matches")));
++			matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
++													sizeof(int) * array_len);
++		}
++
++		/* save this match's locations */
++		for (int i = 0; i < matchctx->npatterns; i++)
++		{
++			int			so = pmatch[i].rm_so;
++			int			eo = pmatch[i].rm_eo;
++
++			matchctx->match_locs[array_idx++] = so;
++			matchctx->match_locs[array_idx++] = eo;
++			if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
++				maxlen = (eo - so);
++		}
++		matchctx->nmatches++;
++		prev_match_end = pmatch[0].rm_eo;
++
++		/* if not glob, stop after one match */
++		if (!re_flags->glob)
++			break;
++
++		/*
++		 * Advance search position.  Normally we start the next search at the
++		 * end of the previous match; but if the match was of zero length, we
++		 * have to advance by one character, or we'd just find the same match
++		 * again.
++		 */
++		start_search = prev_match_end;
++		if (pmatch[0].rm_so == pmatch[0].rm_eo)
++			start_search++;
++		if (start_search > wide_len)
++			break;
++	}
++
++	/*
++	 * If we had no match, but "partial" and "indices" are set, emit the
++	 * details.
++	 */
++	if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
++	{
++		/* enlarge output space if needed */
++		while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
++		{
++			array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
++			if (array_len > MaxAllocSize / sizeof(int))
++				ereport(ERROR,
++						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
++						 errmsg("too many regular expression matches")));
++			matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
++													sizeof(int) * array_len);
++		}
++
++		matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
++		matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
++		/* we don't have pmatch data, so emit -1 */
++		for (int i = 1; i < matchctx->npatterns; i++)
++		{
++			matchctx->match_locs[array_idx++] = -1;
++			matchctx->match_locs[array_idx++] = -1;
++		}
++		matchctx->nmatches++;
++	}
++
++	Assert(array_idx <= array_len);
++
++	if (eml > 1)
++	{
++		int64		maxsiz = eml * (int64) maxlen;
++		int			conv_bufsiz;
++
++		/*
++		 * Make the conversion buffer large enough for any substring of
++		 * interest.
++		 *
++		 * Worst case: assume we need the maximum size (maxlen*eml), but take
++		 * advantage of the fact that the original string length in bytes is
++		 * an upper bound on the byte length of any fetched substring (and we
++		 * know that len+1 is safe to allocate because the varlena header is
++		 * longer than 1 byte).
++		 */
++		if (maxsiz > orig_len)
++			conv_bufsiz = orig_len + 1;
++		else
++			conv_bufsiz = maxsiz + 1;	/* safe since maxsiz < 2^30 */
++
++		matchctx->conv_buf = palloc(conv_bufsiz);
++		matchctx->conv_bufsiz = conv_bufsiz;
++		matchctx->wide_str = wide_str;
++	}
++	else
++	{
++		/* No need to keep the wide string if we're in a single-byte charset. */
++		pfree(wide_str);
++		matchctx->wide_str = NULL;
++		matchctx->conv_buf = NULL;
++		matchctx->conv_bufsiz = 0;
++	}
++
++	/* Clean up temp storage */
++	pfree(pmatch);
++
++	return matchctx;
++}
++
++/*
++ * build_test_info_result - build output array describing compiled regexp
++ *
++ * This borrows some code from Tcl's TclRegAbout().
++ */
++static ArrayType *
++build_test_info_result(regex_t *cpattern, test_re_flags *flags)
++{
++	/* Translation data for flag bits in regex_t.re_info */
++	struct infoname
++	{
++		int			bit;
++		const char *text;
++	};
++	static const struct infoname infonames[] = {
++		{REG_UBACKREF, "REG_UBACKREF"},
++		{REG_ULOOKAROUND, "REG_ULOOKAROUND"},
++		{REG_UBOUNDS, "REG_UBOUNDS"},
++		{REG_UBRACES, "REG_UBRACES"},
++		{REG_UBSALNUM, "REG_UBSALNUM"},
++		{REG_UPBOTCH, "REG_UPBOTCH"},
++		{REG_UBBS, "REG_UBBS"},
++		{REG_UNONPOSIX, "REG_UNONPOSIX"},
++		{REG_UUNSPEC, "REG_UUNSPEC"},
++		{REG_UUNPORT, "REG_UUNPORT"},
++		{REG_ULOCALE, "REG_ULOCALE"},
++		{REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
++		{REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
++		{REG_USHORTEST, "REG_USHORTEST"},
++		{0, NULL}
++	};
++	const struct infoname *inf;
++	Datum		elems[lengthof(infonames) + 1];
++	int			nresults = 0;
++	char		buf[80];
++	int			dims[1];
++	int			lbs[1];
++
++	/* Set up results: first, the number of subexpressions */
++	snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
++	elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
++
++	/* Report individual info bit states */
++	for (inf = infonames; inf->bit != 0; inf++)
++	{
++		if (cpattern->re_info & inf->bit)
++		{
++			if (flags->info & inf->bit)
++				elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
++			else
++			{
++				snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
++				elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
++			}
++		}
++		else
++		{
++			if (flags->info & inf->bit)
++			{
++				snprintf(buf, sizeof(buf), "missing %s!", inf->text);
++				elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
++			}
++		}
++	}
++
++	/* And form an array */
++	dims[0] = nresults;
++	lbs[0] = 1;
++	/* XXX: this hardcodes assumptions about the text type */
++	return construct_md_array(elems, NULL, 1, dims, lbs,
++							  TEXTOID, -1, false, TYPALIGN_INT);
++}
++
++/*
++ * build_test_match_result - build output array for current match
++ *
++ * Note that if the indices flag is set, we don't need any strings,
++ * just the location data.
++ */
++static ArrayType *
++build_test_match_result(test_regex_ctx *matchctx)
++{
++	char	   *buf = matchctx->conv_buf;
++	Datum	   *elems = matchctx->elems;
++	bool	   *nulls = matchctx->nulls;
++	bool		indices = matchctx->re_flags.indices;
++	char		bufstr[80];
++	int			dims[1];
++	int			lbs[1];
++	int			loc;
++	int			i;
++
++	/* Extract matching substrings from the original string */
++	loc = matchctx->next_match * matchctx->npatterns * 2;
++	for (i = 0; i < matchctx->npatterns; i++)
++	{
++		int			so = matchctx->match_locs[loc++];
++		int			eo = matchctx->match_locs[loc++];
++
++		if (indices)
++		{
++			/* Report eo this way for consistency with Tcl */
++			snprintf(bufstr, sizeof(bufstr), "%d %d",
++					 so, so < 0 ? eo : eo - 1);
++			elems[i] = PointerGetDatum(cstring_to_text(bufstr));
++			nulls[i] = false;
++		}
++		else if (so < 0 || eo < 0)
++		{
++			elems[i] = (Datum) 0;
++			nulls[i] = true;
++		}
++		else if (buf)
++		{
++			int			len = pg_wchar2mb_with_len(matchctx->wide_str + so,
++												   buf,
++												   eo - so);
++
++			Assert(len < matchctx->conv_bufsiz);
++			elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
++			nulls[i] = false;
++		}
++		else
++		{
++			elems[i] = DirectFunctionCall3(text_substr,
++										   PointerGetDatum(matchctx->orig_str),
++										   Int32GetDatum(so + 1),
++										   Int32GetDatum(eo - so));
++			nulls[i] = false;
++		}
++	}
++
++	/* In EXPECT indices mode, also report the "details" */
++	if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
++	{
++		int			so = matchctx->details.rm_extend.rm_so;
++		int			eo = matchctx->details.rm_extend.rm_eo;
++
++		snprintf(bufstr, sizeof(bufstr), "%d %d",
++				 so, so < 0 ? eo : eo - 1);
++		elems[i] = PointerGetDatum(cstring_to_text(bufstr));
++		nulls[i] = false;
++		i++;
++	}
++
++	/* And form an array */
++	dims[0] = i;
++	lbs[0] = 1;
++	/* XXX: this hardcodes assumptions about the text type */
++	return construct_md_array(elems, nulls, 1, dims, lbs,
++							  TEXTOID, -1, false, TYPALIGN_INT);
++}
+diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore
+index 93c56c85a09..398292afad5 100644
+--- a/src/test/regress/expected/.gitignore
++++ b/src/test/regress/expected/.gitignore
+@@ -2,6 +2,8 @@
+ /copy.out
+ /create_function_1.out
+ /create_function_2.out
++/encoding.out
++/encoding_1.out
+ /largeobject.out
+ /largeobject_1.out
+ /misc.out
+diff --git a/src/test/regress/expected/euc_kr.out b/src/test/regress/expected/euc_kr.out
+new file mode 100644
+index 00000000000..7a61c89a43a
+--- /dev/null
++++ b/src/test/regress/expected/euc_kr.out
+@@ -0,0 +1,16 @@
++-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
++-- non-UTF8, multibyte encoding as of 2026-01.  Since UTF8 can represent all
++-- of EUC_KR, also run the test in UTF8.
++SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
++\if :skip_test
++\quit
++\endif
++-- Exercise is_multibyte_char_in_char (non-UTF8) slow path.
++SELECT POSITION(
++	convert_from('\xbcf6c7d0', 'EUC_KR') IN
++	convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR'));
++ position 
++----------
++        5
++(1 row)
++
+diff --git a/src/test/regress/expected/euc_kr_1.out b/src/test/regress/expected/euc_kr_1.out
+new file mode 100644
+index 00000000000..faaac5d6355
+--- /dev/null
++++ b/src/test/regress/expected/euc_kr_1.out
+@@ -0,0 +1,6 @@
++-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
++-- non-UTF8, multibyte encoding as of 2026-01.  Since UTF8 can represent all
++-- of EUC_KR, also run the test in UTF8.
++SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
++\if :skip_test
++\quit
+diff --git a/src/test/regress/input/encoding.source b/src/test/regress/input/encoding.source
+new file mode 100644
+index 00000000000..efdfecd3c05
+--- /dev/null
++++ b/src/test/regress/input/encoding.source
+@@ -0,0 +1,240 @@
++/* skip test if not UTF8 server encoding */
++SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
++\if :skip_test
++\quit
++\endif
++
++CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[]
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++
++
++CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text);
++INSERT INTO regress_encoding
++VALUES ('café',
++        'caf' || test_bytea_to_text('\xc3'),
++        'café' || test_bytea_to_text('\x00') || 'dcba',
++        'caf' || test_bytea_to_text('\xc300') || 'dcba');
++
++SELECT good, truncated, with_nul FROM regress_encoding;
++
++SELECT length(good) FROM regress_encoding;
++SELECT substring(good, 3, 1) FROM regress_encoding;
++SELECT substring(good, 4, 1) FROM regress_encoding;
++SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding;
++SELECT reverse(good) FROM regress_encoding;
++
++-- invalid short mb character = error
++SELECT length(truncated) FROM regress_encoding;
++SELECT substring(truncated, 1, 3) FROM regress_encoding;
++SELECT substring(truncated, 1, 4) FROM regress_encoding;
++SELECT reverse(truncated) FROM regress_encoding;
++-- invalid short mb character = silently dropped
++SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding;
++
++-- PostgreSQL doesn't allow strings to contain NUL.  If a corrupted string
++-- contains NUL at a character boundary position, some functions treat it as a
++-- character while others treat it as a terminator, as implementation details.
++
++-- NUL = terminator
++SELECT length(with_nul) FROM regress_encoding;
++SELECT substring(with_nul, 3, 1) FROM regress_encoding;
++SELECT substring(with_nul, 4, 1) FROM regress_encoding;
++SELECT substring(with_nul, 5, 1) FROM regress_encoding;
++SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding;
++SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding;
++-- NUL = character
++SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding;
++
++-- If a corrupted string contains NUL in the tail bytes of a multibyte
++-- character (invalid in all encodings), it is considered part of the
++-- character for length purposes.  An error will only be raised in code paths
++-- that convert or verify encodings.
++
++SELECT length(truncated_with_nul) FROM regress_encoding;
++SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding;
++SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding;
++SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding;
++SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding;
++SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding;
++SELECT reverse(truncated_with_nul) FROM regress_encoding;
++
++-- unbounded: sequence would overrun the string!
++SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3)
++FROM regress_encoding;
++
++-- condition detected when using the length/range variants
++SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3)
++FROM regress_encoding;
++SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3)
++FROM regress_encoding;
++
++-- unbounded: sequence would overrun the string, if the terminator were really
++-- the end of it
++SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3)
++FROM regress_encoding;
++SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3)
++FROM regress_encoding;
++
++-- condition detected when using the cstr variants
++SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3)
++FROM regress_encoding;
++
++DROP TABLE regress_encoding;
++
++-- mb<->wchar conversions
++CREATE FUNCTION test_encoding(encoding text, description text, input bytea)
++RETURNS VOID LANGUAGE plpgsql AS
++$$
++DECLARE
++	prefix text;
++	len int;
++	wchars int[];
++	round_trip bytea;
++	result text;
++BEGIN
++	prefix := rpad(encoding || ' ' || description || ':', 28);
++
++	-- XXX could also test validation, length functions and include client
++	-- only encodings with these test cases
++
++	IF test_valid_server_encoding(encoding) THEN
++		wchars := test_text_to_wchars(encoding, test_bytea_to_text(input));
++		round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars));
++		if input = round_trip then
++			result := 'OK';
++		elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then
++			result := 'truncated';
++		else
++			result := 'failed';
++		end if;
++		RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result;
++	END IF;
++END;
++$$;
++-- No validation is done on the encoding itself, just the length to avoid
++-- overruns, so some of the byte sequences below are bogus.  They cover
++-- all code branches, server encodings only for now.
++CREATE TABLE encoding_tests (encoding text, description text, input bytea);
++INSERT INTO encoding_tests VALUES
++	-- LATIN1, other single-byte encodings
++	('LATIN1', 'ASCII',    'a'),
++	('LATIN1', 'extended', '\xe9'),
++	-- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion):
++	-- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
++	-- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
++	-- 2 80..ff (CS1)
++	('EUC_JP', 'ASCII',      'a'),
++	('EUC_JP', 'CS1, short', '\x80'),
++	('EUC_JP', 'CS1',        '\x8002'),
++	('EUC_JP', 'CS2, short', '\x8e'),
++	('EUC_JP', 'CS2',        '\x8e02'),
++	('EUC_JP', 'CS3, short', '\x8f'),
++	('EUC_JP', 'CS3, short', '\x8f02'),
++	('EUC_JP', 'CS3',        '\x8f0203'),
++	-- EUC_CN
++	-- 3 8e (CS2, not used but arbitrarily considered to have length 3)
++	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
++	-- 2 80..ff (CS1)
++	('EUC_CN', 'ASCII',      'a'),
++	('EUC_CN', 'CS1, short', '\x80'),
++	('EUC_CN', 'CS1',        '\x8002'),
++	('EUC_CN', 'CS2, short', '\x8e'),
++	('EUC_CN', 'CS2, short', '\x8e02'),
++	('EUC_CN', 'CS2',        '\x8e0203'),
++	('EUC_CN', 'CS3, short', '\x8f'),
++	('EUC_CN', 'CS3, short', '\x8f02'),
++	('EUC_CN', 'CS3',        '\x8f0203'),
++	-- EUC_TW:
++	-- 4 8e (CS2)
++	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
++	-- 2 80..ff (CS1)
++	('EUC_TW', 'ASCII',      'a'),
++	('EUC_TW', 'CS1, short', '\x80'),
++	('EUC_TW', 'CS1',        '\x8002'),
++	('EUC_TW', 'CS2, short', '\x8e'),
++	('EUC_TW', 'CS2, short', '\x8e02'),
++	('EUC_TW', 'CS2, short', '\x8e0203'),
++	('EUC_TW', 'CS2',        '\x8e020304'),
++	('EUC_TW', 'CS3, short', '\x8f'),
++	('EUC_TW', 'CS3, short', '\x8f02'),
++	('EUC_TW', 'CS3',        '\x8f0203'),
++	-- UTF8
++	-- 2 c0..df
++	-- 3 e0..ef
++	-- 4 f0..f7 (but maximum real codepoint U+10ffff has f4)
++	-- 5 f8..fb (not supported)
++	-- 6 fc..fd (not supported)
++	('UTF8',   'ASCII',               'a'),
++	('UTF8',   '2 byte, short',       '\xdf'),
++	('UTF8',   '2 byte',              '\xdf82'),
++	('UTF8',   '3 byte, short',       '\xef'),
++	('UTF8',   '3 byte, short',       '\xef82'),
++	('UTF8',   '3 byte',              '\xef8283'),
++	('UTF8',   '4 byte, short',       '\xf7'),
++	('UTF8',   '4 byte, short',       '\xf782'),
++	('UTF8',   '4 byte, short',       '\xf78283'),
++	('UTF8',   '4 byte',              '\xf7828384'),
++	('UTF8',   '5 byte, unsupported', '\xfb'),
++	('UTF8',   '5 byte, unsupported', '\xfb82'),
++	('UTF8',   '5 byte, unsupported', '\xfb8283'),
++	('UTF8',   '5 byte, unsupported', '\xfb828384'),
++	('UTF8',   '5 byte, unsupported', '\xfb82838485'),
++	('UTF8',   '6 byte, unsupported', '\xfd'),
++	('UTF8',   '6 byte, unsupported', '\xfd82'),
++	('UTF8',   '6 byte, unsupported', '\xfd8283'),
++	('UTF8',   '6 byte, unsupported', '\xfd828384'),
++	('UTF8',   '6 byte, unsupported', '\xfd82838485'),
++	('UTF8',   '6 byte, unsupported', '\xfd8283848586'),
++	-- MULE_INTERNAL
++	-- 2 81..8d LC1
++	-- 3 90..99 LC2
++	('MULE_INTERNAL', 'ASCII',         'a'),
++	('MULE_INTERNAL', 'LC1, short',    '\x81'),
++	('MULE_INTERNAL', 'LC1',           '\x8182'),
++	('MULE_INTERNAL', 'LC2, short',    '\x90'),
++	('MULE_INTERNAL', 'LC2, short',    '\x9082'),
++	('MULE_INTERNAL', 'LC2',           '\x908283');
++
++SELECT COUNT(test_encoding(encoding, description, input)) > 0
++FROM encoding_tests;
++
++-- substring fetches a slice of a toasted value; unused tail of that slice is
++-- an incomplete char (bug #19406)
++CREATE TABLE toast_3b_utf8 (c text);
++INSERT INTO toast_3b_utf8 VALUES (repeat(U&'\2026', 4000));
++SELECT SUBSTRING(c FROM 1 FOR 1) FROM toast_3b_utf8;
++SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
++-- diagnose incomplete char iff within the substring
++UPDATE toast_3b_utf8 SET c = c || test_bytea_to_text('\xe280');
++SELECT SUBSTRING(c FROM 4000 FOR 1) FROM toast_3b_utf8;
++SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
++-- substring needing last byte of its slice_size
++ALTER TABLE toast_3b_utf8 RENAME TO toast_4b_utf8;
++UPDATE toast_4b_utf8 SET c = repeat(U&'\+01F680', 3000);
++SELECT SUBSTRING(c FROM 3000 FOR 1) FROM toast_4b_utf8;
++
++DROP TABLE encoding_tests;
++DROP TABLE toast_4b_utf8;
++DROP FUNCTION test_encoding;
++DROP FUNCTION test_text_to_wchars;
++DROP FUNCTION test_mblen_func;
++DROP FUNCTION test_bytea_to_text;
++DROP FUNCTION test_text_to_bytea;
++
++
++-- substring slow path: multi-byte escape char vs. multi-byte pattern char.
++SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
++-- Levenshtein distance metric: exercise character length cache.
++SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
++-- JSON errcontext: truncate long data.
++SELECT repeat(U&'\00A7', 30)::json;
+diff --git a/src/test/regress/output/encoding.source b/src/test/regress/output/encoding.source
+new file mode 100644
+index 00000000000..63c785f4d1b
+--- /dev/null
++++ b/src/test/regress/output/encoding.source
+@@ -0,0 +1,438 @@
++/* skip test if not UTF8 server encoding */
++SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
++\if :skip_test
++\quit
++\endif
++CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[]
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean
++    AS '@libdir@/regress@DLSUFFIX@' LANGUAGE C STRICT;
++CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text);
++INSERT INTO regress_encoding
++VALUES ('café',
++        'caf' || test_bytea_to_text('\xc3'),
++        'café' || test_bytea_to_text('\x00') || 'dcba',
++        'caf' || test_bytea_to_text('\xc300') || 'dcba');
++SELECT good, truncated, with_nul FROM regress_encoding;
++ good | truncated | with_nul 
++------+-----------+----------
++ café | caf       | café
++(1 row)
++
++SELECT length(good) FROM regress_encoding;
++ length 
++--------
++      4
++(1 row)
++
++SELECT substring(good, 3, 1) FROM regress_encoding;
++ substring 
++-----------
++ f
++(1 row)
++
++SELECT substring(good, 4, 1) FROM regress_encoding;
++ substring 
++-----------
++ é
++(1 row)
++
++SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding;
++ regexp_replace 
++----------------
++ é
++(1 row)
++
++SELECT reverse(good) FROM regress_encoding;
++ reverse 
++---------
++ éfac
++(1 row)
++
++-- invalid short mb character = error
++SELECT length(truncated) FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
++SELECT substring(truncated, 1, 3) FROM regress_encoding;
++ substring 
++-----------
++ caf
++(1 row)
++
++SELECT substring(truncated, 1, 4) FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
++SELECT reverse(truncated) FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
++-- invalid short mb character = silently dropped
++SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding;
++ regexp_replace 
++----------------
++ caf
++(1 row)
++
++-- PostgreSQL doesn't allow strings to contain NUL.  If a corrupted string
++-- contains NUL at a character boundary position, some functions treat it as a
++-- character while others treat it as a terminator, as implementation details.
++-- NUL = terminator
++SELECT length(with_nul) FROM regress_encoding;
++ length 
++--------
++      4
++(1 row)
++
++SELECT substring(with_nul, 3, 1) FROM regress_encoding;
++ substring 
++-----------
++ f
++(1 row)
++
++SELECT substring(with_nul, 4, 1) FROM regress_encoding;
++ substring 
++-----------
++ é
++(1 row)
++
++SELECT substring(with_nul, 5, 1) FROM regress_encoding;
++ substring 
++-----------
++ 
++(1 row)
++
++SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding;
++ convert_to 
++------------
++ \x
++(1 row)
++
++SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding;
++ regexp_replace 
++----------------
++ é
++(1 row)
++
++-- NUL = character
++SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding;
++ with_nul | reverse | reverse 
++----------+---------+---------
++ café     | abcd    | café
++(1 row)
++
++-- If a corrupted string contains NUL in the tail bytes of a multibyte
++-- character (invalid in all encodings), it is considered part of the
++-- character for length purposes.  An error will only be raised in code paths
++-- that convert or verify encodings.
++SELECT length(truncated_with_nul) FROM regress_encoding;
++ length 
++--------
++      8
++(1 row)
++
++SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding;
++ substring 
++-----------
++ f
++(1 row)
++
++SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding;
++ substring 
++-----------
++ 
++(1 row)
++
++SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3 0x00
++SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding;
++ substring 
++-----------
++ d
++(1 row)
++
++SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding;
++ ?column? 
++----------
++ t
++(1 row)
++
++SELECT reverse(truncated_with_nul) FROM regress_encoding;
++ reverse 
++---------
++ abcd
++(1 row)
++
++-- unbounded: sequence would overrun the string!
++SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3)
++FROM regress_encoding;
++ test_mblen_func 
++-----------------
++               2
++(1 row)
++
++-- condition detected when using the length/range variants
++SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3)
++FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
++SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3)
++FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
++-- unbounded: sequence would overrun the string, if the terminator were really
++-- the end of it
++SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3)
++FROM regress_encoding;
++ test_mblen_func 
++-----------------
++               2
++(1 row)
++
++SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3)
++FROM regress_encoding;
++ test_mblen_func 
++-----------------
++               2
++(1 row)
++
++-- condition detected when using the cstr variants
++SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3)
++FROM regress_encoding;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xc3
++DROP TABLE regress_encoding;
++-- mb<->wchar conversions
++CREATE FUNCTION test_encoding(encoding text, description text, input bytea)
++RETURNS VOID LANGUAGE plpgsql AS
++$$
++DECLARE
++	prefix text;
++	len int;
++	wchars int[];
++	round_trip bytea;
++	result text;
++BEGIN
++	prefix := rpad(encoding || ' ' || description || ':', 28);
++
++	-- XXX could also test validation, length functions and include client
++	-- only encodings with these test cases
++
++	IF test_valid_server_encoding(encoding) THEN
++		wchars := test_text_to_wchars(encoding, test_bytea_to_text(input));
++		round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars));
++		if input = round_trip then
++			result := 'OK';
++		elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then
++			result := 'truncated';
++		else
++			result := 'failed';
++		end if;
++		RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result;
++	END IF;
++END;
++$$;
++-- No validation is done on the encoding itself, just the length to avoid
++-- overruns, so some of the byte sequences below are bogus.  They cover
++-- all code branches, server encodings only for now.
++CREATE TABLE encoding_tests (encoding text, description text, input bytea);
++INSERT INTO encoding_tests VALUES
++	-- LATIN1, other single-byte encodings
++	('LATIN1', 'ASCII',    'a'),
++	('LATIN1', 'extended', '\xe9'),
++	-- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion):
++	-- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
++	-- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
++	-- 2 80..ff (CS1)
++	('EUC_JP', 'ASCII',      'a'),
++	('EUC_JP', 'CS1, short', '\x80'),
++	('EUC_JP', 'CS1',        '\x8002'),
++	('EUC_JP', 'CS2, short', '\x8e'),
++	('EUC_JP', 'CS2',        '\x8e02'),
++	('EUC_JP', 'CS3, short', '\x8f'),
++	('EUC_JP', 'CS3, short', '\x8f02'),
++	('EUC_JP', 'CS3',        '\x8f0203'),
++	-- EUC_CN
++	-- 3 8e (CS2, not used but arbitrarily considered to have length 3)
++	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
++	-- 2 80..ff (CS1)
++	('EUC_CN', 'ASCII',      'a'),
++	('EUC_CN', 'CS1, short', '\x80'),
++	('EUC_CN', 'CS1',        '\x8002'),
++	('EUC_CN', 'CS2, short', '\x8e'),
++	('EUC_CN', 'CS2, short', '\x8e02'),
++	('EUC_CN', 'CS2',        '\x8e0203'),
++	('EUC_CN', 'CS3, short', '\x8f'),
++	('EUC_CN', 'CS3, short', '\x8f02'),
++	('EUC_CN', 'CS3',        '\x8f0203'),
++	-- EUC_TW:
++	-- 4 8e (CS2)
++	-- 3 8f (CS3, not used but arbitrarily considered to have length 3)
++	-- 2 80..ff (CS1)
++	('EUC_TW', 'ASCII',      'a'),
++	('EUC_TW', 'CS1, short', '\x80'),
++	('EUC_TW', 'CS1',        '\x8002'),
++	('EUC_TW', 'CS2, short', '\x8e'),
++	('EUC_TW', 'CS2, short', '\x8e02'),
++	('EUC_TW', 'CS2, short', '\x8e0203'),
++	('EUC_TW', 'CS2',        '\x8e020304'),
++	('EUC_TW', 'CS3, short', '\x8f'),
++	('EUC_TW', 'CS3, short', '\x8f02'),
++	('EUC_TW', 'CS3',        '\x8f0203'),
++	-- UTF8
++	-- 2 c0..df
++	-- 3 e0..ef
++	-- 4 f0..f7 (but maximum real codepoint U+10ffff has f4)
++	-- 5 f8..fb (not supported)
++	-- 6 fc..fd (not supported)
++	('UTF8',   'ASCII',               'a'),
++	('UTF8',   '2 byte, short',       '\xdf'),
++	('UTF8',   '2 byte',              '\xdf82'),
++	('UTF8',   '3 byte, short',       '\xef'),
++	('UTF8',   '3 byte, short',       '\xef82'),
++	('UTF8',   '3 byte',              '\xef8283'),
++	('UTF8',   '4 byte, short',       '\xf7'),
++	('UTF8',   '4 byte, short',       '\xf782'),
++	('UTF8',   '4 byte, short',       '\xf78283'),
++	('UTF8',   '4 byte',              '\xf7828384'),
++	('UTF8',   '5 byte, unsupported', '\xfb'),
++	('UTF8',   '5 byte, unsupported', '\xfb82'),
++	('UTF8',   '5 byte, unsupported', '\xfb8283'),
++	('UTF8',   '5 byte, unsupported', '\xfb828384'),
++	('UTF8',   '5 byte, unsupported', '\xfb82838485'),
++	('UTF8',   '6 byte, unsupported', '\xfd'),
++	('UTF8',   '6 byte, unsupported', '\xfd82'),
++	('UTF8',   '6 byte, unsupported', '\xfd8283'),
++	('UTF8',   '6 byte, unsupported', '\xfd828384'),
++	('UTF8',   '6 byte, unsupported', '\xfd82838485'),
++	('UTF8',   '6 byte, unsupported', '\xfd8283848586'),
++	-- MULE_INTERNAL
++	-- 2 81..8d LC1
++	-- 3 90..99 LC2
++	('MULE_INTERNAL', 'ASCII',         'a'),
++	('MULE_INTERNAL', 'LC1, short',    '\x81'),
++	('MULE_INTERNAL', 'LC1',           '\x8182'),
++	('MULE_INTERNAL', 'LC2, short',    '\x90'),
++	('MULE_INTERNAL', 'LC2, short',    '\x9082'),
++	('MULE_INTERNAL', 'LC2',           '\x908283');
++SELECT COUNT(test_encoding(encoding, description, input)) > 0
++FROM encoding_tests;
++NOTICE:  LATIN1 ASCII:                \x61 -> {97} -> \x61 = OK
++NOTICE:  LATIN1 extended:             \xe9 -> {233} -> \xe9 = OK
++NOTICE:  EUC_JP ASCII:                \x61 -> {97} -> \x61 = OK
++NOTICE:  EUC_JP CS1, short:           \x80 -> {128} -> \x80 = OK
++NOTICE:  EUC_JP CS1:                  \x8002 -> {32770} -> \x8002 = OK
++NOTICE:  EUC_JP CS2, short:           \x8e -> {142} -> \x8e = OK
++NOTICE:  EUC_JP CS2:                  \x8e02 -> {36354} -> \x8e02 = OK
++NOTICE:  EUC_JP CS3, short:           \x8f -> {143} -> \x8f = OK
++NOTICE:  EUC_JP CS3, short:           \x8f02 -> {36610} -> \x8f02 = OK
++NOTICE:  EUC_JP CS3:                  \x8f0203 -> {9372163} -> \x8f0203 = OK
++NOTICE:  EUC_CN ASCII:                \x61 -> {97} -> \x61 = OK
++NOTICE:  EUC_CN CS1, short:           \x80 -> {128} -> \x80 = OK
++NOTICE:  EUC_CN CS1:                  \x8002 -> {32770} -> \x8002 = OK
++NOTICE:  EUC_CN CS2, short:           \x8e -> {142} -> \x8e = OK
++NOTICE:  EUC_CN CS2, short:           \x8e02 -> {36354} -> \x8e02 = OK
++NOTICE:  EUC_CN CS2:                  \x8e0203 -> {9306627} -> \x8e0203 = OK
++NOTICE:  EUC_CN CS3, short:           \x8f -> {143} -> \x8f = OK
++NOTICE:  EUC_CN CS3, short:           \x8f02 -> {36610} -> \x8f02 = OK
++NOTICE:  EUC_CN CS3:                  \x8f0203 -> {9372163} -> \x8f0203 = OK
++NOTICE:  EUC_TW ASCII:                \x61 -> {97} -> \x61 = OK
++NOTICE:  EUC_TW CS1, short:           \x80 -> {128} -> \x80 = OK
++NOTICE:  EUC_TW CS1:                  \x8002 -> {32770} -> \x8002 = OK
++NOTICE:  EUC_TW CS2, short:           \x8e -> {142} -> \x8e = OK
++NOTICE:  EUC_TW CS2, short:           \x8e02 -> {36354} -> \x8e02 = OK
++NOTICE:  EUC_TW CS2, short:           \x8e0203 -> {36354,3} -> \x8e0203 = OK
++NOTICE:  EUC_TW CS2:                  \x8e020304 -> {-1912470780} -> \x8e020304 = OK
++NOTICE:  EUC_TW CS3, short:           \x8f -> {143} -> \x8f = OK
++NOTICE:  EUC_TW CS3, short:           \x8f02 -> {36610} -> \x8f02 = OK
++NOTICE:  EUC_TW CS3:                  \x8f0203 -> {9372163} -> \x8f0203 = OK
++NOTICE:  UTF8 ASCII:                  \x61 -> {97} -> \x61 = OK
++NOTICE:  UTF8 2 byte, short:          \xdf -> {} -> \x = truncated
++NOTICE:  UTF8 2 byte:                 \xdf82 -> {1986} -> \xdf82 = OK
++NOTICE:  UTF8 3 byte, short:          \xef -> {} -> \x = truncated
++NOTICE:  UTF8 3 byte, short:          \xef82 -> {} -> \x = truncated
++NOTICE:  UTF8 3 byte:                 \xef8283 -> {61571} -> \xef8283 = OK
++NOTICE:  UTF8 4 byte, short:          \xf7 -> {} -> \x = truncated
++NOTICE:  UTF8 4 byte, short:          \xf782 -> {} -> \x = truncated
++NOTICE:  UTF8 4 byte, short:          \xf78283 -> {} -> \x = truncated
++NOTICE:  UTF8 4 byte:                 \xf7828384 -> {1843396} -> \xf7828384 = OK
++NOTICE:  UTF8 5 byte, unsupported:    \xfb -> {251} -> \xc3bb = failed
++NOTICE:  UTF8 5 byte, unsupported:    \xfb82 -> {251,130} -> \xc3bbc282 = failed
++NOTICE:  UTF8 5 byte, unsupported:    \xfb8283 -> {251,130,131} -> \xc3bbc282c283 = failed
++NOTICE:  UTF8 5 byte, unsupported:    \xfb828384 -> {251,130,131,132} -> \xc3bbc282c283c284 = failed
++NOTICE:  UTF8 5 byte, unsupported:    \xfb82838485 -> {251,130,131,132,133} -> \xc3bbc282c283c284c285 = failed
++NOTICE:  UTF8 6 byte, unsupported:    \xfd -> {253} -> \xc3bd = failed
++NOTICE:  UTF8 6 byte, unsupported:    \xfd82 -> {253,130} -> \xc3bdc282 = failed
++NOTICE:  UTF8 6 byte, unsupported:    \xfd8283 -> {253,130,131} -> \xc3bdc282c283 = failed
++NOTICE:  UTF8 6 byte, unsupported:    \xfd828384 -> {253,130,131,132} -> \xc3bdc282c283c284 = failed
++NOTICE:  UTF8 6 byte, unsupported:    \xfd82838485 -> {253,130,131,132,133} -> \xc3bdc282c283c284c285 = failed
++NOTICE:  UTF8 6 byte, unsupported:    \xfd8283848586 -> {253,130,131,132,133,134} -> \xc3bdc282c283c284c285c286 = failed
++NOTICE:  MULE_INTERNAL ASCII:         \x61 -> {97} -> \x61 = OK
++NOTICE:  MULE_INTERNAL LC1, short:    \x81 -> {129} -> \x81 = OK
++NOTICE:  MULE_INTERNAL LC1:           \x8182 -> {8454274} -> \x8182 = OK
++NOTICE:  MULE_INTERNAL LC2, short:    \x90 -> {144} -> \x90 = OK
++NOTICE:  MULE_INTERNAL LC2, short:    \x9082 -> {144,130} -> \x9082 = OK
++NOTICE:  MULE_INTERNAL LC2:           \x908283 -> {9470595} -> \x908283 = OK
++ ?column? 
++----------
++ t
++(1 row)
++
++-- substring fetches a slice of a toasted value; unused tail of that slice is
++-- an incomplete char (bug #19406)
++CREATE TABLE toast_3b_utf8 (c text);
++INSERT INTO toast_3b_utf8 VALUES (repeat(U&'\2026', 4000));
++SELECT SUBSTRING(c FROM 1 FOR 1) FROM toast_3b_utf8;
++ substring 
++-----------
++ …
++(1 row)
++
++SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
++ substring 
++-----------
++ 
++(1 row)
++
++-- diagnose incomplete char iff within the substring
++UPDATE toast_3b_utf8 SET c = c || test_bytea_to_text('\xe280');
++SELECT SUBSTRING(c FROM 4000 FOR 1) FROM toast_3b_utf8;
++ substring 
++-----------
++ …
++(1 row)
++
++SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
++ERROR:  invalid byte sequence for encoding "UTF8": 0xe2 0x80
++-- substring needing last byte of its slice_size
++ALTER TABLE toast_3b_utf8 RENAME TO toast_4b_utf8;
++UPDATE toast_4b_utf8 SET c = repeat(U&'\+01F680', 3000);
++SELECT SUBSTRING(c FROM 3000 FOR 1) FROM toast_4b_utf8;
++ substring 
++-----------
++ 🚀
++(1 row)
++
++DROP TABLE encoding_tests;
++DROP TABLE toast_4b_utf8;
++DROP FUNCTION test_encoding;
++DROP FUNCTION test_text_to_wchars;
++DROP FUNCTION test_mblen_func;
++DROP FUNCTION test_bytea_to_text;
++DROP FUNCTION test_text_to_bytea;
++-- substring slow path: multi-byte escape char vs. multi-byte pattern char.
++SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
++ERROR:  syntax error at or near "U&'\00AC'"
++LINE 1: SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
++                                     ^
++-- Levenshtein distance metric: exercise character length cache.
++SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
++ERROR:  column "real§_name" does not exist
++LINE 1: SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
++               ^
++HINT:  Perhaps you meant to reference the column "x.real_name".
++-- JSON errcontext: truncate long data.
++SELECT repeat(U&'\00A7', 30)::json;
++ERROR:  invalid input syntax for type json
++DETAIL:  Token "§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§" is invalid.
++CONTEXT:  JSON data, line 1: ...§§§§§§§§§§§§§§§§§§§§§§§§
+diff --git a/src/test/regress/output/encoding_1.source b/src/test/regress/output/encoding_1.source
+new file mode 100644
+index 00000000000..a5b02090901
+--- /dev/null
++++ b/src/test/regress/output/encoding_1.source
+@@ -0,0 +1,4 @@
++/* skip test if not UTF8 server encoding */
++SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
++\if :skip_test
++\quit
+diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
+index e146b24bef3..bb4019c8039 100644
+--- a/src/test/regress/parallel_schedule
++++ b/src/test/regress/parallel_schedule
+@@ -27,7 +27,7 @@ test: strings numerology point lseg line box path polygon circle date time timet
+ # geometry depends on point, lseg, box, path, polygon and circle
+ # horology depends on interval, timetz, timestamp, timestamptz
+ # ----------
+-test: geometry horology regex oidjoins type_sanity opr_sanity misc_sanity comments expressions unicode database
++test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid database encoding euc_kr
+ 
+ # ----------
+ # These four each depend on the previous one
+diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
+index ed75c410ffe..bcda74e9c37 100644
+--- a/src/test/regress/regress.c
++++ b/src/test/regress/regress.c
+@@ -1141,3 +1141,143 @@ test_enc_setup(PG_FUNCTION_ARGS)
+ 
+ 	PG_RETURN_VOID();
+ }
++
++/* Convert bytea to text without validation for corruption tests from SQL. */
++PG_FUNCTION_INFO_V1(test_bytea_to_text);
++Datum
++test_bytea_to_text(PG_FUNCTION_ARGS)
++{
++	PG_RETURN_TEXT_P(PG_GETARG_BYTEA_PP(0));
++}
++
++/* And the reverse. */
++PG_FUNCTION_INFO_V1(test_text_to_bytea);
++Datum
++test_text_to_bytea(PG_FUNCTION_ARGS)
++{
++	PG_RETURN_BYTEA_P(PG_GETARG_TEXT_PP(0));
++}
++
++/* Corruption tests in C. */
++PG_FUNCTION_INFO_V1(test_mblen_func);
++Datum
++test_mblen_func(PG_FUNCTION_ARGS)
++{
++	const char *func = text_to_cstring(PG_GETARG_BYTEA_PP(0));
++	const char *encoding = text_to_cstring(PG_GETARG_BYTEA_PP(1));
++	text	   *string = PG_GETARG_BYTEA_PP(2);
++	int			offset = PG_GETARG_INT32(3);
++	const char *data = VARDATA_ANY(string);
++	size_t		size = VARSIZE_ANY_EXHDR(string);
++	int			result = 0;
++
++	if (strcmp(func, "pg_mblen_unbounded") == 0)
++		result = pg_mblen_unbounded(data + offset);
++	else if (strcmp(func, "pg_mblen_cstr") == 0)
++		result = pg_mblen_cstr(data + offset);
++	else if (strcmp(func, "pg_mblen_with_len") == 0)
++		result = pg_mblen_with_len(data + offset, size - offset);
++	else if (strcmp(func, "pg_mblen_range") == 0)
++		result = pg_mblen_range(data + offset, data + size);
++	else if (strcmp(func, "pg_encoding_mblen") == 0)
++		result = pg_encoding_mblen(pg_char_to_encoding(encoding), data + offset);
++	else
++		elog(ERROR, "unknown function");
++
++	PG_RETURN_INT32(result);
++}
++
++PG_FUNCTION_INFO_V1(test_text_to_wchars);
++Datum
++test_text_to_wchars(PG_FUNCTION_ARGS)
++{
++	const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0));
++	text	   *string = PG_GETARG_TEXT_PP(1);
++	const char *data = VARDATA_ANY(string);
++	size_t		size = VARSIZE_ANY_EXHDR(string);
++	pg_wchar   *wchars = palloc(sizeof(pg_wchar) * (size + 1));
++	Datum	   *datums;
++	int			wlen;
++	int			encoding;
++
++	encoding = pg_char_to_encoding(encoding_name);
++	if (encoding < 0)
++		elog(ERROR, "unknown encoding name: %s", encoding_name);
++
++	if (size > 0)
++	{
++		datums = palloc(sizeof(Datum) * size);
++		wlen = pg_encoding_mb2wchar_with_len(encoding,
++											 data,
++											 wchars,
++											 size);
++		Assert(wlen >= 0);
++		Assert(wlen <= size);
++		Assert(wchars[wlen] == 0);
++
++		for (int i = 0; i < wlen; ++i)
++			datums[i] = UInt32GetDatum(wchars[i]);
++	}
++	else
++	{
++		datums = NULL;
++		wlen = 0;
++	}
++
++	PG_RETURN_ARRAYTYPE_P(construct_array_builtin(datums, wlen, INT4OID));
++}
++
++PG_FUNCTION_INFO_V1(test_wchars_to_text);
++Datum
++test_wchars_to_text(PG_FUNCTION_ARGS)
++{
++	const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0));
++	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(1);
++	Datum	   *datums;
++	bool	   *nulls;
++	char	   *mb;
++	text	   *result;
++	int			wlen;
++	int			bytes;
++	int			encoding;
++
++	encoding = pg_char_to_encoding(encoding_name);
++	if (encoding < 0)
++		elog(ERROR, "unknown encoding name: %s", encoding_name);
++
++	deconstruct_array_builtin(array, INT4OID, &datums, &nulls, &wlen);
++
++	if (wlen > 0)
++	{
++		pg_wchar   *wchars = palloc(sizeof(pg_wchar) * wlen);
++
++		for (int i = 0; i < wlen; ++i)
++		{
++			if (nulls[i])
++				elog(ERROR, "unexpected NULL in array");
++			wchars[i] = DatumGetInt32(datums[i]);
++		}
++
++		mb = palloc(pg_encoding_max_length(encoding) * wlen + 1);
++		bytes = pg_encoding_wchar2mb_with_len(encoding, wchars, mb, wlen);
++	}
++	else
++	{
++		mb = "";
++		bytes = 0;
++	}
++
++	result = palloc(bytes + VARHDRSZ);
++	SET_VARSIZE(result, bytes + VARHDRSZ);
++	memcpy(VARDATA(result), mb, bytes);
++
++	PG_RETURN_TEXT_P(result);
++}
++
++PG_FUNCTION_INFO_V1(test_valid_server_encoding);
++Datum
++test_valid_server_encoding(PG_FUNCTION_ARGS)
++{
++	return pg_valid_server_encoding(text_to_cstring(PG_GETARG_TEXT_PP(0)));
++}
++
+diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore
+index 46c8112094c..f5a279c2ee4 100644
+--- a/src/test/regress/sql/.gitignore
++++ b/src/test/regress/sql/.gitignore
+@@ -2,6 +2,7 @@
+ /copy.sql
+ /create_function_1.sql
+ /create_function_2.sql
++/encoding.sql
+ /largeobject.sql
+ /misc.sql
+ /security_label.sql
+diff --git a/src/test/regress/sql/euc_kr.sql b/src/test/regress/sql/euc_kr.sql
+new file mode 100644
+index 00000000000..1851b2a8c14
+--- /dev/null
++++ b/src/test/regress/sql/euc_kr.sql
+@@ -0,0 +1,12 @@
++-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
++-- non-UTF8, multibyte encoding as of 2026-01.  Since UTF8 can represent all
++-- of EUC_KR, also run the test in UTF8.
++SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
++\if :skip_test
++\quit
++\endif
++
++-- Exercise is_multibyte_char_in_char (non-UTF8) slow path.
++SELECT POSITION(
++	convert_from('\xbcf6c7d0', 'EUC_KR') IN
++	convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR'));
+diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
+index 4824097ead2..b8e20d0c4fe 100644
+--- a/src/tools/pgindent/typedefs.list
++++ b/src/tools/pgindent/typedefs.list
+@@ -653,6 +653,7 @@ ExtensibleNodeMethods
+ ExtensionControlFile
+ ExtensionInfo
+ ExtensionMemberId
++ExtensionSiblingCache
+ ExtensionVersionInfo
+ FDWCollateState
+ FD_SET
diff --git a/SPECS/postgresql.spec b/SPECS/postgresql.spec
index 36cf789..4a2fda4 100644
--- a/SPECS/postgresql.spec
+++ b/SPECS/postgresql.spec
@@ -63,7 +63,7 @@ Summary: PostgreSQL client programs
 Name: postgresql
 %global majorversion 13
 Version: %{majorversion}.23
-Release: 1%{?dist}
+Release: 2%{?dist}
 
 # The PostgreSQL license is very similar to other MIT licenses, but the OSI
 # recognizes it as an independent license, so we do as well.
@@ -111,6 +111,7 @@ Patch8: postgresql-external-libpq.patch
 Patch9: postgresql-server-pg_config.patch
 Patch12: postgresql-no-libecpg.patch
 Patch14: postgresql-pgcrypto-openssl3-tests.patch
+Patch15: CVE-2026-2004--CVE-2026-2005--CVE-2026-2006.patch
 
 BuildRequires: make
 BuildRequires: gcc
@@ -426,6 +427,7 @@ goal of accelerating analytics queries.
 %endif
 %patch -P 9 -p1
 %patch -P 14 -p1
+%patch -P 15 -p1
 
 # We used to run autoconf here, but there's no longer any real need to,
 # since Postgres ships with a reasonably modern configure script.
@@ -1230,6 +1232,9 @@ make -C postgresql-setup-%{setup_version} check
 
 
 %changelog
+* Wed Feb 25 2026 Filip Janus <fjanus@redhat.com> - 13.23-2
+- fix CVE-2026-2004 CVE-2026-2005 CVE-2026-2006
+
 * Fri Dec 05 2025 Filip Janus <fjanus@redhat.com> - 13.23-1
 - Update to 13.23
 - Resolves: RHEL-128812 (CVE-2025-12818)