diff --git a/glibc-RHEL-162901-1.patch b/glibc-RHEL-162901-1.patch new file mode 100644 index 0000000..39624a7 --- /dev/null +++ b/glibc-RHEL-162901-1.patch @@ -0,0 +1,369 @@ +commit 5729e0e9af590807df66a3db688008f9547bce9f +Author: Adhemerval Zanella +Date: Fri Feb 10 14:09:10 2023 -0300 + + iconv: Remove _STRING_ARCH_unaligned usage for get/set macros + + And use a packed structure instead. The compiler generates optimized + unaligned code if the architecture supports it. + + Checked on x86_64-linux-gnu and i686-linux-gnu. + + Reviewed-by: Wilco Dijkstra + +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index 499bc6bfa54fe37d..360924cc0394d86b 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -27,6 +27,34 @@ + + __BEGIN_DECLS + ++/* We have to provide support for machines which are not able to handled ++ unaligned memory accesses. Some of the character encodings have ++ representations with a fixed width of 2 or 4 bytes. */ ++#define get16(addr) \ ++({ \ ++ const struct { uint16_t r; } __attribute__ ((__packed__)) *__ptr \ ++ = (__typeof(__ptr))(addr); \ ++ __ptr->r; \ ++}) ++#define get32(addr) \ ++({ \ ++ const struct { uint32_t r; } __attribute__ ((__packed__)) *__ptr \ ++ = (__typeof(__ptr))(addr); \ ++ __ptr->r; \ ++}) ++ ++#define put16(addr, val) \ ++do { \ ++ struct { uint16_t r; } __attribute__ ((__packed__)) *__ptr \ ++ = (__typeof(__ptr))(addr); \ ++ __ptr->r = val; \ ++} while (0) ++#define put32(addr, val) \ ++do { \ ++ struct { uint32_t r; } __attribute__ ((__packed__)) *__ptr \ ++ = (__typeof(__ptr))(addr); \ ++ __ptr->r = val; \ ++} while (0) + + /* Structure for alias definition. Simply two strings. */ + struct gconv_alias +diff --git a/iconv/loop.c b/iconv/loop.c +index 7193e8f20104bf84..851caa9d42d45a47 100644 +--- a/iconv/loop.c ++++ b/iconv/loop.c +@@ -58,75 +58,10 @@ + #include + #include + +-/* We have to provide support for machines which are not able to handled +- unaligned memory accesses. Some of the character encodings have +- representations with a fixed width of 2 or 4 bytes. But if we cannot +- access unaligned memory we still have to read byte-wise. */ + #undef FCTNAME2 + #if _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED +-/* We can handle unaligned memory access. */ +-# define get16(addr) *((const uint16_t *) (addr)) +-# define get32(addr) *((const uint32_t *) (addr)) +- +-/* We need no special support for writing values either. */ +-# define put16(addr, val) *((uint16_t *) (addr)) = (val) +-# define put32(addr, val) *((uint32_t *) (addr)) = (val) +- + # define FCTNAME2(name) name + #else +-/* Distinguish between big endian and little endian. */ +-# if __BYTE_ORDER == __LITTLE_ENDIAN +-# define get16(addr) \ +- (((const unsigned char *) (addr))[1] << 8 \ +- | ((const unsigned char *) (addr))[0]) +-# define get32(addr) \ +- (((((const unsigned char *) (addr))[3] << 8 \ +- | ((const unsigned char *) (addr))[2]) << 8 \ +- | ((const unsigned char *) (addr))[1]) << 8 \ +- | ((const unsigned char *) (addr))[0]) +- +-# define put16(addr, val) \ +- ({ uint16_t __val = (val); \ +- ((unsigned char *) (addr))[0] = __val; \ +- ((unsigned char *) (addr))[1] = __val >> 8; \ +- (void) 0; }) +-# define put32(addr, val) \ +- ({ uint32_t __val = (val); \ +- ((unsigned char *) (addr))[0] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[1] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[2] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[3] = __val; \ +- (void) 0; }) +-# else +-# define get16(addr) \ +- (((const unsigned char *) (addr))[0] << 8 \ +- | ((const unsigned char *) (addr))[1]) +-# define get32(addr) \ +- (((((const unsigned char *) (addr))[0] << 8 \ +- | ((const unsigned char *) (addr))[1]) << 8 \ +- | ((const unsigned char *) (addr))[2]) << 8 \ +- | ((const unsigned char *) (addr))[3]) +- +-# define put16(addr, val) \ +- ({ uint16_t __val = (val); \ +- ((unsigned char *) (addr))[1] = __val; \ +- ((unsigned char *) (addr))[0] = __val >> 8; \ +- (void) 0; }) +-# define put32(addr, val) \ +- ({ uint32_t __val = (val); \ +- ((unsigned char *) (addr))[3] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[2] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[1] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[0] = __val; \ +- (void) 0; }) +-# endif +- + # define FCTNAME2(name) name##_unaligned + #endif + #define FCTNAME(name) FCTNAME2(name) +@@ -352,10 +287,6 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step, + #if !defined DEFINE_UNALIGNED && !_STRING_ARCH_unaligned \ + && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \ + && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0 +-# undef get16 +-# undef get32 +-# undef put16 +-# undef put32 + # undef unaligned + + # define DEFINE_UNALIGNED +@@ -537,8 +468,4 @@ gconv_btowc (struct __gconv_step *step, unsigned char c) + #undef LOOP_NEED_STATE + #undef LOOP_NEED_FLAGS + #undef LOOP_NEED_DATA +-#undef get16 +-#undef get32 +-#undef put16 +-#undef put32 + #undef unaligned +diff --git a/iconv/skeleton.c b/iconv/skeleton.c +index 0ddc16ad1d7e0184..6992701808079ec2 100644 +--- a/iconv/skeleton.c ++++ b/iconv/skeleton.c +@@ -205,73 +205,6 @@ + #endif + + +-/* Define macros which can access unaligned buffers. These macros are +- supposed to be used only in code outside the inner loops. For the inner +- loops we have other definitions which allow optimized access. */ +-#if _STRING_ARCH_unaligned +-/* We can handle unaligned memory access. */ +-# define get16u(addr) *((const uint16_t *) (addr)) +-# define get32u(addr) *((const uint32_t *) (addr)) +- +-/* We need no special support for writing values either. */ +-# define put16u(addr, val) *((uint16_t *) (addr)) = (val) +-# define put32u(addr, val) *((uint32_t *) (addr)) = (val) +-#else +-/* Distinguish between big endian and little endian. */ +-# if __BYTE_ORDER == __LITTLE_ENDIAN +-# define get16u(addr) \ +- (((const unsigned char *) (addr))[1] << 8 \ +- | ((const unsigned char *) (addr))[0]) +-# define get32u(addr) \ +- (((((const unsigned char *) (addr))[3] << 8 \ +- | ((const unsigned char *) (addr))[2]) << 8 \ +- | ((const unsigned char *) (addr))[1]) << 8 \ +- | ((const unsigned char *) (addr))[0]) +- +-# define put16u(addr, val) \ +- ({ uint16_t __val = (val); \ +- ((unsigned char *) (addr))[0] = __val; \ +- ((unsigned char *) (addr))[1] = __val >> 8; \ +- (void) 0; }) +-# define put32u(addr, val) \ +- ({ uint32_t __val = (val); \ +- ((unsigned char *) (addr))[0] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[1] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[2] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[3] = __val; \ +- (void) 0; }) +-# else +-# define get16u(addr) \ +- (((const unsigned char *) (addr))[0] << 8 \ +- | ((const unsigned char *) (addr))[1]) +-# define get32u(addr) \ +- (((((const unsigned char *) (addr))[0] << 8 \ +- | ((const unsigned char *) (addr))[1]) << 8 \ +- | ((const unsigned char *) (addr))[2]) << 8 \ +- | ((const unsigned char *) (addr))[3]) +- +-# define put16u(addr, val) \ +- ({ uint16_t __val = (val); \ +- ((unsigned char *) (addr))[1] = __val; \ +- ((unsigned char *) (addr))[0] = __val >> 8; \ +- (void) 0; }) +-# define put32u(addr, val) \ +- ({ uint32_t __val = (val); \ +- ((unsigned char *) (addr))[3] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[2] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[1] = __val; \ +- __val >>= 8; \ +- ((unsigned char *) (addr))[0] = __val; \ +- (void) 0; }) +-# endif +-#endif +- +- + /* For conversions from a fixed width character set to another fixed width + character set we can define RESET_INPUT_BUFFER in a very fast way. */ + #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE +diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c +index 220d8d1cf174fdd7..97e735c755ab8093 100644 +--- a/iconvdata/iso-2022-jp-3.c ++++ b/iconvdata/iso-2022-jp-3.c +@@ -93,7 +93,7 @@ enum + if (__glibc_likely (outbuf + 4 <= outend)) \ + { \ + /* Write out the last character. */ \ +- put32u (outbuf, ch); \ ++ put32 (outbuf, ch); \ + outbuf += 4; \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ +diff --git a/iconvdata/unicode.c b/iconvdata/unicode.c +index 525abe48b94118e5..ee19b2041c7d4fc5 100644 +--- a/iconvdata/unicode.c ++++ b/iconvdata/unicode.c +@@ -52,10 +52,10 @@ + return (inptr == inend \ + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); \ + \ +- if (get16u (inptr) == BOM) \ ++ if (get16 (inptr) == BOM) \ + /* Simply ignore the BOM character. */ \ + *inptrp = inptr += 2; \ +- else if (get16u (inptr) == BOM_OE) \ ++ else if (get16 (inptr) == BOM_OE) \ + { \ + data->__flags |= __GCONV_SWAP; \ + *inptrp = inptr += 2; \ +@@ -68,7 +68,7 @@ + if (__glibc_unlikely (outbuf + 2 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put16u (outbuf, BOM); \ ++ put16 (outbuf, BOM); \ + outbuf += 2; \ + } \ + swap = data->__flags & __GCONV_SWAP; +diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c +index 63cf43a09b4abf5d..87c59e9ecf1602ac 100644 +--- a/iconvdata/utf-16.c ++++ b/iconvdata/utf-16.c +@@ -56,10 +56,10 @@ + return (inptr == inend \ + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); \ + \ +- if (get16u (inptr) == BOM) \ ++ if (get16 (inptr) == BOM) \ + /* Simply ignore the BOM character. */ \ + *inptrp = inptr += 2; \ +- else if (get16u (inptr) == BOM_OE) \ ++ else if (get16 (inptr) == BOM_OE) \ + { \ + data->__flags |= __GCONV_SWAP; \ + *inptrp = inptr += 2; \ +@@ -71,7 +71,7 @@ + if (__glibc_unlikely (outbuf + 2 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put16u (outbuf, BOM); \ ++ put16 (outbuf, BOM); \ + outbuf += 2; \ + } \ + } \ +diff --git a/iconvdata/utf-32.c b/iconvdata/utf-32.c +index 4fbd7bc18fa2d0c9..060f77230b0101fe 100644 +--- a/iconvdata/utf-32.c ++++ b/iconvdata/utf-32.c +@@ -52,10 +52,10 @@ + return (inptr == inend \ + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); \ + \ +- if (get32u (inptr) == BOM) \ ++ if (get32 (inptr) == BOM) \ + /* Simply ignore the BOM character. */ \ + *inptrp = inptr += 4; \ +- else if (get32u (inptr) == BOM_OE) \ ++ else if (get32 (inptr) == BOM_OE) \ + { \ + data->__flags |= __GCONV_SWAP; \ + *inptrp = inptr += 4; \ +@@ -69,7 +69,7 @@ + if (__glibc_unlikely (outbuf + 4 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put32u (outbuf, BOM); \ ++ put32 (outbuf, BOM); \ + outbuf += 4; \ + } \ + else if (__builtin_expect (data->__invocation_counter == 0, 0) \ +diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c +index 93895f9db8004f72..7d3de99f440a8ab4 100644 +--- a/sysdeps/s390/utf16-utf32-z9.c ++++ b/sysdeps/s390/utf16-utf32-z9.c +@@ -177,7 +177,7 @@ gconv_end (struct __gconv_step *data) + if (__glibc_unlikely (outbuf + 2 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put16u (outbuf, BOM_UTF16); \ ++ put16 (outbuf, BOM_UTF16); \ + outbuf += 2; \ + } \ + else \ +@@ -186,7 +186,7 @@ gconv_end (struct __gconv_step *data) + if (__glibc_unlikely (outbuf + 4 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put32u (outbuf, BOM_UTF32); \ ++ put32 (outbuf, BOM_UTF32); \ + outbuf += 4; \ + } \ + } +diff --git a/sysdeps/s390/utf8-utf16-z9.c b/sysdeps/s390/utf8-utf16-z9.c +index 1ff16b5bec4c10fd..615ebaac13a4c0a7 100644 +--- a/sysdeps/s390/utf8-utf16-z9.c ++++ b/sysdeps/s390/utf8-utf16-z9.c +@@ -217,7 +217,7 @@ gconv_end (struct __gconv_step *data) + if (__glibc_unlikely (outbuf + 2 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put16u (outbuf, BOM_UTF16); \ ++ put16 (outbuf, BOM_UTF16); \ + outbuf += 2; \ + } + +diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c +index f1f4d3c89e6469c1..364558c0e2bed517 100644 +--- a/sysdeps/s390/utf8-utf32-z9.c ++++ b/sysdeps/s390/utf8-utf32-z9.c +@@ -217,7 +217,7 @@ gconv_end (struct __gconv_step *data) + if (__glibc_unlikely (outbuf + 4 > outend)) \ + return __GCONV_FULL_OUTPUT; \ + \ +- put32u (outbuf, BOM); \ ++ put32 (outbuf, BOM); \ + outbuf += 4; \ + } + diff --git a/glibc-RHEL-162901-2.patch b/glibc-RHEL-162901-2.patch new file mode 100644 index 0000000..b31887c --- /dev/null +++ b/glibc-RHEL-162901-2.patch @@ -0,0 +1,694 @@ +commit 3e20ddade31d9c392d8ccf7ec902172f4bb01c2b +Author: Adhemerval Zanella +Date: Fri Feb 10 16:37:36 2023 -0300 + + iconv: Remove _STRING_ARCH_unaligned usage + + Use put/get macros __builtin_bswap32 instead. It allows to remove + the unaligned routines, the compiler will generate unaligned access + if the ABI allows it. + + Checked on x86_64-linux-gnu and i686-linux-gnu. + + Reviewed-by: Wilco Dijkstra + +diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c +index 882d3642e445b107..d368284411c79282 100644 +--- a/iconv/gconv_simple.c ++++ b/iconv/gconv_simple.c +@@ -87,69 +87,22 @@ internal_ucs4_loop (struct __gconv_step *step, + #if __BYTE_ORDER == __LITTLE_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; +- uint32_t *outptr32 = (uint32_t *) outptr; +- +- for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) +- *outptr32++ = bswap_32 (*(const uint32_t *) inptr); +- +- *inptrp = inptr; +- *outptrp = (unsigned char *) outptr32; +-#elif __BYTE_ORDER == __BIG_ENDIAN +- /* Simply copy the data. */ +- *inptrp = inptr + n_convert * 4; +- *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +-#else +-# error "This endianness is not supported." +-#endif +- +- /* Determine the status. */ +- if (*inptrp == inend) +- result = __GCONV_EMPTY_INPUT; +- else if (*outptrp + 4 > outend) +- result = __GCONV_FULL_OUTPUT; +- else +- result = __GCONV_INCOMPLETE_INPUT; +- +- return result; +-} +- +-#if !_STRING_ARCH_unaligned +-static inline int +-__attribute ((always_inline)) +-internal_ucs4_loop_unaligned (struct __gconv_step *step, +- struct __gconv_step_data *step_data, +- const unsigned char **inptrp, +- const unsigned char *inend, +- unsigned char **outptrp, +- const unsigned char *outend, +- size_t *irreversible) +-{ +- const unsigned char *inptr = *inptrp; +- unsigned char *outptr = *outptrp; +- size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; +- int result; +- +-# if __BYTE_ORDER == __LITTLE_ENDIAN +- /* Sigh, we have to do some real work. */ +- size_t cnt; + + for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) + { +- outptr[0] = inptr[3]; +- outptr[1] = inptr[2]; +- outptr[2] = inptr[1]; +- outptr[3] = inptr[0]; ++ uint32_t val = get32 (inptr); ++ put32 (outptr, __builtin_bswap32 (val)); + } + + *inptrp = inptr; + *outptrp = outptr; +-# elif __BYTE_ORDER == __BIG_ENDIAN ++#elif __BYTE_ORDER == __BIG_ENDIAN + /* Simply copy the data. */ + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +-# else +-# error "This endianess is not supported." +-# endif ++#else ++# error "This endianness is not supported." ++#endif + + /* Determine the status. */ + if (*inptrp == inend) +@@ -161,7 +114,6 @@ internal_ucs4_loop_unaligned (struct __gconv_step *step, + + return result; + } +-#endif + + + static inline int +@@ -243,12 +195,9 @@ ucs4_internal_loop (struct __gconv_step *step, + + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) + { +- uint32_t inval; +- ++ uint32_t inval = get32 (inptr); + #if __BYTE_ORDER == __LITTLE_ENDIAN +- inval = bswap_32 (*(const uint32_t *) inptr); +-#else +- inval = *(const uint32_t *) inptr; ++ inval = __builtin_bswap32 (inval); + #endif + + if (__glibc_unlikely (inval > 0x7fffffff)) +@@ -273,7 +222,7 @@ ucs4_internal_loop (struct __gconv_step *step, + return __gconv_mark_illegal_input (step_data); + } + +- *((uint32_t *) outptr) = inval; ++ put32 (outptr, inval); + outptr += sizeof (uint32_t); + } + +@@ -291,75 +240,6 @@ ucs4_internal_loop (struct __gconv_step *step, + return result; + } + +-#if !_STRING_ARCH_unaligned +-static inline int +-__attribute ((always_inline)) +-ucs4_internal_loop_unaligned (struct __gconv_step *step, +- struct __gconv_step_data *step_data, +- const unsigned char **inptrp, +- const unsigned char *inend, +- unsigned char **outptrp, +- const unsigned char *outend, +- size_t *irreversible) +-{ +- int flags = step_data->__flags; +- const unsigned char *inptr = *inptrp; +- unsigned char *outptr = *outptrp; +- int result; +- +- for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) +- { +- if (__glibc_unlikely (inptr[0] > 0x80)) +- { +- /* The value is too large. We don't try transliteration here since +- this is not an error because of the lack of possibilities to +- represent the result. This is a genuine bug in the input since +- UCS4 does not allow such values. */ +- if (irreversible == NULL) +- /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; +- +- if (flags & __GCONV_IGNORE_ERRORS) +- { +- /* Just ignore this character. */ +- ++*irreversible; +- continue; +- } +- +- *inptrp = inptr; +- *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; +- } +- +-# if __BYTE_ORDER == __LITTLE_ENDIAN +- outptr[3] = inptr[0]; +- outptr[2] = inptr[1]; +- outptr[1] = inptr[2]; +- outptr[0] = inptr[3]; +-# else +- outptr[0] = inptr[0]; +- outptr[1] = inptr[1]; +- outptr[2] = inptr[2]; +- outptr[3] = inptr[3]; +-# endif +- outptr += 4; +- } +- +- *inptrp = inptr; +- *outptrp = outptr; +- +- /* Determine the status. */ +- if (*inptrp == inend) +- result = __GCONV_EMPTY_INPUT; +- else if (*outptrp + 4 > outend) +- result = __GCONV_FULL_OUTPUT; +- else +- result = __GCONV_INCOMPLETE_INPUT; +- +- return result; +-} +-#endif +- + + static inline int + __attribute ((always_inline)) +@@ -454,11 +334,12 @@ internal_ucs4le_loop (struct __gconv_step *step, + #if __BYTE_ORDER == __BIG_ENDIAN + /* Sigh, we have to do some real work. */ + size_t cnt; +- uint32_t *outptr32 = (uint32_t *) outptr; + +- for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) +- *outptr32++ = bswap_32 (*(const uint32_t *) inptr); +- outptr = (unsigned char *) outptr32; ++ for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) ++ { ++ uint32_t val = get32 (inptr); ++ put32 (outptr, __builtin_bswap32 (val)); ++ } + + *inptrp = inptr; + *outptrp = outptr; +@@ -481,59 +362,6 @@ internal_ucs4le_loop (struct __gconv_step *step, + return result; + } + +-#if !_STRING_ARCH_unaligned +-static inline int +-__attribute ((always_inline)) +-internal_ucs4le_loop_unaligned (struct __gconv_step *step, +- struct __gconv_step_data *step_data, +- const unsigned char **inptrp, +- const unsigned char *inend, +- unsigned char **outptrp, +- const unsigned char *outend, +- size_t *irreversible) +-{ +- const unsigned char *inptr = *inptrp; +- unsigned char *outptr = *outptrp; +- size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; +- int result; +- +-# if __BYTE_ORDER == __BIG_ENDIAN +- /* Sigh, we have to do some real work. */ +- size_t cnt; +- +- for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4) +- { +- outptr[0] = inptr[3]; +- outptr[1] = inptr[2]; +- outptr[2] = inptr[1]; +- outptr[3] = inptr[0]; +- } +- +- *inptrp = inptr; +- *outptrp = outptr; +-# elif __BYTE_ORDER == __LITTLE_ENDIAN +- /* Simply copy the data. */ +- *inptrp = inptr + n_convert * 4; +- *outptrp = __mempcpy (outptr, inptr, n_convert * 4); +-# else +-# error "This endianess is not supported." +-# endif +- +- /* Determine the status. */ +- if (*inptrp == inend) +- result = __GCONV_EMPTY_INPUT; +- else if (*inptrp + 4 > inend) +- result = __GCONV_INCOMPLETE_INPUT; +- else +- { +- assert (*outptrp + 4 > outend); +- result = __GCONV_FULL_OUTPUT; +- } +- +- return result; +-} +-#endif +- + + static inline int + __attribute ((always_inline)) +@@ -613,12 +441,9 @@ ucs4le_internal_loop (struct __gconv_step *step, + + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) + { +- uint32_t inval; +- ++ uint32_t inval = get32 (inptr); + #if __BYTE_ORDER == __BIG_ENDIAN +- inval = bswap_32 (*(const uint32_t *) inptr); +-#else +- inval = *(const uint32_t *) inptr; ++ inval = __builtin_bswap32 (inval); + #endif + + if (__glibc_unlikely (inval > 0x7fffffff)) +@@ -643,7 +468,7 @@ ucs4le_internal_loop (struct __gconv_step *step, + return __gconv_mark_illegal_input (step_data); + } + +- *((uint32_t *) outptr) = inval; ++ put32 (outptr, inval); + outptr += sizeof (uint32_t); + } + +@@ -664,79 +489,6 @@ ucs4le_internal_loop (struct __gconv_step *step, + return result; + } + +-#if !_STRING_ARCH_unaligned +-static inline int +-__attribute ((always_inline)) +-ucs4le_internal_loop_unaligned (struct __gconv_step *step, +- struct __gconv_step_data *step_data, +- const unsigned char **inptrp, +- const unsigned char *inend, +- unsigned char **outptrp, +- const unsigned char *outend, +- size_t *irreversible) +-{ +- int flags = step_data->__flags; +- const unsigned char *inptr = *inptrp; +- unsigned char *outptr = *outptrp; +- int result; +- +- for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) +- { +- if (__glibc_unlikely (inptr[3] > 0x80)) +- { +- /* The value is too large. We don't try transliteration here since +- this is not an error because of the lack of possibilities to +- represent the result. This is a genuine bug in the input since +- UCS4 does not allow such values. */ +- if (irreversible == NULL) +- /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; +- +- if (flags & __GCONV_IGNORE_ERRORS) +- { +- /* Just ignore this character. */ +- ++*irreversible; +- continue; +- } +- +- *inptrp = inptr; +- *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; +- } +- +-# if __BYTE_ORDER == __BIG_ENDIAN +- outptr[3] = inptr[0]; +- outptr[2] = inptr[1]; +- outptr[1] = inptr[2]; +- outptr[0] = inptr[3]; +-# else +- outptr[0] = inptr[0]; +- outptr[1] = inptr[1]; +- outptr[2] = inptr[2]; +- outptr[3] = inptr[3]; +-# endif +- +- outptr += 4; +- } +- +- *inptrp = inptr; +- *outptrp = outptr; +- +- /* Determine the status. */ +- if (*inptrp == inend) +- result = __GCONV_EMPTY_INPUT; +- else if (*inptrp + 4 > inend) +- result = __GCONV_INCOMPLETE_INPUT; +- else +- { +- assert (*outptrp + 4 > outend); +- result = __GCONV_FULL_OUTPUT; +- } +- +- return result; +-} +-#endif +- + + static inline int + __attribute ((always_inline)) +diff --git a/iconv/loop.c b/iconv/loop.c +index 851caa9d42d45a47..696caa212aaec4bd 100644 +--- a/iconv/loop.c ++++ b/iconv/loop.c +@@ -59,12 +59,7 @@ + #include + + #undef FCTNAME2 +-#if _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED +-# define FCTNAME2(name) name +-#else +-# define FCTNAME2(name) name##_unaligned +-#endif +-#define FCTNAME(name) FCTNAME2(name) ++#define FCTNAME(name) name + + + /* We need at least one byte for the next round. */ +@@ -282,20 +277,9 @@ FCTNAME (LOOPFCT) (struct __gconv_step *step, + } + + +-/* Include the file a second time to define the function to handle +- unaligned access. */ +-#if !defined DEFINE_UNALIGNED && !_STRING_ARCH_unaligned \ +- && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \ +- && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0 +-# undef unaligned +- +-# define DEFINE_UNALIGNED +-# include "loop.c" +-# undef DEFINE_UNALIGNED +-#else +-# if MAX_NEEDED_INPUT > 1 +-# define SINGLE(fct) SINGLE2 (fct) +-# define SINGLE2(fct) fct##_single ++#if MAX_NEEDED_INPUT > 1 ++# define SINGLE(fct) SINGLE2 (fct) ++# define SINGLE2(fct) fct##_single + static inline int + __attribute ((always_inline)) + SINGLE(LOOPFCT) (struct __gconv_step *step, +@@ -305,37 +289,37 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, + size_t *irreversible EXTRA_LOOP_DECLS) + { + mbstate_t *state = step_data->__statep; +-# ifdef LOOP_NEED_FLAGS ++# ifdef LOOP_NEED_FLAGS + int flags = step_data->__flags; +-# endif +-# ifdef LOOP_NEED_DATA ++# endif ++# ifdef LOOP_NEED_DATA + void *data = step->__data; +-# endif ++# endif + int result = __GCONV_OK; + unsigned char bytebuf[MAX_NEEDED_INPUT]; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + size_t inlen; + +-# ifdef INIT_PARAMS ++# ifdef INIT_PARAMS + INIT_PARAMS; +-# endif ++# endif + +-# ifdef UNPACK_BYTES ++# ifdef UNPACK_BYTES + UNPACK_BYTES +-# else ++# else + /* Add the bytes from the state to the input buffer. */ + assert ((state->__count & 7) <= sizeof (state->__value)); + for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen) + bytebuf[inlen] = state->__value.__wchb[inlen]; +-# endif ++# endif + + /* Are there enough bytes in the input buffer? */ + if (MIN_NEEDED_INPUT > 1 + && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0)) + { + *inptrp = inend; +-# ifdef STORE_REST ++# ifdef STORE_REST + + /* Building with -O3 GCC emits a `array subscript is above array + bounds' warning. GCC BZ #64739 has been opened for this. */ +@@ -350,14 +334,14 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, + inend = &bytebuf[inlen]; + + STORE_REST +-# else ++# else + /* We don't have enough input for another complete input + character. */ + size_t inlen_after = inlen + (inend - inptr); + assert (inlen_after <= sizeof (state->__value.__wchb)); + for (; inlen < inlen_after; inlen++) + state->__value.__wchb[inlen] = *inptr++; +-# endif ++# endif + + return __GCONV_INCOMPLETE_INPUT; + } +@@ -403,11 +387,11 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, + result = __GCONV_OK; + + /* Clear the state buffer. */ +-# ifdef CLEAR_STATE ++# ifdef CLEAR_STATE + CLEAR_STATE; +-# else ++# else + state->__count &= ~7; +-# endif ++# endif + } + else if (result == __GCONV_INCOMPLETE_INPUT) + { +@@ -416,11 +400,11 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, + assert (inend != &bytebuf[MAX_NEEDED_INPUT]); + + *inptrp += inend - bytebuf - (state->__count & 7); +-# ifdef STORE_REST ++# ifdef STORE_REST + inptrp = &inptr; + + STORE_REST +-# else ++# else + /* We don't have enough input for another complete input + character. */ + assert (inend - inptr > (state->__count & ~7)); +@@ -429,14 +413,13 @@ SINGLE(LOOPFCT) (struct __gconv_step *step, + for (inlen = 0; inlen < inend - inptr; inlen++) + state->__value.__wchb[inlen] = inptr[inlen]; + inptr = inend; +-# endif ++# endif + } + + return result; + } +-# undef SINGLE +-# undef SINGLE2 +-# endif ++# undef SINGLE ++# undef SINGLE2 + + + # ifdef ONEBYTE_BODY +@@ -468,4 +451,3 @@ gconv_btowc (struct __gconv_step *step, unsigned char c) + #undef LOOP_NEED_STATE + #undef LOOP_NEED_FLAGS + #undef LOOP_NEED_DATA +-#undef unaligned +diff --git a/iconv/skeleton.c b/iconv/skeleton.c +index 6992701808079ec2..e7933ae333a5eae5 100644 +--- a/iconv/skeleton.c ++++ b/iconv/skeleton.c +@@ -451,33 +451,6 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + size_t lirreversible = 0; + size_t *lirreversiblep = irreversible ? &lirreversible : NULL; + +- /* The following assumes that encodings, which have a variable length +- what might unalign a buffer even though it is an aligned in the +- beginning, either don't have the minimal number of bytes as a divisor +- of the maximum length or have a minimum length of 1. This is true +- for all known and supported encodings. +- We use && instead of || to combine the subexpression for the FROM +- encoding and for the TO encoding, because usually one of them is +- INTERNAL, for which the subexpression evaluates to 1, but INTERNAL +- buffers are always aligned correctly. */ +-#define POSSIBLY_UNALIGNED \ +- (!_STRING_ARCH_unaligned \ +- && (((FROM_LOOP_MIN_NEEDED_FROM != 1 \ +- && FROM_LOOP_MAX_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_FROM == 0) \ +- && (FROM_LOOP_MIN_NEEDED_TO != 1 \ +- && FROM_LOOP_MAX_NEEDED_TO % FROM_LOOP_MIN_NEEDED_TO == 0)) \ +- || ((TO_LOOP_MIN_NEEDED_FROM != 1 \ +- && TO_LOOP_MAX_NEEDED_FROM % TO_LOOP_MIN_NEEDED_FROM == 0) \ +- && (TO_LOOP_MIN_NEEDED_TO != 1 \ +- && TO_LOOP_MAX_NEEDED_TO % TO_LOOP_MIN_NEEDED_TO == 0)))) +-#if POSSIBLY_UNALIGNED +- int unaligned; +-# define GEN_unaligned(name) GEN_unaligned2 (name) +-# define GEN_unaligned2(name) name##_unaligned +-#else +-# define unaligned 0 +-#endif +- + #ifdef PREPARE_LOOP + PREPARE_LOOP + #endif +@@ -517,18 +490,6 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + } + #endif + +-#if POSSIBLY_UNALIGNED +- unaligned = +- ((FROM_DIRECTION +- && ((uintptr_t) inptr % FROM_LOOP_MIN_NEEDED_FROM != 0 +- || ((data->__flags & __GCONV_IS_LAST) +- && (uintptr_t) outbuf % FROM_LOOP_MIN_NEEDED_TO != 0))) +- || (!FROM_DIRECTION +- && (((data->__flags & __GCONV_IS_LAST) +- && (uintptr_t) outbuf % TO_LOOP_MIN_NEEDED_TO != 0) +- || (uintptr_t) inptr % TO_LOOP_MIN_NEEDED_FROM != 0))); +-#endif +- + while (1) + { + /* Remember the start value for this round. */ +@@ -546,34 +507,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + SAVE_RESET_STATE (1); + #endif + +- if (__glibc_likely (!unaligned)) +- { +- if (FROM_DIRECTION) +- /* Run the conversion loop. */ +- status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, +- lirreversiblep EXTRA_LOOP_ARGS); +- else +- /* Run the conversion loop. */ +- status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, +- lirreversiblep EXTRA_LOOP_ARGS); +- } +-#if POSSIBLY_UNALIGNED ++ if (FROM_DIRECTION) ++ /* Run the conversion loop. */ ++ status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, ++ lirreversiblep EXTRA_LOOP_ARGS); + else +- { +- if (FROM_DIRECTION) +- /* Run the conversion loop. */ +- status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, +- &outbuf, outend, +- lirreversiblep +- EXTRA_LOOP_ARGS); +- else +- /* Run the conversion loop. */ +- status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, +- &outbuf, outend, +- lirreversiblep +- EXTRA_LOOP_ARGS); +- } +-#endif ++ /* Run the conversion loop. */ ++ status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, ++ lirreversiblep EXTRA_LOOP_ARGS); + + /* If we were called as part of an error handling module we + don't do anything else here. */ +@@ -638,41 +579,18 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, + SAVE_RESET_STATE (0); + #endif + +- if (__glibc_likely (!unaligned)) +- { +- if (FROM_DIRECTION) +- /* Run the conversion loop. */ +- nstatus = FROM_LOOP (step, data, inptrp, inend, +- &outbuf, outerr, +- lirreversiblep +- EXTRA_LOOP_ARGS); +- else +- /* Run the conversion loop. */ +- nstatus = TO_LOOP (step, data, inptrp, inend, +- &outbuf, outerr, +- lirreversiblep +- EXTRA_LOOP_ARGS); +- } +-#if POSSIBLY_UNALIGNED ++ if (FROM_DIRECTION) ++ /* Run the conversion loop. */ ++ nstatus = FROM_LOOP (step, data, inptrp, inend, ++ &outbuf, outerr, ++ lirreversiblep ++ EXTRA_LOOP_ARGS); + else +- { +- if (FROM_DIRECTION) +- /* Run the conversion loop. */ +- nstatus = GEN_unaligned (FROM_LOOP) (step, data, +- inptrp, inend, +- &outbuf, +- outerr, +- lirreversiblep +- EXTRA_LOOP_ARGS); +- else +- /* Run the conversion loop. */ +- nstatus = GEN_unaligned (TO_LOOP) (step, data, +- inptrp, inend, +- &outbuf, outerr, +- lirreversiblep +- EXTRA_LOOP_ARGS); +- } +-#endif ++ /* Run the conversion loop. */ ++ nstatus = TO_LOOP (step, data, inptrp, inend, ++ &outbuf, outerr, ++ lirreversiblep ++ EXTRA_LOOP_ARGS); + + /* We must run out of output buffer space in this + rerun. */ diff --git a/glibc-RHEL-162901-3.patch b/glibc-RHEL-162901-3.patch new file mode 100644 index 0000000..9294f79 --- /dev/null +++ b/glibc-RHEL-162901-3.patch @@ -0,0 +1,326 @@ +commit d6f08d1cf027f4eb2ba289a6cc66853722d4badc +Author: Florian Weimer +Date: Thu Apr 16 19:13:43 2026 +0200 + + Use pending character state in IBM1390, IBM1399 character sets (CVE-2026-4046) + + Follow the example in iso-2022-jp-3.c and use the __count state + variable to store the pending character. This avoids restarting + the conversion if the output buffer ends between two 4-byte UCS-4 + code points, so that the assert reported in the bug can no longer + happen. + + Even though the fix is applied to ibm1364.c, the change is only + effective for the two HAS_COMBINED codecs for IBM1390, IBM1399. + + The test case was mostly auto-generated using + claude-4.6-opus-high-thinking, and composer-2-fast shows up in the + log as well. During review, gpt-5.4-xhigh flagged that the original + version of the test case was not exercising the new character + flush logic. + + This fixes bug 33980. + + Assisted-by: LLM + Reviewed-by: Carlos O'Donell + +Conflicts: + iconvdata/Makefile + (fixup context) + +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index 25bd004e7f92a994..798f9be86d376aeb 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ + bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ +- bug-iconv13 bug-iconv14 bug-iconv15 tst-iconv-iso-2022-cn-ext ++ bug-iconv13 bug-iconv14 bug-iconv15 \ ++ tst-iconv-iso-2022-cn-ext tst-bug33980 + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -332,6 +333,8 @@ $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) + $(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) ++$(objpfx)tst-bug33980.out: $(addprefix $(objpfx), $(gconv-modules)) \ ++ $(addprefix $(objpfx),$(modules.so)) + + $(objpfx)iconv-test.out: run-iconv-test.sh \ + $(addprefix $(objpfx), $(gconv-modules)) \ +diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +index 18f1aa4f6da4b021..c3e9b7c881eaa1d9 100644 +--- a/iconvdata/ibm1364.c ++++ b/iconvdata/ibm1364.c +@@ -68,12 +68,29 @@ + + /* Since this is a stateful encoding we have to provide code which resets + the output state to the initial state. This has to be done during the +- flushing. */ ++ flushing. For the to-internal direction (FROM_DIRECTION is true), ++ there may be a pending character that needs flushing. */ + #define EMIT_SHIFT_TO_INIT \ + if ((data->__statep->__count & ~7) != sb) \ + { \ + if (FROM_DIRECTION) \ +- data->__statep->__count &= 7; \ ++ { \ ++ uint32_t ch = data->__statep->__count >> 7; \ ++ if (__glibc_unlikely (ch != 0)) \ ++ { \ ++ if (__glibc_unlikely (outend - outbuf < 4)) \ ++ status = __GCONV_FULL_OUTPUT; \ ++ else \ ++ { \ ++ put32 (outbuf, ch); \ ++ outbuf += 4; \ ++ /* Clear character and db bit. */ \ ++ data->__statep->__count &= 7; \ ++ } \ ++ } \ ++ else \ ++ data->__statep->__count &= 7; \ ++ } \ + else \ + { \ + /* We are not in the initial state. To switch back we have \ +@@ -100,11 +117,13 @@ + *curcsp = save_curcs + + +-/* Current codeset type. */ ++/* Current codeset type. The bit is stored in the __count variable of ++ the conversion state. If the db bit is set, bit 7 and above store ++ a pending UCS-4 code point if non-zero. */ + enum + { +- sb = 0, +- db = 64 ++ sb = 0, /* Single byte mode. */ ++ db = 64 /* Double byte mode. */ + }; + + +@@ -120,21 +139,29 @@ enum + } \ + else \ + { \ +- /* This is a combined character. Make sure we have room. */ \ +- if (__glibc_unlikely (outptr + 8 > outend)) \ +- { \ +- result = __GCONV_FULL_OUTPUT; \ +- break; \ +- } \ +- \ + const struct divide *cmbp \ + = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN]; \ + assert (cmbp->res1 != 0 && cmbp->res2 != 0); \ + \ + put32 (outptr, cmbp->res1); \ + outptr += 4; \ +- put32 (outptr, cmbp->res2); \ +- outptr += 4; \ ++ \ ++ /* See whether we have room for the second character. */ \ ++ if (outend - outptr >= 4) \ ++ { \ ++ put32 (outptr, cmbp->res2); \ ++ outptr += 4; \ ++ } \ ++ else \ ++ { \ ++ /* Otherwise store only the first character now, and \ ++ put the second one into the queue. */ \ ++ curcs |= cmbp->res2 << 7; \ ++ inptr += 2; \ ++ /* Tell the caller why we terminate the loop. */ \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ + } \ + } + #else +@@ -154,7 +181,20 @@ enum + #define LOOPFCT FROM_LOOP + #define BODY \ + { \ +- uint32_t ch = *inptr; \ ++ uint32_t ch; \ ++ \ ++ ch = curcs >> 7; \ ++ if (__glibc_unlikely (ch != 0)) \ ++ { \ ++ put32 (outptr, ch); \ ++ outptr += 4; \ ++ /* Remove the pending character, but preserve state bits. */ \ ++ curcs &= (1 << 7) - 1; \ ++ continue; \ ++ } \ ++ \ ++ /* Otherwise read the next input byte. */ \ ++ ch = *inptr; \ + \ + if (__builtin_expect (ch, 0) == SO) \ + { \ +diff --git a/iconvdata/tst-bug33980.c b/iconvdata/tst-bug33980.c +new file mode 100644 +index 0000000000000000..c9693e0efebe4eae +--- /dev/null ++++ b/iconvdata/tst-bug33980.c +@@ -0,0 +1,153 @@ ++/* Test for bug 33980: combining characters in IBM1390/IBM1399. ++ Copyright (C) 2026 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Run iconv in a loop with a small output buffer of OUTBUFSIZE bytes ++ starting at OUTBUF. OUTBUF should be right before an unmapped page ++ so that writing past the end will fault. Skip SHIFT bytes at the ++ start of the input and output, to exercise different buffer ++ alignment. TRUNCATE indicates skipped bytes at the end of ++ input (0 and 1 a valid). */ ++static void ++test_one (const char *encoding, unsigned int shift, unsigned int truncate, ++ char *outbuf, size_t outbufsize) ++{ ++ /* In IBM1390 and IBM1399, the DBCS code 0xECB5 expands to two ++ Unicode code points when translated. */ ++ static char input[] = ++ { ++ /* 8 letters X. */ ++ 0xe7, 0xe7, 0xe7, 0xe7, 0xe7, 0xe7, 0xe7, 0xe7, ++ /* SO, 0xECB5, SI: shift to DBCS, special character, shift back. */ ++ 0x0e, 0xec, 0xb5, 0x0f ++ }; ++ ++ /* Expected output after UTF-8 conversion. */ ++ static char expected[] = ++ { ++ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', ++ /* U+304B (HIRAGANA LETTER KA). */ ++ 0xe3, 0x81, 0x8b, ++ /* U+309A (COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK). */ ++ 0xe3, 0x82, 0x9a ++ }; ++ ++ iconv_t cd = iconv_open ("UTF-8", encoding); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ char result_storage[64]; ++ struct alloc_buffer result_buf ++ = alloc_buffer_create (result_storage, sizeof (result_storage)); ++ ++ char *inptr = &input[shift]; ++ size_t inleft = sizeof (input) - shift - truncate; ++ ++ while (inleft > 0) ++ { ++ char *outptr = outbuf; ++ size_t outleft = outbufsize; ++ size_t inleft_before = inleft; ++ ++ size_t ret = iconv (cd, &inptr, &inleft, &outptr, &outleft); ++ size_t produced = outptr - outbuf; ++ alloc_buffer_copy_bytes (&result_buf, outbuf, produced); ++ ++ if (ret == (size_t) -1 && errno == E2BIG) ++ { ++ if (produced == 0 && inleft == inleft_before) ++ { ++ /* Output buffer too small to make progress. This is ++ expected for very small output buffer sizes. */ ++ TEST_VERIFY_EXIT (outbufsize < 3); ++ break; ++ } ++ continue; ++ } ++ if (ret == (size_t) -1) ++ FAIL_EXIT1 ("%s (outbufsize %zu): iconv: %m", encoding, outbufsize); ++ break; ++ } ++ ++ /* Flush any pending state (e.g. a buffered combined character). ++ With outbufsize < 3, we could not store the first character, so ++ the second character did not become pending, and there is nothing ++ to flush. */ ++ { ++ char *outptr = outbuf; ++ size_t outleft = outbufsize; ++ ++ size_t ret = iconv (cd, NULL, NULL, &outptr, &outleft); ++ TEST_VERIFY_EXIT (ret == 0); ++ size_t produced = outptr - outbuf; ++ alloc_buffer_copy_bytes (&result_buf, outbuf, produced); ++ ++ /* Second flush does not provide more data. */ ++ outptr = outbuf; ++ outleft = outbufsize; ++ ret = iconv (cd, NULL, NULL, &outptr, &outleft); ++ TEST_VERIFY_EXIT (ret == 0); ++ TEST_VERIFY (outptr == outbuf); ++ } ++ ++ TEST_VERIFY_EXIT (!alloc_buffer_has_failed (&result_buf)); ++ size_t result_used ++ = sizeof (result_storage) - alloc_buffer_size (&result_buf); ++ ++ if (outbufsize >= 3) ++ { ++ TEST_COMPARE (inleft, 0); ++ TEST_COMPARE (result_used, sizeof (expected) - shift); ++ TEST_COMPARE_BLOB (result_storage, result_used, ++ &expected[shift], sizeof (expected) - shift); ++ } ++ else ++ /* If the buffer is too small, only the leading X could be converted. */ ++ TEST_COMPARE (result_used, 8 - shift); ++ ++ TEST_VERIFY_EXIT (iconv_close (cd) == 0); ++} ++ ++static int ++do_test (void) ++{ ++ struct support_next_to_fault ntf ++ = support_next_to_fault_allocate (8); ++ ++ for (int shift = 0; shift <= 8; ++shift) ++ for (int truncate = 0; truncate < 2; ++truncate) ++ for (size_t outbufsize = 1; outbufsize <= 8; outbufsize++) ++ { ++ char *outbuf = ntf.buffer + ntf.length - outbufsize; ++ test_one ("IBM1390", shift, truncate, outbuf, outbufsize); ++ test_one ("IBM1399", shift, truncate, outbuf, outbufsize); ++ } ++ ++ support_next_to_fault_free (&ntf); ++ return 0; ++} ++ ++#include