From 1ae6242e8b57e0d4458d2ad8b03e78270964bdfb Mon Sep 17 00:00:00 2001 From: Patsy Griffin Date: Wed, 20 Nov 2024 21:09:16 -0500 Subject: [PATCH] iconv: Support in-place conversions (RHEL-1915) Resolves: RHEL-1915 --- glibc-RHEL-1915-1.patch | 337 +++++++++++++ glibc-RHEL-1915-2.patch | 218 ++++++++ glibc-RHEL-1915-3.patch | 91 ++++ glibc-RHEL-1915-4.patch | 62 +++ glibc-RHEL-1915-5.patch | 1064 +++++++++++++++++++++++++++++++++++++++ glibc-RHEL-1915-6.patch | 715 ++++++++++++++++++++++++++ glibc-RHEL-1915-7.patch | 41 ++ glibc-RHEL-1915-8.patch | 323 ++++++++++++ glibc-RHEL-1915-9.patch | 37 ++ glibc.spec | 14 +- 10 files changed, 2901 insertions(+), 1 deletion(-) create mode 100644 glibc-RHEL-1915-1.patch create mode 100644 glibc-RHEL-1915-2.patch create mode 100644 glibc-RHEL-1915-3.patch create mode 100644 glibc-RHEL-1915-4.patch create mode 100644 glibc-RHEL-1915-5.patch create mode 100644 glibc-RHEL-1915-6.patch create mode 100644 glibc-RHEL-1915-7.patch create mode 100644 glibc-RHEL-1915-8.patch create mode 100644 glibc-RHEL-1915-9.patch diff --git a/glibc-RHEL-1915-1.patch b/glibc-RHEL-1915-1.patch new file mode 100644 index 0000000..8a5b994 --- /dev/null +++ b/glibc-RHEL-1915-1.patch @@ -0,0 +1,337 @@ +commit f58a8c1c15d8b5d8a08e8553f82867202b88a5cc +Author: Paul Pluzhnikov +Date: Sat May 27 06:48:33 2023 +0000 + + Fix misspellings in iconv/ and iconvdata/ -- BZ 25337 + + All the changes are in comments or '#error' messages. + Applying this commit results in bit-identical rebuild of iconvdata/*.so + + Reviewed-by: Florian Weimer + +diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c +index 5696058298..0cf3226be6 100644 +--- a/iconv/gconv_charset.c ++++ b/iconv/gconv_charset.c +@@ -181,10 +181,10 @@ __gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, + implementation has always handled them. Only suffixes in the tocode are + processed and handled. The reality is that invalid input in the input + character set should only be ignored if the fromcode specifies IGNORE. +- The current implementation ignores invalid intput in the input character ++ The current implementation ignores invalid input in the input character + set if the tocode contains IGNORE. We preserve this behavior for + backwards compatibility. In the future we may split the handling of +- IGNORE to allow a finer grained specification of ignorning invalid input ++ IGNORE to allow a finer grained specification of ignoring invalid input + and/or ignoring invalid output. */ + conv_spec->translit = ptc.translit; + conv_spec->ignore = ptc.ignore; +diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h +index 00744aad56..07815b0eee 100644 +--- a/iconv/gconv_charset.h ++++ b/iconv/gconv_charset.h +@@ -48,7 +48,7 @@ + + + /* This function copies in-order, characters from the source 's' that are +- either alpha-numeric or one in one of these: "_-.,:/" - into the destination ++ either alphanumeric or one in one of these: "_-.,:/" - into the destination + 'wp' while dropping all other characters. In the process, it converts all + alphabetical characters to upper case. It then appends up to two '/' + characters so that the total number of '/'es in the destination is 2. */ +diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c +index c76011d6bc..ee9e97e1bd 100644 +--- a/iconv/gconv_conf.c ++++ b/iconv/gconv_conf.c +@@ -153,7 +153,7 @@ static void + add_alias (char *rp) + { + /* We now expect two more string. The strings are normalized +- (converted to UPPER case) and strored in the alias database. */ ++ (converted to UPPER case) and stored in the alias database. */ + char *from, *to, *wp; + + while (__isspace_l (*rp, _nl_C_locobj_ptr)) +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index 4b247a815f..19d042faff 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -172,7 +172,7 @@ __libc_lock_define (extern, __gconv_lock attribute_hidden) + }) + + +-/* Return in *HANDLE, a decriptor for the transformation. The function expects ++/* Return in *HANDLE, a descriptor for the transformation. The function expects + the specification of the transformation in the structure pointed to by + CONV_SPEC. It only reads *CONV_SPEC and does not take ownership of it. */ + extern int __gconv_open (struct gconv_spec *conv_spec, +diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c +index c60cffad4c..e936e171d7 100644 +--- a/iconv/gconv_simple.c ++++ b/iconv/gconv_simple.c +@@ -56,7 +56,7 @@ __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c) + + /* Transform from the internal, UCS4-like format, to UCS4. The + difference between the internal ucs4 format and the real UCS4 +- format is, if any, the endianess. The Unicode/ISO 10646 says that ++ format is, if any, the endianness. The Unicode/ISO 10646 says that + unless some higher protocol specifies it differently, the byte + order is big endian.*/ + #define DEFINE_INIT 0 +@@ -100,7 +100,7 @@ internal_ucs4_loop (struct __gconv_step *step, + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); + #else +-# error "This endianess is not supported." ++# error "This endianness is not supported." + #endif + + /* Determine the status. */ +@@ -153,7 +153,7 @@ internal_ucs4_loop_single (struct __gconv_step *step, + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; + #else +-# error "This endianess is not supported." ++# error "This endianness is not supported." + #endif + *outptrp += 4; + +@@ -347,7 +347,7 @@ internal_ucs4le_loop (struct __gconv_step *step, + *inptrp = inptr + n_convert * 4; + *outptrp = __mempcpy (outptr, inptr, n_convert * 4); + #else +-# error "This endianess is not supported." ++# error "This endianness is not supported." + #endif + + /* Determine the status. */ +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 1d90938d71..bee898c63c 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -537,7 +537,7 @@ incomplete character or shift sequence at end of buffer")); + static int + process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) + { +- /* we have a problem with reading from a desriptor since we must not ++ /* we have a problem with reading from a descriptor since we must not + provide the iconv() function an incomplete character or shift + sequence at the end of the buffer. Since we have to deal with + arbitrary encodings we must read the whole text in a buffer and +diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c +index a319e2f762..f3f4baa4e7 100644 +--- a/iconv/iconvconfig.c ++++ b/iconv/iconvconfig.c +@@ -445,7 +445,7 @@ static void + add_alias (char *rp) + { + /* We now expect two more string. The strings are normalized +- (converted to UPPER case) and strored in the alias database. */ ++ (converted to UPPER case) and stored in the alias database. */ + char *from; + char *to; + char *wp; +diff --git a/iconvdata/bug-iconv8.c b/iconvdata/bug-iconv8.c +index e32d891b5c..1ebb674c91 100644 +--- a/iconvdata/bug-iconv8.c ++++ b/iconvdata/bug-iconv8.c +@@ -29,7 +29,7 @@ do_test (void) + /* + * result: -1 84 0 0 (84=EILSEQ) + * +- * Error is returnd but inbuf is consumed. ++ * Error is returned but inbuf is consumed. + * + * \x83\xd9 is valid shift-jis sequence but no character is assigned + * to it. +diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +index 4d6ec71139..5203f30e79 100644 +--- a/iconvdata/ibm1364.c ++++ b/iconvdata/ibm1364.c +@@ -91,7 +91,7 @@ + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_curcs = *curcsp; \ +diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c +index 2939d4d29e..fe2fe1f15b 100644 +--- a/iconvdata/ibm930.c ++++ b/iconvdata/ibm930.c +@@ -80,7 +80,7 @@ + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_curcs = *curcsp; \ +diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c +index 95935b8b36..4db0699a68 100644 +--- a/iconvdata/ibm933.c ++++ b/iconvdata/ibm933.c +@@ -79,7 +79,7 @@ + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_curcs = *curcsp; \ +diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c +index 1d8240a758..3c3d697a24 100644 +--- a/iconvdata/ibm935.c ++++ b/iconvdata/ibm935.c +@@ -80,7 +80,7 @@ + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_curcs = *curcsp; \ +diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c +index 9e02aba122..1586036c1e 100644 +--- a/iconvdata/ibm937.c ++++ b/iconvdata/ibm937.c +@@ -80,7 +80,7 @@ + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_curcs = *curcsp; \ +diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c +index ce719cb29f..9b053c696e 100644 +--- a/iconvdata/ibm939.c ++++ b/iconvdata/ibm939.c +@@ -80,7 +80,7 @@ + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_curcs = *curcsp; \ +diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c +index d0c3ca4f03..36727f0865 100644 +--- a/iconvdata/iso-2022-cn-ext.c ++++ b/iconvdata/iso-2022-cn-ext.c +@@ -154,7 +154,7 @@ enum + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_set = *setp; \ +diff --git a/iconvdata/iso-2022-cn.c b/iconvdata/iso-2022-cn.c +index 73eb5e77c6..5660ead668 100644 +--- a/iconvdata/iso-2022-cn.c ++++ b/iconvdata/iso-2022-cn.c +@@ -102,7 +102,7 @@ enum + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_set = *setp; \ +diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c +index d341a14f51..c722bdbfc3 100644 +--- a/iconvdata/iso-2022-jp-3.c ++++ b/iconvdata/iso-2022-jp-3.c +@@ -156,7 +156,7 @@ enum + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + saved_state = *statep; \ +diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c +index f31dfb92e6..b023d3cf8e 100644 +--- a/iconvdata/iso-2022-jp.c ++++ b/iconvdata/iso-2022-jp.c +@@ -249,7 +249,7 @@ gconv_end (struct __gconv_step *data) + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_set = *setp; \ +@@ -679,7 +679,7 @@ static const cvlist_t conversion_lists[4] = + the character is unknown. \ + The CJK character sets partially overlap when seen as subsets \ + of ISO 10646; therefore there is no single correct result. \ +- We use a preferrence order which depends on the language tag. */ \ ++ We use a preference order which depends on the language tag. */ \ + \ + if (ch <= 0x7f) \ + { \ +diff --git a/iconvdata/iso-2022-kr.c b/iconvdata/iso-2022-kr.c +index e71198aee9..fd785fd8f9 100644 +--- a/iconvdata/iso-2022-kr.c ++++ b/iconvdata/iso-2022-kr.c +@@ -100,7 +100,7 @@ enum + + + /* Since we might have to reset input pointer we must be able to save +- and retore the state. */ ++ and restore the state. */ + #define SAVE_RESET_STATE(Save) \ + if (Save) \ + save_set = *setp; \ +diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c +index f7111a3759..1800dc8fdb 100644 +--- a/iconvdata/iso646.c ++++ b/iconvdata/iso646.c +@@ -21,7 +21,7 @@ + zillions of ISO 646 derivates and supporting them all in a separate + module is overkill since these coded character sets are hardly ever + used anymore (except ANSI_X3.4-1968 == ASCII, which is compatible +- with ISO 8859-1). The European variants are superceded by the ++ with ISO 8859-1). The European variants are superseded by the + various ISO 8859-? standards and the Asian variants are embedded in + larger character sets. Therefore this implementation is simply + here to make it possible to do the conversion if it is necessary. +diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c +index 93c28db13e..5ab821bbff 100644 +--- a/iconvdata/sjis.c ++++ b/iconvdata/sjis.c +@@ -2001,7 +2001,7 @@ static const char from_ucs4_greek[193][2] = + + + /* The mapping of the Kanji is horrible. The glyphs covered by Shift JIS +- are spreaded all around the Unicode CJK area. We use one big table ++ are spread all around the Unicode CJK area. We use one big table + since using the gaps will not buy us much. + + The following table can be generated using +diff --git a/iconvdata/tst-table.sh b/iconvdata/tst-table.sh +index d5b1f3c87d..bc6f542b24 100755 +--- a/iconvdata/tst-table.sh ++++ b/iconvdata/tst-table.sh +@@ -44,7 +44,7 @@ if test ${charset} = GB18030; then + mv ${objpfx}tst-${charset}.truncated.table ${objpfx}tst-${charset}.charmap.table + fi + +-# Precomputed expexted differences between the charmap and iconv forward. ++# Precomputed expected differences between the charmap and iconv forward. + precomposed=${charset}.precomposed + + # Precompute expected differences between the charmap and iconv backward. diff --git a/glibc-RHEL-1915-2.patch b/glibc-RHEL-1915-2.patch new file mode 100644 index 0000000..1b24e63 --- /dev/null +++ b/glibc-RHEL-1915-2.patch @@ -0,0 +1,218 @@ +commit 422ed8ede312f786369e4850e47b8d32beaae4e4 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Base tests for buffer management + + Reviewed-by: DJ Delorie + Conflicts: + iconv/Makefile - tests-special list differences + +diff --git a/iconv/Makefile b/iconv/Makefile +index 65b4a44ab8..b0fa550141 100644 +--- a/iconv/Makefile 2024-11-18 12:41:42.539981355 -0500 ++++ b/iconv/Makefile 2024-11-18 12:40:54.861651890 -0500 +@@ -72,7 +72,10 @@ include $(patsubst %,$(..)libof-iterator + + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out +-tests-special += $(objpfx)tst-iconv_prog.out ++tests-special += \ ++ $(objpfx)tst-iconv_prog-buffer.out \ ++ $(objpfx)tst-iconv_prog.out \ ++ # tests-special + endif + + # Make a copy of the file because gconv module names are constructed +@@ -125,3 +128,8 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_p + $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ + '$(run-program-env)' > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-iconv_prog-buffer.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +new file mode 100644 +index 0000000000..a27107f02b +--- /dev/null ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -0,0 +1,177 @@ ++#!/bin/bash ++# Test for iconv (the program) buffer management. ++# Copyright (C) 2024 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++exec 2>&1 ++set -e ++ ++exec {logfd}>&1 ++ ++codir=$1 ++test_program_prefix="$2" ++ ++# Use internal converters to avoid issues with module loading. ++iconv_args="-f ASCII -t UTF-8" ++ ++failure=false ++ ++tmp=`mktemp -d` ++trap 'rm -rf "$tmp"' 0 ++echo ABC > "$tmp/abc" ++echo DEF > "$tmp/def" ++echo GGG > "$tmp/ggg" ++echo HH > "$tmp/hh" ++echo XY > "$tmp/xy" ++echo ZT > "$tmp/zt" ++echo OUT > "$tmp/out-template" ++printf '\xff' > "$tmp/0xff" ++cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped" ++ ++run_iconv () { ++ local c=0 ++ if test "${FUNCNAME[2]}" = main; then ++ c=1 ++ fi ++ echo "${BASH_SOURCE[$c]}:${BASH_LINENO[$c]}: iconv $iconv_args $@" >&$logfd ++ $test_program_prefix $codir/iconv/iconv_prog $iconv_args "$@" ++} ++ ++check_out_expected () { ++ if ! cmp -s "$tmp/out" "$tmp/expected" ; then ++ echo "error: iconv output difference" >&$logfd ++ echo "*** expected ***" >&$logfd ++ cat "$tmp/expected" >&$logfd ++ echo "*** actual ***" >&$logfd ++ cat "$tmp/out" >&$logfd ++ failure=true ++ fi ++} ++ ++expect_files () { ++ local f ++ ! test -z "$1" ++ cp "$tmp/$1" "$tmp/expected" ++ shift ++ for f in "$@" ; do ++ cat "$tmp/$f" >> "$tmp/expected" ++ done ++ check_out_expected ++} ++ ++check_out () { ++ cat > "$tmp/expected" ++ check_out_expected ++} ++ ++expect_exit () { ++ local expected=$1 ++ shift ++ # Prevent failure for stopping the script. ++ if "$@" ; then ++ actual=$? ++ else ++ actual=$? ++ fi ++ if test "$actual" -ne "$expected"; then ++ echo "error: expected exit status $expected, not $actual" >&$logfd ++ exit 1 ++ fi ++} ++ ++ignore_failure () { ++ set +e ++ "$@" ++ status=$? ++ set -e ++} ++ ++# Concatentation test. ++run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/def" ++expect_files abc def ++ ++# Single-file in-place conversion. ++run_iconv -o "$tmp/out" "$tmp/out" ++expect_files abc def ++ ++# Multiple input files with in-place conversion. ++ ++run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc" ++expect_files abc def abc ++ ++# But not if we are writing to standard output. ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv >"$tmp/out" ++expect_files out-template ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv - >"$tmp/out" ++expect_files out-template ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv /dev/null >>"$tmp/out" ++expect_files out-template ++ ++# Conversion errors should avoid clobbering an existing file if ++# it is also an input file. ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out" ++expect_files 0xff ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out" ++expect_files 0xff ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out" ++expect_files 0xff ++ ++# If errors are ignored, the file should be overwritten. ++ ++cp "$tmp/out-template" "$tmp/out" ++expect_exit 1 \ ++ run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" 2>"$tmp/err" ++! test -s "$tmp/err" ++expect_files abc def ++ ++# FIXME: This is not correct, -c should not change the exit status. ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -c -o "$tmp/out" \ ++ "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err" ++! test -s "$tmp/err" ++expect_files abc xy zt def ++ ++# If the file does not exist yet, it should not be created on error. ++ ++rm "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/0xff" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/0xff" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" ++! test -e "$tmp/out" ++ ++if $failure ; then ++ exit 1 ++fi diff --git a/glibc-RHEL-1915-3.patch b/glibc-RHEL-1915-3.patch new file mode 100644 index 0000000..db6e942 --- /dev/null +++ b/glibc-RHEL-1915-3.patch @@ -0,0 +1,91 @@ +commit 0cb64617a6f691b611406427c8e24b7f04c4983f +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Do not use mmap in iconv (the program) (bug 17703) + + On current systems, very large files are needed before + mmap becomes beneficial. Simplify the implementation. + + This exposed that inptr was not initialized correctly in + process_fd. Handling multiple input files resulted in + EFAULT in read because a null pointer was passed. This + could be observed previously if an input file was not + mappable and was reported as bug 17703. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index a765b1af21..88a928557e 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -31,9 +31,6 @@ + #include + #include + #include +-#ifdef _POSIX_MAPPED_FILES +-# include +-#endif + #include + #include + #include "iconv_prog.h" +@@ -253,10 +250,6 @@ conversions from `%s' and to `%s' are not supported"), + else + do + { +-#ifdef _POSIX_MAPPED_FILES +- struct stat64 st; +- char *addr; +-#endif + int fd, ret; + + if (verbose) +@@ -276,39 +269,6 @@ conversions from `%s' and to `%s' are not supported"), + } + } + +-#ifdef _POSIX_MAPPED_FILES +- /* We have possibilities for reading the input file. First try +- to mmap() it since this will provide the fastest solution. */ +- if (fstat64 (fd, &st) == 0 +- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, +- fd, 0)) != MAP_FAILED)) +- { +- /* Yes, we can use mmap(). The descriptor is not needed +- anymore. */ +- if (close (fd) != 0) +- error (EXIT_FAILURE, errno, +- _("error while closing input `%s'"), +- argv[remaining]); +- +- ret = process_block (cd, addr, st.st_size, &output, +- output_file); +- +- /* We don't need the input data anymore. */ +- munmap ((void *) addr, st.st_size); +- +- if (ret != 0) +- { +- status = EXIT_FAILURE; +- +- if (ret < 0) +- /* We cannot go on with producing output since it might +- lead to problem because the last output might leave +- the output stream in an undefined state. */ +- break; +- } +- } +- else +-#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + ret = process_fd (cd, fd, &output, output_file); +@@ -544,7 +504,7 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; +- char *inptr = NULL; ++ char *inptr = inbuf; + size_t actlen = 0; + + while (actlen < maxlen) diff --git a/glibc-RHEL-1915-4.patch b/glibc-RHEL-1915-4.patch new file mode 100644 index 0000000..711b6db --- /dev/null +++ b/glibc-RHEL-1915-4.patch @@ -0,0 +1,62 @@ +commit 00ba299787c2ea9e5c4986301e2f4965dffbfded +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + manual: __is_last is no longer part of iconv internals + + The __is_last field was replaced with a bitmask in + commit 85830c4c4688b30d3d76111aa9a26745c7b141d6 in 2000, + and multiple bits are in use today. + + Reviewed-by: DJ Delorie + +diff --git a/manual/charset.texi b/manual/charset.texi +index 427db3bc80..3aaa62d088 100644 +--- a/manual/charset.texi ++++ b/manual/charset.texi +@@ -2422,11 +2422,11 @@ written into the buffer to signal how much output is available. If this + conversion step is not the last one, the element must not be modified. + The @code{__outbufend} element must not be modified. + +-@item int __is_last +-This element is nonzero if this conversion step is the last one. This +-information is necessary for the recursion. See the description of the +-conversion function internals below. This element must never be +-modified. ++@item int __flags ++This field is a set of flags. The @code{__GCONV_IS_LAST} bit is set if ++this conversion step is the last one. This information is necessary for ++the recursion. See the description of the conversion function internals ++below. This element must never be modified. + + @item int __invocation_counter + The conversion function can use this element to see how many calls of +@@ -2731,8 +2731,8 @@ Otherwise the function has to emit a byte sequence to bring the state + object into the initial state. Once this all happened the other + conversion modules in the chain of conversions have to get the same + chance. Whether another step follows can be determined from the +-@code{__is_last} element of the step data structure to which the first +-parameter points. ++@code{__GCONV_IS_LAST} flag in the @code{__flags} field of the step ++data structure to which the first parameter points. + + The more interesting mode is when actual text has to be converted. The + first step in this case is to convert as much text as possible from the +@@ -2866,7 +2866,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data, + + /* @r{Call the steps down the chain if there are any but only} + @r{if we successfully emitted the escape sequence.} */ +- if (status == __GCONV_OK && ! data->__is_last) ++ if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST)) + status = fct (next_step, next_data, NULL, NULL, + written, 1); + @} +@@ -2892,7 +2892,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data, + + /* @r{If this is the last step, leave the loop. There is} + @r{nothing we can do.} */ +- if (data->__is_last) ++ if (data->__flags & __GCONV_IS_LAST) + @{ + /* @r{Store information about how many bytes are} + @r{available.} */ diff --git a/glibc-RHEL-1915-5.patch b/glibc-RHEL-1915-5.patch new file mode 100644 index 0000000..ee6b9e0 --- /dev/null +++ b/glibc-RHEL-1915-5.patch @@ -0,0 +1,1064 @@ +commit 6cbf845fcdc76131d0e674cee454fe738b69c69d +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Preserve iconv -c error exit on invalid inputs (bug 32046) + + In several converters, a __GCONV_ILLEGAL_INPUT result gets overwritten + with __GCONV_FULL_OUTPUT. As a result, iconv (the function) returns + E2BIG instead of EILSEQ. The iconv program does not see the original + EILSEQ failure, does not recognize the invalid input, and may + incorrectly exit successfully. + + To address this, a new __flags bit is used to indicate a sticky input + error state. All __GCONV_ILLEGAL_INPUT results are replaced with a + function call that sets this new __GCONV_ENCOUNTERED_ILLEGAL_INPUT and + returns __GCONV_ILLEGAL_INPUT. The iconv program checks for + __GCONV_ENCOUNTERED_ILLEGAL_INPUT and overrides the exit status. + + The converter changes introducing __gconv_mark_illegal_input are + mostly mechanical, except for the res variable initialization in + iconvdata/iso-2022-jp.c: this error gets overwritten with __GCONV_OK + and other results in the following code. If res == + __GCONV_ILLEGAL_INPUT afterwards, STANDARD_TO_LOOP_ERR_HANDLER below + will handle it. + + The __gconv_mark_illegal_input changes do not alter the errno value + set by the iconv function. This is simpler to implement than + reviewing each __GCONV_FULL_OUTPUT result and adjust it not to + override a previous __GCONV_ILLEGAL_INPUT result. Doing it that way + would also change some E2BIG errors in to EILSEQ errors, so it had to + be done conditionally (under a flag set by the iconv program only), to + avoid confusing buffer management in other applications. + + Reviewed-by: DJ Delorie + +diff -Nrup a/iconv/Makefile b/iconv/Makefile +--- a/iconv/Makefile 2024-11-20 10:56:00.793668915 -0500 ++++ b/iconv/Makefile 2024-11-20 11:21:08.986113631 -0500 +@@ -57,6 +57,10 @@ tests = \ + tst-iconv-opt \ + # tests + ++tests-internal = \ ++ tst-iconv-sticky-input-error \ ++ # tests-internal ++ + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv + install-sbin = iconvconfig +diff -Nrup a/iconv/gconv_int.h b/iconv/gconv_int.h +--- a/iconv/gconv_int.h 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconv/gconv_int.h 2024-11-20 11:11:29.577105363 -0500 +@@ -304,4 +304,34 @@ extern wint_t __gconv_btwoc_ascii (struc + + __END_DECLS + ++/* Internal extensions for . */ ++ ++/* Internal flags for __flags in struct __gconv_step_data. Overlaps ++ with flags for __gconv_open. */ ++enum ++ { ++ /* The conversion encountered an illegal input character at one ++ point. */ ++ __GCONV_ENCOUNTERED_ILLEGAL_INPUT = 1U << 30, ++ }; ++ ++/* Mark *STEP_DATA as having seen illegal input, and return ++ __GCONV_ILLEGAL_INPUT. */ ++static inline int ++__gconv_mark_illegal_input (struct __gconv_step_data *step_data) ++{ ++ step_data->__flags |= __GCONV_ENCOUNTERED_ILLEGAL_INPUT; ++ return __GCONV_ILLEGAL_INPUT; ++} ++ ++/* Returns true if any of the conversion steps encountered illegal input. */ ++static _Bool __attribute__ ((unused)) ++__gconv_has_illegal_input (__gconv_t cd) ++{ ++ for (size_t i = 0; i < cd->__nsteps; ++i) ++ if (cd->__data[i].__flags & __GCONV_ENCOUNTERED_ILLEGAL_INPUT) ++ return true; ++ return false; ++} ++ + #endif /* gconv_int.h */ +diff -Nrup a/iconv/gconv_simple.c b/iconv/gconv_simple.c +--- a/iconv/gconv_simple.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconv/gconv_simple.c 2024-11-20 11:18:28.273006314 -0500 +@@ -259,7 +259,7 @@ ucs4_internal_loop (struct __gconv_step + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + + if (flags & __GCONV_IGNORE_ERRORS) + { +@@ -270,7 +270,7 @@ ucs4_internal_loop (struct __gconv_step + + *inptrp = inptr; + *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + + *((uint32_t *) outptr) = inval; +@@ -397,7 +397,7 @@ ucs4_internal_loop_single (struct __gcon + if (!(flags & __GCONV_IGNORE_ERRORS)) + { + *inptrp -= cnt - (state->__count & 7); +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + } + else +@@ -629,7 +629,7 @@ ucs4le_internal_loop (struct __gconv_ste + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + + if (flags & __GCONV_IGNORE_ERRORS) + { +@@ -640,7 +640,7 @@ ucs4le_internal_loop (struct __gconv_ste + + *inptrp = inptr; + *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + + *((uint32_t *) outptr) = inval; +@@ -772,7 +772,7 @@ ucs4le_internal_loop_single (struct __gc + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + else + { +@@ -1218,7 +1218,7 @@ ucs4le_internal_loop_single (struct __gc + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +@@ -1261,7 +1261,7 @@ ucs4le_internal_loop_single (struct __gc + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + inptr += 2; \ +@@ -1310,7 +1310,7 @@ ucs4le_internal_loop_single (struct __gc + character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + inptr += 4; \ +diff -Nrup a/iconv/gconv_trans.c b/iconv/gconv_trans.c +--- a/iconv/gconv_trans.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconv/gconv_trans.c 2024-11-20 11:11:29.578105370 -0500 +@@ -234,6 +234,6 @@ __gconv_transliterate (struct __gconv_st + } + + /* Haven't found a match. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + libc_hidden_def (__gconv_transliterate) +diff -Nrup a/iconv/iconv_prog.c b/iconv/iconv_prog.c +--- a/iconv/iconv_prog.c 2024-11-20 10:56:00.795668928 -0500 ++++ b/iconv/iconv_prog.c 2024-11-20 11:11:29.578105370 -0500 +@@ -292,6 +292,11 @@ conversions from `%s' and to `%s' are no + } + while (++remaining < argc); + ++ /* Ensure that iconv -c still exits with failure if iconv (the ++ function) has failed with E2BIG instead of EILSEQ. */ ++ if (__gconv_has_illegal_input (cd)) ++ status = EXIT_FAILURE; ++ + /* Close the output file now. */ + if (output != NULL && fclose (output)) + error (EXIT_FAILURE, errno, _("error while closing output file")); +diff -Nrup a/iconv/loop.c b/iconv/loop.c +--- a/iconv/loop.c 2024-11-20 10:56:00.242665095 -0500 ++++ b/iconv/loop.c 2024-11-20 11:11:29.578105370 -0500 +@@ -194,8 +194,7 @@ + `continue' must reach certain points. */ + #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +@@ -213,7 +212,7 @@ + points. */ + #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ +diff -Nrup a/iconv/tst-iconv-sticky-input-error.c b/iconv/tst-iconv-sticky-input-error.c +--- a/iconv/tst-iconv-sticky-input-error.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/iconv/tst-iconv-sticky-input-error.c 2024-11-20 11:11:29.578105370 -0500 +@@ -0,0 +1,135 @@ ++/* Test __GCONV_ENCOUNTERED_ILLEGAL_INPUT, as used by iconv -c (bug 32046). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* FROM is the input character set, TO the output character set. If ++ IGNORE is true, the iconv descriptor is set up in the same way as ++ iconv -c would. INPUT is the input string, EXPECTED_OUTPUT the ++ output. OUTPUT_LIMIT is a byte count, specifying how many input ++ bytes are passed to the iconv function on each invocation. */ ++static void ++one_direction (const char *from, const char *to, bool ignore, ++ const char *input, const char *expected_output, ++ size_t output_limit) ++{ ++ if (test_verbose) ++ { ++ char *quoted_input = support_quote_string (input); ++ char *quoted_output = support_quote_string (expected_output); ++ printf ("info: testing from=\"%s\" to=\"%s\" ignore=%d input=\"%s\"" ++ " expected_output=\"%s\" output_limit=%zu\n", ++ from, to, (int) ignore, quoted_input, ++ quoted_output, output_limit); ++ free (quoted_output); ++ free (quoted_input); ++ } ++ ++ __gconv_t cd; ++ if (ignore) ++ { ++ struct gconv_spec conv_spec; ++ TEST_VERIFY_EXIT (__gconv_create_spec (&conv_spec, from, to) ++ == &conv_spec); ++ conv_spec.ignore = true; ++ cd = (iconv_t) -1; ++ TEST_COMPARE (__gconv_open (&conv_spec, &cd, 0), __GCONV_OK); ++ __gconv_destroy_spec (&conv_spec); ++ } ++ else ++ cd = iconv_open (to, from); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ char *input_ptr = (char *) input; ++ size_t input_len = strlen (input); ++ char output_buf[20]; ++ char *output_ptr = output_buf; ++ size_t output_len; ++ do ++ { ++ output_len = array_end (output_buf) - output_ptr; ++ if (output_len > output_limit) ++ /* Limit the buffer size as requested by the caller. */ ++ output_len = output_limit; ++ TEST_VERIFY_EXIT (output_len > 0); ++ if (input_len == 0) ++ /* Trigger final flush. */ ++ input_ptr = NULL; ++ char *old_input_ptr = input_ptr; ++ size_t ret = iconv (cd, &input_ptr, &input_len, ++ &output_ptr, &output_len); ++ if (ret == (size_t) -1) ++ { ++ if (errno != EILSEQ) ++ TEST_COMPARE (errno, E2BIG); ++ } ++ ++ if (input_ptr == old_input_ptr) ++ /* Avoid endless loop if stuck on an invalid input character. */ ++ break; ++ } ++ while (input_ptr != NULL); ++ ++ /* Test the sticky illegal input bit. */ ++ TEST_VERIFY (__gconv_has_illegal_input (cd)); ++ ++ TEST_COMPARE_BLOB (expected_output, strlen (expected_output), ++ output_buf, output_ptr - output_buf); ++ ++ TEST_COMPARE (iconv_close (cd), 0); ++} ++ ++static int ++do_test (void) ++{ ++ static const char charsets[][14] = ++ { ++ "ASCII", ++ "ASCII//IGNORE", ++ "UTF-8", ++ "UTF-8//IGNORE", ++ }; ++ ++ for (size_t from_idx = 0; from_idx < array_length (charsets); ++from_idx) ++ for (size_t to_idx = 0; to_idx < array_length (charsets); ++to_idx) ++ for (int do_ignore = 0; do_ignore < 2; ++do_ignore) ++ for (int limit = 1; limit < 5; ++limit) ++ for (int skip = 0; skip < 3; ++skip) ++ { ++ const char *expected_output; ++ if (do_ignore || strstr (charsets[to_idx], "//IGNORE") != NULL) ++ expected_output = "ABXY" + skip; ++ else ++ expected_output = "AB" + skip; ++ one_direction (charsets[from_idx], charsets[to_idx], do_ignore, ++ "AB\xffXY" + skip, expected_output, limit); ++ } ++ ++ return 0; ++} ++ ++#include +diff -Nrup a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +--- a/iconv/tst-iconv_prog-buffer.sh 2024-11-20 10:56:00.793668915 -0500 ++++ b/iconv/tst-iconv_prog-buffer.sh 2024-11-20 11:11:29.578105370 -0500 +@@ -150,9 +150,8 @@ expect_exit 1 \ + ! test -s "$tmp/err" + expect_files abc def + +-# FIXME: This is not correct, -c should not change the exit status. + cp "$tmp/out-template" "$tmp/out" +-run_iconv -c -o "$tmp/out" \ ++expect_exit 1 run_iconv -c -o "$tmp/out" \ + "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err" + ! test -s "$tmp/err" + expect_files abc xy zt def +diff -Nrup a/iconvdata/cp932.c b/iconvdata/cp932.c +--- a/iconvdata/cp932.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/cp932.c 2024-11-20 11:11:29.579105377 -0500 +@@ -4560,7 +4560,7 @@ static const char from_ucs4_extra[229][2 + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4600,7 +4600,7 @@ static const char from_ucs4_extra[229][2 + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4635,7 +4635,7 @@ static const char from_ucs4_extra[229][2 + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +diff -Nrup a/iconvdata/euc-jp-ms.c b/iconvdata/euc-jp-ms.c +--- a/iconvdata/euc-jp-ms.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/euc-jp-ms.c 2024-11-20 11:11:29.580105384 -0500 +@@ -4660,7 +4660,7 @@ static const unsigned char from_ucs4_ext + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4690,7 +4690,7 @@ static const unsigned char from_ucs4_ext + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4710,7 +4710,7 @@ static const unsigned char from_ucs4_ext + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + } \ +@@ -4821,7 +4821,7 @@ static const unsigned char from_ucs4_ext + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +diff -Nrup a/iconvdata/gbbig5.c b/iconvdata/gbbig5.c +--- a/iconvdata/gbbig5.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/gbbig5.c 2024-11-20 11:11:29.581105391 -0500 +@@ -4832,7 +4832,7 @@ const char __from_big5_to_gb2312 [13973] + { \ + /* We do not have a mapping for this character. \ + If ignore errors, map it to 0xa1bc - big5 box character */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +@@ -4923,7 +4923,7 @@ const char __from_big5_to_gb2312 [13973] + { \ + /* We do not have a mapping for this character. \ + If ignore errors, map it to 0xa1f5 - gb box character */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +diff -Nrup a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +--- a/iconvdata/ibm1364.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/ibm1364.c 2024-11-20 11:11:29.581105391 -0500 +@@ -180,7 +180,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -220,7 +220,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -301,7 +301,7 @@ enum + \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -333,7 +333,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +diff -Nrup a/iconvdata/iso646.c b/iconvdata/iso646.c +--- a/iconvdata/iso646.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/iso646.c 2024-11-20 11:11:29.581105391 -0500 +@@ -314,7 +314,7 @@ gconv_end (struct __gconv_step *data) + ch = 0xf9; \ + else if (var == JP_OCR_B) \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == YU) \ + ch = 0x17e; \ + else if (var == HU) \ +@@ -388,7 +388,7 @@ gconv_end (struct __gconv_step *data) + ch = 0xec; \ + else if (var == JP_OCR_B) \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == YU) \ + ch = 0x10d; \ + else if (var == HU) \ +@@ -404,7 +404,7 @@ gconv_end (struct __gconv_step *data) + break; \ + case 0x80 ... 0xff: \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -441,17 +441,17 @@ gconv_end (struct __gconv_step *data) + case 0x23: \ + if (var == GB || var == ES || var == IT || var == FR || var == FR1 \ + || var == NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x24: \ + if (var == CN || var == HU || var == CU || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x40: \ + if (var == CA || var == CA2 || var == DE || var == ES || var == ES2 \ + || var == IT || var == YU || var == HU || var == FR || var == FR1 \ + || var == PT || var == PT2 || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5b: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -459,7 +459,7 @@ gconv_end (struct __gconv_step *data) + || var == HU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == CU) \ + ch = 0x7d; \ + break; \ +@@ -469,7 +469,7 @@ gconv_end (struct __gconv_step *data) + || var == YU || var == KR || var == HU || var == CU || var == FR \ + || var == FR1 || var == NO || var == NO2 || var == PT \ + || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5d: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -477,17 +477,17 @@ gconv_end (struct __gconv_step *data) + || var == HU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5e: \ + if (var == CA || var == CA2 || var == ES2 || var == YU || var == CU \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x60: \ + if (var == CA || var == CA2 || var == IT || var == JP_OCR_B \ + || var == YU || var == HU || var == FR || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7b: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -495,14 +495,14 @@ gconv_end (struct __gconv_step *data) + || var == CU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7c: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ + || var == ES2 || var == IT || var == YU || var == HU || var == CU \ + || var == FR || var == FR1 || var == NO || var == PT \ + || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == NO2) \ + ch = 0x7e; \ + break; \ +@@ -511,7 +511,7 @@ gconv_end (struct __gconv_step *data) + || var == ES2 || var == IT || var == YU || var == HU || var == CU \ + || var == FR || var == FR1 || var == NO || var == NO2 \ + || var == PT || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7e: \ + if (var == GB || var == CA || var == CA2 || var == DE || var == ES2 \ +@@ -519,21 +519,21 @@ gconv_end (struct __gconv_step *data) + || var == YU || var == HU || var == CU || var == FR || var == FR1 \ + || var == NO || var == NO2 || var == PT || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xa3: \ + if (var != GB && var != ES && var != IT && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x23; \ + break; \ + case 0xa4: \ + if (var != HU && var != CU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x24; \ + break; \ + case 0xa5: \ +@@ -542,7 +542,7 @@ gconv_end (struct __gconv_step *data) + else if (var == JP || var == JP_OCR_B) \ + ch = 0x5c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa7: \ + if (var == DE || var == ES || var == IT || var == PT) \ +@@ -552,11 +552,11 @@ gconv_end (struct __gconv_step *data) + else if (var == NO2) \ + ch = 0x23; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa8: \ + if (var != ES2 && var != CU && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xb0: \ +@@ -567,7 +567,7 @@ gconv_end (struct __gconv_step *data) + else if (var == PT) \ + ch = 0x7e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xb4: \ + if (var == ES2 || var == CU) \ +@@ -575,11 +575,11 @@ gconv_end (struct __gconv_step *data) + else if (var == PT2) \ + ch = 0x40; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xb5: \ + if (var != FR) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xbf: \ +@@ -588,31 +588,31 @@ gconv_end (struct __gconv_step *data) + else if (var == ES2 || var == CU) \ + ch = 0x5e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xc1: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0xc3: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc4: \ + if (var != DE && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc5: \ + if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xc6: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc7: \ +@@ -621,7 +621,7 @@ gconv_end (struct __gconv_step *data) + else if (var == PT || var == PT2) \ + ch = 0x5c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xc9: \ + if (var == CA2) \ +@@ -631,26 +631,26 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x40; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xd1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xd5: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xd6: \ + if (var != DE && var != HU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xd8: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xdc: \ +@@ -659,11 +659,11 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x5e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xdf: \ + if (var != DE) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xe0: \ +@@ -672,36 +672,36 @@ gconv_end (struct __gconv_step *data) + else if (var == IT) \ + ch = 0x7b; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xe1: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xe2: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xe3: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe4: \ + if (var != DE && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe5: \ + if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xe6: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe7: \ +@@ -712,11 +712,11 @@ gconv_end (struct __gconv_step *data) + else if (var == PT || var == PT2) \ + ch = 0x7c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xe8: \ + if (var != CA && var != CA2 && var != IT && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xe9: \ +@@ -727,51 +727,51 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x60; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xea: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xec: \ + if (var != IT) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xee: \ + if (var != CA) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5e; \ + break; \ + case 0xf1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf2: \ + if (var != IT) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf4: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xf5: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xf6: \ + if (var != DE && var != HU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf8: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf9: \ +@@ -780,11 +780,11 @@ gconv_end (struct __gconv_step *data) + else if (var == IT) \ + ch = 0x60; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xfb: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xfc: \ +@@ -793,93 +793,93 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x7e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x160: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0x106: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0x107: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0x10c: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5e; \ + break; \ + case 0x10d: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x110: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0x111: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0x161: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0x17d: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0x17e: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0x2dd: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x2022: \ + if (var != ES2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0x203e: \ + if (var != GB && var != CN && var != JP && var != NO && var != SE) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x20a9: \ + if (var != KR) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0x2329: \ + if (var != JP_OCR_B) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0x232a: \ + if (var != JP_OCR_B) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + default: \ + if (__glibc_unlikely (ch > 0x7f)) \ + { \ + UNICODE_TAG_HANDLER (ch, 4); \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + } \ + break; \ + } \ +diff -Nrup a/iconvdata/unicode.c b/iconvdata/unicode.c +--- a/iconvdata/unicode.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/unicode.c 2024-11-20 11:11:29.582105398 -0500 +@@ -164,7 +164,7 @@ gconv_end (struct __gconv_step *data) + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +diff -Nrup a/iconvdata/utf-16.c b/iconvdata/utf-16.c +--- a/iconvdata/utf-16.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/utf-16.c 2024-11-20 11:11:29.582105398 -0500 +@@ -207,7 +207,7 @@ gconv_end (struct __gconv_step *data) + We must catch this. If we let surrogates pass through, \ + attackers could make a security hole exploit by \ + synthesizing any desired plane 1-16 character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +diff -Nrup a/iconvdata/utf-32.c b/iconvdata/utf-32.c +--- a/iconvdata/utf-32.c 2021-08-01 21:33:43.000000000 -0400 ++++ b/iconvdata/utf-32.c 2024-11-20 11:11:29.582105398 -0500 +@@ -207,7 +207,7 @@ gconv_end (struct __gconv_step *data) + We must catch this. If we let surrogates pass through, \ + attackers could make a security hole exploit by \ + generating "irregular UTF-32" sequences. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ diff --git a/glibc-RHEL-1915-6.patch b/glibc-RHEL-1915-6.patch new file mode 100644 index 0000000..d70b98c --- /dev/null +++ b/glibc-RHEL-1915-6.patch @@ -0,0 +1,715 @@ +commit 8ef3cff9d1ceafe369f982d980678d749fb93bd2 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Support in-place conversions (bug 10460, bug 32033) + + Check if any of the input files overlaps with the output file, and use + a temporary file in this case, so that the input is no clobbered + before it is read. This fixes bug 10460. It allows to use iconv + more easily as a functional replacement for GNU recode. + + The updated output buffer management truncates the output file + if there is no input, fixing bug 32033. + + Reviewed-by: DJ Delorie + + Conflicts: + NEWS - Dropped + iconv/Makefile - tests-special list diffs + iconv/iconv_prog.c - Manual merge + +diff -Nrup a/iconv/Makefile b/iconv/Makefile +--- a/iconv/Makefile 2024-11-21 11:08:26.166051531 -0500 ++++ b/iconv/Makefile 2024-11-21 11:08:51.325219751 -0500 +@@ -77,6 +77,8 @@ include $(patsubst %,$(..)libof-iterator + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out + tests-special += \ ++ $(objpfx)tst-iconv_prog-buffer-large.out \ ++ $(objpfx)tst-iconv_prog-buffer-tiny.out \ + $(objpfx)tst-iconv_prog-buffer.out \ + $(objpfx)tst-iconv_prog.out \ + # tests-special +@@ -137,3 +139,12 @@ $(objpfx)tst-iconv_prog-buffer.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ + $(evaluate-test) ++$(objpfx)tst-iconv_prog-buffer-tiny.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' \ ++ '--buffer-size=1' > $@; \ ++ $(evaluate-test) ++$(objpfx)tst-iconv_prog-buffer-large.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \ ++ $(evaluate-test) +diff -Nrup a/iconv/iconv_prog.c b/iconv/iconv_prog.c +--- a/iconv/iconv_prog.c 2024-11-21 11:08:26.167051537 -0500 ++++ b/iconv/iconv_prog.c 2024-11-21 11:10:10.028745981 -0500 +@@ -48,7 +48,11 @@ + static void print_version (FILE *stream, struct argp_state *state); + void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +-#define OPT_VERBOSE 1000 ++enum ++ { ++ OPT_VERBOSE = 1000, ++ OPT_BUFFER_SIZE, ++ }; + #define OPT_LIST 'l' + + /* Definitions of arguments for argp functions. */ +@@ -64,6 +68,10 @@ static const struct argp_option options[ + { "output", 'o', N_("FILE"), 0, N_("output file") }, + { "silent", 's', NULL, 0, N_("suppress warnings") }, + { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, ++ /* This is an internal option intended for testing only. Very small ++ buffers do not work with all character sets. */ ++ { "buffer-size", OPT_BUFFER_SIZE, N_("BYTE-COUNT"), OPTION_HIDDEN, ++ N_("size of in-memory scratch buffer") }, + { NULL, 0, NULL, 0, NULL } + }; + +@@ -101,13 +109,20 @@ static int list; + /* If nonzero omit invalid character from output. */ + int omit_invalid; + ++/* Current index in argv (after command line processing) with the ++ input file name. */ ++static int current_input_file_index; ++ ++/* Size of the temporary, in-memory buffer. Exceeding it needs ++ spooling to disk in a temporary file. Controlled by --buffer_size. */ ++static size_t output_buffer_size = 1024 * 1024; ++ + /* Prototypes for the functions doing the actual work. */ +-static int process_block (iconv_t cd, char *addr, size_t len, FILE **output, +- const char *output_file); +-static int process_fd (iconv_t cd, int fd, FILE **output, +- const char *output_file); +-static int process_file (iconv_t cd, FILE *input, FILE **output, +- const char *output_file); ++static void prepare_output_file (char **argv); ++static void close_output_file (int status); ++static int process_block (iconv_t cd, char *addr, size_t len); ++static int process_fd (iconv_t cd, int fd); ++static int process_file (iconv_t cd, FILE *input); + static void print_known_names (void); + + +@@ -115,7 +130,6 @@ int + main (int argc, char *argv[]) + { + int status = EXIT_SUCCESS; +- int remaining; + __gconv_t cd; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; +@@ -127,7 +141,7 @@ main (int argc, char *argv[]) + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ +- argp_parse (&argp, argc, argv, 0, &remaining, NULL); ++ argp_parse (&argp, argc, argv, 0, ¤t_input_file_index, NULL); + + /* List all coded character sets if wanted. */ + if (list) +@@ -162,7 +176,8 @@ main (int argc, char *argv[]) + if (from_charmap != NULL || to_charmap != NULL) + /* Construct the conversion table and do the conversion. */ + status = charmap_conversion (from_code, from_charmap, to_code, to_charmap, +- argc, remaining, argv, output_file); ++ argc, current_input_file_index, argv, ++ output_file); + else + { + struct gconv_spec conv_spec; +@@ -236,16 +251,14 @@ conversions from `%s' and to `%s' are no + _("failed to start conversion processing")); + } + +- /* The output file. Will be opened when we are ready to produce +- output. */ +- FILE *output = NULL; ++ prepare_output_file (argv); + + /* Now process the remaining files. Write them to stdout or the file + specified with the `-o' parameter. If we have no file given as + the parameter process all from stdin. */ +- if (remaining == argc) ++ if (current_input_file_index == argc) + { +- if (process_file (cd, stdin, &output, output_file) != 0) ++ if (process_file (cd, stdin) != 0) + status = EXIT_FAILURE; + } + else +@@ -254,17 +267,17 @@ conversions from `%s' and to `%s' are no + int fd, ret; + + if (verbose) +- fprintf (stderr, "%s:\n", argv[remaining]); +- if (strcmp (argv[remaining], "-") == 0) +- fd = 0; ++ fprintf (stderr, "%s:\n", argv[current_input_file_index]); ++ if (strcmp (argv[current_input_file_index], "-") == 0) ++ fd = STDIN_FILENO; + else + { +- fd = open (argv[remaining], O_RDONLY); ++ fd = open (argv[current_input_file_index], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), +- argv[remaining]); ++ argv[current_input_file_index]); + status = EXIT_FAILURE; + continue; + } +@@ -272,7 +285,7 @@ conversions from `%s' and to `%s' are no + + { + /* Read the file in pieces. */ +- ret = process_fd (cd, fd, &output, output_file); ++ ret = process_fd (cd, fd); + + /* Now close the file. */ + close (fd); +@@ -290,7 +303,7 @@ conversions from `%s' and to `%s' are no + } + } + } +- while (++remaining < argc); ++ while (++current_input_file_index < argc); + + /* Ensure that iconv -c still exits with failure if iconv (the + function) has failed with E2BIG instead of EILSEQ. */ +@@ -298,8 +311,7 @@ conversions from `%s' and to `%s' are no + status = EXIT_FAILURE; + + /* Close the output file now. */ +- if (output != NULL && fclose (output)) +- error (EXIT_FAILURE, errno, _("error while closing output file")); ++ close_output_file (status); + } + + return status; +@@ -329,6 +341,14 @@ parse_opt (int key, char *arg, struct ar + /* Omit invalid characters from output. */ + omit_invalid = 1; + break; ++ case OPT_BUFFER_SIZE: ++ { ++ int i = atoi (arg); ++ if (i <= 0) ++ error (EXIT_FAILURE, 0, _("invalid buffer size: %s"), arg); ++ output_buffer_size = i; ++ } ++ break; + case OPT_VERBOSE: + verbose = 1; + break; +@@ -375,59 +395,247 @@ warranty; not even for MERCHANTABILITY o + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); + } + ++/* Command line index of the last input file that overlaps with the ++ output file. Zero means no temporary file is ever required. */ ++static int last_overlapping_file_index; + +-static int +-write_output (const char *outbuf, const char *outptr, FILE **output, +- const char *output_file) ++/* This is set to true if the output is written to a temporary file. */ ++static bool output_using_temporary_file; ++ ++/* This is the file descriptor that will be used by write_output. */ ++static int output_fd = -1; ++ ++/* Pointers at the start and end of the fixed-size output buffer. */ ++static char *output_buffer_start; ++ ++/* Current write position in the output buffer. */ ++static char *output_buffer_current; ++ ++/* Remaining bytes after output_buffer_current in the output buffer. */ ++static size_t output_buffer_remaining; ++ ++ ++/* Reduce the buffer size when writing directly to the output file, to ++ reduce cache utilization. */ ++static size_t copy_buffer_size = BUFSIZ; ++ ++static void ++output_error (void) ++{ ++ error (EXIT_FAILURE, errno, _("cannot open output file")); ++} ++ ++static void ++input_error (const char *path) + { +- /* We have something to write out. */ +- int errno_save = errno; ++ error (0, errno, _("cannot open input file `%s'"), path); ++} + +- if (*output == NULL) ++/* Opens output_file for writing, truncating it. */ ++static void ++open_output_direct (void) ++{ ++ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_TRUNC, 0777); ++ if (output_fd < 0) ++ output_error (); ++} ++ ++static void ++prepare_output_file (char **argv) ++{ ++ if (copy_buffer_size > output_buffer_size) ++ copy_buffer_size = output_buffer_size; ++ ++ if (output_file == NULL || strcmp (output_file, "-") == 0) ++ { ++ /* No buffering is required when writing to standard output ++ because input overlap is expected to be solved externally. */ ++ output_fd = STDOUT_FILENO; ++ output_buffer_size = copy_buffer_size; ++ } ++ else + { +- /* Determine output file. */ +- if (output_file != NULL && strcmp (output_file, "-") != 0) ++ /* If iconv creates the output file, no overlap is possible. */ ++ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_EXCL, 0777); ++ if (output_fd >= 0) ++ output_buffer_size = copy_buffer_size; ++ else + { +- *output = fopen (output_file, "w"); +- if (*output == NULL) +- error (EXIT_FAILURE, errno, _("cannot open output file")); ++ /* Otherwise, check if any of the input files overlap with the ++ output file. */ ++ struct statx st; ++ if (statx (AT_FDCWD, output_file, 0, STATX_INO | STATX_MODE, &st) ++ != 0) ++ output_error (); ++ uint32_t out_dev_minor = st.stx_dev_minor; ++ uint32_t out_dev_major = st.stx_dev_major; ++ uint64_t out_ino = st.stx_ino; ++ ++ int idx = current_input_file_index; ++ while (true) ++ { ++ /* Special case: no input files means standard input. */ ++ if (argv[idx] == NULL && idx != current_input_file_index) ++ break; ++ ++ int ret; ++ if (argv[idx] == NULL || strcmp (argv[idx], "-") == 0) ++ ret = statx (STDIN_FILENO, "", AT_EMPTY_PATH, STATX_INO, &st); ++ else ++ ret = statx (AT_FDCWD, argv[idx], 0, STATX_INO, &st); ++ if (ret != 0) ++ { ++ input_error (argv[idx]); ++ exit (EXIT_FAILURE); ++ } ++ if (out_dev_minor == st.stx_dev_minor ++ && out_dev_major == st.stx_dev_major ++ && out_ino == st.stx_ino) ++ { ++ if (argv[idx] == NULL) ++ /* Corner case: index of NULL would be larger than ++ idx while converting, triggering a switch away ++ from the temporary file. */ ++ last_overlapping_file_index = INT_MAX; ++ else ++ last_overlapping_file_index = idx; ++ } ++ ++ if (argv[idx] == NULL) ++ break; ++ ++idx; ++ } ++ ++ /* If there is no overlap, avoid using a temporary file. */ ++ if (last_overlapping_file_index == 0) ++ { ++ open_output_direct (); ++ output_buffer_size = copy_buffer_size; ++ } + } +- else +- *output = stdout; + } + +- if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf) +- || ferror (*output)) ++ output_buffer_start = malloc (output_buffer_size); ++ if (output_buffer_start == NULL) ++ output_error (); ++ output_buffer_current = output_buffer_start; ++ output_buffer_remaining = output_buffer_size; ++} ++ ++/* Write out the range [first, last), terminating the process on write ++ error. */ ++static void ++write_fully (int fd, const char *first, const char *last) ++{ ++ while (first < last) + { +- /* Error occurred while printing the result. */ +- error (0, 0, _("\ ++ ssize_t ret = write (fd, first, last - first); ++ if (ret == 0) ++ { ++ errno = ENOSPC; ++ output_error (); ++ } ++ if (ret < 0) ++ error (EXIT_FAILURE, errno, _("\ + conversion stopped due to problem in writing the output")); +- return -1; ++ first += ret; + } ++} + +- errno = errno_save; ++static void ++flush_output (void) ++{ ++ bool temporary_file_not_needed ++ = current_input_file_index > last_overlapping_file_index; ++ if (output_fd < 0) ++ { ++ if (temporary_file_not_needed) ++ open_output_direct (); ++ else ++ { ++ /* Create an anonymous temporary file. */ ++ FILE *fp = tmpfile (); ++ if (fp == NULL) ++ output_error (); ++ output_fd = dup (fileno (fp)); ++ if (output_fd < 0) ++ output_error (); ++ fclose (fp); ++ output_using_temporary_file = true; ++ } ++ /* Either way, no longer use a memory-only staging buffer. */ ++ output_buffer_size = copy_buffer_size; ++ } ++ else if (output_using_temporary_file && temporary_file_not_needed) ++ { ++ /* The temporary file is no longer needed. Switch to direct ++ output, replacing output_fd. */ ++ int temp_fd = output_fd; ++ open_output_direct (); ++ ++ /* Copy over the data spooled to the temporary file. */ ++ if (lseek (temp_fd, 0, SEEK_SET) < 0) ++ output_error (); ++ while (true) ++ { ++ char buf[BUFSIZ]; ++ ssize_t ret = read (temp_fd, buf, sizeof (buf)); ++ if (ret < 0) ++ output_error (); ++ if (ret == 0) ++ break; ++ write_fully (output_fd, buf, buf + ret); ++ } ++ close (temp_fd); + +- return 0; ++ /* No longer using a temporary file from now on. */ ++ output_using_temporary_file = false; ++ output_buffer_size = copy_buffer_size; ++ } ++ ++ write_fully (output_fd, output_buffer_start, output_buffer_current); ++ output_buffer_current = output_buffer_start; ++ output_buffer_remaining = output_buffer_size; + } + ++static void ++close_output_file (int status) ++{ ++ /* Do not perform a flush if a temporary file or the in-memory ++ buffer is in use and there was an error. It would clobber the ++ overlapping input file. */ ++ if (status != EXIT_SUCCESS && !omit_invalid && ++ (output_using_temporary_file || output_fd < 0)) ++ return; ++ ++ /* The current_input_file_index variable is now larger than ++ last_overlapping_file_index, so the flush_output call switches ++ away from the temporary file. */ ++ flush_output (); ++ ++ if (output_fd == STDOUT_FILENO) ++ { ++ /* Close standard output in safe manner, to report certain ++ ENOSPC errors. */ ++ output_fd = dup (output_fd); ++ if (output_fd < 0) ++ output_error (); ++ } ++ if (close (output_fd) < 0) ++ output_error (); ++} + + static int +-process_block (iconv_t cd, char *addr, size_t len, FILE **output, +- const char *output_file) ++process_block (iconv_t cd, char *addr, size_t len) + { +-#define OUTBUF_SIZE 32768 + const char *start = addr; +- char outbuf[OUTBUF_SIZE]; +- char *outptr; +- size_t outlen; + size_t n; + int ret = 0; + + while (len > 0) + { +- outptr = outbuf; +- outlen = OUTBUF_SIZE; +- n = iconv (cd, &addr, &len, &outptr, &outlen); ++ n = iconv (cd, &addr, &len, ++ &output_buffer_current, &output_buffer_remaining); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { +@@ -438,39 +646,34 @@ process_block (iconv_t cd, char *addr, s + errno = E2BIG; + } + +- if (outptr != outbuf) +- { +- ret = write_output (outbuf, outptr, output, output_file); +- if (ret != 0) +- break; +- } +- + if (n != (size_t) -1) + { + /* All the input test is processed. For state-dependent + character sets we have to flush the state now. */ +- outptr = outbuf; +- outlen = OUTBUF_SIZE; +- n = iconv (cd, NULL, NULL, &outptr, &outlen); +- +- if (outptr != outbuf) ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ if (n == (size_t) -1 && errno == E2BIG) + { +- ret = write_output (outbuf, outptr, output, output_file); +- if (ret != 0) +- break; ++ /* Try again if the state flush exceeded the buffer space. */ ++ flush_output (); ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); + } ++ bool errno_is_EILSEQ = errno == EILSEQ; + + if (n != (size_t) -1) + break; + +- if (omit_invalid && errno == EILSEQ) ++ if (omit_invalid && errno_is_EILSEQ) + { + ret = 1; + break; + } + } + +- if (errno != E2BIG) ++ if (errno == E2BIG) ++ flush_output (); ++ else + { + /* iconv() ran into a problem. */ + switch (errno) +@@ -501,7 +704,7 @@ incomplete character or shift sequence a + + + static int +-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) ++process_fd (iconv_t cd, int fd) + { + /* we have a problem with reading from a descriptor since we must not + provide the iconv() function an incomplete character or shift +@@ -575,16 +778,16 @@ process_fd (iconv_t cd, int fd, FILE **o + } + + /* Now we have all the input in the buffer. Process it in one run. */ +- return process_block (cd, inbuf, actlen, output, output_file); ++ return process_block (cd, inbuf, actlen); + } + + + static int +-process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file) ++process_file (iconv_t cd, FILE *input) + { + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ +- return process_fd (cd, fileno (input), output, output_file); ++ return process_fd (cd, fileno (input)); + } + + +diff -Nrup a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +--- a/iconv/tst-iconv_prog-buffer.sh 2024-11-21 11:08:26.168051544 -0500 ++++ b/iconv/tst-iconv_prog-buffer.sh 2024-11-21 11:08:51.326219757 -0500 +@@ -17,6 +17,12 @@ + # License along with the GNU C Library; if not, see + # . + ++# Arguments: ++# root of the build tree ($(objpfx-common)) ++# test command wrapper (for running on the board/with new ld.so) ++# extra flags to pass to iconv ++# number of times to double the input files in size (default: 0) ++ + exec 2>&1 + set -e + +@@ -26,7 +32,9 @@ codir=$1 + test_program_prefix="$2" + + # Use internal converters to avoid issues with module loading. +-iconv_args="-f ASCII -t UTF-8" ++iconv_args="-f ASCII -t UTF-8 $3" ++ ++file_size_doublings=${4-0} + + failure=false + +@@ -39,7 +47,19 @@ echo HH > "$tmp/hh" + echo XY > "$tmp/xy" + echo ZT > "$tmp/zt" + echo OUT > "$tmp/out-template" ++: > "$tmp/empty" + printf '\xff' > "$tmp/0xff" ++ ++# Double all files to produce larger buffers. ++for p in "$tmp"/* ; do ++ i=0 ++ while test $i -lt $file_size_doublings; do ++ cat "$p" "$p" > "$tmp/scratch" ++ mv "$tmp/scratch" "$p" ++ i=$(($i + 1)) ++ done ++done ++ + cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped" + + run_iconv () { +@@ -113,6 +133,38 @@ expect_files abc def + run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc" + expect_files abc def abc + ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" ++expect_files ggg abc def abc ++ ++run_iconv -o "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/hh" ++expect_files hh ggg abc def abc hh ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/out" "$tmp/ggg" ++expect_files ggg out-template out-template ggg ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/ggg" ++expect_files ggg out-template hh out-template ggg ++ ++# Empty output should truncate the output file if exists. ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/err" +@@ -156,6 +236,20 @@ expect_exit 1 run_iconv -c -o "$tmp/out" + ! test -s "$tmp/err" + expect_files abc xy zt def + ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def" ++expect_files xy zt abc xy zt def ++ ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" \ ++ "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def" ++expect_files 0xff-wrapped ++ ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -c -o "$tmp/out" \ ++ "$tmp/abc" "$tmp/out" "$tmp/def" "$tmp/out" ++expect_files abc xy zt def xy zt ++ + # If the file does not exist yet, it should not be created on error. + + rm "$tmp/out" diff --git a/glibc-RHEL-1915-7.patch b/glibc-RHEL-1915-7.patch new file mode 100644 index 0000000..d3b62f0 --- /dev/null +++ b/glibc-RHEL-1915-7.patch @@ -0,0 +1,41 @@ +commit 75819cdd29a193cc2db980878bec305905b22bbc +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Multiple - on command line should not fail (bug 32050) + + Usually, the second and subsequent - return EOF immediately + and do not contribute to the output, but this is not an error. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 3e02db7319..dd4bc3a59a 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -287,7 +287,8 @@ conversions from `%s' and to `%s' are not supported"), + ret = process_fd (cd, fd); + + /* Now close the file. */ +- close (fd); ++ if (fd != STDIN_FILENO) ++ close (fd); + + if (ret != 0) + { +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index 54ff871d32..a9c3729d94 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -265,6 +265,11 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" + expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" + ! test -e "$tmp/out" + ++# Listing standard input multiple times should not fail (bug 32050). ++ ++run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc" ++expect_files xy abc zt ++ + if $failure ; then + exit 1 + fi diff --git a/glibc-RHEL-1915-8.patch b/glibc-RHEL-1915-8.patch new file mode 100644 index 0000000..823cc16 --- /dev/null +++ b/glibc-RHEL-1915-8.patch @@ -0,0 +1,323 @@ +commit fa1b0d5e9f6e0353e16339430770a7a8824c0468 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Input buffering for the iconv program (bug 6050) + + Do not read the entire input file into memory. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index dd4bc3a59a..a2f1d34e45 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -118,8 +118,9 @@ static size_t output_buffer_size = 1024 * 1024; + + /* Prototypes for the functions doing the actual work. */ + static void prepare_output_file (char **argv); +-static void close_output_file (int status); +-static int process_block (iconv_t cd, char *addr, size_t len); ++static void close_output_file (__gconv_t cd, int status); ++static int process_block (iconv_t cd, char **addr, size_t *len, ++ off64_t file_offset, bool *incomplete); + static int process_fd (iconv_t cd, int fd); + static int process_file (iconv_t cd, FILE *input); + static void print_known_names (void); +@@ -311,7 +312,7 @@ conversions from `%s' and to `%s' are not supported"), + status = EXIT_FAILURE; + + /* Close the output file now. */ +- close_output_file (status); ++ close_output_file (cd, status); + } + + return status; +@@ -599,7 +600,7 @@ flush_output (void) + } + + static void +-close_output_file (int status) ++close_output_file (__gconv_t cd, int status) + { + /* Do not perform a flush if a temporary file or the in-memory + buffer is in use and there was an error. It would clobber the +@@ -608,10 +609,28 @@ close_output_file (int status) + (output_using_temporary_file || output_fd < 0)) + return; + +- /* The current_input_file_index variable is now larger than +- last_overlapping_file_index, so the flush_output call switches ++ /* All the input text is processed. For state-dependent character ++ sets we have to flush the state now. ++ ++ The current_input_file_index variable is now larger than ++ last_overlapping_file_index, so the flush_output calls switch + away from the temporary file. */ ++ size_t n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ if (n == (size_t) -1 && errno == E2BIG) ++ { ++ /* Try again if the state flush exceeded the buffer space. */ ++ flush_output (); ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ } ++ int saved_errno = errno; + flush_output (); ++ if (n == (size_t) -1 && !omit_invalid) ++ { ++ errno = saved_errno; ++ output_error (); ++ } + + if (output_fd == STDOUT_FILENO) + { +@@ -625,51 +644,35 @@ close_output_file (int status) + output_error (); + } + ++/* CD is the iconv handle. Input processing starts at *ADDR, and ++ consumes upto *LEN bytes. *ADDR and *LEN are updated. FILE_OFFSET ++ is the file offset of the data initially at ADDR. *INCOMPLETE is ++ set to true if conversion stops due to an incomplete input ++ sequence. */ + static int +-process_block (iconv_t cd, char *addr, size_t len) ++process_block (iconv_t cd, char **addr, size_t *len, off64_t file_offset, ++ bool *incomplete) + { +- const char *start = addr; ++ const char *start = *addr; + size_t n; + int ret = 0; + +- while (len > 0) ++ while (*len > 0) + { +- n = iconv (cd, &addr, &len, ++ n = iconv (cd, addr, len, + &output_buffer_current, &output_buffer_remaining); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { + ret = 1; +- if (len == 0) ++ if (*len == 0) + n = 0; + else + errno = E2BIG; + } + + if (n != (size_t) -1) +- { +- /* All the input test is processed. For state-dependent +- character sets we have to flush the state now. */ +- n = iconv (cd, NULL, NULL, +- &output_buffer_current, &output_buffer_remaining); +- if (n == (size_t) -1 && errno == E2BIG) +- { +- /* Try again if the state flush exceeded the buffer space. */ +- flush_output (); +- n = iconv (cd, NULL, NULL, +- &output_buffer_current, &output_buffer_remaining); +- } +- bool errno_is_EILSEQ = errno == EILSEQ; +- +- if (n != (size_t) -1) +- break; +- +- if (omit_invalid && errno_is_EILSEQ) +- { +- ret = 1; +- break; +- } +- } ++ break; + + if (errno == E2BIG) + flush_output (); +@@ -680,13 +683,12 @@ process_block (iconv_t cd, char *addr, size_t len) + { + case EILSEQ: + if (! omit_invalid) +- error (0, 0, _("illegal input sequence at position %ld"), +- (long int) (addr - start)); ++ error (0, 0, _("illegal input sequence at position %lld"), ++ (long long int) (file_offset + (*addr - start))); + break; + case EINVAL: +- error (0, 0, _("\ +-incomplete character or shift sequence at end of buffer")); +- break; ++ *incomplete = true; ++ return ret; + case EBADF: + error (0, 0, _("internal error (illegal descriptor)")); + break; +@@ -706,79 +708,49 @@ incomplete character or shift sequence at end of buffer")); + static int + process_fd (iconv_t cd, int fd) + { +- /* we have a problem with reading from a descriptor since we must not +- provide the iconv() function an incomplete character or shift +- sequence at the end of the buffer. Since we have to deal with +- arbitrary encodings we must read the whole text in a buffer and +- process it in one step. */ +- static char *inbuf = NULL; +- static size_t maxlen = 0; +- char *inptr = inbuf; +- size_t actlen = 0; +- +- while (actlen < maxlen) ++ char inbuf[BUFSIZ]; ++ char *inbuf_end = inbuf + sizeof (inbuf); ++ size_t inbuf_used = 0; ++ off64_t file_offset = 0; ++ int status = 0; ++ bool incomplete = false; ++ ++ while (true) + { +- ssize_t n = read (fd, inptr, maxlen - actlen); +- +- if (n == 0) +- /* No more text to read. */ +- break; +- +- if (n == -1) ++ char *p = inbuf + inbuf_used; ++ ssize_t read_ret = read (fd, p, inbuf_end - p); ++ if (read_ret == 0) ++ { ++ /* On EOF, check if the previous iconv invocation saw an ++ incomplete sequence. */ ++ if (incomplete) ++ { ++ error (0, 0, _("\ ++incomplete character or shift sequence at end of buffer")); ++ return 1; ++ } ++ return 0; ++ } ++ if (read_ret < 0) + { +- /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } +- +- inptr += n; +- actlen += n; ++ inbuf_used += read_ret; ++ incomplete = false; ++ p = inbuf; ++ int ret = process_block (cd, &p, &inbuf_used, file_offset, &incomplete); ++ if (ret != 0) ++ { ++ status = ret; ++ if (ret < 0) ++ break; ++ } ++ /* The next loop iteration consumes the leftover bytes. */ ++ memmove (inbuf, p, inbuf_used); ++ file_offset += read_ret - inbuf_used; + } +- +- if (actlen == maxlen) +- while (1) +- { +- ssize_t n; +- char *new_inbuf; +- +- /* Increase the buffer. */ +- new_inbuf = (char *) realloc (inbuf, maxlen + 32768); +- if (new_inbuf == NULL) +- { +- error (0, errno, _("unable to allocate buffer for input")); +- return -1; +- } +- inbuf = new_inbuf; +- maxlen += 32768; +- inptr = inbuf + actlen; +- +- do +- { +- n = read (fd, inptr, maxlen - actlen); +- +- if (n == 0) +- /* No more text to read. */ +- break; +- +- if (n == -1) +- { +- /* Error while reading. */ +- error (0, errno, _("error while reading the input")); +- return -1; +- } +- +- inptr += n; +- actlen += n; +- } +- while (actlen < maxlen); +- +- if (n == 0) +- /* Break again so we leave both loops. */ +- break; +- } +- +- /* Now we have all the input in the buffer. Process it in one run. */ +- return process_block (cd, inbuf, actlen); ++ return status; + } + + +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index a9c3729d94..23098ac56a 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -50,6 +50,9 @@ echo OUT > "$tmp/out-template" + : > "$tmp/empty" + printf '\xff' > "$tmp/0xff" + ++# Length should be a prime number, to help with buffer alignment testing. ++printf '\xc3\xa4\xe2\x80\x94\xe2\x80\x94\xc3\xa4\n' > "$tmp/utf8-sequence" ++ + # Double all files to produce larger buffers. + for p in "$tmp"/* ; do + i=0 +@@ -270,6 +273,34 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" + run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc" + expect_files xy abc zt + ++# NB: Extra iconv args are ignored after this point. Actual ++# multi-byte conversion does not work with tiny buffers. ++iconv_args="-f UTF-8 -t ASCII" ++ ++printf 'x\n\xc3' > "$tmp/incomplete" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/incomplete" ++check_out <&$logfd ++ printf "%s" "$prefix" > "$tmp/prefix" ++ cat "$tmp/prefix" "$tmp/utf8-sequence" > "$tmp/tmp" ++ iconv_args="-f UTF-8 -t UCS-4" ++ run_iconv -o "$tmp/out1" "$tmp/tmp" ++ iconv_args="-f UCS-4 -t UTF-8" ++ run_iconv -o "$tmp/out" "$tmp/out1" ++ expect_files prefix utf8-sequence ++ ++ prefix="$prefix@" ++ prefix_length=$(($prefix_length + 1)) ++done ++ + if $failure ; then + exit 1 + fi diff --git a/glibc-RHEL-1915-9.patch b/glibc-RHEL-1915-9.patch new file mode 100644 index 0000000..c1828f7 --- /dev/null +++ b/glibc-RHEL-1915-9.patch @@ -0,0 +1,37 @@ +commit 079ebf7624e7fd0ad7fe94a7176a2e132c996d86 +Author: Florian Weimer +Date: Tue Sep 24 10:41:35 2024 +0200 + + iconv: Use $(run-program-prefix) for running iconv (bug 32197) + + With --enable-hardcoded-path-in-tests, $(test-program-prefix) + does not redirect to the built glibc, but we need to run + iconv (the program) against the built glibc even with + --enable-hardcoded-path-in-tests, as it is using the ABI + path for the dynamic linker (as an installed program). + Use $(run-program-prefix) instead. + + Reviewed-by: H.J. Lu + +diff --git a/iconv/Makefile b/iconv/Makefile +index c9af0c4d44..de9d964ed3 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -153,14 +153,14 @@ $(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ + + $(objpfx)tst-iconv_prog-buffer.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' > $@; \ + $(evaluate-test) + $(objpfx)tst-iconv_prog-buffer-tiny.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' \ + '--buffer-size=1' > $@; \ + $(evaluate-test) + $(objpfx)tst-iconv_prog-buffer-large.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' '' '22' > $@; \ + $(evaluate-test) diff --git a/glibc.spec b/glibc.spec index d04a059..bc881da 100644 --- a/glibc.spec +++ b/glibc.spec @@ -157,7 +157,7 @@ end \ Summary: The GNU libc libraries Name: glibc Version: %{glibcversion} -Release: 142%{?dist} +Release: 143%{?dist} # In general, GPLv2+ is used by programs, LGPLv2+ is used for # libraries. @@ -943,6 +943,15 @@ Patch704: glibc-RHEL-46725-9.patch Patch705: glibc-RHEL-46725-10.patch Patch706: glibc-RHEL-46725-11.patch Patch707: glibc-RHEL-46725-12.patch +Patch708: glibc-RHEL-1915-1.patch +Patch709: glibc-RHEL-1915-2.patch +Patch710: glibc-RHEL-1915-3.patch +Patch711: glibc-RHEL-1915-4.patch +Patch712: glibc-RHEL-1915-5.patch +Patch713: glibc-RHEL-1915-6.patch +Patch714: glibc-RHEL-1915-7.patch +Patch715: glibc-RHEL-1915-8.patch +Patch716: glibc-RHEL-1915-9.patch ############################################################################## # Continued list of core "glibc" package information: @@ -3102,6 +3111,9 @@ update_gconv_modules_cache () %endif %changelog +* Wed Nov 20 2024 Patsy Griffin - 2.34-143 +- iconv: Support in-place conversions (RHEL-1915) + * Mon Nov 18 2024 Florian Weimer - 2.34-142 - Add printf function family tests (RHEL-46725)