From 071446cc9087e895470214bb7d2aa756fb4f740d Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Mon, 23 Dec 2024 14:59:23 +0100 Subject: [PATCH] Support in-place file conversion in the iconv tool (RHEL-71530) --- glibc-RHEL-71530-1.patch | 194 +++++++ glibc-RHEL-71530-10.patch | 108 ++++ glibc-RHEL-71530-2.patch | 218 ++++++++ glibc-RHEL-71530-3.patch | 91 ++++ glibc-RHEL-71530-4.patch | 62 +++ glibc-RHEL-71530-5.patch | 1081 +++++++++++++++++++++++++++++++++++++ glibc-RHEL-71530-6.patch | 713 ++++++++++++++++++++++++ glibc-RHEL-71530-7.patch | 41 ++ glibc-RHEL-71530-8.patch | 323 +++++++++++ glibc-RHEL-71530-9.patch | 37 ++ glibc.spec | 15 +- 11 files changed, 2882 insertions(+), 1 deletion(-) create mode 100644 glibc-RHEL-71530-1.patch create mode 100644 glibc-RHEL-71530-10.patch create mode 100644 glibc-RHEL-71530-2.patch create mode 100644 glibc-RHEL-71530-3.patch create mode 100644 glibc-RHEL-71530-4.patch create mode 100644 glibc-RHEL-71530-5.patch create mode 100644 glibc-RHEL-71530-6.patch create mode 100644 glibc-RHEL-71530-7.patch create mode 100644 glibc-RHEL-71530-8.patch create mode 100644 glibc-RHEL-71530-9.patch diff --git a/glibc-RHEL-71530-1.patch b/glibc-RHEL-71530-1.patch new file mode 100644 index 0000000..c4dc954 --- /dev/null +++ b/glibc-RHEL-71530-1.patch @@ -0,0 +1,194 @@ +commit 1b0a2062c8938c7333cd118d85d9976c4e7c92af +Author: Andreas Schwab +Date: Mon Jun 10 12:19:17 2024 +0200 + + iconv: Fix matching of multi-character transliterations (bug 31859) + + Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of + the input buffer is reached. Otherwise it is a non-match, and other + patterns should be tried. + +diff --git a/iconv/Makefile b/iconv/Makefile +index 63afc853ff65967a..65b4a44ab86cf0dc 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -57,6 +57,10 @@ tests = \ + tst-iconv-opt \ + # tests + ++test-srcs := \ ++ tst-translit-mchar \ ++ # test-srcs ++ + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv + install-sbin = iconvconfig +@@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out + tests-special += $(objpfx)tst-iconv_prog.out ++tests-special += $(objpfx)tst-translit-mchar.out + endif + + # Make a copy of the file because gconv module names are constructed +@@ -92,6 +97,8 @@ $(objpfx)tst-gconv-init-failure.out: \ + $(objpfx)gconv-modules $(objpfx)tst-gconv-init-failure-mod.so + endif + ++generated-dirs += tst-translit ++ + include ../Rules + + ifeq ($(run-built-tests),yes) +@@ -126,3 +133,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ + '$(run-program-env)' > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ ++ $(objpfx)tst-translit-mchar \ ++ tst-translit-locale ++ $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \ ++ '$(run-program-env)' '$(run-program-prefix-after-env)' \ ++ $< > $@; \ ++ $(evaluate-test) +diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c +index 08b7a3f71dad5f1e..44f0fd849a3f82a3 100644 +--- a/iconv/gconv_trans.c ++++ b/iconv/gconv_trans.c +@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step, + + /* Nothing found, continue searching. */ + } +- else if (cnt > 0) ++ else if (cnt > 0 && winbuf + cnt == winbufend) + /* This means that the input buffer contents matches a prefix of + an entry. Since we cannot match it unless we get more input, + we will tell the caller about it. */ +diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale +new file mode 100644 +index 0000000000000000..712b08628a64dc11 +--- /dev/null ++++ b/iconv/tst-translit-locale +@@ -0,0 +1,10 @@ ++# Test multi-character transliteration rule ++ ++LC_CTYPE ++copy "POSIX" ++ ++translit_start ++"ÄÄ" "AA" ++translit_end ++ ++END LC_CTYPE +diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c +new file mode 100644 +index 0000000000000000..7d432ea6679b60fa +--- /dev/null ++++ b/iconv/tst-translit-mchar.c +@@ -0,0 +1,48 @@ ++/* Test multi-character transliterations. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ iconv_t cd; ++ /* An input sequence that shares a common prefix with a transliteration ++ rule. */ ++ char input[] = "ÄÅ"; ++ char *inptr = input; ++ char outbuf[10]; ++ char *outptr = outbuf; ++ size_t inlen = sizeof (input), outlen = sizeof (outbuf); ++ size_t n; ++ ++ xsetlocale (LC_CTYPE, "tst-translit"); ++ ++ cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); ++ TEST_VERIFY (cd != (iconv_t) -1); ++ ++ /* This call used to loop infinitely. */ ++ n = iconv (cd, &inptr, &inlen, &outptr, &outlen); ++ TEST_VERIFY (iconv_close (cd) == 0); ++ return n == 0; ++} ++ ++#include +diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh +new file mode 100644 +index 0000000000000000..ab7a7f872911cf21 +--- /dev/null ++++ b/iconv/tst-translit-mchar.sh +@@ -0,0 +1,51 @@ ++#!/bin/sh ++# Testing of multi-character transliterations ++# Copyright (C) 2024 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++set -e ++ ++common_objpfx=$1 ++run_program_prefix_before_env=$2 ++run_program_env=$3 ++run_program_prefix_after_env=$4 ++ ++# Generate data files. ++# The locale only defines the LC_CTYPE category, so we expect a failure ++# due to warnings. ++ret=0 ++${run_program_prefix_before_env} \ ++${run_program_env} \ ++I18NPATH=../localedata \ ++${run_program_prefix_after_env} ${common_objpfx}locale/localedef \ ++--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$? ++if [ $ret -gt 1 ]; then ++ echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)." ++ exit 1 ++fi ++ ++set -x ++ ++# Run the test. ++${run_program_prefix_before_env} \ ++${run_program_env} \ ++LOCPATH=${common_objpfx}iconv \ ++${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar ++ ++# Local Variables: ++# mode:shell-script ++# End: diff --git a/glibc-RHEL-71530-10.patch b/glibc-RHEL-71530-10.patch new file mode 100644 index 0000000..32fcbcb --- /dev/null +++ b/glibc-RHEL-71530-10.patch @@ -0,0 +1,108 @@ +commit 9a4b0eaf726f5404c6683d5c7c5e86f61c3f3fbc +Author: Aurelien Jarno +Date: Sat Dec 14 11:44:11 2024 +0100 + + iconv: do not report error exit with transliteration [BZ #32448] + + Commit 6cbf845fcdc7 ("iconv: Preserve iconv -c error exit on invalid + inputs (bug 32046)") changed the error exit code to report an error when + an input character has been transliterated. This looks like a bug as the + moto in the iconv program is to report an error code in the same + condition as the iconv() function. + + This happens because the STANDARD_TO_LOOP_ERR_HANDLER macro sets a + default value for result and later updates it if the transliteration + succeed. With the changes, setting the default value also marks the + input as illegal. + + Fix that by setting up the default value of result only when the + transliteration is not used. This works because __gconv_transliterate() + calls __gconv_mark_illegal_input() to return an error. At the same time + also fix the typo outself -> ourselves. + + Fixes: 6cbf845fcdc7 + Resolves: BZ #32448 + Signed-off-by: Aurelien Jarno + +diff --git a/iconv/loop.c b/iconv/loop.c +index 199fb283266fb9ca..7149cec9b215a918 100644 +--- a/iconv/loop.c ++++ b/iconv/loop.c +@@ -141,12 +141,13 @@ + points. */ + #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __gconv_mark_illegal_input (step_data); \ +- \ + if (irreversible == NULL) \ +- /* This means we are in call from __gconv_transliterate. In this \ +- case we are not doing any error recovery outself. */ \ +- break; \ ++ { \ ++ /* This means we are in call from __gconv_transliterate. In this \ ++ case we are not doing any error recovery ourselves. */ \ ++ result = __gconv_mark_illegal_input (step_data); \ ++ break; \ ++ } \ + \ + /* If needed, flush any conversion state, so that __gconv_transliterate \ + starts with current shift state. */ \ +@@ -157,6 +158,8 @@ + result = __gconv_transliterate \ + (step, step_data, *inptrp, \ + &inptr, inend, &outptr, irreversible); \ ++ else \ ++ result = __gconv_mark_illegal_input (step_data); \ + \ + REINIT_PARAMS; \ + \ +diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh +index ca4dbd4a3a3318fe..f3a03ac062a70b05 100644 +--- a/iconv/tst-iconv_prog.sh ++++ b/iconv/tst-iconv_prog.sh +@@ -211,12 +211,13 @@ hangarray=( + "\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" + ) + +-# List of option combinations that *should* lead to an error +-errorarray=( ++# List of option combinations with their expected return code ++testarray=( + # Converting from/to invalid character sets should cause error +-"\x00\x00;;INVALID;INVALID" +-"\x00\x00;;INVALID;UTF-8" +-"\x00\x00;;UTF-8;INVALID" ++"\x00\x00;;INVALID;INVALID;1" ++"\x00\x00;;INVALID;UTF-8;1" ++"\x00\x00;;UTF-8;INVALID;1" ++"\xc3\xa9;;UTF-8;ASCII//TRANSLIT;0" + ) + + # Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret +@@ -264,7 +265,7 @@ done + + check_errtest_result () + { +- if [ "$ret" -eq "1" ]; then # we errored out as expected ++ if [ "$ret" -eq "$eret" ]; then # we got the expected return code + result="PASS" + else + result="FAIL" +@@ -277,11 +278,12 @@ check_errtest_result () + fi + } + +-for errorcommand in "${errorarray[@]}"; do +- twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" +- c="$(echo "$errorcommand" | cut -d";" -f 2)" +- from="$(echo "$errorcommand" | cut -d";" -f 3)" +- to="$(echo "$errorcommand" | cut -d";" -f 4)" ++for testcommand in "${testarray[@]}"; do ++ twobyte="$(echo "$testcommand" | cut -d";" -f 1)" ++ c="$(echo "$testcommand" | cut -d";" -f 2)" ++ from="$(echo "$testcommand" | cut -d";" -f 3)" ++ to="$(echo "$testcommand" | cut -d";" -f 4)" ++ eret="$(echo "$testcommand" | cut -d";" -f 5)" + execute_test + check_errtest_result + done diff --git a/glibc-RHEL-71530-2.patch b/glibc-RHEL-71530-2.patch new file mode 100644 index 0000000..4e4de98 --- /dev/null +++ b/glibc-RHEL-71530-2.patch @@ -0,0 +1,218 @@ +commit 422ed8ede312f786369e4850e47b8d32beaae4e4 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Base tests for buffer management + + Reviewed-by: DJ Delorie + +diff --git a/iconv/Makefile b/iconv/Makefile +index 65b4a44ab86cf0dc..b0fa550141db5a06 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -76,8 +76,11 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out +-tests-special += $(objpfx)tst-iconv_prog.out +-tests-special += $(objpfx)tst-translit-mchar.out ++tests-special += \ ++ $(objpfx)tst-iconv_prog-buffer.out \ ++ $(objpfx)tst-iconv_prog.out \ ++ $(objpfx)tst-translit-mchar.out \ ++ # tests-special + endif + + # Make a copy of the file because gconv module names are constructed +@@ -141,3 +144,8 @@ $(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ + '$(run-program-env)' '$(run-program-prefix-after-env)' \ + $< > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-iconv_prog-buffer.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +new file mode 100644 +index 0000000000000000..a27107f02b95cdc7 +--- /dev/null ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -0,0 +1,177 @@ ++#!/bin/bash ++# Test for iconv (the program) buffer management. ++# Copyright (C) 2024 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++exec 2>&1 ++set -e ++ ++exec {logfd}>&1 ++ ++codir=$1 ++test_program_prefix="$2" ++ ++# Use internal converters to avoid issues with module loading. ++iconv_args="-f ASCII -t UTF-8" ++ ++failure=false ++ ++tmp=`mktemp -d` ++trap 'rm -rf "$tmp"' 0 ++echo ABC > "$tmp/abc" ++echo DEF > "$tmp/def" ++echo GGG > "$tmp/ggg" ++echo HH > "$tmp/hh" ++echo XY > "$tmp/xy" ++echo ZT > "$tmp/zt" ++echo OUT > "$tmp/out-template" ++printf '\xff' > "$tmp/0xff" ++cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped" ++ ++run_iconv () { ++ local c=0 ++ if test "${FUNCNAME[2]}" = main; then ++ c=1 ++ fi ++ echo "${BASH_SOURCE[$c]}:${BASH_LINENO[$c]}: iconv $iconv_args $@" >&$logfd ++ $test_program_prefix $codir/iconv/iconv_prog $iconv_args "$@" ++} ++ ++check_out_expected () { ++ if ! cmp -s "$tmp/out" "$tmp/expected" ; then ++ echo "error: iconv output difference" >&$logfd ++ echo "*** expected ***" >&$logfd ++ cat "$tmp/expected" >&$logfd ++ echo "*** actual ***" >&$logfd ++ cat "$tmp/out" >&$logfd ++ failure=true ++ fi ++} ++ ++expect_files () { ++ local f ++ ! test -z "$1" ++ cp "$tmp/$1" "$tmp/expected" ++ shift ++ for f in "$@" ; do ++ cat "$tmp/$f" >> "$tmp/expected" ++ done ++ check_out_expected ++} ++ ++check_out () { ++ cat > "$tmp/expected" ++ check_out_expected ++} ++ ++expect_exit () { ++ local expected=$1 ++ shift ++ # Prevent failure for stopping the script. ++ if "$@" ; then ++ actual=$? ++ else ++ actual=$? ++ fi ++ if test "$actual" -ne "$expected"; then ++ echo "error: expected exit status $expected, not $actual" >&$logfd ++ exit 1 ++ fi ++} ++ ++ignore_failure () { ++ set +e ++ "$@" ++ status=$? ++ set -e ++} ++ ++# Concatentation test. ++run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/def" ++expect_files abc def ++ ++# Single-file in-place conversion. ++run_iconv -o "$tmp/out" "$tmp/out" ++expect_files abc def ++ ++# Multiple input files with in-place conversion. ++ ++run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc" ++expect_files abc def abc ++ ++# But not if we are writing to standard output. ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv >"$tmp/out" ++expect_files out-template ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv - >"$tmp/out" ++expect_files out-template ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv /dev/null >>"$tmp/out" ++expect_files out-template ++ ++# Conversion errors should avoid clobbering an existing file if ++# it is also an input file. ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out" ++expect_files 0xff ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out" ++expect_files 0xff ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out" ++expect_files 0xff ++ ++# If errors are ignored, the file should be overwritten. ++ ++cp "$tmp/out-template" "$tmp/out" ++expect_exit 1 \ ++ run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" 2>"$tmp/err" ++! test -s "$tmp/err" ++expect_files abc def ++ ++# FIXME: This is not correct, -c should not change the exit status. ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -c -o "$tmp/out" \ ++ "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err" ++! test -s "$tmp/err" ++expect_files abc xy zt def ++ ++# If the file does not exist yet, it should not be created on error. ++ ++rm "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/0xff" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/0xff" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" ++! test -e "$tmp/out" ++ ++if $failure ; then ++ exit 1 ++fi diff --git a/glibc-RHEL-71530-3.patch b/glibc-RHEL-71530-3.patch new file mode 100644 index 0000000..740ff15 --- /dev/null +++ b/glibc-RHEL-71530-3.patch @@ -0,0 +1,91 @@ +commit 0cb64617a6f691b611406427c8e24b7f04c4983f +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Do not use mmap in iconv (the program) (bug 17703) + + On current systems, very large files are needed before + mmap becomes beneficial. Simplify the implementation. + + This exposed that inptr was not initialized correctly in + process_fd. Handling multiple input files resulted in + EFAULT in read because a null pointer was passed. This + could be observed previously if an input file was not + mappable and was reported as bug 17703. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index a765b1af21d2bde0..88a928557e7afb0a 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -31,9 +31,6 @@ + #include + #include + #include +-#ifdef _POSIX_MAPPED_FILES +-# include +-#endif + #include + #include + #include "iconv_prog.h" +@@ -253,10 +250,6 @@ conversions from `%s' and to `%s' are not supported"), + else + do + { +-#ifdef _POSIX_MAPPED_FILES +- struct stat64 st; +- char *addr; +-#endif + int fd, ret; + + if (verbose) +@@ -276,39 +269,6 @@ conversions from `%s' and to `%s' are not supported"), + } + } + +-#ifdef _POSIX_MAPPED_FILES +- /* We have possibilities for reading the input file. First try +- to mmap() it since this will provide the fastest solution. */ +- if (fstat64 (fd, &st) == 0 +- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, +- fd, 0)) != MAP_FAILED)) +- { +- /* Yes, we can use mmap(). The descriptor is not needed +- anymore. */ +- if (close (fd) != 0) +- error (EXIT_FAILURE, errno, +- _("error while closing input `%s'"), +- argv[remaining]); +- +- ret = process_block (cd, addr, st.st_size, &output, +- output_file); +- +- /* We don't need the input data anymore. */ +- munmap ((void *) addr, st.st_size); +- +- if (ret != 0) +- { +- status = EXIT_FAILURE; +- +- if (ret < 0) +- /* We cannot go on with producing output since it might +- lead to problem because the last output might leave +- the output stream in an undefined state. */ +- break; +- } +- } +- else +-#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + ret = process_fd (cd, fd, &output, output_file); +@@ -544,7 +504,7 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; +- char *inptr = NULL; ++ char *inptr = inbuf; + size_t actlen = 0; + + while (actlen < maxlen) diff --git a/glibc-RHEL-71530-4.patch b/glibc-RHEL-71530-4.patch new file mode 100644 index 0000000..9e0824f --- /dev/null +++ b/glibc-RHEL-71530-4.patch @@ -0,0 +1,62 @@ +commit 00ba299787c2ea9e5c4986301e2f4965dffbfded +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + manual: __is_last is no longer part of iconv internals + + The __is_last field was replaced with a bitmask in + commit 85830c4c4688b30d3d76111aa9a26745c7b141d6 in 2000, + and multiple bits are in use today. + + Reviewed-by: DJ Delorie + +diff --git a/manual/charset.texi b/manual/charset.texi +index 427db3bc804f6244..3aaa62d088570f76 100644 +--- a/manual/charset.texi ++++ b/manual/charset.texi +@@ -2422,11 +2422,11 @@ written into the buffer to signal how much output is available. If this + conversion step is not the last one, the element must not be modified. + The @code{__outbufend} element must not be modified. + +-@item int __is_last +-This element is nonzero if this conversion step is the last one. This +-information is necessary for the recursion. See the description of the +-conversion function internals below. This element must never be +-modified. ++@item int __flags ++This field is a set of flags. The @code{__GCONV_IS_LAST} bit is set if ++this conversion step is the last one. This information is necessary for ++the recursion. See the description of the conversion function internals ++below. This element must never be modified. + + @item int __invocation_counter + The conversion function can use this element to see how many calls of +@@ -2731,8 +2731,8 @@ Otherwise the function has to emit a byte sequence to bring the state + object into the initial state. Once this all happened the other + conversion modules in the chain of conversions have to get the same + chance. Whether another step follows can be determined from the +-@code{__is_last} element of the step data structure to which the first +-parameter points. ++@code{__GCONV_IS_LAST} flag in the @code{__flags} field of the step ++data structure to which the first parameter points. + + The more interesting mode is when actual text has to be converted. The + first step in this case is to convert as much text as possible from the +@@ -2866,7 +2866,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data, + + /* @r{Call the steps down the chain if there are any but only} + @r{if we successfully emitted the escape sequence.} */ +- if (status == __GCONV_OK && ! data->__is_last) ++ if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST)) + status = fct (next_step, next_data, NULL, NULL, + written, 1); + @} +@@ -2892,7 +2892,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data, + + /* @r{If this is the last step, leave the loop. There is} + @r{nothing we can do.} */ +- if (data->__is_last) ++ if (data->__flags & __GCONV_IS_LAST) + @{ + /* @r{Store information about how many bytes are} + @r{available.} */ diff --git a/glibc-RHEL-71530-5.patch b/glibc-RHEL-71530-5.patch new file mode 100644 index 0000000..827ef5d --- /dev/null +++ b/glibc-RHEL-71530-5.patch @@ -0,0 +1,1081 @@ +commit 6cbf845fcdc76131d0e674cee454fe738b69c69d +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Preserve iconv -c error exit on invalid inputs (bug 32046) + + In several converters, a __GCONV_ILLEGAL_INPUT result gets overwritten + with __GCONV_FULL_OUTPUT. As a result, iconv (the function) returns + E2BIG instead of EILSEQ. The iconv program does not see the original + EILSEQ failure, does not recognize the invalid input, and may + incorrectly exit successfully. + + To address this, a new __flags bit is used to indicate a sticky input + error state. All __GCONV_ILLEGAL_INPUT results are replaced with a + function call that sets this new __GCONV_ENCOUNTERED_ILLEGAL_INPUT and + returns __GCONV_ILLEGAL_INPUT. The iconv program checks for + __GCONV_ENCOUNTERED_ILLEGAL_INPUT and overrides the exit status. + + The converter changes introducing __gconv_mark_illegal_input are + mostly mechanical, except for the res variable initialization in + iconvdata/iso-2022-jp.c: this error gets overwritten with __GCONV_OK + and other results in the following code. If res == + __GCONV_ILLEGAL_INPUT afterwards, STANDARD_TO_LOOP_ERR_HANDLER below + will handle it. + + The __gconv_mark_illegal_input changes do not alter the errno value + set by the iconv function. This is simpler to implement than + reviewing each __GCONV_FULL_OUTPUT result and adjust it not to + override a previous __GCONV_ILLEGAL_INPUT result. Doing it that way + would also change some E2BIG errors in to EILSEQ errors, so it had to + be done conditionally (under a flag set by the iconv program only), to + avoid confusing buffer management in other applications. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/Makefile b/iconv/Makefile +index b0fa550141db5a06..29e4f280ec4cdcbf 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -61,6 +61,10 @@ test-srcs := \ + tst-translit-mchar \ + # test-srcs + ++tests-internal = \ ++ tst-iconv-sticky-input-error \ ++ # tests-internal ++ + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv + install-sbin = iconvconfig +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index 9fece3ea14d5bf24..cd452d94ccb18e92 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -331,4 +331,34 @@ extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c); + + __END_DECLS + ++/* Internal extensions for . */ ++ ++/* Internal flags for __flags in struct __gconv_step_data. Overlaps ++ with flags for __gconv_open. */ ++enum ++ { ++ /* The conversion encountered an illegal input character at one ++ point. */ ++ __GCONV_ENCOUNTERED_ILLEGAL_INPUT = 1U << 30, ++ }; ++ ++/* Mark *STEP_DATA as having seen illegal input, and return ++ __GCONV_ILLEGAL_INPUT. */ ++static inline int ++__gconv_mark_illegal_input (struct __gconv_step_data *step_data) ++{ ++ step_data->__flags |= __GCONV_ENCOUNTERED_ILLEGAL_INPUT; ++ return __GCONV_ILLEGAL_INPUT; ++} ++ ++/* Returns true if any of the conversion steps encountered illegal input. */ ++static _Bool __attribute__ ((unused)) ++__gconv_has_illegal_input (__gconv_t cd) ++{ ++ for (size_t i = 0; i < cd->__nsteps; ++i) ++ if (cd->__data[i].__flags & __GCONV_ENCOUNTERED_ILLEGAL_INPUT) ++ return true; ++ return false; ++} ++ + #endif /* gconv_int.h */ +diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c +index 257be2f8ff6a8811..f22002cf813c5951 100644 +--- a/iconv/gconv_simple.c ++++ b/iconv/gconv_simple.c +@@ -207,7 +207,7 @@ ucs4_internal_loop (struct __gconv_step *step, + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + + if (flags & __GCONV_IGNORE_ERRORS) + { +@@ -218,7 +218,7 @@ ucs4_internal_loop (struct __gconv_step *step, + + *inptrp = inptr; + *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + + put32 (outptr, inval); +@@ -276,7 +276,7 @@ ucs4_internal_loop_single (struct __gconv_step *step, + if (!(flags & __GCONV_IGNORE_ERRORS)) + { + *inptrp -= cnt - (state->__count & 7); +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + } + else +@@ -453,7 +453,7 @@ ucs4le_internal_loop (struct __gconv_step *step, + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + + if (flags & __GCONV_IGNORE_ERRORS) + { +@@ -464,7 +464,7 @@ ucs4le_internal_loop (struct __gconv_step *step, + + *inptrp = inptr; + *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + + put32 (outptr, inval); +@@ -523,7 +523,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + else + { +@@ -969,7 +969,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +@@ -1012,7 +1012,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + inptr += 2; \ +@@ -1061,7 +1061,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + inptr += 4; \ +diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c +index 44f0fd849a3f82a3..54c4f3a1008a4bcd 100644 +--- a/iconv/gconv_trans.c ++++ b/iconv/gconv_trans.c +@@ -232,6 +232,6 @@ __gconv_transliterate (struct __gconv_step *step, + } + + /* Haven't found a match. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + libc_hidden_def (__gconv_transliterate) +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 88a928557e7afb0a..5fe4fe7a6c3776f4 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -291,6 +291,11 @@ conversions from `%s' and to `%s' are not supported"), + } + while (++remaining < argc); + ++ /* Ensure that iconv -c still exits with failure if iconv (the ++ function) has failed with E2BIG instead of EILSEQ. */ ++ if (__gconv_has_illegal_input (cd)) ++ status = EXIT_FAILURE; ++ + /* Close the output file now. */ + if (output != NULL && fclose (output)) + error (EXIT_FAILURE, errno, _("error while closing output file")); +diff --git a/iconv/loop.c b/iconv/loop.c +index 5340dafc709f5ab0..199fb283266fb9ca 100644 +--- a/iconv/loop.c ++++ b/iconv/loop.c +@@ -123,8 +123,7 @@ + `continue' must reach certain points. */ + #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +@@ -142,7 +141,7 @@ + points. */ + #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ +diff --git a/iconv/tst-iconv-sticky-input-error.c b/iconv/tst-iconv-sticky-input-error.c +new file mode 100644 +index 0000000000000000..34a245f185e614ab +--- /dev/null ++++ b/iconv/tst-iconv-sticky-input-error.c +@@ -0,0 +1,135 @@ ++/* Test __GCONV_ENCOUNTERED_ILLEGAL_INPUT, as used by iconv -c (bug 32046). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* FROM is the input character set, TO the output character set. If ++ IGNORE is true, the iconv descriptor is set up in the same way as ++ iconv -c would. INPUT is the input string, EXPECTED_OUTPUT the ++ output. OUTPUT_LIMIT is a byte count, specifying how many input ++ bytes are passed to the iconv function on each invocation. */ ++static void ++one_direction (const char *from, const char *to, bool ignore, ++ const char *input, const char *expected_output, ++ size_t output_limit) ++{ ++ if (test_verbose) ++ { ++ char *quoted_input = support_quote_string (input); ++ char *quoted_output = support_quote_string (expected_output); ++ printf ("info: testing from=\"%s\" to=\"%s\" ignore=%d input=\"%s\"" ++ " expected_output=\"%s\" output_limit=%zu\n", ++ from, to, (int) ignore, quoted_input, ++ quoted_output, output_limit); ++ free (quoted_output); ++ free (quoted_input); ++ } ++ ++ __gconv_t cd; ++ if (ignore) ++ { ++ struct gconv_spec conv_spec; ++ TEST_VERIFY_EXIT (__gconv_create_spec (&conv_spec, from, to) ++ == &conv_spec); ++ conv_spec.ignore = true; ++ cd = (iconv_t) -1; ++ TEST_COMPARE (__gconv_open (&conv_spec, &cd, 0), __GCONV_OK); ++ __gconv_destroy_spec (&conv_spec); ++ } ++ else ++ cd = iconv_open (to, from); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ char *input_ptr = (char *) input; ++ size_t input_len = strlen (input); ++ char output_buf[20]; ++ char *output_ptr = output_buf; ++ size_t output_len; ++ do ++ { ++ output_len = array_end (output_buf) - output_ptr; ++ if (output_len > output_limit) ++ /* Limit the buffer size as requested by the caller. */ ++ output_len = output_limit; ++ TEST_VERIFY_EXIT (output_len > 0); ++ if (input_len == 0) ++ /* Trigger final flush. */ ++ input_ptr = NULL; ++ char *old_input_ptr = input_ptr; ++ size_t ret = iconv (cd, &input_ptr, &input_len, ++ &output_ptr, &output_len); ++ if (ret == (size_t) -1) ++ { ++ if (errno != EILSEQ) ++ TEST_COMPARE (errno, E2BIG); ++ } ++ ++ if (input_ptr == old_input_ptr) ++ /* Avoid endless loop if stuck on an invalid input character. */ ++ break; ++ } ++ while (input_ptr != NULL); ++ ++ /* Test the sticky illegal input bit. */ ++ TEST_VERIFY (__gconv_has_illegal_input (cd)); ++ ++ TEST_COMPARE_BLOB (expected_output, strlen (expected_output), ++ output_buf, output_ptr - output_buf); ++ ++ TEST_COMPARE (iconv_close (cd), 0); ++} ++ ++static int ++do_test (void) ++{ ++ static const char charsets[][14] = ++ { ++ "ASCII", ++ "ASCII//IGNORE", ++ "UTF-8", ++ "UTF-8//IGNORE", ++ }; ++ ++ for (size_t from_idx = 0; from_idx < array_length (charsets); ++from_idx) ++ for (size_t to_idx = 0; to_idx < array_length (charsets); ++to_idx) ++ for (int do_ignore = 0; do_ignore < 2; ++do_ignore) ++ for (int limit = 1; limit < 5; ++limit) ++ for (int skip = 0; skip < 3; ++skip) ++ { ++ const char *expected_output; ++ if (do_ignore || strstr (charsets[to_idx], "//IGNORE") != NULL) ++ expected_output = "ABXY" + skip; ++ else ++ expected_output = "AB" + skip; ++ one_direction (charsets[from_idx], charsets[to_idx], do_ignore, ++ "AB\xffXY" + skip, expected_output, limit); ++ } ++ ++ return 0; ++} ++ ++#include +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index a27107f02b95cdc7..5ff99a02a30370cb 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -150,9 +150,8 @@ expect_exit 1 \ + ! test -s "$tmp/err" + expect_files abc def + +-# FIXME: This is not correct, -c should not change the exit status. + cp "$tmp/out-template" "$tmp/out" +-run_iconv -c -o "$tmp/out" \ ++expect_exit 1 run_iconv -c -o "$tmp/out" \ + "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err" + ! test -s "$tmp/err" + expect_files abc xy zt def +diff --git a/iconvdata/cp932.c b/iconvdata/cp932.c +index bf7297b114df638c..3def70a70bbc9a23 100644 +--- a/iconvdata/cp932.c ++++ b/iconvdata/cp932.c +@@ -4559,7 +4559,7 @@ static const char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4599,7 +4599,7 @@ static const char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4634,7 +4634,7 @@ static const char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +diff --git a/iconvdata/euc-jp-ms.c b/iconvdata/euc-jp-ms.c +index d03a0e05bb392651..96c5325e9d23493d 100644 +--- a/iconvdata/euc-jp-ms.c ++++ b/iconvdata/euc-jp-ms.c +@@ -4659,7 +4659,7 @@ static const unsigned char from_ucs4_extra[229][2] = + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4689,7 +4689,7 @@ static const unsigned char from_ucs4_extra[229][2] = + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4709,7 +4709,7 @@ static const unsigned char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + } \ +@@ -4820,7 +4820,7 @@ static const unsigned char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +diff --git a/iconvdata/gbbig5.c b/iconvdata/gbbig5.c +index f05deeeb1adac612..b15a2ef932f735f9 100644 +--- a/iconvdata/gbbig5.c ++++ b/iconvdata/gbbig5.c +@@ -4831,7 +4831,7 @@ const char __from_big5_to_gb2312 [13973][2] = + { \ + /* We do not have a mapping for this character. \ + If ignore errors, map it to 0xa1bc - big5 box character */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +@@ -4922,7 +4922,7 @@ const char __from_big5_to_gb2312 [13973][2] = + { \ + /* We do not have a mapping for this character. \ + If ignore errors, map it to 0xa1f5 - gb box character */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +index 4c37f30e9891c063..d6c8ce7f682aa64d 100644 +--- a/iconvdata/ibm1364.c ++++ b/iconvdata/ibm1364.c +@@ -179,7 +179,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -219,7 +219,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -300,7 +300,7 @@ enum + \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -332,7 +332,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c +index d96e5f86b9d984da..ba7e23f8acd7384b 100644 +--- a/iconvdata/iso646.c ++++ b/iconvdata/iso646.c +@@ -313,7 +313,7 @@ gconv_end (struct __gconv_step *data) + ch = 0xf9; \ + else if (var == JP_OCR_B) \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == YU) \ + ch = 0x17e; \ + else if (var == HU) \ +@@ -387,7 +387,7 @@ gconv_end (struct __gconv_step *data) + ch = 0xec; \ + else if (var == JP_OCR_B) \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == YU) \ + ch = 0x10d; \ + else if (var == HU) \ +@@ -403,7 +403,7 @@ gconv_end (struct __gconv_step *data) + break; \ + case 0x80 ... 0xff: \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -440,17 +440,17 @@ gconv_end (struct __gconv_step *data) + case 0x23: \ + if (var == GB || var == ES || var == IT || var == FR || var == FR1 \ + || var == NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x24: \ + if (var == CN || var == HU || var == CU || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x40: \ + if (var == CA || var == CA2 || var == DE || var == ES || var == ES2 \ + || var == IT || var == YU || var == HU || var == FR || var == FR1 \ + || var == PT || var == PT2 || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5b: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -458,7 +458,7 @@ gconv_end (struct __gconv_step *data) + || var == HU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == CU) \ + ch = 0x7d; \ + break; \ +@@ -468,7 +468,7 @@ gconv_end (struct __gconv_step *data) + || var == YU || var == KR || var == HU || var == CU || var == FR \ + || var == FR1 || var == NO || var == NO2 || var == PT \ + || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5d: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -476,17 +476,17 @@ gconv_end (struct __gconv_step *data) + || var == HU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5e: \ + if (var == CA || var == CA2 || var == ES2 || var == YU || var == CU \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x60: \ + if (var == CA || var == CA2 || var == IT || var == JP_OCR_B \ + || var == YU || var == HU || var == FR || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7b: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -494,14 +494,14 @@ gconv_end (struct __gconv_step *data) + || var == CU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7c: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ + || var == ES2 || var == IT || var == YU || var == HU || var == CU \ + || var == FR || var == FR1 || var == NO || var == PT \ + || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == NO2) \ + ch = 0x7e; \ + break; \ +@@ -510,7 +510,7 @@ gconv_end (struct __gconv_step *data) + || var == ES2 || var == IT || var == YU || var == HU || var == CU \ + || var == FR || var == FR1 || var == NO || var == NO2 \ + || var == PT || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7e: \ + if (var == GB || var == CA || var == CA2 || var == DE || var == ES2 \ +@@ -518,21 +518,21 @@ gconv_end (struct __gconv_step *data) + || var == YU || var == HU || var == CU || var == FR || var == FR1 \ + || var == NO || var == NO2 || var == PT || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xa3: \ + if (var != GB && var != ES && var != IT && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x23; \ + break; \ + case 0xa4: \ + if (var != HU && var != CU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x24; \ + break; \ + case 0xa5: \ +@@ -541,7 +541,7 @@ gconv_end (struct __gconv_step *data) + else if (var == JP || var == JP_OCR_B) \ + ch = 0x5c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa7: \ + if (var == DE || var == ES || var == IT || var == PT) \ +@@ -551,11 +551,11 @@ gconv_end (struct __gconv_step *data) + else if (var == NO2) \ + ch = 0x23; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa8: \ + if (var != ES2 && var != CU && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xb0: \ +@@ -566,7 +566,7 @@ gconv_end (struct __gconv_step *data) + else if (var == PT) \ + ch = 0x7e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xb4: \ + if (var == ES2 || var == CU) \ +@@ -574,11 +574,11 @@ gconv_end (struct __gconv_step *data) + else if (var == PT2) \ + ch = 0x40; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xb5: \ + if (var != FR) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xbf: \ +@@ -587,31 +587,31 @@ gconv_end (struct __gconv_step *data) + else if (var == ES2 || var == CU) \ + ch = 0x5e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xc1: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0xc3: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc4: \ + if (var != DE && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc5: \ + if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xc6: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc7: \ +@@ -620,7 +620,7 @@ gconv_end (struct __gconv_step *data) + else if (var == PT || var == PT2) \ + ch = 0x5c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xc9: \ + if (var == CA2) \ +@@ -630,26 +630,26 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x40; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xd1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xd5: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xd6: \ + if (var != DE && var != HU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xd8: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xdc: \ +@@ -658,11 +658,11 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x5e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xdf: \ + if (var != DE) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xe0: \ +@@ -671,36 +671,36 @@ gconv_end (struct __gconv_step *data) + else if (var == IT) \ + ch = 0x7b; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xe1: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xe2: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xe3: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe4: \ + if (var != DE && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe5: \ + if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xe6: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe7: \ +@@ -711,11 +711,11 @@ gconv_end (struct __gconv_step *data) + else if (var == PT || var == PT2) \ + ch = 0x7c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xe8: \ + if (var != CA && var != CA2 && var != IT && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xe9: \ +@@ -726,51 +726,51 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x60; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xea: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xec: \ + if (var != IT) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xee: \ + if (var != CA) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5e; \ + break; \ + case 0xf1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf2: \ + if (var != IT) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf4: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xf5: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xf6: \ + if (var != DE && var != HU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf8: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf9: \ +@@ -779,11 +779,11 @@ gconv_end (struct __gconv_step *data) + else if (var == IT) \ + ch = 0x60; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xfb: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xfc: \ +@@ -792,93 +792,93 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x7e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x160: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0x106: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0x107: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0x10c: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5e; \ + break; \ + case 0x10d: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x110: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0x111: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0x161: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0x17d: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0x17e: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0x2dd: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x2022: \ + if (var != ES2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0x203e: \ + if (var != GB && var != CN && var != JP && var != NO && var != SE) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x20a9: \ + if (var != KR) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0x2329: \ + if (var != JP_OCR_B) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0x232a: \ + if (var != JP_OCR_B) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + default: \ + if (__glibc_unlikely (ch > 0x7f)) \ + { \ + UNICODE_TAG_HANDLER (ch, 4); \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + } \ + break; \ + } \ +diff --git a/iconvdata/unicode.c b/iconvdata/unicode.c +index d69c9887a18af2a4..79bb9886e5e773f6 100644 +--- a/iconvdata/unicode.c ++++ b/iconvdata/unicode.c +@@ -163,7 +163,7 @@ gconv_end (struct __gconv_step *data) + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c +index a869353f20e9fd77..9d9fd2a2a3df3e13 100644 +--- a/iconvdata/utf-16.c ++++ b/iconvdata/utf-16.c +@@ -206,7 +206,7 @@ gconv_end (struct __gconv_step *data) + We must catch this. If we let surrogates pass through, \ + attackers could make a security hole exploit by \ + synthesizing any desired plane 1-16 character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +diff --git a/iconvdata/utf-32.c b/iconvdata/utf-32.c +index 5693b4897569d7f0..139eefb6d853135c 100644 +--- a/iconvdata/utf-32.c ++++ b/iconvdata/utf-32.c +@@ -207,7 +207,7 @@ gconv_end (struct __gconv_step *data) + We must catch this. If we let surrogates pass through, \ + attackers could make a security hole exploit by \ + generating "irregular UTF-32" sequences. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ diff --git a/glibc-RHEL-71530-6.patch b/glibc-RHEL-71530-6.patch new file mode 100644 index 0000000..98fd5eb --- /dev/null +++ b/glibc-RHEL-71530-6.patch @@ -0,0 +1,713 @@ +commit 8ef3cff9d1ceafe369f982d980678d749fb93bd2 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Support in-place conversions (bug 10460, bug 32033) + + Check if any of the input files overlaps with the output file, and use + a temporary file in this case, so that the input is no clobbered + before it is read. This fixes bug 10460. It allows to use iconv + more easily as a functional replacement for GNU recode. + + The updated output buffer management truncates the output file + if there is no input, fixing bug 32033. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/Makefile b/iconv/Makefile +index 29e4f280ec4cdcbf..c9af0c4d44cae7fb 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -81,6 +81,8 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out + tests-special += \ ++ $(objpfx)tst-iconv_prog-buffer-large.out \ ++ $(objpfx)tst-iconv_prog-buffer-tiny.out \ + $(objpfx)tst-iconv_prog-buffer.out \ + $(objpfx)tst-iconv_prog.out \ + $(objpfx)tst-translit-mchar.out \ +@@ -153,3 +155,12 @@ $(objpfx)tst-iconv_prog-buffer.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ + $(evaluate-test) ++$(objpfx)tst-iconv_prog-buffer-tiny.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' \ ++ '--buffer-size=1' > $@; \ ++ $(evaluate-test) ++$(objpfx)tst-iconv_prog-buffer-large.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 5fe4fe7a6c3776f4..3e02db7319185d45 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -47,7 +47,11 @@ + static void print_version (FILE *stream, struct argp_state *state); + void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +-#define OPT_VERBOSE 1000 ++enum ++ { ++ OPT_VERBOSE = 1000, ++ OPT_BUFFER_SIZE, ++ }; + #define OPT_LIST 'l' + + /* Definitions of arguments for argp functions. */ +@@ -63,6 +67,10 @@ static const struct argp_option options[] = + { "output", 'o', N_("FILE"), 0, N_("output file") }, + { "silent", 's', NULL, 0, N_("suppress warnings") }, + { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, ++ /* This is an internal option intended for testing only. Very small ++ buffers do not work with all character sets. */ ++ { "buffer-size", OPT_BUFFER_SIZE, N_("BYTE-COUNT"), OPTION_HIDDEN, ++ N_("size of in-memory scratch buffer") }, + { NULL, 0, NULL, 0, NULL } + }; + +@@ -100,13 +108,20 @@ static int list; + /* If nonzero omit invalid character from output. */ + int omit_invalid; + ++/* Current index in argv (after command line processing) with the ++ input file name. */ ++static int current_input_file_index; ++ ++/* Size of the temporary, in-memory buffer. Exceeding it needs ++ spooling to disk in a temporary file. Controlled by --buffer_size. */ ++static size_t output_buffer_size = 1024 * 1024; ++ + /* Prototypes for the functions doing the actual work. */ +-static int process_block (iconv_t cd, char *addr, size_t len, FILE **output, +- const char *output_file); +-static int process_fd (iconv_t cd, int fd, FILE **output, +- const char *output_file); +-static int process_file (iconv_t cd, FILE *input, FILE **output, +- const char *output_file); ++static void prepare_output_file (char **argv); ++static void close_output_file (int status); ++static int process_block (iconv_t cd, char *addr, size_t len); ++static int process_fd (iconv_t cd, int fd); ++static int process_file (iconv_t cd, FILE *input); + static void print_known_names (void); + + +@@ -114,7 +129,6 @@ int + main (int argc, char *argv[]) + { + int status = EXIT_SUCCESS; +- int remaining; + __gconv_t cd; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; +@@ -126,7 +140,7 @@ main (int argc, char *argv[]) + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ +- argp_parse (&argp, argc, argv, 0, &remaining, NULL); ++ argp_parse (&argp, argc, argv, 0, ¤t_input_file_index, NULL); + + /* List all coded character sets if wanted. */ + if (list) +@@ -161,7 +175,8 @@ main (int argc, char *argv[]) + if (from_charmap != NULL || to_charmap != NULL) + /* Construct the conversion table and do the conversion. */ + status = charmap_conversion (from_code, from_charmap, to_code, to_charmap, +- argc, remaining, argv, output_file); ++ argc, current_input_file_index, argv, ++ output_file); + else + { + struct gconv_spec conv_spec; +@@ -235,16 +250,14 @@ conversions from `%s' and to `%s' are not supported"), + _("failed to start conversion processing")); + } + +- /* The output file. Will be opened when we are ready to produce +- output. */ +- FILE *output = NULL; ++ prepare_output_file (argv); + + /* Now process the remaining files. Write them to stdout or the file + specified with the `-o' parameter. If we have no file given as + the parameter process all from stdin. */ +- if (remaining == argc) ++ if (current_input_file_index == argc) + { +- if (process_file (cd, stdin, &output, output_file) != 0) ++ if (process_file (cd, stdin) != 0) + status = EXIT_FAILURE; + } + else +@@ -253,17 +266,17 @@ conversions from `%s' and to `%s' are not supported"), + int fd, ret; + + if (verbose) +- fprintf (stderr, "%s:\n", argv[remaining]); +- if (strcmp (argv[remaining], "-") == 0) +- fd = 0; ++ fprintf (stderr, "%s:\n", argv[current_input_file_index]); ++ if (strcmp (argv[current_input_file_index], "-") == 0) ++ fd = STDIN_FILENO; + else + { +- fd = open (argv[remaining], O_RDONLY); ++ fd = open (argv[current_input_file_index], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), +- argv[remaining]); ++ argv[current_input_file_index]); + status = EXIT_FAILURE; + continue; + } +@@ -271,7 +284,7 @@ conversions from `%s' and to `%s' are not supported"), + + { + /* Read the file in pieces. */ +- ret = process_fd (cd, fd, &output, output_file); ++ ret = process_fd (cd, fd); + + /* Now close the file. */ + close (fd); +@@ -289,7 +302,7 @@ conversions from `%s' and to `%s' are not supported"), + } + } + } +- while (++remaining < argc); ++ while (++current_input_file_index < argc); + + /* Ensure that iconv -c still exits with failure if iconv (the + function) has failed with E2BIG instead of EILSEQ. */ +@@ -297,8 +310,7 @@ conversions from `%s' and to `%s' are not supported"), + status = EXIT_FAILURE; + + /* Close the output file now. */ +- if (output != NULL && fclose (output)) +- error (EXIT_FAILURE, errno, _("error while closing output file")); ++ close_output_file (status); + } + + return status; +@@ -328,6 +340,14 @@ parse_opt (int key, char *arg, struct argp_state *state) + /* Omit invalid characters from output. */ + omit_invalid = 1; + break; ++ case OPT_BUFFER_SIZE: ++ { ++ int i = atoi (arg); ++ if (i <= 0) ++ error (EXIT_FAILURE, 0, _("invalid buffer size: %s"), arg); ++ output_buffer_size = i; ++ } ++ break; + case OPT_VERBOSE: + verbose = 1; + break; +@@ -374,59 +394,247 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); + } + ++/* Command line index of the last input file that overlaps with the ++ output file. Zero means no temporary file is ever required. */ ++static int last_overlapping_file_index; + +-static int +-write_output (const char *outbuf, const char *outptr, FILE **output, +- const char *output_file) ++/* This is set to true if the output is written to a temporary file. */ ++static bool output_using_temporary_file; ++ ++/* This is the file descriptor that will be used by write_output. */ ++static int output_fd = -1; ++ ++/* Pointers at the start and end of the fixed-size output buffer. */ ++static char *output_buffer_start; ++ ++/* Current write position in the output buffer. */ ++static char *output_buffer_current; ++ ++/* Remaining bytes after output_buffer_current in the output buffer. */ ++static size_t output_buffer_remaining; ++ ++ ++/* Reduce the buffer size when writing directly to the output file, to ++ reduce cache utilization. */ ++static size_t copy_buffer_size = BUFSIZ; ++ ++static void ++output_error (void) ++{ ++ error (EXIT_FAILURE, errno, _("cannot open output file")); ++} ++ ++static void ++input_error (const char *path) + { +- /* We have something to write out. */ +- int errno_save = errno; ++ error (0, errno, _("cannot open input file `%s'"), path); ++} + +- if (*output == NULL) ++/* Opens output_file for writing, truncating it. */ ++static void ++open_output_direct (void) ++{ ++ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_TRUNC, 0777); ++ if (output_fd < 0) ++ output_error (); ++} ++ ++static void ++prepare_output_file (char **argv) ++{ ++ if (copy_buffer_size > output_buffer_size) ++ copy_buffer_size = output_buffer_size; ++ ++ if (output_file == NULL || strcmp (output_file, "-") == 0) + { +- /* Determine output file. */ +- if (output_file != NULL && strcmp (output_file, "-") != 0) ++ /* No buffering is required when writing to standard output ++ because input overlap is expected to be solved externally. */ ++ output_fd = STDOUT_FILENO; ++ output_buffer_size = copy_buffer_size; ++ } ++ else ++ { ++ /* If iconv creates the output file, no overlap is possible. */ ++ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_EXCL, 0777); ++ if (output_fd >= 0) ++ output_buffer_size = copy_buffer_size; ++ else + { +- *output = fopen (output_file, "w"); +- if (*output == NULL) +- error (EXIT_FAILURE, errno, _("cannot open output file")); ++ /* Otherwise, check if any of the input files overlap with the ++ output file. */ ++ struct statx st; ++ if (statx (AT_FDCWD, output_file, 0, STATX_INO | STATX_MODE, &st) ++ != 0) ++ output_error (); ++ uint32_t out_dev_minor = st.stx_dev_minor; ++ uint32_t out_dev_major = st.stx_dev_major; ++ uint64_t out_ino = st.stx_ino; ++ ++ int idx = current_input_file_index; ++ while (true) ++ { ++ /* Special case: no input files means standard input. */ ++ if (argv[idx] == NULL && idx != current_input_file_index) ++ break; ++ ++ int ret; ++ if (argv[idx] == NULL || strcmp (argv[idx], "-") == 0) ++ ret = statx (STDIN_FILENO, "", AT_EMPTY_PATH, STATX_INO, &st); ++ else ++ ret = statx (AT_FDCWD, argv[idx], 0, STATX_INO, &st); ++ if (ret != 0) ++ { ++ input_error (argv[idx]); ++ exit (EXIT_FAILURE); ++ } ++ if (out_dev_minor == st.stx_dev_minor ++ && out_dev_major == st.stx_dev_major ++ && out_ino == st.stx_ino) ++ { ++ if (argv[idx] == NULL) ++ /* Corner case: index of NULL would be larger than ++ idx while converting, triggering a switch away ++ from the temporary file. */ ++ last_overlapping_file_index = INT_MAX; ++ else ++ last_overlapping_file_index = idx; ++ } ++ ++ if (argv[idx] == NULL) ++ break; ++ ++idx; ++ } ++ ++ /* If there is no overlap, avoid using a temporary file. */ ++ if (last_overlapping_file_index == 0) ++ { ++ open_output_direct (); ++ output_buffer_size = copy_buffer_size; ++ } + } +- else +- *output = stdout; + } + +- if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf) +- || ferror (*output)) ++ output_buffer_start = malloc (output_buffer_size); ++ if (output_buffer_start == NULL) ++ output_error (); ++ output_buffer_current = output_buffer_start; ++ output_buffer_remaining = output_buffer_size; ++} ++ ++/* Write out the range [first, last), terminating the process on write ++ error. */ ++static void ++write_fully (int fd, const char *first, const char *last) ++{ ++ while (first < last) + { +- /* Error occurred while printing the result. */ +- error (0, 0, _("\ ++ ssize_t ret = write (fd, first, last - first); ++ if (ret == 0) ++ { ++ errno = ENOSPC; ++ output_error (); ++ } ++ if (ret < 0) ++ error (EXIT_FAILURE, errno, _("\ + conversion stopped due to problem in writing the output")); +- return -1; ++ first += ret; ++ } ++} ++ ++static void ++flush_output (void) ++{ ++ bool temporary_file_not_needed ++ = current_input_file_index > last_overlapping_file_index; ++ if (output_fd < 0) ++ { ++ if (temporary_file_not_needed) ++ open_output_direct (); ++ else ++ { ++ /* Create an anonymous temporary file. */ ++ FILE *fp = tmpfile (); ++ if (fp == NULL) ++ output_error (); ++ output_fd = dup (fileno (fp)); ++ if (output_fd < 0) ++ output_error (); ++ fclose (fp); ++ output_using_temporary_file = true; ++ } ++ /* Either way, no longer use a memory-only staging buffer. */ ++ output_buffer_size = copy_buffer_size; + } ++ else if (output_using_temporary_file && temporary_file_not_needed) ++ { ++ /* The temporary file is no longer needed. Switch to direct ++ output, replacing output_fd. */ ++ int temp_fd = output_fd; ++ open_output_direct (); ++ ++ /* Copy over the data spooled to the temporary file. */ ++ if (lseek (temp_fd, 0, SEEK_SET) < 0) ++ output_error (); ++ while (true) ++ { ++ char buf[BUFSIZ]; ++ ssize_t ret = read (temp_fd, buf, sizeof (buf)); ++ if (ret < 0) ++ output_error (); ++ if (ret == 0) ++ break; ++ write_fully (output_fd, buf, buf + ret); ++ } ++ close (temp_fd); + +- errno = errno_save; ++ /* No longer using a temporary file from now on. */ ++ output_using_temporary_file = false; ++ output_buffer_size = copy_buffer_size; ++ } + +- return 0; ++ write_fully (output_fd, output_buffer_start, output_buffer_current); ++ output_buffer_current = output_buffer_start; ++ output_buffer_remaining = output_buffer_size; + } + ++static void ++close_output_file (int status) ++{ ++ /* Do not perform a flush if a temporary file or the in-memory ++ buffer is in use and there was an error. It would clobber the ++ overlapping input file. */ ++ if (status != EXIT_SUCCESS && !omit_invalid && ++ (output_using_temporary_file || output_fd < 0)) ++ return; ++ ++ /* The current_input_file_index variable is now larger than ++ last_overlapping_file_index, so the flush_output call switches ++ away from the temporary file. */ ++ flush_output (); ++ ++ if (output_fd == STDOUT_FILENO) ++ { ++ /* Close standard output in safe manner, to report certain ++ ENOSPC errors. */ ++ output_fd = dup (output_fd); ++ if (output_fd < 0) ++ output_error (); ++ } ++ if (close (output_fd) < 0) ++ output_error (); ++} + + static int +-process_block (iconv_t cd, char *addr, size_t len, FILE **output, +- const char *output_file) ++process_block (iconv_t cd, char *addr, size_t len) + { +-#define OUTBUF_SIZE 32768 + const char *start = addr; +- char outbuf[OUTBUF_SIZE]; +- char *outptr; +- size_t outlen; + size_t n; + int ret = 0; + + while (len > 0) + { +- outptr = outbuf; +- outlen = OUTBUF_SIZE; +- n = iconv (cd, &addr, &len, &outptr, &outlen); ++ n = iconv (cd, &addr, &len, ++ &output_buffer_current, &output_buffer_remaining); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { +@@ -437,39 +645,34 @@ process_block (iconv_t cd, char *addr, size_t len, FILE **output, + errno = E2BIG; + } + +- if (outptr != outbuf) +- { +- ret = write_output (outbuf, outptr, output, output_file); +- if (ret != 0) +- break; +- } +- + if (n != (size_t) -1) + { + /* All the input test is processed. For state-dependent + character sets we have to flush the state now. */ +- outptr = outbuf; +- outlen = OUTBUF_SIZE; +- n = iconv (cd, NULL, NULL, &outptr, &outlen); +- +- if (outptr != outbuf) ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ if (n == (size_t) -1 && errno == E2BIG) + { +- ret = write_output (outbuf, outptr, output, output_file); +- if (ret != 0) +- break; ++ /* Try again if the state flush exceeded the buffer space. */ ++ flush_output (); ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); + } ++ bool errno_is_EILSEQ = errno == EILSEQ; + + if (n != (size_t) -1) + break; + +- if (omit_invalid && errno == EILSEQ) ++ if (omit_invalid && errno_is_EILSEQ) + { + ret = 1; + break; + } + } + +- if (errno != E2BIG) ++ if (errno == E2BIG) ++ flush_output (); ++ else + { + /* iconv() ran into a problem. */ + switch (errno) +@@ -500,7 +703,7 @@ incomplete character or shift sequence at end of buffer")); + + + static int +-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) ++process_fd (iconv_t cd, int fd) + { + /* we have a problem with reading from a descriptor since we must not + provide the iconv() function an incomplete character or shift +@@ -574,16 +777,16 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) + } + + /* Now we have all the input in the buffer. Process it in one run. */ +- return process_block (cd, inbuf, actlen, output, output_file); ++ return process_block (cd, inbuf, actlen); + } + + + static int +-process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file) ++process_file (iconv_t cd, FILE *input) + { + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ +- return process_fd (cd, fileno (input), output, output_file); ++ return process_fd (cd, fileno (input)); + } + + +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index 5ff99a02a30370cb..54ff871d32929997 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -17,6 +17,12 @@ + # License along with the GNU C Library; if not, see + # . + ++# Arguments: ++# root of the build tree ($(objpfx-common)) ++# test command wrapper (for running on the board/with new ld.so) ++# extra flags to pass to iconv ++# number of times to double the input files in size (default: 0) ++ + exec 2>&1 + set -e + +@@ -26,7 +32,9 @@ codir=$1 + test_program_prefix="$2" + + # Use internal converters to avoid issues with module loading. +-iconv_args="-f ASCII -t UTF-8" ++iconv_args="-f ASCII -t UTF-8 $3" ++ ++file_size_doublings=${4-0} + + failure=false + +@@ -39,7 +47,19 @@ echo HH > "$tmp/hh" + echo XY > "$tmp/xy" + echo ZT > "$tmp/zt" + echo OUT > "$tmp/out-template" ++: > "$tmp/empty" + printf '\xff' > "$tmp/0xff" ++ ++# Double all files to produce larger buffers. ++for p in "$tmp"/* ; do ++ i=0 ++ while test $i -lt $file_size_doublings; do ++ cat "$p" "$p" > "$tmp/scratch" ++ mv "$tmp/scratch" "$p" ++ i=$(($i + 1)) ++ done ++done ++ + cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped" + + run_iconv () { +@@ -113,6 +133,38 @@ expect_files abc def + run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc" + expect_files abc def abc + ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" ++expect_files ggg abc def abc ++ ++run_iconv -o "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/hh" ++expect_files hh ggg abc def abc hh ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/out" "$tmp/ggg" ++expect_files ggg out-template out-template ggg ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/ggg" ++expect_files ggg out-template hh out-template ggg ++ ++# Empty output should truncate the output file if exists. ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/err" +@@ -156,6 +236,20 @@ expect_exit 1 run_iconv -c -o "$tmp/out" \ + ! test -s "$tmp/err" + expect_files abc xy zt def + ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def" ++expect_files xy zt abc xy zt def ++ ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" \ ++ "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def" ++expect_files 0xff-wrapped ++ ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -c -o "$tmp/out" \ ++ "$tmp/abc" "$tmp/out" "$tmp/def" "$tmp/out" ++expect_files abc xy zt def xy zt ++ + # If the file does not exist yet, it should not be created on error. + + rm "$tmp/out" diff --git a/glibc-RHEL-71530-7.patch b/glibc-RHEL-71530-7.patch new file mode 100644 index 0000000..932e4fb --- /dev/null +++ b/glibc-RHEL-71530-7.patch @@ -0,0 +1,41 @@ +commit 75819cdd29a193cc2db980878bec305905b22bbc +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Multiple - on command line should not fail (bug 32050) + + Usually, the second and subsequent - return EOF immediately + and do not contribute to the output, but this is not an error. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 3e02db7319185d45..dd4bc3a59a20799a 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -287,7 +287,8 @@ conversions from `%s' and to `%s' are not supported"), + ret = process_fd (cd, fd); + + /* Now close the file. */ +- close (fd); ++ if (fd != STDIN_FILENO) ++ close (fd); + + if (ret != 0) + { +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index 54ff871d32929997..a9c3729d948b4679 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -265,6 +265,11 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" + expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" + ! test -e "$tmp/out" + ++# Listing standard input multiple times should not fail (bug 32050). ++ ++run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc" ++expect_files xy abc zt ++ + if $failure ; then + exit 1 + fi diff --git a/glibc-RHEL-71530-8.patch b/glibc-RHEL-71530-8.patch new file mode 100644 index 0000000..c652caf --- /dev/null +++ b/glibc-RHEL-71530-8.patch @@ -0,0 +1,323 @@ +commit fa1b0d5e9f6e0353e16339430770a7a8824c0468 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Input buffering for the iconv program (bug 6050) + + Do not read the entire input file into memory. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index dd4bc3a59a20799a..a2f1d34e4579f80f 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -118,8 +118,9 @@ static size_t output_buffer_size = 1024 * 1024; + + /* Prototypes for the functions doing the actual work. */ + static void prepare_output_file (char **argv); +-static void close_output_file (int status); +-static int process_block (iconv_t cd, char *addr, size_t len); ++static void close_output_file (__gconv_t cd, int status); ++static int process_block (iconv_t cd, char **addr, size_t *len, ++ off64_t file_offset, bool *incomplete); + static int process_fd (iconv_t cd, int fd); + static int process_file (iconv_t cd, FILE *input); + static void print_known_names (void); +@@ -311,7 +312,7 @@ conversions from `%s' and to `%s' are not supported"), + status = EXIT_FAILURE; + + /* Close the output file now. */ +- close_output_file (status); ++ close_output_file (cd, status); + } + + return status; +@@ -599,7 +600,7 @@ flush_output (void) + } + + static void +-close_output_file (int status) ++close_output_file (__gconv_t cd, int status) + { + /* Do not perform a flush if a temporary file or the in-memory + buffer is in use and there was an error. It would clobber the +@@ -608,10 +609,28 @@ close_output_file (int status) + (output_using_temporary_file || output_fd < 0)) + return; + +- /* The current_input_file_index variable is now larger than +- last_overlapping_file_index, so the flush_output call switches ++ /* All the input text is processed. For state-dependent character ++ sets we have to flush the state now. ++ ++ The current_input_file_index variable is now larger than ++ last_overlapping_file_index, so the flush_output calls switch + away from the temporary file. */ ++ size_t n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ if (n == (size_t) -1 && errno == E2BIG) ++ { ++ /* Try again if the state flush exceeded the buffer space. */ ++ flush_output (); ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ } ++ int saved_errno = errno; + flush_output (); ++ if (n == (size_t) -1 && !omit_invalid) ++ { ++ errno = saved_errno; ++ output_error (); ++ } + + if (output_fd == STDOUT_FILENO) + { +@@ -625,51 +644,35 @@ close_output_file (int status) + output_error (); + } + ++/* CD is the iconv handle. Input processing starts at *ADDR, and ++ consumes upto *LEN bytes. *ADDR and *LEN are updated. FILE_OFFSET ++ is the file offset of the data initially at ADDR. *INCOMPLETE is ++ set to true if conversion stops due to an incomplete input ++ sequence. */ + static int +-process_block (iconv_t cd, char *addr, size_t len) ++process_block (iconv_t cd, char **addr, size_t *len, off64_t file_offset, ++ bool *incomplete) + { +- const char *start = addr; ++ const char *start = *addr; + size_t n; + int ret = 0; + +- while (len > 0) ++ while (*len > 0) + { +- n = iconv (cd, &addr, &len, ++ n = iconv (cd, addr, len, + &output_buffer_current, &output_buffer_remaining); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { + ret = 1; +- if (len == 0) ++ if (*len == 0) + n = 0; + else + errno = E2BIG; + } + + if (n != (size_t) -1) +- { +- /* All the input test is processed. For state-dependent +- character sets we have to flush the state now. */ +- n = iconv (cd, NULL, NULL, +- &output_buffer_current, &output_buffer_remaining); +- if (n == (size_t) -1 && errno == E2BIG) +- { +- /* Try again if the state flush exceeded the buffer space. */ +- flush_output (); +- n = iconv (cd, NULL, NULL, +- &output_buffer_current, &output_buffer_remaining); +- } +- bool errno_is_EILSEQ = errno == EILSEQ; +- +- if (n != (size_t) -1) +- break; +- +- if (omit_invalid && errno_is_EILSEQ) +- { +- ret = 1; +- break; +- } +- } ++ break; + + if (errno == E2BIG) + flush_output (); +@@ -680,13 +683,12 @@ process_block (iconv_t cd, char *addr, size_t len) + { + case EILSEQ: + if (! omit_invalid) +- error (0, 0, _("illegal input sequence at position %ld"), +- (long int) (addr - start)); ++ error (0, 0, _("illegal input sequence at position %lld"), ++ (long long int) (file_offset + (*addr - start))); + break; + case EINVAL: +- error (0, 0, _("\ +-incomplete character or shift sequence at end of buffer")); +- break; ++ *incomplete = true; ++ return ret; + case EBADF: + error (0, 0, _("internal error (illegal descriptor)")); + break; +@@ -706,79 +708,49 @@ incomplete character or shift sequence at end of buffer")); + static int + process_fd (iconv_t cd, int fd) + { +- /* we have a problem with reading from a descriptor since we must not +- provide the iconv() function an incomplete character or shift +- sequence at the end of the buffer. Since we have to deal with +- arbitrary encodings we must read the whole text in a buffer and +- process it in one step. */ +- static char *inbuf = NULL; +- static size_t maxlen = 0; +- char *inptr = inbuf; +- size_t actlen = 0; +- +- while (actlen < maxlen) ++ char inbuf[BUFSIZ]; ++ char *inbuf_end = inbuf + sizeof (inbuf); ++ size_t inbuf_used = 0; ++ off64_t file_offset = 0; ++ int status = 0; ++ bool incomplete = false; ++ ++ while (true) + { +- ssize_t n = read (fd, inptr, maxlen - actlen); +- +- if (n == 0) +- /* No more text to read. */ +- break; +- +- if (n == -1) ++ char *p = inbuf + inbuf_used; ++ ssize_t read_ret = read (fd, p, inbuf_end - p); ++ if (read_ret == 0) ++ { ++ /* On EOF, check if the previous iconv invocation saw an ++ incomplete sequence. */ ++ if (incomplete) ++ { ++ error (0, 0, _("\ ++incomplete character or shift sequence at end of buffer")); ++ return 1; ++ } ++ return 0; ++ } ++ if (read_ret < 0) + { +- /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } +- +- inptr += n; +- actlen += n; ++ inbuf_used += read_ret; ++ incomplete = false; ++ p = inbuf; ++ int ret = process_block (cd, &p, &inbuf_used, file_offset, &incomplete); ++ if (ret != 0) ++ { ++ status = ret; ++ if (ret < 0) ++ break; ++ } ++ /* The next loop iteration consumes the leftover bytes. */ ++ memmove (inbuf, p, inbuf_used); ++ file_offset += read_ret - inbuf_used; + } +- +- if (actlen == maxlen) +- while (1) +- { +- ssize_t n; +- char *new_inbuf; +- +- /* Increase the buffer. */ +- new_inbuf = (char *) realloc (inbuf, maxlen + 32768); +- if (new_inbuf == NULL) +- { +- error (0, errno, _("unable to allocate buffer for input")); +- return -1; +- } +- inbuf = new_inbuf; +- maxlen += 32768; +- inptr = inbuf + actlen; +- +- do +- { +- n = read (fd, inptr, maxlen - actlen); +- +- if (n == 0) +- /* No more text to read. */ +- break; +- +- if (n == -1) +- { +- /* Error while reading. */ +- error (0, errno, _("error while reading the input")); +- return -1; +- } +- +- inptr += n; +- actlen += n; +- } +- while (actlen < maxlen); +- +- if (n == 0) +- /* Break again so we leave both loops. */ +- break; +- } +- +- /* Now we have all the input in the buffer. Process it in one run. */ +- return process_block (cd, inbuf, actlen); ++ return status; + } + + +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index a9c3729d948b4679..23098ac56a344c48 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -50,6 +50,9 @@ echo OUT > "$tmp/out-template" + : > "$tmp/empty" + printf '\xff' > "$tmp/0xff" + ++# Length should be a prime number, to help with buffer alignment testing. ++printf '\xc3\xa4\xe2\x80\x94\xe2\x80\x94\xc3\xa4\n' > "$tmp/utf8-sequence" ++ + # Double all files to produce larger buffers. + for p in "$tmp"/* ; do + i=0 +@@ -270,6 +273,34 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" + run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc" + expect_files xy abc zt + ++# NB: Extra iconv args are ignored after this point. Actual ++# multi-byte conversion does not work with tiny buffers. ++iconv_args="-f UTF-8 -t ASCII" ++ ++printf 'x\n\xc3' > "$tmp/incomplete" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/incomplete" ++check_out <&$logfd ++ printf "%s" "$prefix" > "$tmp/prefix" ++ cat "$tmp/prefix" "$tmp/utf8-sequence" > "$tmp/tmp" ++ iconv_args="-f UTF-8 -t UCS-4" ++ run_iconv -o "$tmp/out1" "$tmp/tmp" ++ iconv_args="-f UCS-4 -t UTF-8" ++ run_iconv -o "$tmp/out" "$tmp/out1" ++ expect_files prefix utf8-sequence ++ ++ prefix="$prefix@" ++ prefix_length=$(($prefix_length + 1)) ++done ++ + if $failure ; then + exit 1 + fi diff --git a/glibc-RHEL-71530-9.patch b/glibc-RHEL-71530-9.patch new file mode 100644 index 0000000..7d764c7 --- /dev/null +++ b/glibc-RHEL-71530-9.patch @@ -0,0 +1,37 @@ +commit 079ebf7624e7fd0ad7fe94a7176a2e132c996d86 +Author: Florian Weimer +Date: Tue Sep 24 10:41:35 2024 +0200 + + iconv: Use $(run-program-prefix) for running iconv (bug 32197) + + With --enable-hardcoded-path-in-tests, $(test-program-prefix) + does not redirect to the built glibc, but we need to run + iconv (the program) against the built glibc even with + --enable-hardcoded-path-in-tests, as it is using the ABI + path for the dynamic linker (as an installed program). + Use $(run-program-prefix) instead. + + Reviewed-by: H.J. Lu + +diff --git a/iconv/Makefile b/iconv/Makefile +index c9af0c4d44cae7fb..de9d964ed3c762bf 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -153,14 +153,14 @@ $(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ + + $(objpfx)tst-iconv_prog-buffer.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' > $@; \ + $(evaluate-test) + $(objpfx)tst-iconv_prog-buffer-tiny.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' \ + '--buffer-size=1' > $@; \ + $(evaluate-test) + $(objpfx)tst-iconv_prog-buffer-large.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' '' '22' > $@; \ + $(evaluate-test) diff --git a/glibc.spec b/glibc.spec index 3a5f146..66c4e1f 100644 --- a/glibc.spec +++ b/glibc.spec @@ -145,7 +145,7 @@ Version: %{glibcversion} # - It allows using the Release number without the %%dist tag in the dependency # generator to make the generated requires interchangeable between Rawhide # and ELN (.elnYY < .fcXX). -%global baserelease 32 +%global baserelease 33 Release: %{baserelease}%{?dist} # Licenses: @@ -485,6 +485,16 @@ Patch167: glibc-upstream-2.39-137.patch Patch168: glibc-RHEL-12867-2.patch Patch169: glibc-RHEL-12867-3.patch Patch170: glibc-RHEL-42410.patch +Patch171: glibc-RHEL-71530-1.patch +Patch172: glibc-RHEL-71530-2.patch +Patch173: glibc-RHEL-71530-3.patch +Patch174: glibc-RHEL-71530-4.patch +Patch175: glibc-RHEL-71530-5.patch +Patch176: glibc-RHEL-71530-6.patch +Patch177: glibc-RHEL-71530-7.patch +Patch178: glibc-RHEL-71530-8.patch +Patch179: glibc-RHEL-71530-9.patch +Patch180: glibc-RHEL-71530-10.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2480,6 +2490,9 @@ update_gconv_modules_cache () %endif %changelog +* Mon Dec 23 2024 Florian Weimer - 2.39-33 +- Support in-place file conversion in the iconv tool (RHEL-71530) + * Mon Dec 16 2024 Florian Weimer - 2.39-32 - Make getenv thread-safe in more cases (RHEL-42410)