diff --git a/glibc-RHEL-71530-1.patch b/glibc-RHEL-71530-1.patch new file mode 100644 index 0000000..c4dc954 --- /dev/null +++ b/glibc-RHEL-71530-1.patch @@ -0,0 +1,194 @@ +commit 1b0a2062c8938c7333cd118d85d9976c4e7c92af +Author: Andreas Schwab +Date: Mon Jun 10 12:19:17 2024 +0200 + + iconv: Fix matching of multi-character transliterations (bug 31859) + + Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of + the input buffer is reached. Otherwise it is a non-match, and other + patterns should be tried. + +diff --git a/iconv/Makefile b/iconv/Makefile +index 63afc853ff65967a..65b4a44ab86cf0dc 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -57,6 +57,10 @@ tests = \ + tst-iconv-opt \ + # tests + ++test-srcs := \ ++ tst-translit-mchar \ ++ # test-srcs ++ + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv + install-sbin = iconvconfig +@@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out + tests-special += $(objpfx)tst-iconv_prog.out ++tests-special += $(objpfx)tst-translit-mchar.out + endif + + # Make a copy of the file because gconv module names are constructed +@@ -92,6 +97,8 @@ $(objpfx)tst-gconv-init-failure.out: \ + $(objpfx)gconv-modules $(objpfx)tst-gconv-init-failure-mod.so + endif + ++generated-dirs += tst-translit ++ + include ../Rules + + ifeq ($(run-built-tests),yes) +@@ -126,3 +133,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ + '$(run-program-env)' > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ ++ $(objpfx)tst-translit-mchar \ ++ tst-translit-locale ++ $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \ ++ '$(run-program-env)' '$(run-program-prefix-after-env)' \ ++ $< > $@; \ ++ $(evaluate-test) +diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c +index 08b7a3f71dad5f1e..44f0fd849a3f82a3 100644 +--- a/iconv/gconv_trans.c ++++ b/iconv/gconv_trans.c +@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step, + + /* Nothing found, continue searching. */ + } +- else if (cnt > 0) ++ else if (cnt > 0 && winbuf + cnt == winbufend) + /* This means that the input buffer contents matches a prefix of + an entry. Since we cannot match it unless we get more input, + we will tell the caller about it. */ +diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale +new file mode 100644 +index 0000000000000000..712b08628a64dc11 +--- /dev/null ++++ b/iconv/tst-translit-locale +@@ -0,0 +1,10 @@ ++# Test multi-character transliteration rule ++ ++LC_CTYPE ++copy "POSIX" ++ ++translit_start ++"ÄÄ" "AA" ++translit_end ++ ++END LC_CTYPE +diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c +new file mode 100644 +index 0000000000000000..7d432ea6679b60fa +--- /dev/null ++++ b/iconv/tst-translit-mchar.c +@@ -0,0 +1,48 @@ ++/* Test multi-character transliterations. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ iconv_t cd; ++ /* An input sequence that shares a common prefix with a transliteration ++ rule. */ ++ char input[] = "ÄÅ"; ++ char *inptr = input; ++ char outbuf[10]; ++ char *outptr = outbuf; ++ size_t inlen = sizeof (input), outlen = sizeof (outbuf); ++ size_t n; ++ ++ xsetlocale (LC_CTYPE, "tst-translit"); ++ ++ cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); ++ TEST_VERIFY (cd != (iconv_t) -1); ++ ++ /* This call used to loop infinitely. */ ++ n = iconv (cd, &inptr, &inlen, &outptr, &outlen); ++ TEST_VERIFY (iconv_close (cd) == 0); ++ return n == 0; ++} ++ ++#include +diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh +new file mode 100644 +index 0000000000000000..ab7a7f872911cf21 +--- /dev/null ++++ b/iconv/tst-translit-mchar.sh +@@ -0,0 +1,51 @@ ++#!/bin/sh ++# Testing of multi-character transliterations ++# Copyright (C) 2024 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++set -e ++ ++common_objpfx=$1 ++run_program_prefix_before_env=$2 ++run_program_env=$3 ++run_program_prefix_after_env=$4 ++ ++# Generate data files. ++# The locale only defines the LC_CTYPE category, so we expect a failure ++# due to warnings. ++ret=0 ++${run_program_prefix_before_env} \ ++${run_program_env} \ ++I18NPATH=../localedata \ ++${run_program_prefix_after_env} ${common_objpfx}locale/localedef \ ++--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$? ++if [ $ret -gt 1 ]; then ++ echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)." ++ exit 1 ++fi ++ ++set -x ++ ++# Run the test. ++${run_program_prefix_before_env} \ ++${run_program_env} \ ++LOCPATH=${common_objpfx}iconv \ ++${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar ++ ++# Local Variables: ++# mode:shell-script ++# End: diff --git a/glibc-RHEL-71530-10.patch b/glibc-RHEL-71530-10.patch new file mode 100644 index 0000000..32fcbcb --- /dev/null +++ b/glibc-RHEL-71530-10.patch @@ -0,0 +1,108 @@ +commit 9a4b0eaf726f5404c6683d5c7c5e86f61c3f3fbc +Author: Aurelien Jarno +Date: Sat Dec 14 11:44:11 2024 +0100 + + iconv: do not report error exit with transliteration [BZ #32448] + + Commit 6cbf845fcdc7 ("iconv: Preserve iconv -c error exit on invalid + inputs (bug 32046)") changed the error exit code to report an error when + an input character has been transliterated. This looks like a bug as the + moto in the iconv program is to report an error code in the same + condition as the iconv() function. + + This happens because the STANDARD_TO_LOOP_ERR_HANDLER macro sets a + default value for result and later updates it if the transliteration + succeed. With the changes, setting the default value also marks the + input as illegal. + + Fix that by setting up the default value of result only when the + transliteration is not used. This works because __gconv_transliterate() + calls __gconv_mark_illegal_input() to return an error. At the same time + also fix the typo outself -> ourselves. + + Fixes: 6cbf845fcdc7 + Resolves: BZ #32448 + Signed-off-by: Aurelien Jarno + +diff --git a/iconv/loop.c b/iconv/loop.c +index 199fb283266fb9ca..7149cec9b215a918 100644 +--- a/iconv/loop.c ++++ b/iconv/loop.c +@@ -141,12 +141,13 @@ + points. */ + #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __gconv_mark_illegal_input (step_data); \ +- \ + if (irreversible == NULL) \ +- /* This means we are in call from __gconv_transliterate. In this \ +- case we are not doing any error recovery outself. */ \ +- break; \ ++ { \ ++ /* This means we are in call from __gconv_transliterate. In this \ ++ case we are not doing any error recovery ourselves. */ \ ++ result = __gconv_mark_illegal_input (step_data); \ ++ break; \ ++ } \ + \ + /* If needed, flush any conversion state, so that __gconv_transliterate \ + starts with current shift state. */ \ +@@ -157,6 +158,8 @@ + result = __gconv_transliterate \ + (step, step_data, *inptrp, \ + &inptr, inend, &outptr, irreversible); \ ++ else \ ++ result = __gconv_mark_illegal_input (step_data); \ + \ + REINIT_PARAMS; \ + \ +diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh +index ca4dbd4a3a3318fe..f3a03ac062a70b05 100644 +--- a/iconv/tst-iconv_prog.sh ++++ b/iconv/tst-iconv_prog.sh +@@ -211,12 +211,13 @@ hangarray=( + "\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" + ) + +-# List of option combinations that *should* lead to an error +-errorarray=( ++# List of option combinations with their expected return code ++testarray=( + # Converting from/to invalid character sets should cause error +-"\x00\x00;;INVALID;INVALID" +-"\x00\x00;;INVALID;UTF-8" +-"\x00\x00;;UTF-8;INVALID" ++"\x00\x00;;INVALID;INVALID;1" ++"\x00\x00;;INVALID;UTF-8;1" ++"\x00\x00;;UTF-8;INVALID;1" ++"\xc3\xa9;;UTF-8;ASCII//TRANSLIT;0" + ) + + # Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret +@@ -264,7 +265,7 @@ done + + check_errtest_result () + { +- if [ "$ret" -eq "1" ]; then # we errored out as expected ++ if [ "$ret" -eq "$eret" ]; then # we got the expected return code + result="PASS" + else + result="FAIL" +@@ -277,11 +278,12 @@ check_errtest_result () + fi + } + +-for errorcommand in "${errorarray[@]}"; do +- twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" +- c="$(echo "$errorcommand" | cut -d";" -f 2)" +- from="$(echo "$errorcommand" | cut -d";" -f 3)" +- to="$(echo "$errorcommand" | cut -d";" -f 4)" ++for testcommand in "${testarray[@]}"; do ++ twobyte="$(echo "$testcommand" | cut -d";" -f 1)" ++ c="$(echo "$testcommand" | cut -d";" -f 2)" ++ from="$(echo "$testcommand" | cut -d";" -f 3)" ++ to="$(echo "$testcommand" | cut -d";" -f 4)" ++ eret="$(echo "$testcommand" | cut -d";" -f 5)" + execute_test + check_errtest_result + done diff --git a/glibc-RHEL-71530-2.patch b/glibc-RHEL-71530-2.patch new file mode 100644 index 0000000..4e4de98 --- /dev/null +++ b/glibc-RHEL-71530-2.patch @@ -0,0 +1,218 @@ +commit 422ed8ede312f786369e4850e47b8d32beaae4e4 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Base tests for buffer management + + Reviewed-by: DJ Delorie + +diff --git a/iconv/Makefile b/iconv/Makefile +index 65b4a44ab86cf0dc..b0fa550141db5a06 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -76,8 +76,11 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out +-tests-special += $(objpfx)tst-iconv_prog.out +-tests-special += $(objpfx)tst-translit-mchar.out ++tests-special += \ ++ $(objpfx)tst-iconv_prog-buffer.out \ ++ $(objpfx)tst-iconv_prog.out \ ++ $(objpfx)tst-translit-mchar.out \ ++ # tests-special + endif + + # Make a copy of the file because gconv module names are constructed +@@ -141,3 +144,8 @@ $(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ + '$(run-program-env)' '$(run-program-prefix-after-env)' \ + $< > $@; \ + $(evaluate-test) ++ ++$(objpfx)tst-iconv_prog-buffer.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +new file mode 100644 +index 0000000000000000..a27107f02b95cdc7 +--- /dev/null ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -0,0 +1,177 @@ ++#!/bin/bash ++# Test for iconv (the program) buffer management. ++# Copyright (C) 2024 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++ ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++exec 2>&1 ++set -e ++ ++exec {logfd}>&1 ++ ++codir=$1 ++test_program_prefix="$2" ++ ++# Use internal converters to avoid issues with module loading. ++iconv_args="-f ASCII -t UTF-8" ++ ++failure=false ++ ++tmp=`mktemp -d` ++trap 'rm -rf "$tmp"' 0 ++echo ABC > "$tmp/abc" ++echo DEF > "$tmp/def" ++echo GGG > "$tmp/ggg" ++echo HH > "$tmp/hh" ++echo XY > "$tmp/xy" ++echo ZT > "$tmp/zt" ++echo OUT > "$tmp/out-template" ++printf '\xff' > "$tmp/0xff" ++cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped" ++ ++run_iconv () { ++ local c=0 ++ if test "${FUNCNAME[2]}" = main; then ++ c=1 ++ fi ++ echo "${BASH_SOURCE[$c]}:${BASH_LINENO[$c]}: iconv $iconv_args $@" >&$logfd ++ $test_program_prefix $codir/iconv/iconv_prog $iconv_args "$@" ++} ++ ++check_out_expected () { ++ if ! cmp -s "$tmp/out" "$tmp/expected" ; then ++ echo "error: iconv output difference" >&$logfd ++ echo "*** expected ***" >&$logfd ++ cat "$tmp/expected" >&$logfd ++ echo "*** actual ***" >&$logfd ++ cat "$tmp/out" >&$logfd ++ failure=true ++ fi ++} ++ ++expect_files () { ++ local f ++ ! test -z "$1" ++ cp "$tmp/$1" "$tmp/expected" ++ shift ++ for f in "$@" ; do ++ cat "$tmp/$f" >> "$tmp/expected" ++ done ++ check_out_expected ++} ++ ++check_out () { ++ cat > "$tmp/expected" ++ check_out_expected ++} ++ ++expect_exit () { ++ local expected=$1 ++ shift ++ # Prevent failure for stopping the script. ++ if "$@" ; then ++ actual=$? ++ else ++ actual=$? ++ fi ++ if test "$actual" -ne "$expected"; then ++ echo "error: expected exit status $expected, not $actual" >&$logfd ++ exit 1 ++ fi ++} ++ ++ignore_failure () { ++ set +e ++ "$@" ++ status=$? ++ set -e ++} ++ ++# Concatentation test. ++run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/def" ++expect_files abc def ++ ++# Single-file in-place conversion. ++run_iconv -o "$tmp/out" "$tmp/out" ++expect_files abc def ++ ++# Multiple input files with in-place conversion. ++ ++run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc" ++expect_files abc def abc ++ ++# But not if we are writing to standard output. ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv >"$tmp/out" ++expect_files out-template ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv - >"$tmp/out" ++expect_files out-template ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv /dev/null >>"$tmp/out" ++expect_files out-template ++ ++# Conversion errors should avoid clobbering an existing file if ++# it is also an input file. ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out" ++expect_files 0xff ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out" ++expect_files 0xff ++ ++cp "$tmp/0xff" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out" ++expect_files 0xff ++ ++# If errors are ignored, the file should be overwritten. ++ ++cp "$tmp/out-template" "$tmp/out" ++expect_exit 1 \ ++ run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" 2>"$tmp/err" ++! test -s "$tmp/err" ++expect_files abc def ++ ++# FIXME: This is not correct, -c should not change the exit status. ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -c -o "$tmp/out" \ ++ "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err" ++! test -s "$tmp/err" ++expect_files abc xy zt def ++ ++# If the file does not exist yet, it should not be created on error. ++ ++rm "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/0xff" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/0xff" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" ++! test -e "$tmp/out" ++ ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" ++! test -e "$tmp/out" ++ ++if $failure ; then ++ exit 1 ++fi diff --git a/glibc-RHEL-71530-3.patch b/glibc-RHEL-71530-3.patch new file mode 100644 index 0000000..740ff15 --- /dev/null +++ b/glibc-RHEL-71530-3.patch @@ -0,0 +1,91 @@ +commit 0cb64617a6f691b611406427c8e24b7f04c4983f +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Do not use mmap in iconv (the program) (bug 17703) + + On current systems, very large files are needed before + mmap becomes beneficial. Simplify the implementation. + + This exposed that inptr was not initialized correctly in + process_fd. Handling multiple input files resulted in + EFAULT in read because a null pointer was passed. This + could be observed previously if an input file was not + mappable and was reported as bug 17703. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index a765b1af21d2bde0..88a928557e7afb0a 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -31,9 +31,6 @@ + #include + #include + #include +-#ifdef _POSIX_MAPPED_FILES +-# include +-#endif + #include + #include + #include "iconv_prog.h" +@@ -253,10 +250,6 @@ conversions from `%s' and to `%s' are not supported"), + else + do + { +-#ifdef _POSIX_MAPPED_FILES +- struct stat64 st; +- char *addr; +-#endif + int fd, ret; + + if (verbose) +@@ -276,39 +269,6 @@ conversions from `%s' and to `%s' are not supported"), + } + } + +-#ifdef _POSIX_MAPPED_FILES +- /* We have possibilities for reading the input file. First try +- to mmap() it since this will provide the fastest solution. */ +- if (fstat64 (fd, &st) == 0 +- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, +- fd, 0)) != MAP_FAILED)) +- { +- /* Yes, we can use mmap(). The descriptor is not needed +- anymore. */ +- if (close (fd) != 0) +- error (EXIT_FAILURE, errno, +- _("error while closing input `%s'"), +- argv[remaining]); +- +- ret = process_block (cd, addr, st.st_size, &output, +- output_file); +- +- /* We don't need the input data anymore. */ +- munmap ((void *) addr, st.st_size); +- +- if (ret != 0) +- { +- status = EXIT_FAILURE; +- +- if (ret < 0) +- /* We cannot go on with producing output since it might +- lead to problem because the last output might leave +- the output stream in an undefined state. */ +- break; +- } +- } +- else +-#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + ret = process_fd (cd, fd, &output, output_file); +@@ -544,7 +504,7 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) + process it in one step. */ + static char *inbuf = NULL; + static size_t maxlen = 0; +- char *inptr = NULL; ++ char *inptr = inbuf; + size_t actlen = 0; + + while (actlen < maxlen) diff --git a/glibc-RHEL-71530-4.patch b/glibc-RHEL-71530-4.patch new file mode 100644 index 0000000..9e0824f --- /dev/null +++ b/glibc-RHEL-71530-4.patch @@ -0,0 +1,62 @@ +commit 00ba299787c2ea9e5c4986301e2f4965dffbfded +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + manual: __is_last is no longer part of iconv internals + + The __is_last field was replaced with a bitmask in + commit 85830c4c4688b30d3d76111aa9a26745c7b141d6 in 2000, + and multiple bits are in use today. + + Reviewed-by: DJ Delorie + +diff --git a/manual/charset.texi b/manual/charset.texi +index 427db3bc804f6244..3aaa62d088570f76 100644 +--- a/manual/charset.texi ++++ b/manual/charset.texi +@@ -2422,11 +2422,11 @@ written into the buffer to signal how much output is available. If this + conversion step is not the last one, the element must not be modified. + The @code{__outbufend} element must not be modified. + +-@item int __is_last +-This element is nonzero if this conversion step is the last one. This +-information is necessary for the recursion. See the description of the +-conversion function internals below. This element must never be +-modified. ++@item int __flags ++This field is a set of flags. The @code{__GCONV_IS_LAST} bit is set if ++this conversion step is the last one. This information is necessary for ++the recursion. See the description of the conversion function internals ++below. This element must never be modified. + + @item int __invocation_counter + The conversion function can use this element to see how many calls of +@@ -2731,8 +2731,8 @@ Otherwise the function has to emit a byte sequence to bring the state + object into the initial state. Once this all happened the other + conversion modules in the chain of conversions have to get the same + chance. Whether another step follows can be determined from the +-@code{__is_last} element of the step data structure to which the first +-parameter points. ++@code{__GCONV_IS_LAST} flag in the @code{__flags} field of the step ++data structure to which the first parameter points. + + The more interesting mode is when actual text has to be converted. The + first step in this case is to convert as much text as possible from the +@@ -2866,7 +2866,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data, + + /* @r{Call the steps down the chain if there are any but only} + @r{if we successfully emitted the escape sequence.} */ +- if (status == __GCONV_OK && ! data->__is_last) ++ if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST)) + status = fct (next_step, next_data, NULL, NULL, + written, 1); + @} +@@ -2892,7 +2892,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data, + + /* @r{If this is the last step, leave the loop. There is} + @r{nothing we can do.} */ +- if (data->__is_last) ++ if (data->__flags & __GCONV_IS_LAST) + @{ + /* @r{Store information about how many bytes are} + @r{available.} */ diff --git a/glibc-RHEL-71530-5.patch b/glibc-RHEL-71530-5.patch new file mode 100644 index 0000000..827ef5d --- /dev/null +++ b/glibc-RHEL-71530-5.patch @@ -0,0 +1,1081 @@ +commit 6cbf845fcdc76131d0e674cee454fe738b69c69d +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Preserve iconv -c error exit on invalid inputs (bug 32046) + + In several converters, a __GCONV_ILLEGAL_INPUT result gets overwritten + with __GCONV_FULL_OUTPUT. As a result, iconv (the function) returns + E2BIG instead of EILSEQ. The iconv program does not see the original + EILSEQ failure, does not recognize the invalid input, and may + incorrectly exit successfully. + + To address this, a new __flags bit is used to indicate a sticky input + error state. All __GCONV_ILLEGAL_INPUT results are replaced with a + function call that sets this new __GCONV_ENCOUNTERED_ILLEGAL_INPUT and + returns __GCONV_ILLEGAL_INPUT. The iconv program checks for + __GCONV_ENCOUNTERED_ILLEGAL_INPUT and overrides the exit status. + + The converter changes introducing __gconv_mark_illegal_input are + mostly mechanical, except for the res variable initialization in + iconvdata/iso-2022-jp.c: this error gets overwritten with __GCONV_OK + and other results in the following code. If res == + __GCONV_ILLEGAL_INPUT afterwards, STANDARD_TO_LOOP_ERR_HANDLER below + will handle it. + + The __gconv_mark_illegal_input changes do not alter the errno value + set by the iconv function. This is simpler to implement than + reviewing each __GCONV_FULL_OUTPUT result and adjust it not to + override a previous __GCONV_ILLEGAL_INPUT result. Doing it that way + would also change some E2BIG errors in to EILSEQ errors, so it had to + be done conditionally (under a flag set by the iconv program only), to + avoid confusing buffer management in other applications. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/Makefile b/iconv/Makefile +index b0fa550141db5a06..29e4f280ec4cdcbf 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -61,6 +61,10 @@ test-srcs := \ + tst-translit-mchar \ + # test-srcs + ++tests-internal = \ ++ tst-iconv-sticky-input-error \ ++ # tests-internal ++ + others = iconv_prog iconvconfig + install-others-programs = $(inst_bindir)/iconv + install-sbin = iconvconfig +diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h +index 9fece3ea14d5bf24..cd452d94ccb18e92 100644 +--- a/iconv/gconv_int.h ++++ b/iconv/gconv_int.h +@@ -331,4 +331,34 @@ extern wint_t __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c); + + __END_DECLS + ++/* Internal extensions for . */ ++ ++/* Internal flags for __flags in struct __gconv_step_data. Overlaps ++ with flags for __gconv_open. */ ++enum ++ { ++ /* The conversion encountered an illegal input character at one ++ point. */ ++ __GCONV_ENCOUNTERED_ILLEGAL_INPUT = 1U << 30, ++ }; ++ ++/* Mark *STEP_DATA as having seen illegal input, and return ++ __GCONV_ILLEGAL_INPUT. */ ++static inline int ++__gconv_mark_illegal_input (struct __gconv_step_data *step_data) ++{ ++ step_data->__flags |= __GCONV_ENCOUNTERED_ILLEGAL_INPUT; ++ return __GCONV_ILLEGAL_INPUT; ++} ++ ++/* Returns true if any of the conversion steps encountered illegal input. */ ++static _Bool __attribute__ ((unused)) ++__gconv_has_illegal_input (__gconv_t cd) ++{ ++ for (size_t i = 0; i < cd->__nsteps; ++i) ++ if (cd->__data[i].__flags & __GCONV_ENCOUNTERED_ILLEGAL_INPUT) ++ return true; ++ return false; ++} ++ + #endif /* gconv_int.h */ +diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c +index 257be2f8ff6a8811..f22002cf813c5951 100644 +--- a/iconv/gconv_simple.c ++++ b/iconv/gconv_simple.c +@@ -207,7 +207,7 @@ ucs4_internal_loop (struct __gconv_step *step, + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + + if (flags & __GCONV_IGNORE_ERRORS) + { +@@ -218,7 +218,7 @@ ucs4_internal_loop (struct __gconv_step *step, + + *inptrp = inptr; + *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + + put32 (outptr, inval); +@@ -276,7 +276,7 @@ ucs4_internal_loop_single (struct __gconv_step *step, + if (!(flags & __GCONV_IGNORE_ERRORS)) + { + *inptrp -= cnt - (state->__count & 7); +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + } + else +@@ -453,7 +453,7 @@ ucs4le_internal_loop (struct __gconv_step *step, + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + + if (flags & __GCONV_IGNORE_ERRORS) + { +@@ -464,7 +464,7 @@ ucs4le_internal_loop (struct __gconv_step *step, + + *inptrp = inptr; + *outptrp = outptr; +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + + put32 (outptr, inval); +@@ -523,7 +523,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + else + { +@@ -969,7 +969,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +@@ -1012,7 +1012,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + inptr += 2; \ +@@ -1061,7 +1061,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, + character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + inptr += 4; \ +diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c +index 44f0fd849a3f82a3..54c4f3a1008a4bcd 100644 +--- a/iconv/gconv_trans.c ++++ b/iconv/gconv_trans.c +@@ -232,6 +232,6 @@ __gconv_transliterate (struct __gconv_step *step, + } + + /* Haven't found a match. */ +- return __GCONV_ILLEGAL_INPUT; ++ return __gconv_mark_illegal_input (step_data); + } + libc_hidden_def (__gconv_transliterate) +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 88a928557e7afb0a..5fe4fe7a6c3776f4 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -291,6 +291,11 @@ conversions from `%s' and to `%s' are not supported"), + } + while (++remaining < argc); + ++ /* Ensure that iconv -c still exits with failure if iconv (the ++ function) has failed with E2BIG instead of EILSEQ. */ ++ if (__gconv_has_illegal_input (cd)) ++ status = EXIT_FAILURE; ++ + /* Close the output file now. */ + if (output != NULL && fclose (output)) + error (EXIT_FAILURE, errno, _("error while closing output file")); +diff --git a/iconv/loop.c b/iconv/loop.c +index 5340dafc709f5ab0..199fb283266fb9ca 100644 +--- a/iconv/loop.c ++++ b/iconv/loop.c +@@ -123,8 +123,7 @@ + `continue' must reach certain points. */ + #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ +- \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +@@ -142,7 +141,7 @@ + points. */ + #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + \ + if (irreversible == NULL) \ + /* This means we are in call from __gconv_transliterate. In this \ +diff --git a/iconv/tst-iconv-sticky-input-error.c b/iconv/tst-iconv-sticky-input-error.c +new file mode 100644 +index 0000000000000000..34a245f185e614ab +--- /dev/null ++++ b/iconv/tst-iconv-sticky-input-error.c +@@ -0,0 +1,135 @@ ++/* Test __GCONV_ENCOUNTERED_ILLEGAL_INPUT, as used by iconv -c (bug 32046). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* FROM is the input character set, TO the output character set. If ++ IGNORE is true, the iconv descriptor is set up in the same way as ++ iconv -c would. INPUT is the input string, EXPECTED_OUTPUT the ++ output. OUTPUT_LIMIT is a byte count, specifying how many input ++ bytes are passed to the iconv function on each invocation. */ ++static void ++one_direction (const char *from, const char *to, bool ignore, ++ const char *input, const char *expected_output, ++ size_t output_limit) ++{ ++ if (test_verbose) ++ { ++ char *quoted_input = support_quote_string (input); ++ char *quoted_output = support_quote_string (expected_output); ++ printf ("info: testing from=\"%s\" to=\"%s\" ignore=%d input=\"%s\"" ++ " expected_output=\"%s\" output_limit=%zu\n", ++ from, to, (int) ignore, quoted_input, ++ quoted_output, output_limit); ++ free (quoted_output); ++ free (quoted_input); ++ } ++ ++ __gconv_t cd; ++ if (ignore) ++ { ++ struct gconv_spec conv_spec; ++ TEST_VERIFY_EXIT (__gconv_create_spec (&conv_spec, from, to) ++ == &conv_spec); ++ conv_spec.ignore = true; ++ cd = (iconv_t) -1; ++ TEST_COMPARE (__gconv_open (&conv_spec, &cd, 0), __GCONV_OK); ++ __gconv_destroy_spec (&conv_spec); ++ } ++ else ++ cd = iconv_open (to, from); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ char *input_ptr = (char *) input; ++ size_t input_len = strlen (input); ++ char output_buf[20]; ++ char *output_ptr = output_buf; ++ size_t output_len; ++ do ++ { ++ output_len = array_end (output_buf) - output_ptr; ++ if (output_len > output_limit) ++ /* Limit the buffer size as requested by the caller. */ ++ output_len = output_limit; ++ TEST_VERIFY_EXIT (output_len > 0); ++ if (input_len == 0) ++ /* Trigger final flush. */ ++ input_ptr = NULL; ++ char *old_input_ptr = input_ptr; ++ size_t ret = iconv (cd, &input_ptr, &input_len, ++ &output_ptr, &output_len); ++ if (ret == (size_t) -1) ++ { ++ if (errno != EILSEQ) ++ TEST_COMPARE (errno, E2BIG); ++ } ++ ++ if (input_ptr == old_input_ptr) ++ /* Avoid endless loop if stuck on an invalid input character. */ ++ break; ++ } ++ while (input_ptr != NULL); ++ ++ /* Test the sticky illegal input bit. */ ++ TEST_VERIFY (__gconv_has_illegal_input (cd)); ++ ++ TEST_COMPARE_BLOB (expected_output, strlen (expected_output), ++ output_buf, output_ptr - output_buf); ++ ++ TEST_COMPARE (iconv_close (cd), 0); ++} ++ ++static int ++do_test (void) ++{ ++ static const char charsets[][14] = ++ { ++ "ASCII", ++ "ASCII//IGNORE", ++ "UTF-8", ++ "UTF-8//IGNORE", ++ }; ++ ++ for (size_t from_idx = 0; from_idx < array_length (charsets); ++from_idx) ++ for (size_t to_idx = 0; to_idx < array_length (charsets); ++to_idx) ++ for (int do_ignore = 0; do_ignore < 2; ++do_ignore) ++ for (int limit = 1; limit < 5; ++limit) ++ for (int skip = 0; skip < 3; ++skip) ++ { ++ const char *expected_output; ++ if (do_ignore || strstr (charsets[to_idx], "//IGNORE") != NULL) ++ expected_output = "ABXY" + skip; ++ else ++ expected_output = "AB" + skip; ++ one_direction (charsets[from_idx], charsets[to_idx], do_ignore, ++ "AB\xffXY" + skip, expected_output, limit); ++ } ++ ++ return 0; ++} ++ ++#include +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index a27107f02b95cdc7..5ff99a02a30370cb 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -150,9 +150,8 @@ expect_exit 1 \ + ! test -s "$tmp/err" + expect_files abc def + +-# FIXME: This is not correct, -c should not change the exit status. + cp "$tmp/out-template" "$tmp/out" +-run_iconv -c -o "$tmp/out" \ ++expect_exit 1 run_iconv -c -o "$tmp/out" \ + "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err" + ! test -s "$tmp/err" + expect_files abc xy zt def +diff --git a/iconvdata/cp932.c b/iconvdata/cp932.c +index bf7297b114df638c..3def70a70bbc9a23 100644 +--- a/iconvdata/cp932.c ++++ b/iconvdata/cp932.c +@@ -4559,7 +4559,7 @@ static const char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4599,7 +4599,7 @@ static const char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4634,7 +4634,7 @@ static const char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +diff --git a/iconvdata/euc-jp-ms.c b/iconvdata/euc-jp-ms.c +index d03a0e05bb392651..96c5325e9d23493d 100644 +--- a/iconvdata/euc-jp-ms.c ++++ b/iconvdata/euc-jp-ms.c +@@ -4659,7 +4659,7 @@ static const unsigned char from_ucs4_extra[229][2] = + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4689,7 +4689,7 @@ static const unsigned char from_ucs4_extra[229][2] = + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -4709,7 +4709,7 @@ static const unsigned char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + } \ +@@ -4820,7 +4820,7 @@ static const unsigned char from_ucs4_extra[229][2] = + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +diff --git a/iconvdata/gbbig5.c b/iconvdata/gbbig5.c +index f05deeeb1adac612..b15a2ef932f735f9 100644 +--- a/iconvdata/gbbig5.c ++++ b/iconvdata/gbbig5.c +@@ -4831,7 +4831,7 @@ const char __from_big5_to_gb2312 [13973][2] = + { \ + /* We do not have a mapping for this character. \ + If ignore errors, map it to 0xa1bc - big5 box character */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +@@ -4922,7 +4922,7 @@ const char __from_big5_to_gb2312 [13973][2] = + { \ + /* We do not have a mapping for this character. \ + If ignore errors, map it to 0xa1f5 - gb box character */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + \ +diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c +index 4c37f30e9891c063..d6c8ce7f682aa64d 100644 +--- a/iconvdata/ibm1364.c ++++ b/iconvdata/ibm1364.c +@@ -179,7 +179,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -219,7 +219,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -300,7 +300,7 @@ enum + \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +@@ -332,7 +332,7 @@ enum + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + ++*irreversible; \ +diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c +index d96e5f86b9d984da..ba7e23f8acd7384b 100644 +--- a/iconvdata/iso646.c ++++ b/iconvdata/iso646.c +@@ -313,7 +313,7 @@ gconv_end (struct __gconv_step *data) + ch = 0xf9; \ + else if (var == JP_OCR_B) \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == YU) \ + ch = 0x17e; \ + else if (var == HU) \ +@@ -387,7 +387,7 @@ gconv_end (struct __gconv_step *data) + ch = 0xec; \ + else if (var == JP_OCR_B) \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == YU) \ + ch = 0x10d; \ + else if (var == HU) \ +@@ -403,7 +403,7 @@ gconv_end (struct __gconv_step *data) + break; \ + case 0x80 ... 0xff: \ + /* Illegal character. */ \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + } \ + \ +@@ -440,17 +440,17 @@ gconv_end (struct __gconv_step *data) + case 0x23: \ + if (var == GB || var == ES || var == IT || var == FR || var == FR1 \ + || var == NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x24: \ + if (var == CN || var == HU || var == CU || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x40: \ + if (var == CA || var == CA2 || var == DE || var == ES || var == ES2 \ + || var == IT || var == YU || var == HU || var == FR || var == FR1 \ + || var == PT || var == PT2 || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5b: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -458,7 +458,7 @@ gconv_end (struct __gconv_step *data) + || var == HU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == CU) \ + ch = 0x7d; \ + break; \ +@@ -468,7 +468,7 @@ gconv_end (struct __gconv_step *data) + || var == YU || var == KR || var == HU || var == CU || var == FR \ + || var == FR1 || var == NO || var == NO2 || var == PT \ + || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5d: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -476,17 +476,17 @@ gconv_end (struct __gconv_step *data) + || var == HU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x5e: \ + if (var == CA || var == CA2 || var == ES2 || var == YU || var == CU \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x60: \ + if (var == CA || var == CA2 || var == IT || var == JP_OCR_B \ + || var == YU || var == HU || var == FR || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7b: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ +@@ -494,14 +494,14 @@ gconv_end (struct __gconv_step *data) + || var == CU || var == FR || var == FR1 || var == NO \ + || var == NO2 || var == PT || var == PT2 || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7c: \ + if (var == CA || var == CA2 || var == DE || var == DK || var == ES \ + || var == ES2 || var == IT || var == YU || var == HU || var == CU \ + || var == FR || var == FR1 || var == NO || var == PT \ + || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + else if (var == NO2) \ + ch = 0x7e; \ + break; \ +@@ -510,7 +510,7 @@ gconv_end (struct __gconv_step *data) + || var == ES2 || var == IT || var == YU || var == HU || var == CU \ + || var == FR || var == FR1 || var == NO || var == NO2 \ + || var == PT || var == PT2 || var == SE || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x7e: \ + if (var == GB || var == CA || var == CA2 || var == DE || var == ES2 \ +@@ -518,21 +518,21 @@ gconv_end (struct __gconv_step *data) + || var == YU || var == HU || var == CU || var == FR || var == FR1 \ + || var == NO || var == NO2 || var == PT || var == SE \ + || var == SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xa3: \ + if (var != GB && var != ES && var != IT && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x23; \ + break; \ + case 0xa4: \ + if (var != HU && var != CU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x24; \ + break; \ + case 0xa5: \ +@@ -541,7 +541,7 @@ gconv_end (struct __gconv_step *data) + else if (var == JP || var == JP_OCR_B) \ + ch = 0x5c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa7: \ + if (var == DE || var == ES || var == IT || var == PT) \ +@@ -551,11 +551,11 @@ gconv_end (struct __gconv_step *data) + else if (var == NO2) \ + ch = 0x23; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xa8: \ + if (var != ES2 && var != CU && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xb0: \ +@@ -566,7 +566,7 @@ gconv_end (struct __gconv_step *data) + else if (var == PT) \ + ch = 0x7e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xb4: \ + if (var == ES2 || var == CU) \ +@@ -574,11 +574,11 @@ gconv_end (struct __gconv_step *data) + else if (var == PT2) \ + ch = 0x40; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xb5: \ + if (var != FR) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xbf: \ +@@ -587,31 +587,31 @@ gconv_end (struct __gconv_step *data) + else if (var == ES2 || var == CU) \ + ch = 0x5e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xc1: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0xc3: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc4: \ + if (var != DE && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc5: \ + if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xc6: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xc7: \ +@@ -620,7 +620,7 @@ gconv_end (struct __gconv_step *data) + else if (var == PT || var == PT2) \ + ch = 0x5c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xc9: \ + if (var == CA2) \ +@@ -630,26 +630,26 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x40; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xd1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xd5: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xd6: \ + if (var != DE && var != HU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xd8: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0xdc: \ +@@ -658,11 +658,11 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x5e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xdf: \ + if (var != DE) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xe0: \ +@@ -671,36 +671,36 @@ gconv_end (struct __gconv_step *data) + else if (var == IT) \ + ch = 0x7b; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xe1: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xe2: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0xe3: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe4: \ + if (var != DE && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe5: \ + if (var != DK && var != NO && var != NO2 && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xe6: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0xe7: \ +@@ -711,11 +711,11 @@ gconv_end (struct __gconv_step *data) + else if (var == PT || var == PT2) \ + ch = 0x7c; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xe8: \ + if (var != CA && var != CA2 && var != IT && var != FR && var != FR1) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xe9: \ +@@ -726,51 +726,51 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x60; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xea: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0xec: \ + if (var != IT) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xee: \ + if (var != CA) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5e; \ + break; \ + case 0xf1: \ + if (var != ES && var != ES2 && var != CU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf2: \ + if (var != IT) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf4: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0xf5: \ + if (var != PT && var != PT2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0xf6: \ + if (var != DE && var != HU && var != SE && var != SE2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf8: \ + if (var != DK && var != NO && var != NO2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0xf9: \ +@@ -779,11 +779,11 @@ gconv_end (struct __gconv_step *data) + else if (var == IT) \ + ch = 0x60; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0xfb: \ + if (var != CA && var != CA2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0xfc: \ +@@ -792,93 +792,93 @@ gconv_end (struct __gconv_step *data) + else if (var == SE2) \ + ch = 0x7e; \ + else \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + break; \ + case 0x160: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0x106: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + case 0x107: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7d; \ + break; \ + case 0x10c: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5e; \ + break; \ + case 0x10d: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x110: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0x111: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7c; \ + break; \ + case 0x161: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7b; \ + break; \ + case 0x17d: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0x17e: \ + if (var != YU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x60; \ + break; \ + case 0x2dd: \ + if (var != HU) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x2022: \ + if (var != ES2) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x40; \ + break; \ + case 0x203e: \ + if (var != GB && var != CN && var != JP && var != NO && var != SE) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x7e; \ + break; \ + case 0x20a9: \ + if (var != KR) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5c; \ + break; \ + case 0x2329: \ + if (var != JP_OCR_B) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5b; \ + break; \ + case 0x232a: \ + if (var != JP_OCR_B) \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + ch = 0x5d; \ + break; \ + default: \ + if (__glibc_unlikely (ch > 0x7f)) \ + { \ + UNICODE_TAG_HANDLER (ch, 4); \ +- failure = __GCONV_ILLEGAL_INPUT; \ ++ failure = __gconv_mark_illegal_input (step_data); \ + } \ + break; \ + } \ +diff --git a/iconvdata/unicode.c b/iconvdata/unicode.c +index d69c9887a18af2a4..79bb9886e5e773f6 100644 +--- a/iconvdata/unicode.c ++++ b/iconvdata/unicode.c +@@ -163,7 +163,7 @@ gconv_end (struct __gconv_step *data) + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +diff --git a/iconvdata/utf-16.c b/iconvdata/utf-16.c +index a869353f20e9fd77..9d9fd2a2a3df3e13 100644 +--- a/iconvdata/utf-16.c ++++ b/iconvdata/utf-16.c +@@ -206,7 +206,7 @@ gconv_end (struct __gconv_step *data) + We must catch this. If we let surrogates pass through, \ + attackers could make a security hole exploit by \ + synthesizing any desired plane 1-16 character. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ +diff --git a/iconvdata/utf-32.c b/iconvdata/utf-32.c +index 5693b4897569d7f0..139eefb6d853135c 100644 +--- a/iconvdata/utf-32.c ++++ b/iconvdata/utf-32.c +@@ -207,7 +207,7 @@ gconv_end (struct __gconv_step *data) + We must catch this. If we let surrogates pass through, \ + attackers could make a security hole exploit by \ + generating "irregular UTF-32" sequences. */ \ +- result = __GCONV_ILLEGAL_INPUT; \ ++ result = __gconv_mark_illegal_input (step_data); \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ diff --git a/glibc-RHEL-71530-6.patch b/glibc-RHEL-71530-6.patch new file mode 100644 index 0000000..98fd5eb --- /dev/null +++ b/glibc-RHEL-71530-6.patch @@ -0,0 +1,713 @@ +commit 8ef3cff9d1ceafe369f982d980678d749fb93bd2 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Support in-place conversions (bug 10460, bug 32033) + + Check if any of the input files overlaps with the output file, and use + a temporary file in this case, so that the input is no clobbered + before it is read. This fixes bug 10460. It allows to use iconv + more easily as a functional replacement for GNU recode. + + The updated output buffer management truncates the output file + if there is no input, fixing bug 32033. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/Makefile b/iconv/Makefile +index 29e4f280ec4cdcbf..c9af0c4d44cae7fb 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -81,6 +81,8 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) + ifeq ($(run-built-tests),yes) + xtests-special += $(objpfx)test-iconvconfig.out + tests-special += \ ++ $(objpfx)tst-iconv_prog-buffer-large.out \ ++ $(objpfx)tst-iconv_prog-buffer-tiny.out \ + $(objpfx)tst-iconv_prog-buffer.out \ + $(objpfx)tst-iconv_prog.out \ + $(objpfx)tst-translit-mchar.out \ +@@ -153,3 +155,12 @@ $(objpfx)tst-iconv_prog-buffer.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ + $(evaluate-test) ++$(objpfx)tst-iconv_prog-buffer-tiny.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' \ ++ '--buffer-size=1' > $@; \ ++ $(evaluate-test) ++$(objpfx)tst-iconv_prog-buffer-large.out: \ ++ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog ++ $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \ ++ $(evaluate-test) +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 5fe4fe7a6c3776f4..3e02db7319185d45 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -47,7 +47,11 @@ + static void print_version (FILE *stream, struct argp_state *state); + void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +-#define OPT_VERBOSE 1000 ++enum ++ { ++ OPT_VERBOSE = 1000, ++ OPT_BUFFER_SIZE, ++ }; + #define OPT_LIST 'l' + + /* Definitions of arguments for argp functions. */ +@@ -63,6 +67,10 @@ static const struct argp_option options[] = + { "output", 'o', N_("FILE"), 0, N_("output file") }, + { "silent", 's', NULL, 0, N_("suppress warnings") }, + { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") }, ++ /* This is an internal option intended for testing only. Very small ++ buffers do not work with all character sets. */ ++ { "buffer-size", OPT_BUFFER_SIZE, N_("BYTE-COUNT"), OPTION_HIDDEN, ++ N_("size of in-memory scratch buffer") }, + { NULL, 0, NULL, 0, NULL } + }; + +@@ -100,13 +108,20 @@ static int list; + /* If nonzero omit invalid character from output. */ + int omit_invalid; + ++/* Current index in argv (after command line processing) with the ++ input file name. */ ++static int current_input_file_index; ++ ++/* Size of the temporary, in-memory buffer. Exceeding it needs ++ spooling to disk in a temporary file. Controlled by --buffer_size. */ ++static size_t output_buffer_size = 1024 * 1024; ++ + /* Prototypes for the functions doing the actual work. */ +-static int process_block (iconv_t cd, char *addr, size_t len, FILE **output, +- const char *output_file); +-static int process_fd (iconv_t cd, int fd, FILE **output, +- const char *output_file); +-static int process_file (iconv_t cd, FILE *input, FILE **output, +- const char *output_file); ++static void prepare_output_file (char **argv); ++static void close_output_file (int status); ++static int process_block (iconv_t cd, char *addr, size_t len); ++static int process_fd (iconv_t cd, int fd); ++static int process_file (iconv_t cd, FILE *input); + static void print_known_names (void); + + +@@ -114,7 +129,6 @@ int + main (int argc, char *argv[]) + { + int status = EXIT_SUCCESS; +- int remaining; + __gconv_t cd; + struct charmap_t *from_charmap = NULL; + struct charmap_t *to_charmap = NULL; +@@ -126,7 +140,7 @@ main (int argc, char *argv[]) + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ +- argp_parse (&argp, argc, argv, 0, &remaining, NULL); ++ argp_parse (&argp, argc, argv, 0, ¤t_input_file_index, NULL); + + /* List all coded character sets if wanted. */ + if (list) +@@ -161,7 +175,8 @@ main (int argc, char *argv[]) + if (from_charmap != NULL || to_charmap != NULL) + /* Construct the conversion table and do the conversion. */ + status = charmap_conversion (from_code, from_charmap, to_code, to_charmap, +- argc, remaining, argv, output_file); ++ argc, current_input_file_index, argv, ++ output_file); + else + { + struct gconv_spec conv_spec; +@@ -235,16 +250,14 @@ conversions from `%s' and to `%s' are not supported"), + _("failed to start conversion processing")); + } + +- /* The output file. Will be opened when we are ready to produce +- output. */ +- FILE *output = NULL; ++ prepare_output_file (argv); + + /* Now process the remaining files. Write them to stdout or the file + specified with the `-o' parameter. If we have no file given as + the parameter process all from stdin. */ +- if (remaining == argc) ++ if (current_input_file_index == argc) + { +- if (process_file (cd, stdin, &output, output_file) != 0) ++ if (process_file (cd, stdin) != 0) + status = EXIT_FAILURE; + } + else +@@ -253,17 +266,17 @@ conversions from `%s' and to `%s' are not supported"), + int fd, ret; + + if (verbose) +- fprintf (stderr, "%s:\n", argv[remaining]); +- if (strcmp (argv[remaining], "-") == 0) +- fd = 0; ++ fprintf (stderr, "%s:\n", argv[current_input_file_index]); ++ if (strcmp (argv[current_input_file_index], "-") == 0) ++ fd = STDIN_FILENO; + else + { +- fd = open (argv[remaining], O_RDONLY); ++ fd = open (argv[current_input_file_index], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), +- argv[remaining]); ++ argv[current_input_file_index]); + status = EXIT_FAILURE; + continue; + } +@@ -271,7 +284,7 @@ conversions from `%s' and to `%s' are not supported"), + + { + /* Read the file in pieces. */ +- ret = process_fd (cd, fd, &output, output_file); ++ ret = process_fd (cd, fd); + + /* Now close the file. */ + close (fd); +@@ -289,7 +302,7 @@ conversions from `%s' and to `%s' are not supported"), + } + } + } +- while (++remaining < argc); ++ while (++current_input_file_index < argc); + + /* Ensure that iconv -c still exits with failure if iconv (the + function) has failed with E2BIG instead of EILSEQ. */ +@@ -297,8 +310,7 @@ conversions from `%s' and to `%s' are not supported"), + status = EXIT_FAILURE; + + /* Close the output file now. */ +- if (output != NULL && fclose (output)) +- error (EXIT_FAILURE, errno, _("error while closing output file")); ++ close_output_file (status); + } + + return status; +@@ -328,6 +340,14 @@ parse_opt (int key, char *arg, struct argp_state *state) + /* Omit invalid characters from output. */ + omit_invalid = 1; + break; ++ case OPT_BUFFER_SIZE: ++ { ++ int i = atoi (arg); ++ if (i <= 0) ++ error (EXIT_FAILURE, 0, _("invalid buffer size: %s"), arg); ++ output_buffer_size = i; ++ } ++ break; + case OPT_VERBOSE: + verbose = 1; + break; +@@ -374,59 +394,247 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); + } + ++/* Command line index of the last input file that overlaps with the ++ output file. Zero means no temporary file is ever required. */ ++static int last_overlapping_file_index; + +-static int +-write_output (const char *outbuf, const char *outptr, FILE **output, +- const char *output_file) ++/* This is set to true if the output is written to a temporary file. */ ++static bool output_using_temporary_file; ++ ++/* This is the file descriptor that will be used by write_output. */ ++static int output_fd = -1; ++ ++/* Pointers at the start and end of the fixed-size output buffer. */ ++static char *output_buffer_start; ++ ++/* Current write position in the output buffer. */ ++static char *output_buffer_current; ++ ++/* Remaining bytes after output_buffer_current in the output buffer. */ ++static size_t output_buffer_remaining; ++ ++ ++/* Reduce the buffer size when writing directly to the output file, to ++ reduce cache utilization. */ ++static size_t copy_buffer_size = BUFSIZ; ++ ++static void ++output_error (void) ++{ ++ error (EXIT_FAILURE, errno, _("cannot open output file")); ++} ++ ++static void ++input_error (const char *path) + { +- /* We have something to write out. */ +- int errno_save = errno; ++ error (0, errno, _("cannot open input file `%s'"), path); ++} + +- if (*output == NULL) ++/* Opens output_file for writing, truncating it. */ ++static void ++open_output_direct (void) ++{ ++ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_TRUNC, 0777); ++ if (output_fd < 0) ++ output_error (); ++} ++ ++static void ++prepare_output_file (char **argv) ++{ ++ if (copy_buffer_size > output_buffer_size) ++ copy_buffer_size = output_buffer_size; ++ ++ if (output_file == NULL || strcmp (output_file, "-") == 0) + { +- /* Determine output file. */ +- if (output_file != NULL && strcmp (output_file, "-") != 0) ++ /* No buffering is required when writing to standard output ++ because input overlap is expected to be solved externally. */ ++ output_fd = STDOUT_FILENO; ++ output_buffer_size = copy_buffer_size; ++ } ++ else ++ { ++ /* If iconv creates the output file, no overlap is possible. */ ++ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_EXCL, 0777); ++ if (output_fd >= 0) ++ output_buffer_size = copy_buffer_size; ++ else + { +- *output = fopen (output_file, "w"); +- if (*output == NULL) +- error (EXIT_FAILURE, errno, _("cannot open output file")); ++ /* Otherwise, check if any of the input files overlap with the ++ output file. */ ++ struct statx st; ++ if (statx (AT_FDCWD, output_file, 0, STATX_INO | STATX_MODE, &st) ++ != 0) ++ output_error (); ++ uint32_t out_dev_minor = st.stx_dev_minor; ++ uint32_t out_dev_major = st.stx_dev_major; ++ uint64_t out_ino = st.stx_ino; ++ ++ int idx = current_input_file_index; ++ while (true) ++ { ++ /* Special case: no input files means standard input. */ ++ if (argv[idx] == NULL && idx != current_input_file_index) ++ break; ++ ++ int ret; ++ if (argv[idx] == NULL || strcmp (argv[idx], "-") == 0) ++ ret = statx (STDIN_FILENO, "", AT_EMPTY_PATH, STATX_INO, &st); ++ else ++ ret = statx (AT_FDCWD, argv[idx], 0, STATX_INO, &st); ++ if (ret != 0) ++ { ++ input_error (argv[idx]); ++ exit (EXIT_FAILURE); ++ } ++ if (out_dev_minor == st.stx_dev_minor ++ && out_dev_major == st.stx_dev_major ++ && out_ino == st.stx_ino) ++ { ++ if (argv[idx] == NULL) ++ /* Corner case: index of NULL would be larger than ++ idx while converting, triggering a switch away ++ from the temporary file. */ ++ last_overlapping_file_index = INT_MAX; ++ else ++ last_overlapping_file_index = idx; ++ } ++ ++ if (argv[idx] == NULL) ++ break; ++ ++idx; ++ } ++ ++ /* If there is no overlap, avoid using a temporary file. */ ++ if (last_overlapping_file_index == 0) ++ { ++ open_output_direct (); ++ output_buffer_size = copy_buffer_size; ++ } + } +- else +- *output = stdout; + } + +- if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf) +- || ferror (*output)) ++ output_buffer_start = malloc (output_buffer_size); ++ if (output_buffer_start == NULL) ++ output_error (); ++ output_buffer_current = output_buffer_start; ++ output_buffer_remaining = output_buffer_size; ++} ++ ++/* Write out the range [first, last), terminating the process on write ++ error. */ ++static void ++write_fully (int fd, const char *first, const char *last) ++{ ++ while (first < last) + { +- /* Error occurred while printing the result. */ +- error (0, 0, _("\ ++ ssize_t ret = write (fd, first, last - first); ++ if (ret == 0) ++ { ++ errno = ENOSPC; ++ output_error (); ++ } ++ if (ret < 0) ++ error (EXIT_FAILURE, errno, _("\ + conversion stopped due to problem in writing the output")); +- return -1; ++ first += ret; ++ } ++} ++ ++static void ++flush_output (void) ++{ ++ bool temporary_file_not_needed ++ = current_input_file_index > last_overlapping_file_index; ++ if (output_fd < 0) ++ { ++ if (temporary_file_not_needed) ++ open_output_direct (); ++ else ++ { ++ /* Create an anonymous temporary file. */ ++ FILE *fp = tmpfile (); ++ if (fp == NULL) ++ output_error (); ++ output_fd = dup (fileno (fp)); ++ if (output_fd < 0) ++ output_error (); ++ fclose (fp); ++ output_using_temporary_file = true; ++ } ++ /* Either way, no longer use a memory-only staging buffer. */ ++ output_buffer_size = copy_buffer_size; + } ++ else if (output_using_temporary_file && temporary_file_not_needed) ++ { ++ /* The temporary file is no longer needed. Switch to direct ++ output, replacing output_fd. */ ++ int temp_fd = output_fd; ++ open_output_direct (); ++ ++ /* Copy over the data spooled to the temporary file. */ ++ if (lseek (temp_fd, 0, SEEK_SET) < 0) ++ output_error (); ++ while (true) ++ { ++ char buf[BUFSIZ]; ++ ssize_t ret = read (temp_fd, buf, sizeof (buf)); ++ if (ret < 0) ++ output_error (); ++ if (ret == 0) ++ break; ++ write_fully (output_fd, buf, buf + ret); ++ } ++ close (temp_fd); + +- errno = errno_save; ++ /* No longer using a temporary file from now on. */ ++ output_using_temporary_file = false; ++ output_buffer_size = copy_buffer_size; ++ } + +- return 0; ++ write_fully (output_fd, output_buffer_start, output_buffer_current); ++ output_buffer_current = output_buffer_start; ++ output_buffer_remaining = output_buffer_size; + } + ++static void ++close_output_file (int status) ++{ ++ /* Do not perform a flush if a temporary file or the in-memory ++ buffer is in use and there was an error. It would clobber the ++ overlapping input file. */ ++ if (status != EXIT_SUCCESS && !omit_invalid && ++ (output_using_temporary_file || output_fd < 0)) ++ return; ++ ++ /* The current_input_file_index variable is now larger than ++ last_overlapping_file_index, so the flush_output call switches ++ away from the temporary file. */ ++ flush_output (); ++ ++ if (output_fd == STDOUT_FILENO) ++ { ++ /* Close standard output in safe manner, to report certain ++ ENOSPC errors. */ ++ output_fd = dup (output_fd); ++ if (output_fd < 0) ++ output_error (); ++ } ++ if (close (output_fd) < 0) ++ output_error (); ++} + + static int +-process_block (iconv_t cd, char *addr, size_t len, FILE **output, +- const char *output_file) ++process_block (iconv_t cd, char *addr, size_t len) + { +-#define OUTBUF_SIZE 32768 + const char *start = addr; +- char outbuf[OUTBUF_SIZE]; +- char *outptr; +- size_t outlen; + size_t n; + int ret = 0; + + while (len > 0) + { +- outptr = outbuf; +- outlen = OUTBUF_SIZE; +- n = iconv (cd, &addr, &len, &outptr, &outlen); ++ n = iconv (cd, &addr, &len, ++ &output_buffer_current, &output_buffer_remaining); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { +@@ -437,39 +645,34 @@ process_block (iconv_t cd, char *addr, size_t len, FILE **output, + errno = E2BIG; + } + +- if (outptr != outbuf) +- { +- ret = write_output (outbuf, outptr, output, output_file); +- if (ret != 0) +- break; +- } +- + if (n != (size_t) -1) + { + /* All the input test is processed. For state-dependent + character sets we have to flush the state now. */ +- outptr = outbuf; +- outlen = OUTBUF_SIZE; +- n = iconv (cd, NULL, NULL, &outptr, &outlen); +- +- if (outptr != outbuf) ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ if (n == (size_t) -1 && errno == E2BIG) + { +- ret = write_output (outbuf, outptr, output, output_file); +- if (ret != 0) +- break; ++ /* Try again if the state flush exceeded the buffer space. */ ++ flush_output (); ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); + } ++ bool errno_is_EILSEQ = errno == EILSEQ; + + if (n != (size_t) -1) + break; + +- if (omit_invalid && errno == EILSEQ) ++ if (omit_invalid && errno_is_EILSEQ) + { + ret = 1; + break; + } + } + +- if (errno != E2BIG) ++ if (errno == E2BIG) ++ flush_output (); ++ else + { + /* iconv() ran into a problem. */ + switch (errno) +@@ -500,7 +703,7 @@ incomplete character or shift sequence at end of buffer")); + + + static int +-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) ++process_fd (iconv_t cd, int fd) + { + /* we have a problem with reading from a descriptor since we must not + provide the iconv() function an incomplete character or shift +@@ -574,16 +777,16 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file) + } + + /* Now we have all the input in the buffer. Process it in one run. */ +- return process_block (cd, inbuf, actlen, output, output_file); ++ return process_block (cd, inbuf, actlen); + } + + + static int +-process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file) ++process_file (iconv_t cd, FILE *input) + { + /* This should be safe since we use this function only for `stdin' and + we haven't read anything so far. */ +- return process_fd (cd, fileno (input), output, output_file); ++ return process_fd (cd, fileno (input)); + } + + +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index 5ff99a02a30370cb..54ff871d32929997 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -17,6 +17,12 @@ + # License along with the GNU C Library; if not, see + # . + ++# Arguments: ++# root of the build tree ($(objpfx-common)) ++# test command wrapper (for running on the board/with new ld.so) ++# extra flags to pass to iconv ++# number of times to double the input files in size (default: 0) ++ + exec 2>&1 + set -e + +@@ -26,7 +32,9 @@ codir=$1 + test_program_prefix="$2" + + # Use internal converters to avoid issues with module loading. +-iconv_args="-f ASCII -t UTF-8" ++iconv_args="-f ASCII -t UTF-8 $3" ++ ++file_size_doublings=${4-0} + + failure=false + +@@ -39,7 +47,19 @@ echo HH > "$tmp/hh" + echo XY > "$tmp/xy" + echo ZT > "$tmp/zt" + echo OUT > "$tmp/out-template" ++: > "$tmp/empty" + printf '\xff' > "$tmp/0xff" ++ ++# Double all files to produce larger buffers. ++for p in "$tmp"/* ; do ++ i=0 ++ while test $i -lt $file_size_doublings; do ++ cat "$p" "$p" > "$tmp/scratch" ++ mv "$tmp/scratch" "$p" ++ i=$(($i + 1)) ++ done ++done ++ + cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped" + + run_iconv () { +@@ -113,6 +133,38 @@ expect_files abc def + run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc" + expect_files abc def abc + ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" ++expect_files ggg abc def abc ++ ++run_iconv -o "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/hh" ++expect_files hh ggg abc def abc hh ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/out" "$tmp/ggg" ++expect_files ggg out-template out-template ggg ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/ggg" ++expect_files ggg out-template hh out-template ggg ++ ++# Empty output should truncate the output file if exists. ++ ++cp "$tmp/out-template" "$tmp/out" ++run_iconv -o "$tmp/out" "$tmp/err" +@@ -156,6 +236,20 @@ expect_exit 1 run_iconv -c -o "$tmp/out" \ + ! test -s "$tmp/err" + expect_files abc xy zt def + ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def" ++expect_files xy zt abc xy zt def ++ ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -o "$tmp/out" \ ++ "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def" ++expect_files 0xff-wrapped ++ ++cp "$tmp/0xff-wrapped" "$tmp/out" ++expect_exit 1 run_iconv -c -o "$tmp/out" \ ++ "$tmp/abc" "$tmp/out" "$tmp/def" "$tmp/out" ++expect_files abc xy zt def xy zt ++ + # If the file does not exist yet, it should not be created on error. + + rm "$tmp/out" diff --git a/glibc-RHEL-71530-7.patch b/glibc-RHEL-71530-7.patch new file mode 100644 index 0000000..932e4fb --- /dev/null +++ b/glibc-RHEL-71530-7.patch @@ -0,0 +1,41 @@ +commit 75819cdd29a193cc2db980878bec305905b22bbc +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Multiple - on command line should not fail (bug 32050) + + Usually, the second and subsequent - return EOF immediately + and do not contribute to the output, but this is not an error. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index 3e02db7319185d45..dd4bc3a59a20799a 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -287,7 +287,8 @@ conversions from `%s' and to `%s' are not supported"), + ret = process_fd (cd, fd); + + /* Now close the file. */ +- close (fd); ++ if (fd != STDIN_FILENO) ++ close (fd); + + if (ret != 0) + { +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index 54ff871d32929997..a9c3729d948b4679 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -265,6 +265,11 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" + expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" + ! test -e "$tmp/out" + ++# Listing standard input multiple times should not fail (bug 32050). ++ ++run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc" ++expect_files xy abc zt ++ + if $failure ; then + exit 1 + fi diff --git a/glibc-RHEL-71530-8.patch b/glibc-RHEL-71530-8.patch new file mode 100644 index 0000000..c652caf --- /dev/null +++ b/glibc-RHEL-71530-8.patch @@ -0,0 +1,323 @@ +commit fa1b0d5e9f6e0353e16339430770a7a8824c0468 +Author: Florian Weimer +Date: Fri Sep 20 13:10:54 2024 +0200 + + iconv: Input buffering for the iconv program (bug 6050) + + Do not read the entire input file into memory. + + Reviewed-by: DJ Delorie + +diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c +index dd4bc3a59a20799a..a2f1d34e4579f80f 100644 +--- a/iconv/iconv_prog.c ++++ b/iconv/iconv_prog.c +@@ -118,8 +118,9 @@ static size_t output_buffer_size = 1024 * 1024; + + /* Prototypes for the functions doing the actual work. */ + static void prepare_output_file (char **argv); +-static void close_output_file (int status); +-static int process_block (iconv_t cd, char *addr, size_t len); ++static void close_output_file (__gconv_t cd, int status); ++static int process_block (iconv_t cd, char **addr, size_t *len, ++ off64_t file_offset, bool *incomplete); + static int process_fd (iconv_t cd, int fd); + static int process_file (iconv_t cd, FILE *input); + static void print_known_names (void); +@@ -311,7 +312,7 @@ conversions from `%s' and to `%s' are not supported"), + status = EXIT_FAILURE; + + /* Close the output file now. */ +- close_output_file (status); ++ close_output_file (cd, status); + } + + return status; +@@ -599,7 +600,7 @@ flush_output (void) + } + + static void +-close_output_file (int status) ++close_output_file (__gconv_t cd, int status) + { + /* Do not perform a flush if a temporary file or the in-memory + buffer is in use and there was an error. It would clobber the +@@ -608,10 +609,28 @@ close_output_file (int status) + (output_using_temporary_file || output_fd < 0)) + return; + +- /* The current_input_file_index variable is now larger than +- last_overlapping_file_index, so the flush_output call switches ++ /* All the input text is processed. For state-dependent character ++ sets we have to flush the state now. ++ ++ The current_input_file_index variable is now larger than ++ last_overlapping_file_index, so the flush_output calls switch + away from the temporary file. */ ++ size_t n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ if (n == (size_t) -1 && errno == E2BIG) ++ { ++ /* Try again if the state flush exceeded the buffer space. */ ++ flush_output (); ++ n = iconv (cd, NULL, NULL, ++ &output_buffer_current, &output_buffer_remaining); ++ } ++ int saved_errno = errno; + flush_output (); ++ if (n == (size_t) -1 && !omit_invalid) ++ { ++ errno = saved_errno; ++ output_error (); ++ } + + if (output_fd == STDOUT_FILENO) + { +@@ -625,51 +644,35 @@ close_output_file (int status) + output_error (); + } + ++/* CD is the iconv handle. Input processing starts at *ADDR, and ++ consumes upto *LEN bytes. *ADDR and *LEN are updated. FILE_OFFSET ++ is the file offset of the data initially at ADDR. *INCOMPLETE is ++ set to true if conversion stops due to an incomplete input ++ sequence. */ + static int +-process_block (iconv_t cd, char *addr, size_t len) ++process_block (iconv_t cd, char **addr, size_t *len, off64_t file_offset, ++ bool *incomplete) + { +- const char *start = addr; ++ const char *start = *addr; + size_t n; + int ret = 0; + +- while (len > 0) ++ while (*len > 0) + { +- n = iconv (cd, &addr, &len, ++ n = iconv (cd, addr, len, + &output_buffer_current, &output_buffer_remaining); + + if (n == (size_t) -1 && omit_invalid && errno == EILSEQ) + { + ret = 1; +- if (len == 0) ++ if (*len == 0) + n = 0; + else + errno = E2BIG; + } + + if (n != (size_t) -1) +- { +- /* All the input test is processed. For state-dependent +- character sets we have to flush the state now. */ +- n = iconv (cd, NULL, NULL, +- &output_buffer_current, &output_buffer_remaining); +- if (n == (size_t) -1 && errno == E2BIG) +- { +- /* Try again if the state flush exceeded the buffer space. */ +- flush_output (); +- n = iconv (cd, NULL, NULL, +- &output_buffer_current, &output_buffer_remaining); +- } +- bool errno_is_EILSEQ = errno == EILSEQ; +- +- if (n != (size_t) -1) +- break; +- +- if (omit_invalid && errno_is_EILSEQ) +- { +- ret = 1; +- break; +- } +- } ++ break; + + if (errno == E2BIG) + flush_output (); +@@ -680,13 +683,12 @@ process_block (iconv_t cd, char *addr, size_t len) + { + case EILSEQ: + if (! omit_invalid) +- error (0, 0, _("illegal input sequence at position %ld"), +- (long int) (addr - start)); ++ error (0, 0, _("illegal input sequence at position %lld"), ++ (long long int) (file_offset + (*addr - start))); + break; + case EINVAL: +- error (0, 0, _("\ +-incomplete character or shift sequence at end of buffer")); +- break; ++ *incomplete = true; ++ return ret; + case EBADF: + error (0, 0, _("internal error (illegal descriptor)")); + break; +@@ -706,79 +708,49 @@ incomplete character or shift sequence at end of buffer")); + static int + process_fd (iconv_t cd, int fd) + { +- /* we have a problem with reading from a descriptor since we must not +- provide the iconv() function an incomplete character or shift +- sequence at the end of the buffer. Since we have to deal with +- arbitrary encodings we must read the whole text in a buffer and +- process it in one step. */ +- static char *inbuf = NULL; +- static size_t maxlen = 0; +- char *inptr = inbuf; +- size_t actlen = 0; +- +- while (actlen < maxlen) ++ char inbuf[BUFSIZ]; ++ char *inbuf_end = inbuf + sizeof (inbuf); ++ size_t inbuf_used = 0; ++ off64_t file_offset = 0; ++ int status = 0; ++ bool incomplete = false; ++ ++ while (true) + { +- ssize_t n = read (fd, inptr, maxlen - actlen); +- +- if (n == 0) +- /* No more text to read. */ +- break; +- +- if (n == -1) ++ char *p = inbuf + inbuf_used; ++ ssize_t read_ret = read (fd, p, inbuf_end - p); ++ if (read_ret == 0) ++ { ++ /* On EOF, check if the previous iconv invocation saw an ++ incomplete sequence. */ ++ if (incomplete) ++ { ++ error (0, 0, _("\ ++incomplete character or shift sequence at end of buffer")); ++ return 1; ++ } ++ return 0; ++ } ++ if (read_ret < 0) + { +- /* Error while reading. */ + error (0, errno, _("error while reading the input")); + return -1; + } +- +- inptr += n; +- actlen += n; ++ inbuf_used += read_ret; ++ incomplete = false; ++ p = inbuf; ++ int ret = process_block (cd, &p, &inbuf_used, file_offset, &incomplete); ++ if (ret != 0) ++ { ++ status = ret; ++ if (ret < 0) ++ break; ++ } ++ /* The next loop iteration consumes the leftover bytes. */ ++ memmove (inbuf, p, inbuf_used); ++ file_offset += read_ret - inbuf_used; + } +- +- if (actlen == maxlen) +- while (1) +- { +- ssize_t n; +- char *new_inbuf; +- +- /* Increase the buffer. */ +- new_inbuf = (char *) realloc (inbuf, maxlen + 32768); +- if (new_inbuf == NULL) +- { +- error (0, errno, _("unable to allocate buffer for input")); +- return -1; +- } +- inbuf = new_inbuf; +- maxlen += 32768; +- inptr = inbuf + actlen; +- +- do +- { +- n = read (fd, inptr, maxlen - actlen); +- +- if (n == 0) +- /* No more text to read. */ +- break; +- +- if (n == -1) +- { +- /* Error while reading. */ +- error (0, errno, _("error while reading the input")); +- return -1; +- } +- +- inptr += n; +- actlen += n; +- } +- while (actlen < maxlen); +- +- if (n == 0) +- /* Break again so we leave both loops. */ +- break; +- } +- +- /* Now we have all the input in the buffer. Process it in one run. */ +- return process_block (cd, inbuf, actlen); ++ return status; + } + + +diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh +index a9c3729d948b4679..23098ac56a344c48 100644 +--- a/iconv/tst-iconv_prog-buffer.sh ++++ b/iconv/tst-iconv_prog-buffer.sh +@@ -50,6 +50,9 @@ echo OUT > "$tmp/out-template" + : > "$tmp/empty" + printf '\xff' > "$tmp/0xff" + ++# Length should be a prime number, to help with buffer alignment testing. ++printf '\xc3\xa4\xe2\x80\x94\xe2\x80\x94\xc3\xa4\n' > "$tmp/utf8-sequence" ++ + # Double all files to produce larger buffers. + for p in "$tmp"/* ; do + i=0 +@@ -270,6 +273,34 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def" + run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc" + expect_files xy abc zt + ++# NB: Extra iconv args are ignored after this point. Actual ++# multi-byte conversion does not work with tiny buffers. ++iconv_args="-f UTF-8 -t ASCII" ++ ++printf 'x\n\xc3' > "$tmp/incomplete" ++expect_exit 1 run_iconv -o "$tmp/out" "$tmp/incomplete" ++check_out <&$logfd ++ printf "%s" "$prefix" > "$tmp/prefix" ++ cat "$tmp/prefix" "$tmp/utf8-sequence" > "$tmp/tmp" ++ iconv_args="-f UTF-8 -t UCS-4" ++ run_iconv -o "$tmp/out1" "$tmp/tmp" ++ iconv_args="-f UCS-4 -t UTF-8" ++ run_iconv -o "$tmp/out" "$tmp/out1" ++ expect_files prefix utf8-sequence ++ ++ prefix="$prefix@" ++ prefix_length=$(($prefix_length + 1)) ++done ++ + if $failure ; then + exit 1 + fi diff --git a/glibc-RHEL-71530-9.patch b/glibc-RHEL-71530-9.patch new file mode 100644 index 0000000..7d764c7 --- /dev/null +++ b/glibc-RHEL-71530-9.patch @@ -0,0 +1,37 @@ +commit 079ebf7624e7fd0ad7fe94a7176a2e132c996d86 +Author: Florian Weimer +Date: Tue Sep 24 10:41:35 2024 +0200 + + iconv: Use $(run-program-prefix) for running iconv (bug 32197) + + With --enable-hardcoded-path-in-tests, $(test-program-prefix) + does not redirect to the built glibc, but we need to run + iconv (the program) against the built glibc even with + --enable-hardcoded-path-in-tests, as it is using the ABI + path for the dynamic linker (as an installed program). + Use $(run-program-prefix) instead. + + Reviewed-by: H.J. Lu + +diff --git a/iconv/Makefile b/iconv/Makefile +index c9af0c4d44cae7fb..de9d964ed3c762bf 100644 +--- a/iconv/Makefile ++++ b/iconv/Makefile +@@ -153,14 +153,14 @@ $(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \ + + $(objpfx)tst-iconv_prog-buffer.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' > $@; \ + $(evaluate-test) + $(objpfx)tst-iconv_prog-buffer-tiny.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' \ + '--buffer-size=1' > $@; \ + $(evaluate-test) + $(objpfx)tst-iconv_prog-buffer-large.out: \ + tst-iconv_prog-buffer.sh $(objpfx)iconv_prog +- $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \ ++ $(BASH) $< $(common-objdir) '$(run-program-prefix)' '' '22' > $@; \ + $(evaluate-test) diff --git a/glibc-RHEL-75555.patch b/glibc-RHEL-75555.patch new file mode 100644 index 0000000..ef56105 --- /dev/null +++ b/glibc-RHEL-75555.patch @@ -0,0 +1,115 @@ +commit abeae3c0061c0599ac2f012b270d6b4c8f59c82f +Author: Florian Weimer +Date: Thu Jan 16 18:45:25 2025 +0100 + + Linux: Fixes for getrandom fork handling + + Careful updates of grnd_alloc.len are required to ensure that + after fork, grnd_alloc.states does not contain entries that + are also encountered by __getrandom_reset_state in TCBs. + For the same reason, it is necessary to overwrite the TCB state + pointer with NULL before updating grnd_alloc.states in + __getrandom_vdso_release. + + Before this change, different TCBs could share the same getrandom + state after multi-threaded fork. This would be a critical security + bug (predictable randomness) if not caught during development. + + The additional check in stdlib/tst-arc4random-thread makes it more + likely that the test fails due to the bugs mentioned above. + + Both __getrandom_reset_state and __getrandom_vdso_release could + put reserved NULL pointers into the states array. This is also + fixed with this commit. After these changes, no null pointers were + observed in the states array during testing. + + Reviewed-by: Adhemerval Zanella + +diff --git a/stdlib/tst-arc4random-thread.c b/stdlib/tst-arc4random-thread.c +index d1259626c62af5ad..5e8a4761fc39165a 100644 +--- a/stdlib/tst-arc4random-thread.c ++++ b/stdlib/tst-arc4random-thread.c +@@ -49,7 +49,7 @@ static const int sizes[] = { 12, 15, 16, 17, 24, 31, max_size }; + struct blob + { + unsigned int size; +- int thread_id; ++ int thread_id; /* -1 means after fork. */ + unsigned int index; + unsigned char bytes[max_size]; + }; +@@ -323,6 +323,20 @@ do_test_func (const char *fname, void (*func)(unsigned char *, size_t)) + } + } + ++ for (struct blob *p = dynarray_blob_begin (&global_result); ++ p < end; ++p) ++ { ++ unsigned int sum = 0; ++ for (unsigned int i = 0; i < p->size; ++i) ++ sum += p->bytes[i]; ++ if (sum == 0) ++ { ++ support_record_failure (); ++ printf ("error: all-zero result of length %u on thread %d\n", ++ p->size, p->thread_id); ++ } ++ } ++ + dynarray_blob_free (&global_result); + + return 0; +diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c +index d3eab66a1af6229e..d93901f6ea5cbc6b 100644 +--- a/sysdeps/unix/sysv/linux/getrandom.c ++++ b/sysdeps/unix/sysv/linux/getrandom.c +@@ -168,6 +168,11 @@ vgetrandom_get_state (void) + if (grnd_alloc.len > 0 || vgetrandom_get_state_alloc ()) + state = grnd_alloc.states[--grnd_alloc.len]; + ++ /* Barrier needed by fork: The state must be gone from the array ++ through len update before it becomes visible in the TCB. (There ++ is also a release barrier implied by the unlock, but issue a ++ stronger barrier to help fork.) */ ++ atomic_thread_fence_seq_cst (); + __libc_lock_unlock (grnd_alloc.lock); + internal_signal_restore_set (&set); + +@@ -278,7 +283,10 @@ void + __getrandom_reset_state (struct pthread *curp) + { + #ifdef HAVE_GETRANDOM_VSYSCALL +- if (grnd_alloc.states == NULL || curp->getrandom_buf == NULL) ++ /* The pointer can be reserved if the fork happened during a ++ getrandom call. */ ++ void *buf = release_ptr (curp->getrandom_buf); ++ if (grnd_alloc.states == NULL || buf == NULL) + return; + assert (grnd_alloc.len < grnd_alloc.cap); + grnd_alloc.states[grnd_alloc.len++] = release_ptr (curp->getrandom_buf); +@@ -294,11 +302,23 @@ void + __getrandom_vdso_release (struct pthread *curp) + { + #ifdef HAVE_GETRANDOM_VSYSCALL +- if (curp->getrandom_buf == NULL) ++ /* The pointer can be reserved if the thread was canceled in a ++ signal handler. */ ++ void *buf = release_ptr (curp->getrandom_buf); ++ if (buf == NULL) + return; + + __libc_lock_lock (grnd_alloc.lock); +- grnd_alloc.states[grnd_alloc.len++] = curp->getrandom_buf; ++ ++ size_t len = grnd_alloc.len; ++ grnd_alloc.states[len] = curp->getrandom_buf; ++ curp->getrandom_buf = NULL; ++ /* Barrier needed by fork: The state must vanish from the TCB before ++ it becomes visible in the states array. Also avoid exposing the ++ previous entry value at the same index in the states array (which ++ may be in use by another thread). */ ++ atomic_thread_fence_seq_cst (); ++ grnd_alloc.len = len + 1; + __libc_lock_unlock (grnd_alloc.lock); + #endif + } diff --git a/glibc-RHEL-75809-2.patch b/glibc-RHEL-75809-2.patch new file mode 100644 index 0000000..15d865e --- /dev/null +++ b/glibc-RHEL-75809-2.patch @@ -0,0 +1,232 @@ +commit 36fcdfbbc5463e55581fec67141df3493fb81f7e +Author: Florian Weimer +Date: Fri Jan 24 08:04:23 2025 +0100 + + Revert "stdlib: Support malloc-managed environ arrays for compatibility" + + This reverts commit b62759db04b8ed7f829c06f1d7c3b8fb70616493. + + Reason for revert: Incompatible with “env -i” and coreutils (bug 32588). + + Reviewed-by: H.J. Lu + +diff --git a/csu/init-first.c b/csu/init-first.c +index 77b5b4941beb3a73..a2cb456ccf9ac5e6 100644 +--- a/csu/init-first.c ++++ b/csu/init-first.c +@@ -61,7 +61,6 @@ _init_first (int argc, char **argv, char **envp) + __libc_argc = argc; + __libc_argv = argv; + __environ = envp; +- __environ_startup = envp; + + #ifndef SHARED + /* First the initialization which normally would be done by the +diff --git a/csu/libc-start.c b/csu/libc-start.c +index 260027c2396e1f52..d784de0f0bdd70c8 100644 +--- a/csu/libc-start.c ++++ b/csu/libc-start.c +@@ -244,7 +244,6 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + char **ev = &argv[argc + 1]; + + __environ = ev; +- __environ_startup = ev; + + /* Store the lowest stack address. This is done in ld.so if this is + the code for the DSO. */ +diff --git a/include/unistd.h b/include/unistd.h +index ada957f9d04d272a..e241603b8131a9e9 100644 +--- a/include/unistd.h ++++ b/include/unistd.h +@@ -203,9 +203,6 @@ libc_hidden_proto (__tcsetpgrp) + extern int __libc_enable_secure attribute_relro; + rtld_hidden_proto (__libc_enable_secure) + +-/* Original value of __environ. Initialized by _init_first (dynamic) +- or __libc_start_main (static). */ +-extern char **__environ_startup attribute_hidden; + + /* Various internal function. */ + extern void __libc_check_standard_fds (void) attribute_hidden; +diff --git a/posix/environ.c b/posix/environ.c +index 2430b47d8eee148c..a0ed0d80eab207f8 100644 +--- a/posix/environ.c ++++ b/posix/environ.c +@@ -10,5 +10,3 @@ weak_alias (__environ, environ) + /* The SVR4 ABI says `_environ' will be the name to use + in case the user overrides the weak alias `environ'. */ + weak_alias (__environ, _environ) +- +-char **__environ_startup; +diff --git a/stdlib/Makefile b/stdlib/Makefile +index 217600ba60e3c7d4..ff1418f5bb2ea5c9 100644 +--- a/stdlib/Makefile ++++ b/stdlib/Makefile +@@ -312,7 +312,6 @@ tests := \ + tst-setcontext9 \ + tst-setcontext10 \ + tst-setcontext11 \ +- tst-setenv-malloc \ + tst-stdbit-Wconversion \ + tst-stdbit-builtins \ + tst-stdc_bit_ceil \ +diff --git a/stdlib/setenv.c b/stdlib/setenv.c +index 79982aa12ac20078..d12401ca77cee5a7 100644 +--- a/stdlib/setenv.c ++++ b/stdlib/setenv.c +@@ -191,52 +191,52 @@ __add_to_environ (const char *name, const char *value, const char *combined, + ep[1] = NULL; + else + { +- /* We cannot use __environ as is and need a larger allocation. */ +- +- if (start_environ == __environ_startup +- || __environ_is_from_array_list (start_environ)) +- { +- /* Allocate a new array, managed in the list. */ +- struct environ_array *target_array +- = __environ_new_array (required_size); +- if (target_array == NULL) +- { +- UNLOCK; +- return -1; +- } +- result_environ = &target_array->array[0]; +- +- /* Copy over the __environ array contents. This code +- handles the case start_environ == ep == NULL, too. */ +- size_t i; +- for (i = 0; start_environ + i < ep; ++i) +- /* Regular store because unless there has been direct +- manipulation of the environment, target_array is still +- a private copy. */ +- result_environ[i] = atomic_load_relaxed (start_environ + i); +- } ++ /* We cannot use __environ as is and need to copy over the ++ __environ contents into an array managed via ++ __environ_array_list. */ ++ ++ struct environ_array *target_array; ++ if (__environ_array_list != NULL ++ && required_size <= __environ_array_list->allocated) ++ /* Existing array has enough room. Contents is copied below. */ ++ target_array = __environ_array_list; + else + { +- /* Otherwise the application installed its own pointer. +- Historically, this pointer was managed using realloc. +- Continue doing so. This disables multi-threading +- support. */ +- result_environ = __libc_reallocarray (start_environ, +- required_size, +- sizeof (*result_environ)); +- if (result_environ == NULL) ++ /* Allocate a new array. */ ++ target_array = __environ_new_array (required_size); ++ if (target_array == NULL) + { + UNLOCK; + return -1; + } + } + ++ /* Copy over the __environ array contents. This forward ++ copy slides backwards part of the array if __environ ++ points into target_array->array. This happens if an ++ application makes an assignment like: ++ ++ environ = &environ[1]; ++ ++ The forward copy avoids clobbering values that still ++ needing copying. This code handles the case ++ start_environ == ep == NULL, too. */ ++ size_t i; ++ for (i = 0; start_environ + i < ep; ++i) ++ /* Regular store because unless there has been direct ++ manipulation of the environment, target_array is still ++ a private copy. */ ++ target_array->array[i] = atomic_load_relaxed (start_environ + i); ++ + /* This is the new place where we should add the element. */ +- ep = result_environ + (required_size - 2); ++ ep = target_array->array + i; + + /* Add the null terminator in case there was a pointer there + previously. */ + ep[1] = NULL; ++ ++ /* And __environ should be repointed to our array. */ ++ result_environ = &target_array->array[0]; + } + } + +diff --git a/stdlib/tst-setenv-malloc.c b/stdlib/tst-setenv-malloc.c +deleted file mode 100644 +index 18a9d36842e67aa5..0000000000000000 +--- a/stdlib/tst-setenv-malloc.c ++++ /dev/null +@@ -1,64 +0,0 @@ +-/* Test using setenv with a malloc-allocated environ variable. +- Copyright (C) 2025 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-/* This test is not in the scope for POSIX or any other standard, but +- some applications assume that environ is a heap-allocated pointer +- after a call to setenv on an empty environment. */ +- +-#include +-#include +-#include +-#include +- +-static const char *original_path; +-static char **save_environ; +- +-static void +-rewrite_environ (void) +-{ +- save_environ = environ; +- environ = xmalloc (sizeof (*environ)); +- *environ = NULL; +- TEST_COMPARE (setenv ("A", "1", 1), 0); +- TEST_COMPARE (setenv ("B", "2", 1), 0); +- TEST_VERIFY (environ != save_environ); +- TEST_COMPARE_STRING (environ[0], "A=1"); +- TEST_COMPARE_STRING (environ[1], "B=2"); +- TEST_COMPARE_STRING (environ[2], NULL); +- TEST_COMPARE_STRING (getenv ("PATH"), NULL); +- free (environ); +- environ = save_environ; +- TEST_COMPARE_STRING (getenv ("PATH"), original_path); +-} +- +-static int +-do_test (void) +-{ +- original_path = getenv ("PATH"); +- rewrite_environ (); +- +- /* Test again after reallocated the environment due to an initial +- setenv call. */ +- TEST_COMPARE (setenv ("TST_SETENV_MALLOC", "1", 1), 0); +- TEST_VERIFY (environ != save_environ); +- rewrite_environ (); +- +- return 0; +-} +- +-#include diff --git a/glibc-RHEL-75809-3.patch b/glibc-RHEL-75809-3.patch new file mode 100644 index 0000000..aff1b42 --- /dev/null +++ b/glibc-RHEL-75809-3.patch @@ -0,0 +1,299 @@ +commit 12b4a1fc6ecfc278a87159164bdf1d682deb18e2 +Author: Florian Weimer +Date: Fri Jan 24 10:40:28 2025 +0100 + + stdlib: Re-implement free (environ) compatibility kludge for setenv + + For the originally failing application (userhelper from usermode), + it is not actually necessary to call realloc on the environ + pointer. Yes, there will be a memory leak because the application + assigns a heap-allocated pointer to environ that it never frees, + but this leak was always there: the old realloc-based setenv had + a hidden internal variable, last_environ, that was used in a similar + way to __environ_array_list. The application is not impacted by + the leak anyway because the relevant operations do not happen in + a loop. + + The change here just uses a separte heap allocation and points + environ to that. This means that if an application calls + free (environ) and restores the environ pointer to the value + at process start, and does not modify the environment further, + nothing bad happens. + + This change should not invalidate any previous testing that went into + the original getenv thread safety change, commit 7a61e7f557a97ab597d6 + ("stdlib: Make getenv thread-safe in more cases"). + + The new test cases are modeled in part on the env -i use case from + bug 32588 (with !DO_MALLOC && !DO_EARLY_SETENV), and the previous + stdlib/tst-setenv-malloc test. The DO_MALLOC && !DO_EARLY_SETENV + case in the new test should approximate what userhelper from the + usermode package does. + + Reviewed-by: Carlos O'Donell + +diff --git a/stdlib/Makefile b/stdlib/Makefile +index ff1418f5bb2ea5c9..f4dec9be46a573b9 100644 +--- a/stdlib/Makefile ++++ b/stdlib/Makefile +@@ -275,6 +275,10 @@ tests := \ + tst-canon-bz26341 \ + tst-cxa_atexit \ + tst-environ \ ++ tst-environ-change-1 \ ++ tst-environ-change-2 \ ++ tst-environ-change-3 \ ++ tst-environ-change-4 \ + tst-getenv-signal \ + tst-getenv-thread \ + tst-getenv-unsetenv \ +diff --git a/stdlib/setenv.c b/stdlib/setenv.c +index d12401ca77cee5a7..20b0e1673c9557de 100644 +--- a/stdlib/setenv.c ++++ b/stdlib/setenv.c +@@ -118,24 +118,21 @@ __environ_new_array (size_t required_size) + else + new_size = __environ_array_list->allocated * 2; + +- size_t new_size_in_bytes; +- if (__builtin_mul_overflow (new_size, sizeof (char *), +- &new_size_in_bytes) +- || __builtin_add_overflow (new_size_in_bytes, +- offsetof (struct environ_array, +- array), +- &new_size_in_bytes)) ++ /* Zero-initialize everything, so that getenv can only ++ observe valid or null pointers. */ ++ char **new_array = calloc (new_size, sizeof (*new_array)); ++ if (new_array == NULL) ++ return NULL; ++ ++ struct environ_array *target_array = malloc (sizeof (*target_array)); ++ if (target_array == NULL) + { +- __set_errno (ENOMEM); ++ free (new_array); + return NULL; + } + +- /* Zero-initialize everything, so that getenv can only +- observe valid or null pointers. */ +- struct environ_array *target_array = calloc (1, new_size_in_bytes); +- if (target_array == NULL) +- return NULL; + target_array->allocated = new_size; ++ target_array->array = new_array; + assert (new_size >= target_array->allocated); + + /* Put it onto the list. */ +@@ -236,7 +233,7 @@ __add_to_environ (const char *name, const char *value, const char *combined, + ep[1] = NULL; + + /* And __environ should be repointed to our array. */ +- result_environ = &target_array->array[0]; ++ result_environ = target_array->array; + } + } + +@@ -403,6 +400,7 @@ __libc_setenv_freemem (void) + /* Clear all backing arrays. */ + while (__environ_array_list != NULL) + { ++ free (__environ_array_list->array); + void *ptr = __environ_array_list; + __environ_array_list = __environ_array_list->next; + free (ptr); +diff --git a/stdlib/setenv.h b/stdlib/setenv.h +index 036f4274aa29b722..42b86fff1008bc81 100644 +--- a/stdlib/setenv.h ++++ b/stdlib/setenv.h +@@ -29,9 +29,18 @@ + of environment values used before. */ + struct environ_array + { +- struct environ_array *next; /* Previously used environment array. */ ++ /* The actual environment array. Use a separate allocation (and not ++ a flexible array member) so that calls like free (environ) that ++ have been encountered in some applications do not crash ++ immediately. With such a call, if the application restores the ++ original environ pointer at process start and does not modify the ++ environment again, a use-after-free situation only occurs during ++ __libc_freeres, which is only called during memory debugging. ++ With subsequent setenv calls, there is still heap corruption, but ++ that happened with the old realloc-based implementation, too. */ ++ char **array; + size_t allocated; /* Number of allocated array elments. */ +- char *array[]; /* The actual environment array. */ ++ struct environ_array *next; /* Previously used environment array. */ + }; + + /* After initialization, and until the user resets environ (perhaps by +@@ -44,7 +53,7 @@ static inline bool + __environ_is_from_array_list (char **ep) + { + struct environ_array *eal = atomic_load_relaxed (&__environ_array_list); +- return eal != NULL && &eal->array[0] == ep; ++ return eal != NULL && eal->array == ep; + } + + /* Counter for detecting concurrent modification in unsetenv. +diff --git a/stdlib/tst-environ-change-1.c b/stdlib/tst-environ-change-1.c +new file mode 100644 +index 0000000000000000..4241ad4c63ea2e33 +--- /dev/null ++++ b/stdlib/tst-environ-change-1.c +@@ -0,0 +1,3 @@ ++#define DO_EARLY_SETENV 0 ++#define DO_MALLOC 0 ++#include "tst-environ-change-skeleton.c" +diff --git a/stdlib/tst-environ-change-2.c b/stdlib/tst-environ-change-2.c +new file mode 100644 +index 0000000000000000..b20be124902125e8 +--- /dev/null ++++ b/stdlib/tst-environ-change-2.c +@@ -0,0 +1,3 @@ ++#define DO_EARLY_SETENV 0 ++#define DO_MALLOC 1 ++#include "tst-environ-change-skeleton.c" +diff --git a/stdlib/tst-environ-change-3.c b/stdlib/tst-environ-change-3.c +new file mode 100644 +index 0000000000000000..e77996a6cb0ac601 +--- /dev/null ++++ b/stdlib/tst-environ-change-3.c +@@ -0,0 +1,3 @@ ++#define DO_EARLY_SETENV 1 ++#define DO_MALLOC 0 ++#include "tst-environ-change-skeleton.c" +diff --git a/stdlib/tst-environ-change-4.c b/stdlib/tst-environ-change-4.c +new file mode 100644 +index 0000000000000000..633ef7bda84eb2a8 +--- /dev/null ++++ b/stdlib/tst-environ-change-4.c +@@ -0,0 +1,3 @@ ++#define DO_EARLY_SETENV 1 ++#define DO_MALLOC 1 ++#include "tst-environ-change-skeleton.c" +diff --git a/stdlib/tst-environ-change-skeleton.c b/stdlib/tst-environ-change-skeleton.c +new file mode 100644 +index 0000000000000000..c9b02844369207d9 +--- /dev/null ++++ b/stdlib/tst-environ-change-skeleton.c +@@ -0,0 +1,118 @@ ++/* Test deallocation of the environ pointer. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This test is not in the scope for POSIX or any other standard, but ++ some applications assume that environ is a heap-allocated pointer ++ after a call to setenv on an empty environment. They also try to ++ save and restore environ in an attempt to undo a temporary ++ modification of the environment array, but this does not work if ++ setenv was called before. ++ ++ Before including this file, these macros need to be defined ++ to 0 or 1: ++ ++ DO_EARLY_SETENV If 1, perform a setenv call before changing environ. ++ DO_MALLOC If 1, use a heap pointer for the empty environment. ++ ++ Note that this test will produce errors under valgrind and other ++ memory tracers that call __libc_freeres because free (environ) ++ deallocates a pointer still used internally. */ ++ ++#include ++#include ++#include ++#include ++ ++static void ++check_rewritten (void) ++{ ++ TEST_COMPARE_STRING (environ[0], "tst_environ_change_a=1"); ++ TEST_COMPARE_STRING (environ[1], "tst_environ_change_b=2"); ++ TEST_COMPARE_STRING (environ[2], NULL); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_a"), "1"); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_b"), "2"); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_early"), NULL); ++ TEST_COMPARE_STRING (getenv ("PATH"), NULL); ++} ++ ++static int ++do_test (void) ++{ ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_a"), NULL); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_b"), NULL); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_early_setenv"), NULL); ++#if DO_EARLY_SETENV ++ TEST_COMPARE (setenv ("tst_environ_change_early_setenv", "1", 1), 0); ++#else ++ /* Must come back after environ reset. */ ++ char *original_path = xstrdup (getenv ("PATH")); ++#endif ++ ++ char **save_environ = environ; ++#if DO_MALLOC ++ environ = xmalloc (sizeof (*environ)); ++#else ++ char *environ_array[1]; ++ environ = environ_array; ++#endif ++ *environ = NULL; ++ TEST_COMPARE (setenv ("tst_environ_change_a", "1", 1), 0); ++ TEST_COMPARE (setenv ("tst_environ_change_b", "2", 1), 0); ++#if !DO_EARLY_SETENV ++ /* Early setenv results in reuse of the heap-allocated environ array ++ that does not change as more pointers are added to it. */ ++ TEST_VERIFY (environ != save_environ); ++#endif ++ check_rewritten (); ++ ++ bool check_environ = true; ++#if DO_MALLOC ++ /* Disable further checks if the free call clobbers the environ ++ contents. Whether that is the case depends on the internal ++ setenv allocation policy and the heap layout. */ ++ check_environ = environ != save_environ; ++ /* Invalid: Causes internal use-after-free condition. Yet this has ++ to be supported for compatibility with some applications. */ ++ free (environ); ++#endif ++ ++ environ = save_environ; ++ ++#if DO_EARLY_SETENV ++ /* With an early setenv, the internal environ array was overwritten. ++ Historically, this triggered a use-after-free problem because of ++ the use of realloc internally in setenv, but it may appear as if ++ the original environment had been restored. In the current code, ++ we can only support this if the free (environ) above call did not ++ clobber the array, otherwise getenv will see invalid pointers. ++ Due to the use-after-free, invalid pointers could be seen with ++ the old implementation as well, but the triggering conditions ++ were different. */ ++ if (check_environ) ++ check_rewritten (); ++#else ++ TEST_VERIFY (check_environ); ++ TEST_COMPARE_STRING (getenv ("PATH"), original_path); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_a"), NULL); ++ TEST_COMPARE_STRING (getenv ("tst_environ_change_b"), NULL); ++#endif ++ ++ return 0; ++} ++ ++#include diff --git a/glibc-RHEL-75809.patch b/glibc-RHEL-75809.patch new file mode 100644 index 0000000..c66091d --- /dev/null +++ b/glibc-RHEL-75809.patch @@ -0,0 +1,245 @@ +commit b62759db04b8ed7f829c06f1d7c3b8fb70616493 +Author: Florian Weimer +Date: Wed Jan 22 13:48:56 2025 +0100 + + stdlib: Support malloc-managed environ arrays for compatibility + + Some applications set environ to a heap-allocated pointer, call + setenv (expecting it to call realloc), free environ, and then + restore the original environ pointer. This breaks after + commit 7a61e7f557a97ab597d6fca5e2d1f13f65685c61 ("stdlib: Make + getenv thread-safe in more cases") because after the setenv call, + the environ pointer does not point to the start of a heap allocation. + Instead, setenv creates a separate allocation and changes environ + to point into that. This means that the free call in the application + results in heap corruption. + + The interim approach was more compatible with other libcs because + it does not assume that the incoming environ pointer is allocated + as if by malloc (if it was written by the application). However, + it seems to be more important to stay compatible with previous + glibc version: assume the incoming pointer is heap allocated, + and preserve this property after setenv calls. + + Reviewed-by: Carlos O'Donell + +diff --git a/csu/init-first.c b/csu/init-first.c +index a2cb456ccf9ac5e6..77b5b4941beb3a73 100644 +--- a/csu/init-first.c ++++ b/csu/init-first.c +@@ -61,6 +61,7 @@ _init_first (int argc, char **argv, char **envp) + __libc_argc = argc; + __libc_argv = argv; + __environ = envp; ++ __environ_startup = envp; + + #ifndef SHARED + /* First the initialization which normally would be done by the +diff --git a/csu/libc-start.c b/csu/libc-start.c +index d784de0f0bdd70c8..260027c2396e1f52 100644 +--- a/csu/libc-start.c ++++ b/csu/libc-start.c +@@ -244,6 +244,7 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + char **ev = &argv[argc + 1]; + + __environ = ev; ++ __environ_startup = ev; + + /* Store the lowest stack address. This is done in ld.so if this is + the code for the DSO. */ +diff --git a/include/unistd.h b/include/unistd.h +index e241603b8131a9e9..ada957f9d04d272a 100644 +--- a/include/unistd.h ++++ b/include/unistd.h +@@ -203,6 +203,9 @@ libc_hidden_proto (__tcsetpgrp) + extern int __libc_enable_secure attribute_relro; + rtld_hidden_proto (__libc_enable_secure) + ++/* Original value of __environ. Initialized by _init_first (dynamic) ++ or __libc_start_main (static). */ ++extern char **__environ_startup attribute_hidden; + + /* Various internal function. */ + extern void __libc_check_standard_fds (void) attribute_hidden; +diff --git a/posix/environ.c b/posix/environ.c +index a0ed0d80eab207f8..2430b47d8eee148c 100644 +--- a/posix/environ.c ++++ b/posix/environ.c +@@ -10,3 +10,5 @@ weak_alias (__environ, environ) + /* The SVR4 ABI says `_environ' will be the name to use + in case the user overrides the weak alias `environ'. */ + weak_alias (__environ, _environ) ++ ++char **__environ_startup; +diff --git a/stdlib/Makefile b/stdlib/Makefile +index ff1418f5bb2ea5c9..217600ba60e3c7d4 100644 +--- a/stdlib/Makefile ++++ b/stdlib/Makefile +@@ -312,6 +312,7 @@ tests := \ + tst-setcontext9 \ + tst-setcontext10 \ + tst-setcontext11 \ ++ tst-setenv-malloc \ + tst-stdbit-Wconversion \ + tst-stdbit-builtins \ + tst-stdc_bit_ceil \ +diff --git a/stdlib/setenv.c b/stdlib/setenv.c +index d12401ca77cee5a7..79982aa12ac20078 100644 +--- a/stdlib/setenv.c ++++ b/stdlib/setenv.c +@@ -191,52 +191,52 @@ __add_to_environ (const char *name, const char *value, const char *combined, + ep[1] = NULL; + else + { +- /* We cannot use __environ as is and need to copy over the +- __environ contents into an array managed via +- __environ_array_list. */ +- +- struct environ_array *target_array; +- if (__environ_array_list != NULL +- && required_size <= __environ_array_list->allocated) +- /* Existing array has enough room. Contents is copied below. */ +- target_array = __environ_array_list; +- else ++ /* We cannot use __environ as is and need a larger allocation. */ ++ ++ if (start_environ == __environ_startup ++ || __environ_is_from_array_list (start_environ)) + { +- /* Allocate a new array. */ +- target_array = __environ_new_array (required_size); ++ /* Allocate a new array, managed in the list. */ ++ struct environ_array *target_array ++ = __environ_new_array (required_size); + if (target_array == NULL) + { + UNLOCK; + return -1; + } ++ result_environ = &target_array->array[0]; ++ ++ /* Copy over the __environ array contents. This code ++ handles the case start_environ == ep == NULL, too. */ ++ size_t i; ++ for (i = 0; start_environ + i < ep; ++i) ++ /* Regular store because unless there has been direct ++ manipulation of the environment, target_array is still ++ a private copy. */ ++ result_environ[i] = atomic_load_relaxed (start_environ + i); ++ } ++ else ++ { ++ /* Otherwise the application installed its own pointer. ++ Historically, this pointer was managed using realloc. ++ Continue doing so. This disables multi-threading ++ support. */ ++ result_environ = __libc_reallocarray (start_environ, ++ required_size, ++ sizeof (*result_environ)); ++ if (result_environ == NULL) ++ { ++ UNLOCK; ++ return -1; ++ } + } +- +- /* Copy over the __environ array contents. This forward +- copy slides backwards part of the array if __environ +- points into target_array->array. This happens if an +- application makes an assignment like: +- +- environ = &environ[1]; +- +- The forward copy avoids clobbering values that still +- needing copying. This code handles the case +- start_environ == ep == NULL, too. */ +- size_t i; +- for (i = 0; start_environ + i < ep; ++i) +- /* Regular store because unless there has been direct +- manipulation of the environment, target_array is still +- a private copy. */ +- target_array->array[i] = atomic_load_relaxed (start_environ + i); + + /* This is the new place where we should add the element. */ +- ep = target_array->array + i; ++ ep = result_environ + (required_size - 2); + + /* Add the null terminator in case there was a pointer there + previously. */ + ep[1] = NULL; +- +- /* And __environ should be repointed to our array. */ +- result_environ = &target_array->array[0]; + } + } + +diff --git a/stdlib/tst-setenv-malloc.c b/stdlib/tst-setenv-malloc.c +new file mode 100644 +index 0000000000000000..18a9d36842e67aa5 +--- /dev/null ++++ b/stdlib/tst-setenv-malloc.c +@@ -0,0 +1,64 @@ ++/* Test using setenv with a malloc-allocated environ variable. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This test is not in the scope for POSIX or any other standard, but ++ some applications assume that environ is a heap-allocated pointer ++ after a call to setenv on an empty environment. */ ++ ++#include ++#include ++#include ++#include ++ ++static const char *original_path; ++static char **save_environ; ++ ++static void ++rewrite_environ (void) ++{ ++ save_environ = environ; ++ environ = xmalloc (sizeof (*environ)); ++ *environ = NULL; ++ TEST_COMPARE (setenv ("A", "1", 1), 0); ++ TEST_COMPARE (setenv ("B", "2", 1), 0); ++ TEST_VERIFY (environ != save_environ); ++ TEST_COMPARE_STRING (environ[0], "A=1"); ++ TEST_COMPARE_STRING (environ[1], "B=2"); ++ TEST_COMPARE_STRING (environ[2], NULL); ++ TEST_COMPARE_STRING (getenv ("PATH"), NULL); ++ free (environ); ++ environ = save_environ; ++ TEST_COMPARE_STRING (getenv ("PATH"), original_path); ++} ++ ++static int ++do_test (void) ++{ ++ original_path = getenv ("PATH"); ++ rewrite_environ (); ++ ++ /* Test again after reallocated the environment due to an initial ++ setenv call. */ ++ TEST_COMPARE (setenv ("TST_SETENV_MALLOC", "1", 1), 0); ++ TEST_VERIFY (environ != save_environ); ++ rewrite_environ (); ++ ++ return 0; ++} ++ ++#include diff --git a/glibc-upstream-2.39-138.patch b/glibc-upstream-2.39-138.patch new file mode 100644 index 0000000..c2ed486 --- /dev/null +++ b/glibc-upstream-2.39-138.patch @@ -0,0 +1,55 @@ +commit 9a0e174a39a3a65f628c6a55e29fe35f6d67bf42 +Author: Michael Jeanson +Date: Thu Nov 7 22:23:49 2024 +0100 + + nptl: initialize rseq area prior to registration + + Per the rseq syscall documentation, 3 fields are required to be + initialized by userspace prior to registration, they are 'cpu_id', + 'rseq_cs' and 'flags'. Since we have no guarantee that 'struct pthread' + is cleared on all architectures, explicitly set those 3 fields prior to + registration. + + Signed-off-by: Michael Jeanson + Reviewed-by: Florian Weimer + (cherry picked from commit 97f60abd25628425971f07e9b0e7f8eec0741235) + +diff --git a/nptl/descr.h b/nptl/descr.h +index 4697f633e16c7359..a83df327e4bcba2e 100644 +--- a/nptl/descr.h ++++ b/nptl/descr.h +@@ -417,6 +417,8 @@ struct pthread + { + uint32_t cpu_id_start; + uint32_t cpu_id; ++ uint64_t rseq_cs; ++ uint32_t flags; + }; + char pad[32]; /* Original rseq area size. */ + } rseq_area __attribute__ ((aligned (32))); +diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h +index 7ea935b4adab8c20..37a8f630b6519ff0 100644 +--- a/sysdeps/unix/sysv/linux/rseq-internal.h ++++ b/sysdeps/unix/sysv/linux/rseq-internal.h +@@ -51,11 +51,21 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) + /* The initial implementation used only 20 bytes out of 32, + but still expected size 32. */ + size = RSEQ_AREA_SIZE_INITIAL; ++ ++ /* Initialize the rseq fields that are read by the kernel on ++ registration, there is no guarantee that struct pthread is ++ cleared on all architectures. */ ++ THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); ++ THREAD_SETMEM (self, rseq_area.rseq_cs, 0); ++ THREAD_SETMEM (self, rseq_area.flags, 0); ++ + int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area, + size, 0, RSEQ_SIG); + if (!INTERNAL_SYSCALL_ERROR_P (ret)) + return true; + } ++ /* When rseq is disabled by tunables or the registration fails, inform ++ userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED. */ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); + return false; + } diff --git a/glibc-upstream-2.39-139.patch b/glibc-upstream-2.39-139.patch new file mode 100644 index 0000000..20d6367 --- /dev/null +++ b/glibc-upstream-2.39-139.patch @@ -0,0 +1,29 @@ +commit 350db2839387659e1500a54d276e401c9c6b2dee +Author: Michael Jeanson +Date: Wed Nov 20 14:15:42 2024 -0500 + + nptl: initialize cpu_id_start prior to rseq registration + + When adding explicit initialization of rseq fields prior to + registration, I glossed over the fact that 'cpu_id_start' is also + documented as initialized by user-space. + + While current kernels don't validate the content of this field on + registration, future ones could. + + Signed-off-by: Michael Jeanson + Reviewed-by: Mathieu Desnoyers + (cherry picked from commit d9f40387d3305d97e30a8cf8724218c42a63680a) + +diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h +index 37a8f630b6519ff0..ef3eab1fefd4d90d 100644 +--- a/sysdeps/unix/sysv/linux/rseq-internal.h ++++ b/sysdeps/unix/sysv/linux/rseq-internal.h +@@ -56,6 +56,7 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq) + registration, there is no guarantee that struct pthread is + cleared on all architectures. */ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); ++ THREAD_SETMEM (self, rseq_area.cpu_id_start, 0); + THREAD_SETMEM (self, rseq_area.rseq_cs, 0); + THREAD_SETMEM (self, rseq_area.flags, 0); + diff --git a/glibc-upstream-2.39-140.patch b/glibc-upstream-2.39-140.patch new file mode 100644 index 0000000..aa67ae2 --- /dev/null +++ b/glibc-upstream-2.39-140.patch @@ -0,0 +1,28 @@ +commit aa8768999e94fcee1695feb766c69dd8a93b706b +Author: H.J. Lu +Date: Fri May 17 20:00:38 2024 -0700 + + Pass -nostdlib -nostartfiles together with -r [BZ #31753] + + Since -r in GCC 6/7/8 doesn't imply -nostdlib -nostartfiles, update the + link-static-libc.out rule to also pass -nostdlib -nostartfiles. This + fixes BZ #31753. + + Signed-off-by: H.J. Lu + Reviewed-by: Florian Weimer + (cherry picked from commit 2be3352f0b1ebaa39596393fffe1062275186669) + +diff --git a/Makefile b/Makefile +index 37bf70aa4ad4403f..ae9bc09327dd2d5b 100644 +--- a/Makefile ++++ b/Makefile +@@ -581,7 +581,8 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh + # definitions of any symbols. + tests-special += $(objpfx)link-static-libc.out + $(objpfx)link-static-libc.out: +- $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \ ++ $(LINK.o) $(whole-archive) -nostdlib -nostartfiles -r \ ++ $(objpfx)libc.a -o /dev/null > $@ 2>&1; \ + $(evaluate-test) + + # Print test summary for tests in $1 .sum file; diff --git a/glibc-upstream-2.39-141.patch b/glibc-upstream-2.39-141.patch new file mode 100644 index 0000000..2801ac0 --- /dev/null +++ b/glibc-upstream-2.39-141.patch @@ -0,0 +1,172 @@ +commit 51da74a97e0f024fd89b57304b3ab010a3cfaef1 +Author: Sam James +Date: Mon Dec 9 23:11:25 2024 +0000 + + malloc: add indirection for malloc(-like) functions in tests [BZ #32366] + + GCC 15 introduces allocation dead code removal (DCE) for PR117370 in + r15-5255-g7828dc070510f8. This breaks various glibc tests which want + to assert various properties of the allocator without doing anything + obviously useful with the allocated memory. + + Alexander Monakov rightly pointed out that we can and should do better + than passing -fno-malloc-dce to paper over the problem. Not least because + GCC 14 already does such DCE where there's no testing of malloc's return + value against NULL, and LLVM has such optimisations too. + + Handle this by providing malloc (and friends) wrappers with a volatile + function pointer to obscure that we're calling malloc (et. al) from the + compiler. + + Reviewed-by: Paul Eggert + (cherry picked from commit a9944a52c967ce76a5894c30d0274b824df43c7a) + +diff --git a/malloc/tst-aligned-alloc.c b/malloc/tst-aligned-alloc.c +index 91167d1392c0e626..b0f05a8fec78d5e8 100644 +--- a/malloc/tst-aligned-alloc.c ++++ b/malloc/tst-aligned-alloc.c +@@ -25,6 +25,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int + do_test (void) + { +diff --git a/malloc/tst-compathooks-off.c b/malloc/tst-compathooks-off.c +index d0106f3fb74ff3b1..4cce6e5a8076f6b6 100644 +--- a/malloc/tst-compathooks-off.c ++++ b/malloc/tst-compathooks-off.c +@@ -25,6 +25,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + extern void (*volatile __free_hook) (void *, const void *); + extern void *(*volatile __malloc_hook)(size_t, const void *); + extern void *(*volatile __realloc_hook)(void *, size_t, const void *); +diff --git a/malloc/tst-malloc-aux.h b/malloc/tst-malloc-aux.h +new file mode 100644 +index 0000000000000000..54908b4a2464d510 +--- /dev/null ++++ b/malloc/tst-malloc-aux.h +@@ -0,0 +1,41 @@ ++/* Wrappers for malloc-like functions to allow testing the implementation ++ without optimization. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#ifndef TST_MALLOC_AUX_H ++#define TST_MALLOC_AUX_H ++ ++#include ++#include ++ ++static void *(*volatile aligned_alloc_indirect)(size_t, size_t) = aligned_alloc; ++static void *(*volatile calloc_indirect)(size_t, size_t) = calloc; ++static void *(*volatile malloc_indirect)(size_t) = malloc; ++static void *(*volatile realloc_indirect)(void*, size_t) = realloc; ++ ++#undef aligned_alloc ++#undef calloc ++#undef malloc ++#undef realloc ++ ++#define aligned_alloc aligned_alloc_indirect ++#define calloc calloc_indirect ++#define malloc malloc_indirect ++#define realloc realloc_indirect ++ ++#endif /* TST_MALLOC_AUX_H */ +diff --git a/malloc/tst-malloc-check.c b/malloc/tst-malloc-check.c +index fde8863ad7561a71..cc88bff3b39a421c 100644 +--- a/malloc/tst-malloc-check.c ++++ b/malloc/tst-malloc-check.c +@@ -20,6 +20,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int errors = 0; + + static void +diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c +index 8e9e0d5fa2b4b907..2b91377e54cdc485 100644 +--- a/malloc/tst-malloc-too-large.c ++++ b/malloc/tst-malloc-too-large.c +@@ -43,6 +43,7 @@ + #include + #include + ++#include "tst-malloc-aux.h" + + /* This function prepares for each 'too-large memory allocation' test by + performing a small successful malloc/free and resetting errno prior to +diff --git a/malloc/tst-malloc.c b/malloc/tst-malloc.c +index f7a6e4654c374d01..68af399022543111 100644 +--- a/malloc/tst-malloc.c ++++ b/malloc/tst-malloc.c +@@ -22,6 +22,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int errors = 0; + + static void +diff --git a/malloc/tst-realloc.c b/malloc/tst-realloc.c +index f50499ecb114d574..74a28fb45ed80bf5 100644 +--- a/malloc/tst-realloc.c ++++ b/malloc/tst-realloc.c +@@ -23,6 +23,8 @@ + #include + #include + ++#include "tst-malloc-aux.h" ++ + static int + do_test (void) + { +diff --git a/support/support.h b/support/support.h +index ba21ec9b5add7c02..1a77f7979330d60c 100644 +--- a/support/support.h ++++ b/support/support.h +@@ -113,7 +113,7 @@ void *xposix_memalign (size_t alignment, size_t n) + __attribute_malloc__ __attribute_alloc_align__ ((1)) + __attribute_alloc_size__ ((2)) __attr_dealloc_free __returns_nonnull; + char *xasprintf (const char *format, ...) +- __attribute__ ((format (printf, 1, 2), malloc)) __attr_dealloc_free ++ __attribute__ ((format (printf, 1, 2), __malloc__)) __attr_dealloc_free + __returns_nonnull; + char *xstrdup (const char *) __attr_dealloc_free __returns_nonnull; + char *xstrndup (const char *, size_t) __attr_dealloc_free __returns_nonnull; +diff --git a/test-skeleton.c b/test-skeleton.c +index ae185a4f2821de00..690f26e7cf229622 100644 +--- a/test-skeleton.c ++++ b/test-skeleton.c +@@ -27,7 +27,6 @@ + #include + #include + #include +-#include + #include + #include + #include diff --git a/glibc-upstream-2.39-142.patch b/glibc-upstream-2.39-142.patch new file mode 100644 index 0000000..72c8751 --- /dev/null +++ b/glibc-upstream-2.39-142.patch @@ -0,0 +1,44 @@ +commit 2c882bf9c15d206aaf04766d1b8e3ae5b1002cc2 +Author: H.J. Lu +Date: Thu Dec 5 08:39:44 2024 +0800 + + math: Exclude internal math symbols for tests [BZ #32414] + + Since internal tests don't have access to internal symbols in libm, + exclude them for internal tests. Also make tst-strtod5 and tst-strtod5i + depend on $(libm) to support older versions of GCC which can't inline + copysign family functions. This fixes BZ #32414. + + Signed-off-by: H.J. Lu + Reviewed-by: Sunil K Pandey + (cherry picked from commit 5df09b444835fca6e64b3d4b4a5beb19b3b2ba21) + +diff --git a/include/math.h b/include/math.h +index fa11a710a6c152a4..035fd160ffb9e032 100644 +--- a/include/math.h ++++ b/include/math.h +@@ -130,7 +130,10 @@ fabsf128 (_Float128 x) + } + # endif + +-# if !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0) ++ ++/* NB: Internal tests don't have access to internal symbols. */ ++# if !IS_IN (testsuite_internal) \ ++ && !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0) + # ifndef NO_MATH_REDIRECT + /* Declare some functions for use within GLIBC. Compilers typically + inline those functions as a single instruction. Use an asm to +diff --git a/stdlib/Makefile b/stdlib/Makefile +index 70d7291c6e3454a8..ff1418f5bb2ea5c9 100644 +--- a/stdlib/Makefile ++++ b/stdlib/Makefile +@@ -607,6 +607,8 @@ $(objpfx)bug-strtod2: $(libm) + $(objpfx)tst-strtod-round: $(libm) + $(objpfx)tst-tininess: $(libm) + $(objpfx)tst-strtod-underflow: $(libm) ++$(objpfx)tst-strtod5: $(libm) ++$(objpfx)tst-strtod5i: $(libm) + $(objpfx)tst-strtod6: $(libm) + $(objpfx)tst-strtod-nan-locale: $(libm) + $(objpfx)tst-strtod-nan-sign: $(libm) diff --git a/glibc-upstream-2.39-143.patch b/glibc-upstream-2.39-143.patch new file mode 100644 index 0000000..ade64c9 --- /dev/null +++ b/glibc-upstream-2.39-143.patch @@ -0,0 +1,30 @@ +commit 2c8a7f14fac3628b6a06cc76cdfda54a7ac20386 +Author: Florian Weimer +Date: Tue Dec 17 18:12:03 2024 +0100 + + x86: Avoid integer truncation with large cache sizes (bug 32470) + + Some hypervisors report 1 TiB L3 cache size. This results + in some variables incorrectly getting zeroed, causing crashes + in memcpy/memmove because invariants are violated. + + (cherry picked from commit 61c3450db96dce96ad2b24b4f0b548e6a46d68e5) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index 5a98f70364220da4..1f68968a9a457586 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -959,11 +959,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + non_temporal_threshold = maximum_non_temporal_threshold; + + /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ +- unsigned int minimum_rep_movsb_threshold; ++ unsigned long int minimum_rep_movsb_threshold; + /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for + VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB + threshold is 2048 * (VEC_SIZE / 16). */ +- unsigned int rep_movsb_threshold; ++ unsigned long int rep_movsb_threshold; + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) + && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) + { diff --git a/glibc-upstream-2.39-144.patch b/glibc-upstream-2.39-144.patch new file mode 100644 index 0000000..17ca3b3 --- /dev/null +++ b/glibc-upstream-2.39-144.patch @@ -0,0 +1,250 @@ +commit 61daaa76390e0ff73eade3a688d3626b7e7e0c20 +Author: Noah Goldstein +Date: Fri May 24 12:38:50 2024 -0500 + + x86: Improve large memset perf with non-temporal stores [RHEL-29312] + + Previously we use `rep stosb` for all medium/large memsets. This is + notably worse than non-temporal stores for large (above a + few MBs) memsets. + See: + https://docs.google.com/spreadsheets/d/1opzukzvum4n6-RUVHTGddV6RjAEil4P2uMjjQGLbLcU/edit?usp=sharing + For data using different stategies for large memset on ICX and SKX. + + Using non-temporal stores can be up to 3x faster on ICX and 2x faster + on SKX. Historically, these numbers would not have been so good + because of the zero-over-zero writeback optimization that `rep stosb` + is able to do. But, the zero-over-zero writeback optimization has been + removed as a potential side-channel attack, so there is no longer any + good reason to only rely on `rep stosb` for large memsets. On the flip + size, non-temporal writes can avoid data in their RFO requests saving + memory bandwidth. + + All of the other changes to the file are to re-organize the + code-blocks to maintain "good" alignment given the new code added in + the `L(stosb_local)` case. + + The results from running the GLIBC memset benchmarks on TGL-client for + N=20 runs: + + Geometric Mean across the suite New / Old EXEX256: 0.979 + Geometric Mean across the suite New / Old EXEX512: 0.979 + Geometric Mean across the suite New / Old AVX2 : 0.986 + Geometric Mean across the suite New / Old SSE2 : 0.979 + + Most of the cases are essentially unchanged, this is mostly to show + that adding the non-temporal case didn't add any regressions to the + other cases. + + The results on the memset-large benchmark suite on TGL-client for N=20 + runs: + + Geometric Mean across the suite New / Old EXEX256: 0.926 + Geometric Mean across the suite New / Old EXEX512: 0.925 + Geometric Mean across the suite New / Old AVX2 : 0.928 + Geometric Mean across the suite New / Old SSE2 : 0.924 + + So roughly a 7.5% speedup. This is lower than what we see on servers + (likely because clients typically have faster single-core bandwidth so + saving bandwidth on RFOs is less impactful), but still advantageous. + + Full test-suite passes on x86_64 w/ and w/o multiarch. + Reviewed-by: H.J. Lu + + (cherry picked from commit 5bf0ab80573d66e4ae5d94b094659094336da90f) + +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index 97839a22483b0613..637caadb406b2544 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -21,10 +21,13 @@ + 2. If size is less than VEC, use integer register stores. + 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores. + 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores. +- 5. On machines ERMS feature, if size is greater or equal than +- __x86_rep_stosb_threshold then REP STOSB will be used. +- 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with +- 4 VEC stores and store 4 * VEC at a time until done. */ ++ 5. If size is more to 4 * VEC_SIZE, align to 1 * VEC_SIZE with ++ 4 VEC stores and store 4 * VEC at a time until done. ++ 6. On machines ERMS feature, if size is range ++ [__x86_rep_stosb_threshold, __x86_shared_non_temporal_threshold) ++ then REP STOSB will be used. ++ 7. If size >= __x86_shared_non_temporal_threshold, use a ++ non-temporal stores. */ + + #include + +@@ -147,6 +150,41 @@ L(entry_from_wmemset): + VMOVU %VMM(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VMM(0), (%rdi) + VZEROUPPER_RETURN ++ ++ /* If have AVX512 mask instructions put L(less_vec) close to ++ entry as it doesn't take much space and is likely a hot target. */ ++#ifdef USE_LESS_VEC_MASK_STORE ++ /* Align to ensure the L(less_vec) logic all fits in 1x cache lines. */ ++ .p2align 6,, 47 ++ .p2align 4 ++L(less_vec): ++L(less_vec_from_wmemset): ++ /* Less than 1 VEC. */ ++# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 ++# error Unsupported VEC_SIZE! ++# endif ++ /* Clear high bits from edi. Only keeping bits relevant to page ++ cross check. Note that we are using rax which is set in ++ MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. */ ++ andl $(PAGE_SIZE - 1), %edi ++ /* Check if VEC_SIZE store cross page. Mask stores suffer ++ serious performance degradation when it has to fault suppress. */ ++ cmpl $(PAGE_SIZE - VEC_SIZE), %edi ++ /* This is generally considered a cold target. */ ++ ja L(cross_page) ++# if VEC_SIZE > 32 ++ movq $-1, %rcx ++ bzhiq %rdx, %rcx, %rcx ++ kmovq %rcx, %k1 ++# else ++ movl $-1, %ecx ++ bzhil %edx, %ecx, %ecx ++ kmovd %ecx, %k1 ++# endif ++ vmovdqu8 %VMM(0), (%rax){%k1} ++ VZEROUPPER_RETURN ++#endif ++ + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMSET_SYMBOL (__memset, unaligned)) + +@@ -185,54 +223,6 @@ L(last_2x_vec): + #endif + VZEROUPPER_RETURN + +- /* If have AVX512 mask instructions put L(less_vec) close to +- entry as it doesn't take much space and is likely a hot target. +- */ +-#ifdef USE_LESS_VEC_MASK_STORE +- .p2align 4,, 10 +-L(less_vec): +-L(less_vec_from_wmemset): +- /* Less than 1 VEC. */ +-# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 +-# error Unsupported VEC_SIZE! +-# endif +- /* Clear high bits from edi. Only keeping bits relevant to page +- cross check. Note that we are using rax which is set in +- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. */ +- andl $(PAGE_SIZE - 1), %edi +- /* Check if VEC_SIZE store cross page. Mask stores suffer +- serious performance degradation when it has to fault suppress. +- */ +- cmpl $(PAGE_SIZE - VEC_SIZE), %edi +- /* This is generally considered a cold target. */ +- ja L(cross_page) +-# if VEC_SIZE > 32 +- movq $-1, %rcx +- bzhiq %rdx, %rcx, %rcx +- kmovq %rcx, %k1 +-# else +- movl $-1, %ecx +- bzhil %edx, %ecx, %ecx +- kmovd %ecx, %k1 +-# endif +- vmovdqu8 %VMM(0), (%rax){%k1} +- VZEROUPPER_RETURN +- +-# if defined USE_MULTIARCH && IS_IN (libc) +- /* Include L(stosb_local) here if including L(less_vec) between +- L(stosb_more_2x_vec) and ENTRY. This is to cache align the +- L(stosb_more_2x_vec) target. */ +- .p2align 4,, 10 +-L(stosb_local): +- movzbl %sil, %eax +- mov %RDX_LP, %RCX_LP +- mov %RDI_LP, %RDX_LP +- rep stosb +- mov %RDX_LP, %RAX_LP +- VZEROUPPER_RETURN +-# endif +-#endif +- + #if defined USE_MULTIARCH && IS_IN (libc) + .p2align 4 + L(stosb_more_2x_vec): +@@ -318,21 +308,33 @@ L(return_vzeroupper): + ret + #endif + +- .p2align 4,, 10 +-#ifndef USE_LESS_VEC_MASK_STORE +-# if defined USE_MULTIARCH && IS_IN (libc) ++#ifdef USE_WITH_AVX2 ++ .p2align 4 ++#else ++ .p2align 4,, 4 ++#endif ++ ++#if defined USE_MULTIARCH && IS_IN (libc) + /* If no USE_LESS_VEC_MASK put L(stosb_local) here. Will be in + range for 2-byte jump encoding. */ + L(stosb_local): ++ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ++ jae L(nt_memset) + movzbl %sil, %eax + mov %RDX_LP, %RCX_LP + mov %RDI_LP, %RDX_LP + rep stosb ++# if (defined USE_WITH_SSE2) || (defined USE_WITH_AVX512) ++ /* Use xchg to save 1-byte (this helps align targets below). */ ++ xchg %RDX_LP, %RAX_LP ++# else + mov %RDX_LP, %RAX_LP +- VZEROUPPER_RETURN + # endif ++ VZEROUPPER_RETURN ++#endif ++#ifndef USE_LESS_VEC_MASK_STORE + /* Define L(less_vec) only if not otherwise defined. */ +- .p2align 4 ++ .p2align 4,, 12 + L(less_vec): + /* Broadcast esi to partial register (i.e VEC_SIZE == 32 broadcast to + xmm). This is only does anything for AVX2. */ +@@ -423,4 +425,35 @@ L(between_2_3): + movb %SET_REG8, -1(%LESS_VEC_REG, %rdx) + #endif + ret +-END (MEMSET_SYMBOL (__memset, unaligned_erms)) ++ ++#if defined USE_MULTIARCH && IS_IN (libc) ++# ifdef USE_WITH_AVX512 ++ /* Force align so the loop doesn't cross a cache-line. */ ++ .p2align 4 ++# endif ++ .p2align 4,, 7 ++ /* Memset using non-temporal stores. */ ++L(nt_memset): ++ VMOVU %VMM(0), (VEC_SIZE * 0)(%rdi) ++ leaq (VEC_SIZE * -4)(%rdi, %rdx), %rdx ++ /* Align DST. */ ++ orq $(VEC_SIZE * 1 - 1), %rdi ++ incq %rdi ++ .p2align 4,, 7 ++L(nt_loop): ++ VMOVNT %VMM(0), (VEC_SIZE * 0)(%rdi) ++ VMOVNT %VMM(0), (VEC_SIZE * 1)(%rdi) ++ VMOVNT %VMM(0), (VEC_SIZE * 2)(%rdi) ++ VMOVNT %VMM(0), (VEC_SIZE * 3)(%rdi) ++ subq $(VEC_SIZE * -4), %rdi ++ cmpq %rdx, %rdi ++ jb L(nt_loop) ++ sfence ++ VMOVU %VMM(0), (VEC_SIZE * 0)(%rdx) ++ VMOVU %VMM(0), (VEC_SIZE * 1)(%rdx) ++ VMOVU %VMM(0), (VEC_SIZE * 2)(%rdx) ++ VMOVU %VMM(0), (VEC_SIZE * 3)(%rdx) ++ VZEROUPPER_RETURN ++#endif ++ ++END(MEMSET_SYMBOL(__memset, unaligned_erms)) diff --git a/glibc-upstream-2.39-145.patch b/glibc-upstream-2.39-145.patch new file mode 100644 index 0000000..1248613 --- /dev/null +++ b/glibc-upstream-2.39-145.patch @@ -0,0 +1,143 @@ +commit 994b129a35ca5218ecddd1add74aea68f1314560 +Author: Noah Goldstein +Date: Fri Sep 27 15:50:10 2024 -0700 + + x86/string: Fixup alignment of main loop in str{n}cmp-evex [BZ #32212] + + The loop should be aligned to 32-bytes so that it can ideally run out + the DSB. This is particularly important on Skylake-Server where + deficiencies in it's DSB implementation make it prone to not being + able to run loops out of the DSB. + + For example running strcmp-evex on 200Mb string: + + 32-byte aligned loop: + - 43,399,578,766 idq.dsb_uops + not 32-byte aligned loop: + - 6,060,139,704 idq.dsb_uops + + This results in a 25% performance degradation for the non-aligned + version. + + The fix is to just ensure the code layout is such that the loop is + aligned. (Which was previously the case but was accidentally dropped + in 84e7c46df). + + NB: The fix was actually 64-byte alignment. This is because 64-byte + alignment generally produces more stable performance than 32-byte + aligned code (cache line crosses can affect perf), so if we are going + past 16-byte alignmnent, might as well go to 64. 64-byte alignment + also matches most other functions we over-align, so it creates a + common point of optimization. + + Times are reported as ratio of Time_With_Patch / + Time_Without_Patch. Lower is better. + + The values being reported is the geometric mean of the ratio across + all tests in bench-strcmp and bench-strncmp. + + Note this patch is only attempting to improve the Skylake-Server + strcmp for long strings. The rest of the numbers are only to test for + regressions. + + Tigerlake Results Strings <= 512: + strcmp : 1.026 + strncmp: 0.949 + + Tigerlake Results Strings > 512: + strcmp : 0.994 + strncmp: 0.998 + + Skylake-Server Results Strings <= 512: + strcmp : 0.945 + strncmp: 0.943 + + Skylake-Server Results Strings > 512: + strcmp : 0.778 + strncmp: 1.000 + + The 2.6% regression on TGL-strcmp is due to slowdowns caused by + changes in alignment of code handling small sizes (most on the + page-cross logic). These should be safe to ignore because 1) We + previously only 16-byte aligned the function so this behavior is not + new and was essentially up to chance before this patch and 2) this + type of alignment related regression on small sizes really only comes + up in tight micro-benchmark loops and is unlikely to have any affect + on realworld performance. + + Reviewed-by: H.J. Lu + (cherry picked from commit 483443d3211532903d7e790211af5a1d55fdb1f3) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index 06730ab2a18f72a0..cea034f394ab45e2 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -209,7 +209,9 @@ + returned. */ + + .section SECTION(.text), "ax", @progbits +- .align 16 ++ /* Align 64 bytes here. This is to get the L(loop) block ideally ++ aligned for the DSB. */ ++ .align 64 + .type STRCMP, @function + .globl STRCMP + # ifdef USE_AS_STRCASECMP_L +@@ -509,9 +511,7 @@ L(ret4): + ret + # endif + +- /* 32 byte align here ensures the main loop is ideally aligned +- for DSB. */ +- .p2align 5 ++ .p2align 4,, 4 + L(more_3x_vec): + /* Safe to compare 4x vectors. */ + VMOVU (VEC_SIZE)(%rdi), %VMM(0) +@@ -1426,10 +1426,9 @@ L(less_32_till_page): + L(ret_zero_page_cross_slow_case0): + xorl %eax, %eax + ret +-# endif +- +- ++# else + .p2align 4,, 10 ++# endif + L(less_16_till_page): + cmpl $((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax + ja L(less_8_till_page) +@@ -1482,8 +1481,12 @@ L(less_16_till_page): + # endif + jmp L(prepare_loop_aligned) + +- +- ++# ifndef USE_AS_STRNCMP ++ /* Fits in aligning bytes. */ ++L(ret_zero_4_loop): ++ xorl %eax, %eax ++ ret ++# endif + + .p2align 4,, 10 + L(less_8_till_page): +@@ -1554,6 +1557,7 @@ L(ret_less_8_wcs): + + # ifdef USE_AS_STRNCMP + .p2align 4,, 2 ++L(ret_zero_4_loop): + L(ret_zero_page_cross_slow_case1): + xorl %eax, %eax + ret +@@ -1586,10 +1590,6 @@ L(less_4_loop): + subq $-(CHAR_PER_VEC * 4), %rdx + # endif + jmp L(prepare_loop_aligned) +- +-L(ret_zero_4_loop): +- xorl %eax, %eax +- ret + L(ret_less_4_loop): + xorl %r8d, %eax + subl %r8d, %eax diff --git a/glibc-upstream-2.39-146.patch b/glibc-upstream-2.39-146.patch new file mode 100644 index 0000000..9d16b94 --- /dev/null +++ b/glibc-upstream-2.39-146.patch @@ -0,0 +1,57 @@ +commit 808a84a8b81468b517a4d721fdc62069cb8c211f +Author: Siddhesh Poyarekar +Date: Tue Jan 21 16:11:06 2025 -0500 + + Fix underallocation of abort_msg_s struct (CVE-2025-0395) + + Include the space needed to store the length of the message itself, in + addition to the message string. This resolves BZ #32582. + + Signed-off-by: Siddhesh Poyarekar + Reviewed: Adhemerval Zanella + (cherry picked from commit 68ee0f704cb81e9ad0a78c644a83e1e9cd2ee578) + +diff --git a/assert/assert.c b/assert/assert.c +index c29629f5f68921a0..b6e37d694cf4b779 100644 +--- a/assert/assert.c ++++ b/assert/assert.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -65,7 +66,8 @@ __assert_fail_base (const char *fmt, const char *assertion, const char *file, + (void) __fxprintf (NULL, "%s", str); + (void) fflush (stderr); + +- total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); ++ total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1, ++ GLRO(dl_pagesize)); + struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + if (__glibc_likely (buf != MAP_FAILED)) +diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c +index f9e3425e04496a26..089c47b04b8af049 100644 +--- a/sysdeps/posix/libc_fatal.c ++++ b/sysdeps/posix/libc_fatal.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -105,7 +106,8 @@ __libc_message_impl (const char *fmt, ...) + { + WRITEV_FOR_FATAL (fd, iov, iovcnt, total); + +- total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1); ++ total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1, ++ GLRO(dl_pagesize)); + struct abort_msg_s *buf = __mmap (NULL, total, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); diff --git a/glibc.spec b/glibc.spec index 2b4940e..8b0776b 100644 --- a/glibc.spec +++ b/glibc.spec @@ -145,7 +145,7 @@ Version: %{glibcversion} # - It allows using the Release number without the %%dist tag in the dependency # generator to make the generated requires interchangeable between Rawhide # and ELN (.elnYY < .fcXX). -%global baserelease 32 +%global baserelease 37 Release: %{baserelease}%{?dist}.alma.1 # Licenses: @@ -485,8 +485,31 @@ Patch167: glibc-upstream-2.39-137.patch Patch168: glibc-RHEL-12867-2.patch Patch169: glibc-RHEL-12867-3.patch Patch170: glibc-RHEL-42410.patch +Patch171: glibc-RHEL-71530-1.patch +Patch172: glibc-RHEL-71530-2.patch +Patch173: glibc-RHEL-71530-3.patch +Patch174: glibc-RHEL-71530-4.patch +Patch175: glibc-RHEL-71530-5.patch +Patch176: glibc-RHEL-71530-6.patch +Patch177: glibc-RHEL-71530-7.patch +Patch178: glibc-RHEL-71530-8.patch +Patch179: glibc-RHEL-71530-9.patch +Patch180: glibc-RHEL-71530-10.patch +Patch181: glibc-upstream-2.39-138.patch +Patch182: glibc-upstream-2.39-139.patch +Patch183: glibc-upstream-2.39-140.patch +Patch184: glibc-upstream-2.39-141.patch +Patch185: glibc-upstream-2.39-142.patch +Patch186: glibc-upstream-2.39-143.patch +Patch187: glibc-upstream-2.39-144.patch +Patch188: glibc-upstream-2.39-145.patch +Patch189: glibc-upstream-2.39-146.patch +Patch190: glibc-RHEL-75809.patch +Patch191: glibc-RHEL-75555.patch +Patch192: glibc-RHEL-75809-2.patch +Patch193: glibc-RHEL-75809-3.patch -# AlmaLinux backport: https://sourceware.org/bugzilla/show_bug.cgi?id=32470 +# AlmaLinux Patch Patch1000: glibc-upstream-2.39-bz-32470.patch ############################################################################## @@ -2486,13 +2509,37 @@ update_gconv_modules_cache () %endif %changelog - -* Wed Jan 08 2025 Eduard Abdullin - 2.39-32.alma.1 +* Tue Feb 11 2025 Eduard Abdullin - 2.39-37.alma.1 - Overwrite target for x86_64_v2 -* Mon Dec 23 2024 Koichiro Iwao - 2.39-23.alma.2 +* Tue Feb 11 2025 Koichiro Iwao - 2.39-37.alma.1 - Apply patch from upstream BZ #32470 +* Fri Jan 24 2025 Florian Weimer - 2.39-37 +- setenv: Rework free(environ) compatibility support (RHEL-75809) + +* Thu Jan 23 2025 Florian Weimer - 2.39-36 +- CVE-2025-0577: vDSO getrandom predictable randomness after fork (RHEL-75555) + +* Thu Jan 23 2025 Florian Weimer - 2.39-35 +- Restore compatibility with environ/malloc usage pattern (RHEL-75809) + +* Thu Jan 23 2025 Florian Weimer - 2.39-34 +- Sync with upstream branch release/2.39/master, + commit 808a84a8b81468b517a4d721fdc62069cb8c211f: +- Fix underallocation of abort_msg_s struct (CVE-2025-0395) +- x86/string: Fixup alignment of main loop in str{n}cmp-evex [BZ #32212] +- x86: Improve large memset perf with non-temporal stores [RHEL-29312] +- x86: Avoid integer truncation with large cache sizes (bug 32470) +- math: Exclude internal math symbols for tests [BZ #32414] +- malloc: add indirection for malloc(-like) functions in tests [BZ #32366] +- Pass -nostdlib -nostartfiles together with -r [BZ #31753] +- nptl: initialize cpu_id_start prior to rseq registration +- nptl: initialize rseq area prior to registration + +* Mon Dec 23 2024 Florian Weimer - 2.39-33 +- Support in-place file conversion in the iconv tool (RHEL-71530) + * Mon Dec 16 2024 Florian Weimer - 2.39-32 - Make getenv thread-safe in more cases (RHEL-42410) @@ -2564,7 +2611,6 @@ update_gconv_modules_cache () * Mon Nov 4 2024 Florian Weimer - 2.39-24 - Switch to the upstream way of building xtests - * Tue Oct 29 2024 Troy Dawson - 2.39-23 - Bump release for October 2024 mass rebuild: Resolves: RHEL-64018