iconv: Support in-place conversions (RHEL-1915)

Resolves: RHEL-1915
This commit is contained in:
Patsy Griffin 2024-11-20 21:09:16 -05:00
parent e56f6d3b52
commit 1ae6242e8b
10 changed files with 2901 additions and 1 deletions

337
glibc-RHEL-1915-1.patch Normal file
View File

@ -0,0 +1,337 @@
commit f58a8c1c15d8b5d8a08e8553f82867202b88a5cc
Author: Paul Pluzhnikov <ppluzhnikov@google.com>
Date: Sat May 27 06:48:33 2023 +0000
Fix misspellings in iconv/ and iconvdata/ -- BZ 25337
All the changes are in comments or '#error' messages.
Applying this commit results in bit-identical rebuild of iconvdata/*.so
Reviewed-by: Florian Weimer <fw@deneb.enyo.de>
diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
index 5696058298..0cf3226be6 100644
--- a/iconv/gconv_charset.c
+++ b/iconv/gconv_charset.c
@@ -181,10 +181,10 @@ __gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
implementation has always handled them. Only suffixes in the tocode are
processed and handled. The reality is that invalid input in the input
character set should only be ignored if the fromcode specifies IGNORE.
- The current implementation ignores invalid intput in the input character
+ The current implementation ignores invalid input in the input character
set if the tocode contains IGNORE. We preserve this behavior for
backwards compatibility. In the future we may split the handling of
- IGNORE to allow a finer grained specification of ignorning invalid input
+ IGNORE to allow a finer grained specification of ignoring invalid input
and/or ignoring invalid output. */
conv_spec->translit = ptc.translit;
conv_spec->ignore = ptc.ignore;
diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
index 00744aad56..07815b0eee 100644
--- a/iconv/gconv_charset.h
+++ b/iconv/gconv_charset.h
@@ -48,7 +48,7 @@
/* This function copies in-order, characters from the source 's' that are
- either alpha-numeric or one in one of these: "_-.,:/" - into the destination
+ either alphanumeric or one in one of these: "_-.,:/" - into the destination
'wp' while dropping all other characters. In the process, it converts all
alphabetical characters to upper case. It then appends up to two '/'
characters so that the total number of '/'es in the destination is 2. */
diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c
index c76011d6bc..ee9e97e1bd 100644
--- a/iconv/gconv_conf.c
+++ b/iconv/gconv_conf.c
@@ -153,7 +153,7 @@ static void
add_alias (char *rp)
{
/* We now expect two more string. The strings are normalized
- (converted to UPPER case) and strored in the alias database. */
+ (converted to UPPER case) and stored in the alias database. */
char *from, *to, *wp;
while (__isspace_l (*rp, _nl_C_locobj_ptr))
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 4b247a815f..19d042faff 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -172,7 +172,7 @@ __libc_lock_define (extern, __gconv_lock attribute_hidden)
})
-/* Return in *HANDLE, a decriptor for the transformation. The function expects
+/* Return in *HANDLE, a descriptor for the transformation. The function expects
the specification of the transformation in the structure pointed to by
CONV_SPEC. It only reads *CONV_SPEC and does not take ownership of it. */
extern int __gconv_open (struct gconv_spec *conv_spec,
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index c60cffad4c..e936e171d7 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -56,7 +56,7 @@ __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
/* Transform from the internal, UCS4-like format, to UCS4. The
difference between the internal ucs4 format and the real UCS4
- format is, if any, the endianess. The Unicode/ISO 10646 says that
+ format is, if any, the endianness. The Unicode/ISO 10646 says that
unless some higher protocol specifies it differently, the byte
order is big endian.*/
#define DEFINE_INIT 0
@@ -100,7 +100,7 @@ internal_ucs4_loop (struct __gconv_step *step,
*inptrp = inptr + n_convert * 4;
*outptrp = __mempcpy (outptr, inptr, n_convert * 4);
#else
-# error "This endianess is not supported."
+# error "This endianness is not supported."
#endif
/* Determine the status. */
@@ -153,7 +153,7 @@ internal_ucs4_loop_single (struct __gconv_step *step,
(*outptrp)[2] = state->__value.__wchb[2];
(*outptrp)[3] = state->__value.__wchb[3];
#else
-# error "This endianess is not supported."
+# error "This endianness is not supported."
#endif
*outptrp += 4;
@@ -347,7 +347,7 @@ internal_ucs4le_loop (struct __gconv_step *step,
*inptrp = inptr + n_convert * 4;
*outptrp = __mempcpy (outptr, inptr, n_convert * 4);
#else
-# error "This endianess is not supported."
+# error "This endianness is not supported."
#endif
/* Determine the status. */
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 1d90938d71..bee898c63c 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -537,7 +537,7 @@ incomplete character or shift sequence at end of buffer"));
static int
process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
{
- /* we have a problem with reading from a desriptor since we must not
+ /* we have a problem with reading from a descriptor since we must not
provide the iconv() function an incomplete character or shift
sequence at the end of the buffer. Since we have to deal with
arbitrary encodings we must read the whole text in a buffer and
diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c
index a319e2f762..f3f4baa4e7 100644
--- a/iconv/iconvconfig.c
+++ b/iconv/iconvconfig.c
@@ -445,7 +445,7 @@ static void
add_alias (char *rp)
{
/* We now expect two more string. The strings are normalized
- (converted to UPPER case) and strored in the alias database. */
+ (converted to UPPER case) and stored in the alias database. */
char *from;
char *to;
char *wp;
diff --git a/iconvdata/bug-iconv8.c b/iconvdata/bug-iconv8.c
index e32d891b5c..1ebb674c91 100644
--- a/iconvdata/bug-iconv8.c
+++ b/iconvdata/bug-iconv8.c
@@ -29,7 +29,7 @@ do_test (void)
/*
* result: -1 84 0 0 (84=EILSEQ)
*
- * Error is returnd but inbuf is consumed.
+ * Error is returned but inbuf is consumed.
*
* \x83\xd9 is valid shift-jis sequence but no character is assigned
* to it.
diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c
index 4d6ec71139..5203f30e79 100644
--- a/iconvdata/ibm1364.c
+++ b/iconvdata/ibm1364.c
@@ -91,7 +91,7 @@
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_curcs = *curcsp; \
diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c
index 2939d4d29e..fe2fe1f15b 100644
--- a/iconvdata/ibm930.c
+++ b/iconvdata/ibm930.c
@@ -80,7 +80,7 @@
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_curcs = *curcsp; \
diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c
index 95935b8b36..4db0699a68 100644
--- a/iconvdata/ibm933.c
+++ b/iconvdata/ibm933.c
@@ -79,7 +79,7 @@
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_curcs = *curcsp; \
diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c
index 1d8240a758..3c3d697a24 100644
--- a/iconvdata/ibm935.c
+++ b/iconvdata/ibm935.c
@@ -80,7 +80,7 @@
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_curcs = *curcsp; \
diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c
index 9e02aba122..1586036c1e 100644
--- a/iconvdata/ibm937.c
+++ b/iconvdata/ibm937.c
@@ -80,7 +80,7 @@
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_curcs = *curcsp; \
diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c
index ce719cb29f..9b053c696e 100644
--- a/iconvdata/ibm939.c
+++ b/iconvdata/ibm939.c
@@ -80,7 +80,7 @@
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_curcs = *curcsp; \
diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c
index d0c3ca4f03..36727f0865 100644
--- a/iconvdata/iso-2022-cn-ext.c
+++ b/iconvdata/iso-2022-cn-ext.c
@@ -154,7 +154,7 @@ enum
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_set = *setp; \
diff --git a/iconvdata/iso-2022-cn.c b/iconvdata/iso-2022-cn.c
index 73eb5e77c6..5660ead668 100644
--- a/iconvdata/iso-2022-cn.c
+++ b/iconvdata/iso-2022-cn.c
@@ -102,7 +102,7 @@ enum
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_set = *setp; \
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
index d341a14f51..c722bdbfc3 100644
--- a/iconvdata/iso-2022-jp-3.c
+++ b/iconvdata/iso-2022-jp-3.c
@@ -156,7 +156,7 @@ enum
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
saved_state = *statep; \
diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c
index f31dfb92e6..b023d3cf8e 100644
--- a/iconvdata/iso-2022-jp.c
+++ b/iconvdata/iso-2022-jp.c
@@ -249,7 +249,7 @@ gconv_end (struct __gconv_step *data)
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_set = *setp; \
@@ -679,7 +679,7 @@ static const cvlist_t conversion_lists[4] =
the character is unknown. \
The CJK character sets partially overlap when seen as subsets \
of ISO 10646; therefore there is no single correct result. \
- We use a preferrence order which depends on the language tag. */ \
+ We use a preference order which depends on the language tag. */ \
\
if (ch <= 0x7f) \
{ \
diff --git a/iconvdata/iso-2022-kr.c b/iconvdata/iso-2022-kr.c
index e71198aee9..fd785fd8f9 100644
--- a/iconvdata/iso-2022-kr.c
+++ b/iconvdata/iso-2022-kr.c
@@ -100,7 +100,7 @@ enum
/* Since we might have to reset input pointer we must be able to save
- and retore the state. */
+ and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_set = *setp; \
diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c
index f7111a3759..1800dc8fdb 100644
--- a/iconvdata/iso646.c
+++ b/iconvdata/iso646.c
@@ -21,7 +21,7 @@
zillions of ISO 646 derivates and supporting them all in a separate
module is overkill since these coded character sets are hardly ever
used anymore (except ANSI_X3.4-1968 == ASCII, which is compatible
- with ISO 8859-1). The European variants are superceded by the
+ with ISO 8859-1). The European variants are superseded by the
various ISO 8859-? standards and the Asian variants are embedded in
larger character sets. Therefore this implementation is simply
here to make it possible to do the conversion if it is necessary.
diff --git a/iconvdata/sjis.c b/iconvdata/sjis.c
index 93c28db13e..5ab821bbff 100644
--- a/iconvdata/sjis.c
+++ b/iconvdata/sjis.c
@@ -2001,7 +2001,7 @@ static const char from_ucs4_greek[193][2] =
/* The mapping of the Kanji is horrible. The glyphs covered by Shift JIS
- are spreaded all around the Unicode CJK area. We use one big table
+ are spread all around the Unicode CJK area. We use one big table
since using the gaps will not buy us much.
The following table can be generated using
diff --git a/iconvdata/tst-table.sh b/iconvdata/tst-table.sh
index d5b1f3c87d..bc6f542b24 100755
--- a/iconvdata/tst-table.sh
+++ b/iconvdata/tst-table.sh
@@ -44,7 +44,7 @@ if test ${charset} = GB18030; then
mv ${objpfx}tst-${charset}.truncated.table ${objpfx}tst-${charset}.charmap.table
fi
-# Precomputed expexted differences between the charmap and iconv forward.
+# Precomputed expected differences between the charmap and iconv forward.
precomposed=${charset}.precomposed
# Precompute expected differences between the charmap and iconv backward.

218
glibc-RHEL-1915-2.patch Normal file
View File

@ -0,0 +1,218 @@
commit 422ed8ede312f786369e4850e47b8d32beaae4e4
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 20 13:10:54 2024 +0200
iconv: Base tests for buffer management
Reviewed-by: DJ Delorie <dj@redhat.com>
Conflicts:
iconv/Makefile - tests-special list differences
diff --git a/iconv/Makefile b/iconv/Makefile
index 65b4a44ab8..b0fa550141 100644
--- a/iconv/Makefile 2024-11-18 12:41:42.539981355 -0500
+++ b/iconv/Makefile 2024-11-18 12:40:54.861651890 -0500
@@ -72,7 +72,10 @@ include $(patsubst %,$(..)libof-iterator
ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out
-tests-special += $(objpfx)tst-iconv_prog.out
+tests-special += \
+ $(objpfx)tst-iconv_prog-buffer.out \
+ $(objpfx)tst-iconv_prog.out \
+ # tests-special
endif
# Make a copy of the file because gconv module names are constructed
@@ -125,3 +128,8 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_p
$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
'$(run-program-env)' > $@; \
$(evaluate-test)
+
+$(objpfx)tst-iconv_prog-buffer.out: \
+ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
+ $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \
+ $(evaluate-test)
diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh
new file mode 100644
index 0000000000..a27107f02b
--- /dev/null
+++ b/iconv/tst-iconv_prog-buffer.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# Test for iconv (the program) buffer management.
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+exec 2>&1
+set -e
+
+exec {logfd}>&1
+
+codir=$1
+test_program_prefix="$2"
+
+# Use internal converters to avoid issues with module loading.
+iconv_args="-f ASCII -t UTF-8"
+
+failure=false
+
+tmp=`mktemp -d`
+trap 'rm -rf "$tmp"' 0
+echo ABC > "$tmp/abc"
+echo DEF > "$tmp/def"
+echo GGG > "$tmp/ggg"
+echo HH > "$tmp/hh"
+echo XY > "$tmp/xy"
+echo ZT > "$tmp/zt"
+echo OUT > "$tmp/out-template"
+printf '\xff' > "$tmp/0xff"
+cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped"
+
+run_iconv () {
+ local c=0
+ if test "${FUNCNAME[2]}" = main; then
+ c=1
+ fi
+ echo "${BASH_SOURCE[$c]}:${BASH_LINENO[$c]}: iconv $iconv_args $@" >&$logfd
+ $test_program_prefix $codir/iconv/iconv_prog $iconv_args "$@"
+}
+
+check_out_expected () {
+ if ! cmp -s "$tmp/out" "$tmp/expected" ; then
+ echo "error: iconv output difference" >&$logfd
+ echo "*** expected ***" >&$logfd
+ cat "$tmp/expected" >&$logfd
+ echo "*** actual ***" >&$logfd
+ cat "$tmp/out" >&$logfd
+ failure=true
+ fi
+}
+
+expect_files () {
+ local f
+ ! test -z "$1"
+ cp "$tmp/$1" "$tmp/expected"
+ shift
+ for f in "$@" ; do
+ cat "$tmp/$f" >> "$tmp/expected"
+ done
+ check_out_expected
+}
+
+check_out () {
+ cat > "$tmp/expected"
+ check_out_expected
+}
+
+expect_exit () {
+ local expected=$1
+ shift
+ # Prevent failure for stopping the script.
+ if "$@" ; then
+ actual=$?
+ else
+ actual=$?
+ fi
+ if test "$actual" -ne "$expected"; then
+ echo "error: expected exit status $expected, not $actual" >&$logfd
+ exit 1
+ fi
+}
+
+ignore_failure () {
+ set +e
+ "$@"
+ status=$?
+ set -e
+}
+
+# Concatentation test.
+run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/def"
+expect_files abc def
+
+# Single-file in-place conversion.
+run_iconv -o "$tmp/out" "$tmp/out"
+expect_files abc def
+
+# Multiple input files with in-place conversion.
+
+run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc"
+expect_files abc def abc
+
+# But not if we are writing to standard output.
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv </dev/null >>"$tmp/out"
+expect_files out-template
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv - </dev/null >>"$tmp/out"
+expect_files out-template
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv /dev/null >>"$tmp/out"
+expect_files out-template
+
+# Conversion errors should avoid clobbering an existing file if
+# it is also an input file.
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out"
+expect_files 0xff
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out"
+expect_files 0xff
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out"
+expect_files 0xff
+
+# If errors are ignored, the file should be overwritten.
+
+cp "$tmp/out-template" "$tmp/out"
+expect_exit 1 \
+ run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" 2>"$tmp/err"
+! test -s "$tmp/err"
+expect_files abc def
+
+# FIXME: This is not correct, -c should not change the exit status.
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -c -o "$tmp/out" \
+ "$tmp/abc" "$tmp/0xff-wrapped" "$tmp/def" 2>"$tmp/err"
+! test -s "$tmp/err"
+expect_files abc xy zt def
+
+# If the file does not exist yet, it should not be created on error.
+
+rm "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/0xff"
+! test -e "$tmp/out"
+
+expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/0xff"
+! test -e "$tmp/out"
+
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def"
+! test -e "$tmp/out"
+
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def"
+! test -e "$tmp/out"
+
+if $failure ; then
+ exit 1
+fi

91
glibc-RHEL-1915-3.patch Normal file
View File

@ -0,0 +1,91 @@
commit 0cb64617a6f691b611406427c8e24b7f04c4983f
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 20 13:10:54 2024 +0200
iconv: Do not use mmap in iconv (the program) (bug 17703)
On current systems, very large files are needed before
mmap becomes beneficial. Simplify the implementation.
This exposed that inptr was not initialized correctly in
process_fd. Handling multiple input files resulted in
EFAULT in read because a null pointer was passed. This
could be observed previously if an input file was not
mappable and was reported as bug 17703.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index a765b1af21..88a928557e 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -31,9 +31,6 @@
#include <string.h>
#include <unistd.h>
#include <libintl.h>
-#ifdef _POSIX_MAPPED_FILES
-# include <sys/mman.h>
-#endif
#include <charmap.h>
#include <gconv_int.h>
#include "iconv_prog.h"
@@ -253,10 +250,6 @@ conversions from `%s' and to `%s' are not supported"),
else
do
{
-#ifdef _POSIX_MAPPED_FILES
- struct stat64 st;
- char *addr;
-#endif
int fd, ret;
if (verbose)
@@ -276,39 +269,6 @@ conversions from `%s' and to `%s' are not supported"),
}
}
-#ifdef _POSIX_MAPPED_FILES
- /* We have possibilities for reading the input file. First try
- to mmap() it since this will provide the fastest solution. */
- if (fstat64 (fd, &st) == 0
- && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
- fd, 0)) != MAP_FAILED))
- {
- /* Yes, we can use mmap(). The descriptor is not needed
- anymore. */
- if (close (fd) != 0)
- error (EXIT_FAILURE, errno,
- _("error while closing input `%s'"),
- argv[remaining]);
-
- ret = process_block (cd, addr, st.st_size, &output,
- output_file);
-
- /* We don't need the input data anymore. */
- munmap ((void *) addr, st.st_size);
-
- if (ret != 0)
- {
- status = EXIT_FAILURE;
-
- if (ret < 0)
- /* We cannot go on with producing output since it might
- lead to problem because the last output might leave
- the output stream in an undefined state. */
- break;
- }
- }
- else
-#endif /* _POSIX_MAPPED_FILES */
{
/* Read the file in pieces. */
ret = process_fd (cd, fd, &output, output_file);
@@ -544,7 +504,7 @@ process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
process it in one step. */
static char *inbuf = NULL;
static size_t maxlen = 0;
- char *inptr = NULL;
+ char *inptr = inbuf;
size_t actlen = 0;
while (actlen < maxlen)

62
glibc-RHEL-1915-4.patch Normal file
View File

@ -0,0 +1,62 @@
commit 00ba299787c2ea9e5c4986301e2f4965dffbfded
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 20 13:10:54 2024 +0200
manual: __is_last is no longer part of iconv internals
The __is_last field was replaced with a bitmask in
commit 85830c4c4688b30d3d76111aa9a26745c7b141d6 in 2000,
and multiple bits are in use today.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/manual/charset.texi b/manual/charset.texi
index 427db3bc80..3aaa62d088 100644
--- a/manual/charset.texi
+++ b/manual/charset.texi
@@ -2422,11 +2422,11 @@ written into the buffer to signal how much output is available. If this
conversion step is not the last one, the element must not be modified.
The @code{__outbufend} element must not be modified.
-@item int __is_last
-This element is nonzero if this conversion step is the last one. This
-information is necessary for the recursion. See the description of the
-conversion function internals below. This element must never be
-modified.
+@item int __flags
+This field is a set of flags. The @code{__GCONV_IS_LAST} bit is set if
+this conversion step is the last one. This information is necessary for
+the recursion. See the description of the conversion function internals
+below. This element must never be modified.
@item int __invocation_counter
The conversion function can use this element to see how many calls of
@@ -2731,8 +2731,8 @@ Otherwise the function has to emit a byte sequence to bring the state
object into the initial state. Once this all happened the other
conversion modules in the chain of conversions have to get the same
chance. Whether another step follows can be determined from the
-@code{__is_last} element of the step data structure to which the first
-parameter points.
+@code{__GCONV_IS_LAST} flag in the @code{__flags} field of the step
+data structure to which the first parameter points.
The more interesting mode is when actual text has to be converted. The
first step in this case is to convert as much text as possible from the
@@ -2866,7 +2866,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data,
/* @r{Call the steps down the chain if there are any but only}
@r{if we successfully emitted the escape sequence.} */
- if (status == __GCONV_OK && ! data->__is_last)
+ if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST))
status = fct (next_step, next_data, NULL, NULL,
written, 1);
@}
@@ -2892,7 +2892,7 @@ gconv (struct __gconv_step *step, struct __gconv_step_data *data,
/* @r{If this is the last step, leave the loop. There is}
@r{nothing we can do.} */
- if (data->__is_last)
+ if (data->__flags & __GCONV_IS_LAST)
@{
/* @r{Store information about how many bytes are}
@r{available.} */

1064
glibc-RHEL-1915-5.patch Normal file

File diff suppressed because it is too large Load Diff

715
glibc-RHEL-1915-6.patch Normal file
View File

@ -0,0 +1,715 @@
commit 8ef3cff9d1ceafe369f982d980678d749fb93bd2
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 20 13:10:54 2024 +0200
iconv: Support in-place conversions (bug 10460, bug 32033)
Check if any of the input files overlaps with the output file, and use
a temporary file in this case, so that the input is no clobbered
before it is read. This fixes bug 10460. It allows to use iconv
more easily as a functional replacement for GNU recode.
The updated output buffer management truncates the output file
if there is no input, fixing bug 32033.
Reviewed-by: DJ Delorie <dj@redhat.com>
Conflicts:
NEWS - Dropped
iconv/Makefile - tests-special list diffs
iconv/iconv_prog.c - Manual merge
diff -Nrup a/iconv/Makefile b/iconv/Makefile
--- a/iconv/Makefile 2024-11-21 11:08:26.166051531 -0500
+++ b/iconv/Makefile 2024-11-21 11:08:51.325219751 -0500
@@ -77,6 +77,8 @@ include $(patsubst %,$(..)libof-iterator
ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out
tests-special += \
+ $(objpfx)tst-iconv_prog-buffer-large.out \
+ $(objpfx)tst-iconv_prog-buffer-tiny.out \
$(objpfx)tst-iconv_prog-buffer.out \
$(objpfx)tst-iconv_prog.out \
# tests-special
@@ -137,3 +139,12 @@ $(objpfx)tst-iconv_prog-buffer.out: \
tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
$(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \
$(evaluate-test)
+$(objpfx)tst-iconv_prog-buffer-tiny.out: \
+ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
+ $(BASH) $< $(common-objdir) '$(test-program-prefix)' \
+ '--buffer-size=1' > $@; \
+ $(evaluate-test)
+$(objpfx)tst-iconv_prog-buffer-large.out: \
+ tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
+ $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \
+ $(evaluate-test)
diff -Nrup a/iconv/iconv_prog.c b/iconv/iconv_prog.c
--- a/iconv/iconv_prog.c 2024-11-21 11:08:26.167051537 -0500
+++ b/iconv/iconv_prog.c 2024-11-21 11:10:10.028745981 -0500
@@ -48,7 +48,11 @@
static void print_version (FILE *stream, struct argp_state *state);
void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
-#define OPT_VERBOSE 1000
+enum
+ {
+ OPT_VERBOSE = 1000,
+ OPT_BUFFER_SIZE,
+ };
#define OPT_LIST 'l'
/* Definitions of arguments for argp functions. */
@@ -64,6 +68,10 @@ static const struct argp_option options[
{ "output", 'o', N_("FILE"), 0, N_("output file") },
{ "silent", 's', NULL, 0, N_("suppress warnings") },
{ "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
+ /* This is an internal option intended for testing only. Very small
+ buffers do not work with all character sets. */
+ { "buffer-size", OPT_BUFFER_SIZE, N_("BYTE-COUNT"), OPTION_HIDDEN,
+ N_("size of in-memory scratch buffer") },
{ NULL, 0, NULL, 0, NULL }
};
@@ -101,13 +109,20 @@ static int list;
/* If nonzero omit invalid character from output. */
int omit_invalid;
+/* Current index in argv (after command line processing) with the
+ input file name. */
+static int current_input_file_index;
+
+/* Size of the temporary, in-memory buffer. Exceeding it needs
+ spooling to disk in a temporary file. Controlled by --buffer_size. */
+static size_t output_buffer_size = 1024 * 1024;
+
/* Prototypes for the functions doing the actual work. */
-static int process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file);
-static int process_fd (iconv_t cd, int fd, FILE **output,
- const char *output_file);
-static int process_file (iconv_t cd, FILE *input, FILE **output,
- const char *output_file);
+static void prepare_output_file (char **argv);
+static void close_output_file (int status);
+static int process_block (iconv_t cd, char *addr, size_t len);
+static int process_fd (iconv_t cd, int fd);
+static int process_file (iconv_t cd, FILE *input);
static void print_known_names (void);
@@ -115,7 +130,6 @@ int
main (int argc, char *argv[])
{
int status = EXIT_SUCCESS;
- int remaining;
__gconv_t cd;
struct charmap_t *from_charmap = NULL;
struct charmap_t *to_charmap = NULL;
@@ -127,7 +141,7 @@ main (int argc, char *argv[])
textdomain (_libc_intl_domainname);
/* Parse and process arguments. */
- argp_parse (&argp, argc, argv, 0, &remaining, NULL);
+ argp_parse (&argp, argc, argv, 0, &current_input_file_index, NULL);
/* List all coded character sets if wanted. */
if (list)
@@ -162,7 +176,8 @@ main (int argc, char *argv[])
if (from_charmap != NULL || to_charmap != NULL)
/* Construct the conversion table and do the conversion. */
status = charmap_conversion (from_code, from_charmap, to_code, to_charmap,
- argc, remaining, argv, output_file);
+ argc, current_input_file_index, argv,
+ output_file);
else
{
struct gconv_spec conv_spec;
@@ -236,16 +251,14 @@ conversions from `%s' and to `%s' are no
_("failed to start conversion processing"));
}
- /* The output file. Will be opened when we are ready to produce
- output. */
- FILE *output = NULL;
+ prepare_output_file (argv);
/* Now process the remaining files. Write them to stdout or the file
specified with the `-o' parameter. If we have no file given as
the parameter process all from stdin. */
- if (remaining == argc)
+ if (current_input_file_index == argc)
{
- if (process_file (cd, stdin, &output, output_file) != 0)
+ if (process_file (cd, stdin) != 0)
status = EXIT_FAILURE;
}
else
@@ -254,17 +267,17 @@ conversions from `%s' and to `%s' are no
int fd, ret;
if (verbose)
- fprintf (stderr, "%s:\n", argv[remaining]);
- if (strcmp (argv[remaining], "-") == 0)
- fd = 0;
+ fprintf (stderr, "%s:\n", argv[current_input_file_index]);
+ if (strcmp (argv[current_input_file_index], "-") == 0)
+ fd = STDIN_FILENO;
else
{
- fd = open (argv[remaining], O_RDONLY);
+ fd = open (argv[current_input_file_index], O_RDONLY);
if (fd == -1)
{
error (0, errno, _("cannot open input file `%s'"),
- argv[remaining]);
+ argv[current_input_file_index]);
status = EXIT_FAILURE;
continue;
}
@@ -272,7 +285,7 @@ conversions from `%s' and to `%s' are no
{
/* Read the file in pieces. */
- ret = process_fd (cd, fd, &output, output_file);
+ ret = process_fd (cd, fd);
/* Now close the file. */
close (fd);
@@ -290,7 +303,7 @@ conversions from `%s' and to `%s' are no
}
}
}
- while (++remaining < argc);
+ while (++current_input_file_index < argc);
/* Ensure that iconv -c still exits with failure if iconv (the
function) has failed with E2BIG instead of EILSEQ. */
@@ -298,8 +311,7 @@ conversions from `%s' and to `%s' are no
status = EXIT_FAILURE;
/* Close the output file now. */
- if (output != NULL && fclose (output))
- error (EXIT_FAILURE, errno, _("error while closing output file"));
+ close_output_file (status);
}
return status;
@@ -329,6 +341,14 @@ parse_opt (int key, char *arg, struct ar
/* Omit invalid characters from output. */
omit_invalid = 1;
break;
+ case OPT_BUFFER_SIZE:
+ {
+ int i = atoi (arg);
+ if (i <= 0)
+ error (EXIT_FAILURE, 0, _("invalid buffer size: %s"), arg);
+ output_buffer_size = i;
+ }
+ break;
case OPT_VERBOSE:
verbose = 1;
break;
@@ -375,59 +395,247 @@ warranty; not even for MERCHANTABILITY o
fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
}
+/* Command line index of the last input file that overlaps with the
+ output file. Zero means no temporary file is ever required. */
+static int last_overlapping_file_index;
-static int
-write_output (const char *outbuf, const char *outptr, FILE **output,
- const char *output_file)
+/* This is set to true if the output is written to a temporary file. */
+static bool output_using_temporary_file;
+
+/* This is the file descriptor that will be used by write_output. */
+static int output_fd = -1;
+
+/* Pointers at the start and end of the fixed-size output buffer. */
+static char *output_buffer_start;
+
+/* Current write position in the output buffer. */
+static char *output_buffer_current;
+
+/* Remaining bytes after output_buffer_current in the output buffer. */
+static size_t output_buffer_remaining;
+
+
+/* Reduce the buffer size when writing directly to the output file, to
+ reduce cache utilization. */
+static size_t copy_buffer_size = BUFSIZ;
+
+static void
+output_error (void)
+{
+ error (EXIT_FAILURE, errno, _("cannot open output file"));
+}
+
+static void
+input_error (const char *path)
{
- /* We have something to write out. */
- int errno_save = errno;
+ error (0, errno, _("cannot open input file `%s'"), path);
+}
- if (*output == NULL)
+/* Opens output_file for writing, truncating it. */
+static void
+open_output_direct (void)
+{
+ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_TRUNC, 0777);
+ if (output_fd < 0)
+ output_error ();
+}
+
+static void
+prepare_output_file (char **argv)
+{
+ if (copy_buffer_size > output_buffer_size)
+ copy_buffer_size = output_buffer_size;
+
+ if (output_file == NULL || strcmp (output_file, "-") == 0)
+ {
+ /* No buffering is required when writing to standard output
+ because input overlap is expected to be solved externally. */
+ output_fd = STDOUT_FILENO;
+ output_buffer_size = copy_buffer_size;
+ }
+ else
{
- /* Determine output file. */
- if (output_file != NULL && strcmp (output_file, "-") != 0)
+ /* If iconv creates the output file, no overlap is possible. */
+ output_fd = open64 (output_file, O_WRONLY | O_CREAT | O_EXCL, 0777);
+ if (output_fd >= 0)
+ output_buffer_size = copy_buffer_size;
+ else
{
- *output = fopen (output_file, "w");
- if (*output == NULL)
- error (EXIT_FAILURE, errno, _("cannot open output file"));
+ /* Otherwise, check if any of the input files overlap with the
+ output file. */
+ struct statx st;
+ if (statx (AT_FDCWD, output_file, 0, STATX_INO | STATX_MODE, &st)
+ != 0)
+ output_error ();
+ uint32_t out_dev_minor = st.stx_dev_minor;
+ uint32_t out_dev_major = st.stx_dev_major;
+ uint64_t out_ino = st.stx_ino;
+
+ int idx = current_input_file_index;
+ while (true)
+ {
+ /* Special case: no input files means standard input. */
+ if (argv[idx] == NULL && idx != current_input_file_index)
+ break;
+
+ int ret;
+ if (argv[idx] == NULL || strcmp (argv[idx], "-") == 0)
+ ret = statx (STDIN_FILENO, "", AT_EMPTY_PATH, STATX_INO, &st);
+ else
+ ret = statx (AT_FDCWD, argv[idx], 0, STATX_INO, &st);
+ if (ret != 0)
+ {
+ input_error (argv[idx]);
+ exit (EXIT_FAILURE);
+ }
+ if (out_dev_minor == st.stx_dev_minor
+ && out_dev_major == st.stx_dev_major
+ && out_ino == st.stx_ino)
+ {
+ if (argv[idx] == NULL)
+ /* Corner case: index of NULL would be larger than
+ idx while converting, triggering a switch away
+ from the temporary file. */
+ last_overlapping_file_index = INT_MAX;
+ else
+ last_overlapping_file_index = idx;
+ }
+
+ if (argv[idx] == NULL)
+ break;
+ ++idx;
+ }
+
+ /* If there is no overlap, avoid using a temporary file. */
+ if (last_overlapping_file_index == 0)
+ {
+ open_output_direct ();
+ output_buffer_size = copy_buffer_size;
+ }
}
- else
- *output = stdout;
}
- if (fwrite (outbuf, 1, outptr - outbuf, *output) < (size_t) (outptr - outbuf)
- || ferror (*output))
+ output_buffer_start = malloc (output_buffer_size);
+ if (output_buffer_start == NULL)
+ output_error ();
+ output_buffer_current = output_buffer_start;
+ output_buffer_remaining = output_buffer_size;
+}
+
+/* Write out the range [first, last), terminating the process on write
+ error. */
+static void
+write_fully (int fd, const char *first, const char *last)
+{
+ while (first < last)
{
- /* Error occurred while printing the result. */
- error (0, 0, _("\
+ ssize_t ret = write (fd, first, last - first);
+ if (ret == 0)
+ {
+ errno = ENOSPC;
+ output_error ();
+ }
+ if (ret < 0)
+ error (EXIT_FAILURE, errno, _("\
conversion stopped due to problem in writing the output"));
- return -1;
+ first += ret;
}
+}
- errno = errno_save;
+static void
+flush_output (void)
+{
+ bool temporary_file_not_needed
+ = current_input_file_index > last_overlapping_file_index;
+ if (output_fd < 0)
+ {
+ if (temporary_file_not_needed)
+ open_output_direct ();
+ else
+ {
+ /* Create an anonymous temporary file. */
+ FILE *fp = tmpfile ();
+ if (fp == NULL)
+ output_error ();
+ output_fd = dup (fileno (fp));
+ if (output_fd < 0)
+ output_error ();
+ fclose (fp);
+ output_using_temporary_file = true;
+ }
+ /* Either way, no longer use a memory-only staging buffer. */
+ output_buffer_size = copy_buffer_size;
+ }
+ else if (output_using_temporary_file && temporary_file_not_needed)
+ {
+ /* The temporary file is no longer needed. Switch to direct
+ output, replacing output_fd. */
+ int temp_fd = output_fd;
+ open_output_direct ();
+
+ /* Copy over the data spooled to the temporary file. */
+ if (lseek (temp_fd, 0, SEEK_SET) < 0)
+ output_error ();
+ while (true)
+ {
+ char buf[BUFSIZ];
+ ssize_t ret = read (temp_fd, buf, sizeof (buf));
+ if (ret < 0)
+ output_error ();
+ if (ret == 0)
+ break;
+ write_fully (output_fd, buf, buf + ret);
+ }
+ close (temp_fd);
- return 0;
+ /* No longer using a temporary file from now on. */
+ output_using_temporary_file = false;
+ output_buffer_size = copy_buffer_size;
+ }
+
+ write_fully (output_fd, output_buffer_start, output_buffer_current);
+ output_buffer_current = output_buffer_start;
+ output_buffer_remaining = output_buffer_size;
}
+static void
+close_output_file (int status)
+{
+ /* Do not perform a flush if a temporary file or the in-memory
+ buffer is in use and there was an error. It would clobber the
+ overlapping input file. */
+ if (status != EXIT_SUCCESS && !omit_invalid &&
+ (output_using_temporary_file || output_fd < 0))
+ return;
+
+ /* The current_input_file_index variable is now larger than
+ last_overlapping_file_index, so the flush_output call switches
+ away from the temporary file. */
+ flush_output ();
+
+ if (output_fd == STDOUT_FILENO)
+ {
+ /* Close standard output in safe manner, to report certain
+ ENOSPC errors. */
+ output_fd = dup (output_fd);
+ if (output_fd < 0)
+ output_error ();
+ }
+ if (close (output_fd) < 0)
+ output_error ();
+}
static int
-process_block (iconv_t cd, char *addr, size_t len, FILE **output,
- const char *output_file)
+process_block (iconv_t cd, char *addr, size_t len)
{
-#define OUTBUF_SIZE 32768
const char *start = addr;
- char outbuf[OUTBUF_SIZE];
- char *outptr;
- size_t outlen;
size_t n;
int ret = 0;
while (len > 0)
{
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, &addr, &len, &outptr, &outlen);
+ n = iconv (cd, &addr, &len,
+ &output_buffer_current, &output_buffer_remaining);
if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
{
@@ -438,39 +646,34 @@ process_block (iconv_t cd, char *addr, s
errno = E2BIG;
}
- if (outptr != outbuf)
- {
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
- }
-
if (n != (size_t) -1)
{
/* All the input test is processed. For state-dependent
character sets we have to flush the state now. */
- outptr = outbuf;
- outlen = OUTBUF_SIZE;
- n = iconv (cd, NULL, NULL, &outptr, &outlen);
-
- if (outptr != outbuf)
+ n = iconv (cd, NULL, NULL,
+ &output_buffer_current, &output_buffer_remaining);
+ if (n == (size_t) -1 && errno == E2BIG)
{
- ret = write_output (outbuf, outptr, output, output_file);
- if (ret != 0)
- break;
+ /* Try again if the state flush exceeded the buffer space. */
+ flush_output ();
+ n = iconv (cd, NULL, NULL,
+ &output_buffer_current, &output_buffer_remaining);
}
+ bool errno_is_EILSEQ = errno == EILSEQ;
if (n != (size_t) -1)
break;
- if (omit_invalid && errno == EILSEQ)
+ if (omit_invalid && errno_is_EILSEQ)
{
ret = 1;
break;
}
}
- if (errno != E2BIG)
+ if (errno == E2BIG)
+ flush_output ();
+ else
{
/* iconv() ran into a problem. */
switch (errno)
@@ -501,7 +704,7 @@ incomplete character or shift sequence a
static int
-process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
+process_fd (iconv_t cd, int fd)
{
/* we have a problem with reading from a descriptor since we must not
provide the iconv() function an incomplete character or shift
@@ -575,16 +778,16 @@ process_fd (iconv_t cd, int fd, FILE **o
}
/* Now we have all the input in the buffer. Process it in one run. */
- return process_block (cd, inbuf, actlen, output, output_file);
+ return process_block (cd, inbuf, actlen);
}
static int
-process_file (iconv_t cd, FILE *input, FILE **output, const char *output_file)
+process_file (iconv_t cd, FILE *input)
{
/* This should be safe since we use this function only for `stdin' and
we haven't read anything so far. */
- return process_fd (cd, fileno (input), output, output_file);
+ return process_fd (cd, fileno (input));
}
diff -Nrup a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh
--- a/iconv/tst-iconv_prog-buffer.sh 2024-11-21 11:08:26.168051544 -0500
+++ b/iconv/tst-iconv_prog-buffer.sh 2024-11-21 11:08:51.326219757 -0500
@@ -17,6 +17,12 @@
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
+# Arguments:
+# root of the build tree ($(objpfx-common))
+# test command wrapper (for running on the board/with new ld.so)
+# extra flags to pass to iconv
+# number of times to double the input files in size (default: 0)
+
exec 2>&1
set -e
@@ -26,7 +32,9 @@ codir=$1
test_program_prefix="$2"
# Use internal converters to avoid issues with module loading.
-iconv_args="-f ASCII -t UTF-8"
+iconv_args="-f ASCII -t UTF-8 $3"
+
+file_size_doublings=${4-0}
failure=false
@@ -39,7 +47,19 @@ echo HH > "$tmp/hh"
echo XY > "$tmp/xy"
echo ZT > "$tmp/zt"
echo OUT > "$tmp/out-template"
+: > "$tmp/empty"
printf '\xff' > "$tmp/0xff"
+
+# Double all files to produce larger buffers.
+for p in "$tmp"/* ; do
+ i=0
+ while test $i -lt $file_size_doublings; do
+ cat "$p" "$p" > "$tmp/scratch"
+ mv "$tmp/scratch" "$p"
+ i=$(($i + 1))
+ done
+done
+
cat "$tmp/xy" "$tmp/0xff" "$tmp/zt" > "$tmp/0xff-wrapped"
run_iconv () {
@@ -113,6 +133,38 @@ expect_files abc def
run_iconv -o "$tmp/out" "$tmp/out" "$tmp/abc"
expect_files abc def abc
+run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out"
+expect_files ggg abc def abc
+
+run_iconv -o "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/hh"
+expect_files hh ggg abc def abc hh
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/out" "$tmp/ggg"
+expect_files ggg out-template out-template ggg
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" "$tmp/ggg" "$tmp/out" "$tmp/hh" "$tmp/out" "$tmp/ggg"
+expect_files ggg out-template hh out-template ggg
+
+# Empty output should truncate the output file if exists.
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" </dev/null
+expect_files empty
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" - </dev/null
+expect_files empty
+
+cp "$tmp/out-template" "$tmp/out"
+run_iconv -o "$tmp/out" /dev/null
+expect_files empty
+
+cp "$tmp/out-template" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/0xff"
+expect_files empty
+
# But not if we are writing to standard output.
cp "$tmp/out-template" "$tmp/out"
@@ -142,8 +194,36 @@ cp "$tmp/0xff" "$tmp/out"
expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out"
expect_files 0xff
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" < "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" - < "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/out"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/out"
+expect_files 0xff-wrapped
+
# If errors are ignored, the file should be overwritten.
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out"
+expect_files xy zt
+
+cp "$tmp/0xff" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def"
+expect_files abc def
+
cp "$tmp/out-template" "$tmp/out"
expect_exit 1 \
run_iconv -c -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def" 2>"$tmp/err"
@@ -156,6 +236,20 @@ expect_exit 1 run_iconv -c -o "$tmp/out"
! test -s "$tmp/err"
expect_files abc xy zt def
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def"
+expect_files xy zt abc xy zt def
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -o "$tmp/out" \
+ "$tmp/out" "$tmp/abc" "$tmp/out" "$tmp/def"
+expect_files 0xff-wrapped
+
+cp "$tmp/0xff-wrapped" "$tmp/out"
+expect_exit 1 run_iconv -c -o "$tmp/out" \
+ "$tmp/abc" "$tmp/out" "$tmp/def" "$tmp/out"
+expect_files abc xy zt def xy zt
+
# If the file does not exist yet, it should not be created on error.
rm "$tmp/out"

41
glibc-RHEL-1915-7.patch Normal file
View File

@ -0,0 +1,41 @@
commit 75819cdd29a193cc2db980878bec305905b22bbc
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 20 13:10:54 2024 +0200
iconv: Multiple - on command line should not fail (bug 32050)
Usually, the second and subsequent - return EOF immediately
and do not contribute to the output, but this is not an error.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 3e02db7319..dd4bc3a59a 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -287,7 +287,8 @@ conversions from `%s' and to `%s' are not supported"),
ret = process_fd (cd, fd);
/* Now close the file. */
- close (fd);
+ if (fd != STDIN_FILENO)
+ close (fd);
if (ret != 0)
{
diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh
index 54ff871d32..a9c3729d94 100644
--- a/iconv/tst-iconv_prog-buffer.sh
+++ b/iconv/tst-iconv_prog-buffer.sh
@@ -265,6 +265,11 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" "$tmp/0xff" "$tmp/def"
expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def"
! test -e "$tmp/out"
+# Listing standard input multiple times should not fail (bug 32050).
+
+run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc"
+expect_files xy abc zt
+
if $failure ; then
exit 1
fi

323
glibc-RHEL-1915-8.patch Normal file
View File

@ -0,0 +1,323 @@
commit fa1b0d5e9f6e0353e16339430770a7a8824c0468
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 20 13:10:54 2024 +0200
iconv: Input buffering for the iconv program (bug 6050)
Do not read the entire input file into memory.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index dd4bc3a59a..a2f1d34e45 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -118,8 +118,9 @@ static size_t output_buffer_size = 1024 * 1024;
/* Prototypes for the functions doing the actual work. */
static void prepare_output_file (char **argv);
-static void close_output_file (int status);
-static int process_block (iconv_t cd, char *addr, size_t len);
+static void close_output_file (__gconv_t cd, int status);
+static int process_block (iconv_t cd, char **addr, size_t *len,
+ off64_t file_offset, bool *incomplete);
static int process_fd (iconv_t cd, int fd);
static int process_file (iconv_t cd, FILE *input);
static void print_known_names (void);
@@ -311,7 +312,7 @@ conversions from `%s' and to `%s' are not supported"),
status = EXIT_FAILURE;
/* Close the output file now. */
- close_output_file (status);
+ close_output_file (cd, status);
}
return status;
@@ -599,7 +600,7 @@ flush_output (void)
}
static void
-close_output_file (int status)
+close_output_file (__gconv_t cd, int status)
{
/* Do not perform a flush if a temporary file or the in-memory
buffer is in use and there was an error. It would clobber the
@@ -608,10 +609,28 @@ close_output_file (int status)
(output_using_temporary_file || output_fd < 0))
return;
- /* The current_input_file_index variable is now larger than
- last_overlapping_file_index, so the flush_output call switches
+ /* All the input text is processed. For state-dependent character
+ sets we have to flush the state now.
+
+ The current_input_file_index variable is now larger than
+ last_overlapping_file_index, so the flush_output calls switch
away from the temporary file. */
+ size_t n = iconv (cd, NULL, NULL,
+ &output_buffer_current, &output_buffer_remaining);
+ if (n == (size_t) -1 && errno == E2BIG)
+ {
+ /* Try again if the state flush exceeded the buffer space. */
+ flush_output ();
+ n = iconv (cd, NULL, NULL,
+ &output_buffer_current, &output_buffer_remaining);
+ }
+ int saved_errno = errno;
flush_output ();
+ if (n == (size_t) -1 && !omit_invalid)
+ {
+ errno = saved_errno;
+ output_error ();
+ }
if (output_fd == STDOUT_FILENO)
{
@@ -625,51 +644,35 @@ close_output_file (int status)
output_error ();
}
+/* CD is the iconv handle. Input processing starts at *ADDR, and
+ consumes upto *LEN bytes. *ADDR and *LEN are updated. FILE_OFFSET
+ is the file offset of the data initially at ADDR. *INCOMPLETE is
+ set to true if conversion stops due to an incomplete input
+ sequence. */
static int
-process_block (iconv_t cd, char *addr, size_t len)
+process_block (iconv_t cd, char **addr, size_t *len, off64_t file_offset,
+ bool *incomplete)
{
- const char *start = addr;
+ const char *start = *addr;
size_t n;
int ret = 0;
- while (len > 0)
+ while (*len > 0)
{
- n = iconv (cd, &addr, &len,
+ n = iconv (cd, addr, len,
&output_buffer_current, &output_buffer_remaining);
if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
{
ret = 1;
- if (len == 0)
+ if (*len == 0)
n = 0;
else
errno = E2BIG;
}
if (n != (size_t) -1)
- {
- /* All the input test is processed. For state-dependent
- character sets we have to flush the state now. */
- n = iconv (cd, NULL, NULL,
- &output_buffer_current, &output_buffer_remaining);
- if (n == (size_t) -1 && errno == E2BIG)
- {
- /* Try again if the state flush exceeded the buffer space. */
- flush_output ();
- n = iconv (cd, NULL, NULL,
- &output_buffer_current, &output_buffer_remaining);
- }
- bool errno_is_EILSEQ = errno == EILSEQ;
-
- if (n != (size_t) -1)
- break;
-
- if (omit_invalid && errno_is_EILSEQ)
- {
- ret = 1;
- break;
- }
- }
+ break;
if (errno == E2BIG)
flush_output ();
@@ -680,13 +683,12 @@ process_block (iconv_t cd, char *addr, size_t len)
{
case EILSEQ:
if (! omit_invalid)
- error (0, 0, _("illegal input sequence at position %ld"),
- (long int) (addr - start));
+ error (0, 0, _("illegal input sequence at position %lld"),
+ (long long int) (file_offset + (*addr - start)));
break;
case EINVAL:
- error (0, 0, _("\
-incomplete character or shift sequence at end of buffer"));
- break;
+ *incomplete = true;
+ return ret;
case EBADF:
error (0, 0, _("internal error (illegal descriptor)"));
break;
@@ -706,79 +708,49 @@ incomplete character or shift sequence at end of buffer"));
static int
process_fd (iconv_t cd, int fd)
{
- /* we have a problem with reading from a descriptor since we must not
- provide the iconv() function an incomplete character or shift
- sequence at the end of the buffer. Since we have to deal with
- arbitrary encodings we must read the whole text in a buffer and
- process it in one step. */
- static char *inbuf = NULL;
- static size_t maxlen = 0;
- char *inptr = inbuf;
- size_t actlen = 0;
-
- while (actlen < maxlen)
+ char inbuf[BUFSIZ];
+ char *inbuf_end = inbuf + sizeof (inbuf);
+ size_t inbuf_used = 0;
+ off64_t file_offset = 0;
+ int status = 0;
+ bool incomplete = false;
+
+ while (true)
{
- ssize_t n = read (fd, inptr, maxlen - actlen);
-
- if (n == 0)
- /* No more text to read. */
- break;
-
- if (n == -1)
+ char *p = inbuf + inbuf_used;
+ ssize_t read_ret = read (fd, p, inbuf_end - p);
+ if (read_ret == 0)
+ {
+ /* On EOF, check if the previous iconv invocation saw an
+ incomplete sequence. */
+ if (incomplete)
+ {
+ error (0, 0, _("\
+incomplete character or shift sequence at end of buffer"));
+ return 1;
+ }
+ return 0;
+ }
+ if (read_ret < 0)
{
- /* Error while reading. */
error (0, errno, _("error while reading the input"));
return -1;
}
-
- inptr += n;
- actlen += n;
+ inbuf_used += read_ret;
+ incomplete = false;
+ p = inbuf;
+ int ret = process_block (cd, &p, &inbuf_used, file_offset, &incomplete);
+ if (ret != 0)
+ {
+ status = ret;
+ if (ret < 0)
+ break;
+ }
+ /* The next loop iteration consumes the leftover bytes. */
+ memmove (inbuf, p, inbuf_used);
+ file_offset += read_ret - inbuf_used;
}
-
- if (actlen == maxlen)
- while (1)
- {
- ssize_t n;
- char *new_inbuf;
-
- /* Increase the buffer. */
- new_inbuf = (char *) realloc (inbuf, maxlen + 32768);
- if (new_inbuf == NULL)
- {
- error (0, errno, _("unable to allocate buffer for input"));
- return -1;
- }
- inbuf = new_inbuf;
- maxlen += 32768;
- inptr = inbuf + actlen;
-
- do
- {
- n = read (fd, inptr, maxlen - actlen);
-
- if (n == 0)
- /* No more text to read. */
- break;
-
- if (n == -1)
- {
- /* Error while reading. */
- error (0, errno, _("error while reading the input"));
- return -1;
- }
-
- inptr += n;
- actlen += n;
- }
- while (actlen < maxlen);
-
- if (n == 0)
- /* Break again so we leave both loops. */
- break;
- }
-
- /* Now we have all the input in the buffer. Process it in one run. */
- return process_block (cd, inbuf, actlen);
+ return status;
}
diff --git a/iconv/tst-iconv_prog-buffer.sh b/iconv/tst-iconv_prog-buffer.sh
index a9c3729d94..23098ac56a 100644
--- a/iconv/tst-iconv_prog-buffer.sh
+++ b/iconv/tst-iconv_prog-buffer.sh
@@ -50,6 +50,9 @@ echo OUT > "$tmp/out-template"
: > "$tmp/empty"
printf '\xff' > "$tmp/0xff"
+# Length should be a prime number, to help with buffer alignment testing.
+printf '\xc3\xa4\xe2\x80\x94\xe2\x80\x94\xc3\xa4\n' > "$tmp/utf8-sequence"
+
# Double all files to produce larger buffers.
for p in "$tmp"/* ; do
i=0
@@ -270,6 +273,34 @@ expect_exit 1 run_iconv -o "$tmp/out" "$tmp/abc" - < "$tmp/0xff" "$tmp/def"
run_iconv -o "$tmp/out" "$tmp/xy" - - "$tmp/zt" < "$tmp/abc"
expect_files xy abc zt
+# NB: Extra iconv args are ignored after this point. Actual
+# multi-byte conversion does not work with tiny buffers.
+iconv_args="-f UTF-8 -t ASCII"
+
+printf 'x\n\xc3' > "$tmp/incomplete"
+expect_exit 1 run_iconv -o "$tmp/out" "$tmp/incomplete"
+check_out <<EOF
+x
+EOF
+
+# Test buffering behavior if the buffer ends with an incomplete
+# multi-byte sequence.
+prefix=""
+prefix_length=0
+while test $prefix_length -lt 12; do
+ echo "info: testing prefix length $prefix_length" 2>&$logfd
+ printf "%s" "$prefix" > "$tmp/prefix"
+ cat "$tmp/prefix" "$tmp/utf8-sequence" > "$tmp/tmp"
+ iconv_args="-f UTF-8 -t UCS-4"
+ run_iconv -o "$tmp/out1" "$tmp/tmp"
+ iconv_args="-f UCS-4 -t UTF-8"
+ run_iconv -o "$tmp/out" "$tmp/out1"
+ expect_files prefix utf8-sequence
+
+ prefix="$prefix@"
+ prefix_length=$(($prefix_length + 1))
+done
+
if $failure ; then
exit 1
fi

37
glibc-RHEL-1915-9.patch Normal file
View File

@ -0,0 +1,37 @@
commit 079ebf7624e7fd0ad7fe94a7176a2e132c996d86
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Sep 24 10:41:35 2024 +0200
iconv: Use $(run-program-prefix) for running iconv (bug 32197)
With --enable-hardcoded-path-in-tests, $(test-program-prefix)
does not redirect to the built glibc, but we need to run
iconv (the program) against the built glibc even with
--enable-hardcoded-path-in-tests, as it is using the ABI
path for the dynamic linker (as an installed program).
Use $(run-program-prefix) instead.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
diff --git a/iconv/Makefile b/iconv/Makefile
index c9af0c4d44..de9d964ed3 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -153,14 +153,14 @@ $(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
$(objpfx)tst-iconv_prog-buffer.out: \
tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
- $(BASH) $< $(common-objdir) '$(test-program-prefix)' > $@; \
+ $(BASH) $< $(common-objdir) '$(run-program-prefix)' > $@; \
$(evaluate-test)
$(objpfx)tst-iconv_prog-buffer-tiny.out: \
tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
- $(BASH) $< $(common-objdir) '$(test-program-prefix)' \
+ $(BASH) $< $(common-objdir) '$(run-program-prefix)' \
'--buffer-size=1' > $@; \
$(evaluate-test)
$(objpfx)tst-iconv_prog-buffer-large.out: \
tst-iconv_prog-buffer.sh $(objpfx)iconv_prog
- $(BASH) $< $(common-objdir) '$(test-program-prefix)' '' '22' > $@; \
+ $(BASH) $< $(common-objdir) '$(run-program-prefix)' '' '22' > $@; \
$(evaluate-test)

View File

@ -157,7 +157,7 @@ end \
Summary: The GNU libc libraries
Name: glibc
Version: %{glibcversion}
Release: 142%{?dist}
Release: 143%{?dist}
# In general, GPLv2+ is used by programs, LGPLv2+ is used for
# libraries.
@ -943,6 +943,15 @@ Patch704: glibc-RHEL-46725-9.patch
Patch705: glibc-RHEL-46725-10.patch
Patch706: glibc-RHEL-46725-11.patch
Patch707: glibc-RHEL-46725-12.patch
Patch708: glibc-RHEL-1915-1.patch
Patch709: glibc-RHEL-1915-2.patch
Patch710: glibc-RHEL-1915-3.patch
Patch711: glibc-RHEL-1915-4.patch
Patch712: glibc-RHEL-1915-5.patch
Patch713: glibc-RHEL-1915-6.patch
Patch714: glibc-RHEL-1915-7.patch
Patch715: glibc-RHEL-1915-8.patch
Patch716: glibc-RHEL-1915-9.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -3102,6 +3111,9 @@ update_gconv_modules_cache ()
%endif
%changelog
* Wed Nov 20 2024 Patsy Griffin <patsy@redhat.com> - 2.34-143
- iconv: Support in-place conversions (RHEL-1915)
* Mon Nov 18 2024 Florian Weimer <fweimer@redhat.com> - 2.34-142
- Add printf function family tests (RHEL-46725)