Compare commits

..

No commits in common. "c8-beta" and "c8" have entirely different histories.
c8-beta ... c8

29 changed files with 6416 additions and 443 deletions

View File

@ -0,0 +1,203 @@
Author: Charles Fol <folcharles@gmail.com>
Date: Thu Mar 28 12:25:38 2024 -0300
iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961)
ISO-2022-CN-EXT uses escape sequences to indicate character set changes
(as specified by RFC 1922). While the SOdesignation has the expected
bounds checks, neither SS2designation nor SS3designation have its;
allowing a write overflow of 1, 2, or 3 bytes with fixed values:
'$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'.
Checked on aarch64-linux-gnu.
Co-authored-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Tested-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
index 646e2ccd11478646..c959758a90ed954f 100644
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@@ -75,7 +75,7 @@ ifeq (yes,$(build-shared))
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 \
- bug-iconv15
+ bug-iconv15 tst-iconv-iso-2022-cn-ext
ifeq ($(have-thread-library),yes)
tests += bug-iconv3
endif
@@ -325,6 +325,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
$(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
$(addprefix $(objpfx),$(modules.so))
+$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \
+ $(addprefix $(objpfx),$(modules.so))
$(objpfx)iconv-test.out: run-iconv-test.sh \
$(addprefix $(objpfx), $(gconv-modules)) \
diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c
index c21a7187b4d7808e..bd9493c12d95070b 100644
--- a/iconvdata/iso-2022-cn-ext.c
+++ b/iconvdata/iso-2022-cn-ext.c
@@ -575,6 +575,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
{ \
const char *escseq; \
\
+ if (outptr + 4 > outend) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
assert (used == CNS11643_2_set); /* XXX */ \
escseq = "*H"; \
*outptr++ = ESC; \
@@ -588,6 +594,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
{ \
const char *escseq; \
\
+ if (outptr + 4 > outend) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
assert ((used >> 5) >= 3 && (used >> 5) <= 7); \
escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \
*outptr++ = ESC; \
diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c
new file mode 100644
index 0000000000000000..96a8765fd5369681
--- /dev/null
+++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c
@@ -0,0 +1,128 @@
+/* Verify ISO-2022-CN-EXT does not write out of the bounds.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <errno.h>
+#include <iconv.h>
+#include <sys/mman.h>
+
+#include <support/xunistd.h>
+#include <support/check.h>
+#include <support/support.h>
+
+/* The test sets up a two memory page buffer with the second page marked
+ PROT_NONE to trigger a fault if the conversion writes beyond the exact
+ expected amount. Then we carry out various conversions and precisely
+ place the start of the output buffer in order to trigger a SIGSEGV if the
+ process writes anywhere between 1 and page sized bytes more (only one
+ PROT_NONE page is setup as a canary) than expected. These tests exercise
+ all three of the cases in ISO-2022-CN-EXT where the converter must switch
+ character sets and may run out of buffer space while doing the
+ operation. */
+
+static int
+do_test (void)
+{
+ iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8");
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
+
+ char *ntf;
+ size_t ntfsize;
+ char *outbufbase;
+ {
+ int pgz = getpagesize ();
+ TEST_VERIFY_EXIT (pgz > 0);
+ ntfsize = 2 * pgz;
+
+ ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE
+ | MAP_ANONYMOUS, -1);
+ xmprotect (ntf + pgz, pgz, PROT_NONE);
+
+ outbufbase = ntf + pgz;
+ }
+
+ /* Check if SOdesignation escape sequence does not trigger an OOB write. */
+ {
+ char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2";
+
+ for (int i = 0; i < 9; i++)
+ {
+ char *inp = inbuf;
+ size_t inleft = sizeof (inbuf) - 1;
+
+ char *outp = outbufbase - i;
+ size_t outleft = i;
+
+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
+ == (size_t) -1);
+ TEST_COMPARE (errno, E2BIG);
+
+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
+ }
+ }
+
+ /* Same as before for SS2designation. */
+ {
+ char inbuf[] = "㴽 \xe3\xb4\xbd";
+
+ for (int i = 0; i < 14; i++)
+ {
+ char *inp = inbuf;
+ size_t inleft = sizeof (inbuf) - 1;
+
+ char *outp = outbufbase - i;
+ size_t outleft = i;
+
+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
+ == (size_t) -1);
+ TEST_COMPARE (errno, E2BIG);
+
+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
+ }
+ }
+
+ /* Same as before for SS3designation. */
+ {
+ char inbuf[] = "劄 \xe5\x8a\x84";
+
+ for (int i = 0; i < 14; i++)
+ {
+ char *inp = inbuf;
+ size_t inleft = sizeof (inbuf) - 1;
+
+ char *outp = outbufbase - i;
+ size_t outleft = i;
+
+ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
+ == (size_t) -1);
+ TEST_COMPARE (errno, E2BIG);
+
+ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
+ }
+ }
+
+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
+
+ xmunmap (ntf, ntfsize);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

View File

@ -0,0 +1,31 @@
commit 87801a8fd06db1d654eea3e4f7626ff476a9bdaa
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Apr 25 15:00:45 2024 +0200
CVE-2024-33599: nscd: Stack-based buffer overflow in netgroup cache (bug 31677)
Using alloca matches what other caches do. The request length is
bounded by MAXKEYLEN.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
index 5ee4413ef9384ec9..60c8225639a33b6b 100644
--- a/nscd/netgroupcache.c
+++ b/nscd/netgroupcache.c
@@ -503,12 +503,13 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
= (struct indataset *) mempool_alloc (db,
sizeof (*dataset) + req->key_len,
1);
- struct indataset dataset_mem;
bool cacheable = true;
if (__glibc_unlikely (dataset == NULL))
{
cacheable = false;
- dataset = &dataset_mem;
+ /* The alloca is safe because nscd_run_worker verfies that
+ key_len is not larger than MAXKEYLEN. */
+ dataset = alloca (sizeof (*dataset) + req->key_len);
}
datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,

View File

@ -0,0 +1,52 @@
commit 7835b00dbce53c3c87bbbb1754a95fb5e58187aa
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Apr 25 15:01:07 2024 +0200
CVE-2024-33600: nscd: Do not send missing not-found response in addgetnetgrentX (bug 31678)
If we failed to add a not-found response to the cache, the dataset
point can be null, resulting in a null pointer dereference.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
index 60c8225639a33b6b..a3e04b4c43e6acae 100644
--- a/nscd/netgroupcache.c
+++ b/nscd/netgroupcache.c
@@ -148,7 +148,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
/* No such service. */
cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
&key_copy);
- goto writeout;
+ goto maybe_cache_add;
}
memset (&data, '\0', sizeof (data));
@@ -349,7 +349,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
{
cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
&key_copy);
- goto writeout;
+ goto maybe_cache_add;
}
total = buffilled;
@@ -411,14 +411,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
}
if (he == NULL && fd != -1)
- {
- /* We write the dataset before inserting it to the database
- since while inserting this thread might block and so would
- unnecessarily let the receiver wait. */
- writeout:
+ /* We write the dataset before inserting it to the database since
+ while inserting this thread might block and so would
+ unnecessarily let the receiver wait. */
writeall (fd, &dataset->resp, dataset->head.recsize);
- }
+ maybe_cache_add:
if (cacheable)
{
/* If necessary, we also propagate the data to disk. */

View File

@ -0,0 +1,53 @@
commit b048a482f088e53144d26a61c390bed0210f49f2
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Apr 25 15:01:07 2024 +0200
CVE-2024-33600: nscd: Avoid null pointer crashes after notfound response (bug 31678)
The addgetnetgrentX call in addinnetgrX may have failed to produce
a result, so the result variable in addinnetgrX can be NULL.
Use db->negtimeout as the fallback value if there is no result data;
the timeout is also overwritten below.
Also avoid sending a second not-found response. (The client
disconnects after receiving the first response, so the data stream did
not go out of sync even without this fix.) It is still beneficial to
add the negative response to the mapping, so that the client can get
it from there in the future, instead of going through the socket.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
index a3e04b4c43e6acae..f656872ae8c3b888 100644
--- a/nscd/netgroupcache.c
+++ b/nscd/netgroupcache.c
@@ -512,14 +512,15 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,
sizeof (innetgroup_response_header),
- he == NULL ? 0 : dh->nreloads + 1, result->head.ttl);
+ he == NULL ? 0 : dh->nreloads + 1,
+ result == NULL ? db->negtimeout : result->head.ttl);
/* Set the notfound status and timeout based on the result from
getnetgrent. */
- dataset->head.notfound = result->head.notfound;
+ dataset->head.notfound = result == NULL || result->head.notfound;
dataset->head.timeout = timeout;
dataset->resp.version = NSCD_VERSION;
- dataset->resp.found = result->resp.found;
+ dataset->resp.found = result != NULL && result->resp.found;
/* Until we find a matching entry the result is 0. */
dataset->resp.result = 0;
@@ -567,7 +568,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
goto out;
}
- if (he == NULL)
+ /* addgetnetgrentX may have already sent a notfound response. Do
+ not send another one. */
+ if (he == NULL && dataset->resp.found)
{
/* We write the dataset before inserting it to the database
since while inserting this thread might block and so would

View File

@ -0,0 +1,383 @@
commit c04a21e050d64a1193a6daab872bca2528bda44b
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Apr 25 15:01:07 2024 +0200
CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680)
This avoids potential memory corruption when the underlying NSS
callback function does not use the buffer space to store all strings
(e.g., for constant strings).
Instead of custom buffer management, two scratch buffers are used.
This increases stack usage somewhat.
Scratch buffer allocation failure is handled by return -1
(an invalid timeout value) instead of terminating the process.
This fixes bug 31679.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
index f656872ae8c3b888..dd180f8083e7c9f9 100644
--- a/nscd/netgroupcache.c
+++ b/nscd/netgroupcache.c
@@ -24,6 +24,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
+#include <scratch_buffer.h>
#include "../inet/netgroup.h"
#include "nscd.h"
@@ -66,6 +67,16 @@ struct dataset
char strdata[0];
};
+/* Send a notfound response to FD. Always returns -1 to indicate an
+ ephemeral error. */
+static time_t
+send_notfound (int fd)
+{
+ if (fd != -1)
+ TEMP_FAILURE_RETRY (send (fd, &notfound, sizeof (notfound), MSG_NOSIGNAL));
+ return -1;
+}
+
/* Sends a notfound message and prepares a notfound dataset to write to the
cache. Returns true if there was enough memory to allocate the dataset and
returns the dataset in DATASETP, total bytes to write in TOTALP and the
@@ -84,8 +95,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
total = sizeof (notfound);
timeout = time (NULL) + db->negtimeout;
- if (fd != -1)
- TEMP_FAILURE_RETRY (send (fd, &notfound, total, MSG_NOSIGNAL));
+ send_notfound (fd);
dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1);
/* If we cannot permanently store the result, so be it. */
@@ -110,11 +120,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
return cacheable;
}
+struct addgetnetgrentX_scratch
+{
+ /* This is the result that the caller should use. It can be NULL,
+ point into buffer, or it can be in the cache. */
+ struct dataset *dataset;
+
+ struct scratch_buffer buffer;
+
+ /* Used internally in addgetnetgrentX as a staging area. */
+ struct scratch_buffer tmp;
+
+ /* Number of bytes in buffer that are actually used. */
+ size_t buffer_used;
+};
+
+static void
+addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch)
+{
+ scratch->dataset = NULL;
+ scratch_buffer_init (&scratch->buffer);
+ scratch_buffer_init (&scratch->tmp);
+
+ /* Reserve space for the header. */
+ scratch->buffer_used = sizeof (struct dataset);
+ static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space),
+ "initial buffer space");
+ memset (scratch->tmp.data, 0, sizeof (struct dataset));
+}
+
+static void
+addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch)
+{
+ scratch_buffer_free (&scratch->buffer);
+ scratch_buffer_free (&scratch->tmp);
+}
+
+/* Copy LENGTH bytes from S into SCRATCH. Returns NULL if SCRATCH
+ could not be resized, otherwise a pointer to the copy. */
+static char *
+addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch,
+ const char *s, size_t length)
+{
+ while (true)
+ {
+ size_t remaining = scratch->buffer.length - scratch->buffer_used;
+ if (remaining >= length)
+ break;
+ if (!scratch_buffer_grow_preserve (&scratch->buffer))
+ return NULL;
+ }
+ char *copy = scratch->buffer.data + scratch->buffer_used;
+ memcpy (copy, s, length);
+ scratch->buffer_used += length;
+ return copy;
+}
+
+/* Copy S into SCRATCH, including its null terminator. Returns false
+ if SCRATCH could not be resized. */
+static bool
+addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s)
+{
+ if (s == NULL)
+ s = "";
+ return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL;
+}
+
+/* Caller must initialize and free *SCRATCH. If the return value is
+ negative, this function has sent a notfound response. */
static time_t
addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
const char *key, uid_t uid, struct hashentry *he,
- struct datahead *dh, struct dataset **resultp,
- void **tofreep)
+ struct datahead *dh, struct addgetnetgrentX_scratch *scratch)
{
if (__glibc_unlikely (debug_level > 0))
{
@@ -133,14 +210,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
char *key_copy = NULL;
struct __netgrent data;
- size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len);
- size_t buffilled = sizeof (*dataset);
- char *buffer = NULL;
size_t nentries = 0;
size_t group_len = strlen (key) + 1;
struct name_list *first_needed
= alloca (sizeof (struct name_list) + group_len);
- *tofreep = NULL;
if (netgroup_database == NULL
&& __nss_database_lookup2 ("netgroup", NULL, NULL, &netgroup_database))
@@ -152,8 +225,6 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
}
memset (&data, '\0', sizeof (data));
- buffer = xmalloc (buflen);
- *tofreep = buffer;
first_needed->next = first_needed;
memcpy (first_needed->name, key, group_len);
data.needed_groups = first_needed;
@@ -196,8 +267,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
while (1)
{
int e;
- status = getfct.f (&data, buffer + buffilled,
- buflen - buffilled - req->key_len, &e);
+ status = getfct.f (&data, scratch->tmp.data,
+ scratch->tmp.length, &e);
if (status == NSS_STATUS_SUCCESS)
{
if (data.type == triple_val)
@@ -205,68 +276,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
const char *nhost = data.val.triple.host;
const char *nuser = data.val.triple.user;
const char *ndomain = data.val.triple.domain;
-
- size_t hostlen = strlen (nhost ?: "") + 1;
- size_t userlen = strlen (nuser ?: "") + 1;
- size_t domainlen = strlen (ndomain ?: "") + 1;
-
- if (nhost == NULL || nuser == NULL || ndomain == NULL
- || nhost > nuser || nuser > ndomain)
- {
- const char *last = nhost;
- if (last == NULL
- || (nuser != NULL && nuser > last))
- last = nuser;
- if (last == NULL
- || (ndomain != NULL && ndomain > last))
- last = ndomain;
-
- size_t bufused
- = (last == NULL
- ? buffilled
- : last + strlen (last) + 1 - buffer);
-
- /* We have to make temporary copies. */
- size_t needed = hostlen + userlen + domainlen;
-
- if (buflen - req->key_len - bufused < needed)
- {
- buflen += MAX (buflen, 2 * needed);
- /* Save offset in the old buffer. We don't
- bother with the NULL check here since
- we'll do that later anyway. */
- size_t nhostdiff = nhost - buffer;
- size_t nuserdiff = nuser - buffer;
- size_t ndomaindiff = ndomain - buffer;
-
- char *newbuf = xrealloc (buffer, buflen);
- /* Fix up the triplet pointers into the new
- buffer. */
- nhost = (nhost ? newbuf + nhostdiff
- : NULL);
- nuser = (nuser ? newbuf + nuserdiff
- : NULL);
- ndomain = (ndomain ? newbuf + ndomaindiff
- : NULL);
- *tofreep = buffer = newbuf;
- }
-
- nhost = memcpy (buffer + bufused,
- nhost ?: "", hostlen);
- nuser = memcpy ((char *) nhost + hostlen,
- nuser ?: "", userlen);
- ndomain = memcpy ((char *) nuser + userlen,
- ndomain ?: "", domainlen);
- }
-
- char *wp = buffer + buffilled;
- wp = memmove (wp, nhost ?: "", hostlen);
- wp += hostlen;
- wp = memmove (wp, nuser ?: "", userlen);
- wp += userlen;
- wp = memmove (wp, ndomain ?: "", domainlen);
- wp += domainlen;
- buffilled = wp - buffer;
+ if (!(addgetnetgrentX_append (scratch, nhost)
+ && addgetnetgrentX_append (scratch, nuser)
+ && addgetnetgrentX_append (scratch, ndomain)))
+ return send_notfound (fd);
++nentries;
}
else
@@ -318,8 +331,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
}
else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
{
- buflen *= 2;
- *tofreep = buffer = xrealloc (buffer, buflen);
+ if (!scratch_buffer_grow (&scratch->tmp))
+ return send_notfound (fd);
}
else if (status == NSS_STATUS_RETURN
|| status == NSS_STATUS_NOTFOUND
@@ -352,10 +365,17 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
goto maybe_cache_add;
}
- total = buffilled;
+ /* Capture the result size without the key appended. */
+ total = scratch->buffer_used;
+
+ /* Make a copy of the key. The scratch buffer must not move after
+ this point. */
+ key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len);
+ if (key_copy == NULL)
+ return send_notfound (fd);
/* Fill in the dataset. */
- dataset = (struct dataset *) buffer;
+ dataset = scratch->buffer.data;
timeout = datahead_init_pos (&dataset->head, total + req->key_len,
total - offsetof (struct dataset, resp),
he == NULL ? 0 : dh->nreloads + 1,
@@ -364,11 +384,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
dataset->resp.version = NSCD_VERSION;
dataset->resp.found = 1;
dataset->resp.nresults = nentries;
- dataset->resp.result_len = buffilled - sizeof (*dataset);
-
- assert (buflen - buffilled >= req->key_len);
- key_copy = memcpy (buffer + buffilled, key, req->key_len);
- buffilled += req->key_len;
+ dataset->resp.result_len = total - sizeof (*dataset);
/* Now we can determine whether on refill we have to create a new
record or not. */
@@ -399,7 +415,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
if (__glibc_likely (newp != NULL))
{
/* Adjust pointer into the memory block. */
- key_copy = (char *) newp + (key_copy - buffer);
+ key_copy = (char *) newp + (key_copy - (char *) dataset);
dataset = memcpy (newp, dataset, total + req->key_len);
cacheable = true;
@@ -440,7 +456,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
}
out:
- *resultp = dataset;
+ scratch->dataset = dataset;
return timeout;
}
@@ -461,6 +477,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
if (user != NULL)
key = (char *) rawmemchr (key, '\0') + 1;
const char *domain = *key++ ? key : NULL;
+ struct addgetnetgrentX_scratch scratch;
+
+ addgetnetgrentX_scratch_init (&scratch);
if (__glibc_unlikely (debug_level > 0))
{
@@ -476,12 +495,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
group, group_len,
db, uid);
time_t timeout;
- void *tofree;
if (result != NULL)
- {
- timeout = result->head.timeout;
- tofree = NULL;
- }
+ timeout = result->head.timeout;
else
{
request_header req_get =
@@ -490,7 +505,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
.key_len = group_len
};
timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL,
- &result, &tofree);
+ &scratch);
+ result = scratch.dataset;
+ if (timeout < 0)
+ goto out;
}
struct indataset
@@ -604,7 +622,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
}
out:
- free (tofree);
+ addgetnetgrentX_scratch_free (&scratch);
return timeout;
}
@@ -614,11 +632,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req,
const char *key, uid_t uid, struct hashentry *he,
struct datahead *dh)
{
- struct dataset *ignore;
- void *tofree;
- time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh,
- &ignore, &tofree);
- free (tofree);
+ struct addgetnetgrentX_scratch scratch;
+ addgetnetgrentX_scratch_init (&scratch);
+ time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch);
+ addgetnetgrentX_scratch_free (&scratch);
+ if (timeout < 0)
+ timeout = 0;
return timeout;
}
@@ -662,5 +681,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he,
.key_len = he->len
};
- return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh);
+ int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
+ he, dh);
+ if (timeout < 0)
+ timeout = 0;
+ return timeout;
}

View File

@ -0,0 +1,88 @@
commit fe06fb313bddf7e4530056897d4a706606e49377
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Aug 1 23:31:23 2024 +0200
elf: Clarify and invert second argument of _dl_allocate_tls_init
Also remove an outdated comment: _dl_allocate_tls_init is
called as part of pthread_create.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff -Nrup a/elf/dl-tls.c b/elf/dl-tls.c
--- a/elf/dl-tls.c 2024-08-09 20:22:18.516110414 -0400
+++ b/elf/dl-tls.c 2024-08-09 20:23:26.182493188 -0400
@@ -553,9 +553,14 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_m
/* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
for the TLS space. The DTV may be resized, and so this function may
call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
- is taken when manipulating global TLS-related data in the loader. */
+ is taken when manipulating global TLS-related data in the loader.
+
+ If MAIN_THREAD, this is the first call during process
+ initialization. In this case, TLS initialization for secondary
+ (audit) namespaces is skipped because that has already been handled
+ by dlopen. */
void *
-_dl_allocate_tls_init (void *result, bool init_tls)
+_dl_allocate_tls_init (void *result, bool main_thread)
{
if (result == NULL)
/* The memory allocation failed. */
@@ -634,7 +639,7 @@ _dl_allocate_tls_init (void *result, boo
because it would already be set by the audit setup. However,
subsequent thread creation would need to follow the default
behaviour. */
- if (map->l_ns != LM_ID_BASE && !init_tls)
+ if (map->l_ns != LM_ID_BASE && main_thread)
continue;
memset (__mempcpy (dest, map->l_tls_initimage,
map->l_tls_initimage_size), '\0',
@@ -662,7 +667,7 @@ _dl_allocate_tls (void *mem)
{
return _dl_allocate_tls_init (mem == NULL
? _dl_allocate_tls_storage ()
- : allocate_dtv (mem), true);
+ : allocate_dtv (mem), false);
}
rtld_hidden_def (_dl_allocate_tls)
diff -Nrup a/elf/rtld.c b/elf/rtld.c
--- a/elf/rtld.c 2024-08-09 20:22:18.516110414 -0400
+++ b/elf/rtld.c 2024-08-09 20:24:17.584783701 -0400
@@ -2478,7 +2478,7 @@ ERROR: '%s': cannot process note segment
into the main thread's TLS area, which we allocated above.
Note: thread-local variables must only be accessed after completing
the next step. */
- _dl_allocate_tls_init (tcbp, false);
+ _dl_allocate_tls_init (tcbp, true);
/* And finally install it for the main thread. */
if (! tls_init_tp_called)
diff -Nrup a/nptl/allocatestack.c b/nptl/allocatestack.c
--- a/nptl/allocatestack.c 2024-08-09 20:22:17.808106408 -0400
+++ b/nptl/allocatestack.c 2024-08-09 20:25:49.436302396 -0400
@@ -244,7 +244,7 @@ get_cached_stack (size_t *sizep, void **
memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
/* Re-initialize the TLS. */
- _dl_allocate_tls_init (TLS_TPADJ (result), true);
+ _dl_allocate_tls_init (TLS_TPADJ (result), false);
return result;
}
diff -Nrup a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
--- a/sysdeps/generic/ldsodefs.h 2024-08-09 20:22:18.517110419 -0400
+++ b/sysdeps/generic/ldsodefs.h 2024-08-09 20:34:20.093186159 -0400
@@ -1185,10 +1185,8 @@ extern void _dl_get_tls_static_info (siz
extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden;
-/* These are internal entry points to the two halves of _dl_allocate_tls,
- only used within rtld.c itself at startup time. */
extern void *_dl_allocate_tls_storage (void) attribute_hidden;
-extern void *_dl_allocate_tls_init (void *, bool);
+extern void *_dl_allocate_tls_init (void *result, bool main_thread);
rtld_hidden_proto (_dl_allocate_tls_init)
/* Deallocate memory allocated with _dl_allocate_tls. */

View File

@ -0,0 +1,526 @@
commit 5097cd344fd243fb8deb6dec96e8073753f962f9
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Aug 1 23:31:30 2024 +0200
elf: Avoid re-initializing already allocated TLS in dlopen (bug 31717)
The old code used l_init_called as an indicator for whether TLS
initialization was complete. However, it is possible that
TLS for an object is initialized, written to, and then dlopen
for this object is called again, and l_init_called is not true at
this point. Previously, this resulted in TLS being initialized
twice, discarding any interim writes (technically introducing a
use-after-free bug even).
This commit introduces an explicit per-object flag, l_tls_in_slotinfo.
It indicates whether _dl_add_to_slotinfo has been called for this
object. This flag is used to avoid double-initialization of TLS.
In update_tls_slotinfo, the first_static_tls micro-optimization
is removed because preserving the initalization flag for subsequent
use by the second loop for static TLS is a bit complicated, and
another per-object flag does not seem to be worth it. Furthermore,
the l_init_called flag is dropped from the second loop (for static
TLS initialization) because l_need_tls_init on its own prevents
double-initialization.
The remaining l_init_called usage in resize_scopes and update_scopes
is just an optimization due to the use of scope_has_map, so it is
not changed in this commit.
The isupper check ensures that libc.so.6 is TLS is not reverted.
Such a revert happens if l_need_tls_init is not cleared in
_dl_allocate_tls_init for the main_thread case, now that
l_init_called is not checked anymore in update_tls_slotinfo
in elf/dl-open.c.
Reported-by: Jonathon Anderson <janderson@rice.edu>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Conflicts:
elf/Makefile - tests and module-names differences
elf/Makefile - add $(libdl) where needed
diff -Nrup a/elf/Makefile b/elf/Makefile
--- a/elf/Makefile 2024-08-09 21:34:38.159713929 -0400
+++ b/elf/Makefile 2024-08-16 13:22:32.268485608 -0400
@@ -369,6 +369,10 @@ tests += \
tst-dlmopen-dlerror \
tst-dlmopen-gethostbyname \
tst-dlmopen-twice \
+ tst-dlopen-tlsreinit1 \
+ tst-dlopen-tlsreinit2 \
+ tst-dlopen-tlsreinit3 \
+ tst-dlopen-tlsreinit4 \
tst-dlopenfail \
tst-dlopenfail-2 \
tst-dlopenrpath \
@@ -720,6 +724,9 @@ modules-names = \
tst-dlmopen-gethostbyname-mod \
tst-dlmopen-twice-mod1 \
tst-dlmopen-twice-mod2 \
+ tst-dlopen-tlsreinitmod1 \
+ tst-dlopen-tlsreinitmod2 \
+ tst-dlopen-tlsreinitmod3 \
tst-dlopenfaillinkmod \
tst-dlopenfailmod1 \
tst-dlopenfailmod2 \
@@ -2775,3 +2782,30 @@ $(objpfx)tst-recursive-tls.out: \
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
$(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c
$(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$*
+
+# Order matters here. The test needs the constructor for
+# tst-dlopen-tlsreinitmod2.so to be called first.
+$(objpfx)tst-dlopen-tlsreinitmod3.so: $(libdl)
+$(objpfx)tst-dlopen-tlsreinit3: $(libdl)
+$(objpfx)tst-dlopen-tlsreinitmod1.so: $(libdl)
+$(objpfx)tst-dlopen-tlsreinit1: $(libdl)
+LDFLAGS-tst-dlopen-tlsreinitmod1.so = -Wl,--no-as-needed
+$(objpfx)tst-dlopen-tlsreinitmod1.so: \
+ $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so
+LDFLAGS-tst-dlopen-tlsreinit2 = -Wl,--no-as-needed
+$(objpfx)tst-dlopen-tlsreinit2: \
+ $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so
+LDFLAGS-tst-dlopen-tlsreinit4 = -Wl,--no-as-needed
+$(objpfx)tst-dlopen-tlsreinit4: \
+ $(objpfx)tst-dlopen-tlsreinitmod3.so $(objpfx)tst-dlopen-tlsreinitmod2.so
+# tst-dlopen-tlsreinitmod2.so is underlinked and refers to
+# tst-dlopen-tlsreinitmod3.so. The dependency is provided via
+# $(objpfx)tst-dlopen-tlsreinitmod1.so.
+tst-dlopen-tlsreinitmod2.so-no-z-defs = yes
+$(objpfx)tst-dlopen-tlsreinit.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \
+ $(objpfx)tst-dlopen-tlsreinitmod2.so $(objpfx)tst-dlopen-tlsreinitmod3.so
+# Reuse an audit module which provides ample debug logging.
+$(objpfx)tst-dlopen-tlsreinit3.out: $(objpfx)tst-auditmod1.so
+tst-dlopen-tlsreinit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so
+$(objpfx)tst-dlopen-tlsreinit4.out: $(objpfx)tst-auditmod1.so
+tst-dlopen-tlsreinit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so
diff -Nrup a/elf/dl-open.c b/elf/dl-open.c
--- a/elf/dl-open.c 2024-08-09 21:34:37.782711770 -0400
+++ b/elf/dl-open.c 2024-08-13 16:18:12.501292054 -0400
@@ -361,17 +361,8 @@ resize_tls_slotinfo (struct link_map *ne
{
bool any_tls = false;
for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
- {
- struct link_map *imap = new->l_searchlist.r_list[i];
-
- /* Only add TLS memory if this object is loaded now and
- therefore is not yet initialized. */
- if (! imap->l_init_called && imap->l_tls_blocksize > 0)
- {
- _dl_add_to_slotinfo (imap, false);
- any_tls = true;
- }
- }
+ if (_dl_add_to_slotinfo (new->l_searchlist.r_list[i], false))
+ any_tls = true;
return any_tls;
}
@@ -381,22 +372,8 @@ resize_tls_slotinfo (struct link_map *ne
static void
update_tls_slotinfo (struct link_map *new)
{
- unsigned int first_static_tls = new->l_searchlist.r_nlist;
for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
- {
- struct link_map *imap = new->l_searchlist.r_list[i];
-
- /* Only add TLS memory if this object is loaded now and
- therefore is not yet initialized. */
- if (! imap->l_init_called && imap->l_tls_blocksize > 0)
- {
- _dl_add_to_slotinfo (imap, true);
-
- if (imap->l_need_tls_init
- && first_static_tls == new->l_searchlist.r_nlist)
- first_static_tls = i;
- }
- }
+ _dl_add_to_slotinfo (new->l_searchlist.r_list[i], true);
size_t newgen = GL(dl_tls_generation) + 1;
if (__glibc_unlikely (newgen == 0))
@@ -408,13 +385,11 @@ TLS generation counter wrapped! Please
/* We need a second pass for static tls data, because
_dl_update_slotinfo must not be run while calls to
_dl_add_to_slotinfo are still pending. */
- for (unsigned int i = first_static_tls; i < new->l_searchlist.r_nlist; ++i)
+ for (unsigned int i = 0; i < new->l_searchlist.r_nlist; ++i)
{
struct link_map *imap = new->l_searchlist.r_list[i];
- if (imap->l_need_tls_init
- && ! imap->l_init_called
- && imap->l_tls_blocksize > 0)
+ if (imap->l_need_tls_init && imap->l_tls_blocksize > 0)
{
/* For static TLS we have to allocate the memory here and
now, but we can delay updating the DTV. */
diff -Nrup a/elf/dl-tls.c b/elf/dl-tls.c
--- a/elf/dl-tls.c 2024-08-09 21:34:38.162713946 -0400
+++ b/elf/dl-tls.c 2024-08-13 16:23:19.819877383 -0400
@@ -633,17 +633,21 @@ _dl_allocate_tls_init (void *result, boo
some platforms use in static programs requires it. */
dtv[map->l_tls_modid].pointer.val = dest;
- /* Copy the initialization image and clear the BSS part. For
- audit modules or dependencies with initial-exec TLS, we can not
- set the initial TLS image on default loader initialization
- because it would already be set by the audit setup. However,
- subsequent thread creation would need to follow the default
- behaviour. */
+ /* Copy the initialization image and clear the BSS part.
+ For audit modules or dependencies with initial-exec TLS,
+ we can not set the initial TLS image on default loader
+ initialization because it would already be set by the
+ audit setup, which uses the dlopen code and already
+ clears l_need_tls_init. Calls with !main_thread from
+ pthread_create need to initialze TLS for the current
+ thread regardless of namespace. */
if (map->l_ns != LM_ID_BASE && main_thread)
continue;
memset (__mempcpy (dest, map->l_tls_initimage,
map->l_tls_initimage_size), '\0',
map->l_tls_blocksize - map->l_tls_initimage_size);
+ if (main_thread)
+ map->l_need_tls_init = 0;
}
total += cnt;
@@ -1100,9 +1104,32 @@ _dl_tls_initial_modid_limit_setup (void)
}
-void
+/* Add module to slot information data. If DO_ADD is false, only the
+ required memory is allocated. Must be called with
+ GL (dl_load_tls_lock) acquired. If the function has already been
+ called for the link map L with !DO_ADD, then this function will not
+ raise an exception, otherwise it is possible that it encounters a
+ memory allocation failure.
+
+ Return false if L has already been added to the slotinfo data, or
+ if L has no TLS data. If the returned value is true, L has been
+ added with this call (DO_ADD), or has been added in a previous call
+ (!DO_ADD).
+
+ The expected usage is as follows: Call _dl_add_to_slotinfo for
+ several link maps with DO_ADD set to false, and record if any calls
+ result in a true result. If there was a true result, call
+ _dl_add_to_slotinfo again, this time with DO_ADD set to true. (For
+ simplicity, it's possible to call the function for link maps where
+ the previous result was false.) The return value from the second
+ round of calls can be ignored. If there was true result initially,
+ call _dl_update_slotinfo to update the TLS generation counter. */
+bool
_dl_add_to_slotinfo (struct link_map *l, bool do_add)
{
+ if (l->l_tls_blocksize == 0 || l->l_tls_in_slotinfo)
+ return false;
+
/* Now that we know the object is loaded successfully add
modules containing TLS data to the dtv info table. We
might have to increase its size. */
@@ -1158,5 +1185,8 @@ cannot create TLS data structures"));
atomic_store_relaxed (&listp->slotinfo[idx].map, l);
atomic_store_relaxed (&listp->slotinfo[idx].gen,
GL(dl_tls_generation) + 1);
+ l->l_tls_in_slotinfo = true;
}
+
+ return true;
}
diff -Nrup a/elf/tst-dlopen-tlsreinit1.c b/elf/tst-dlopen-tlsreinit1.c
--- a/elf/tst-dlopen-tlsreinit1.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinit1.c 2024-08-09 21:57:01.495424018 -0400
@@ -0,0 +1,40 @@
+/* Test that dlopen preserves already accessed TLS (bug 31717).
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdbool.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+#include <ctype.h>
+
+static int
+do_test (void)
+{
+ void *handle = xdlopen ("tst-dlopen-tlsreinitmod1.so", RTLD_NOW);
+
+ bool *tlsreinitmod3_tested = xdlsym (handle, "tlsreinitmod3_tested");
+ TEST_VERIFY (*tlsreinitmod3_tested);
+
+ xdlclose (handle);
+
+ /* This crashes if the libc.so.6 TLS image has been reverted. */
+ TEST_VERIFY (!isupper ('@'));
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff -Nrup a/elf/tst-dlopen-tlsreinit2.c b/elf/tst-dlopen-tlsreinit2.c
--- a/elf/tst-dlopen-tlsreinit2.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinit2.c 2024-08-09 21:59:14.657192089 -0400
@@ -0,0 +1,39 @@
+/* Test that dlopen preserves already accessed TLS (bug 31717).
+ Variant with initially-linked modules.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+
+
+static int
+do_test (void)
+{
+ /* Defined in tst-dlopen-tlsreinitmod3.so. */
+ extern bool tlsreinitmod3_tested;
+ TEST_VERIFY (tlsreinitmod3_tested);
+
+ /* This crashes if the libc.so.6 TLS image has been reverted. */
+ TEST_VERIFY (!isupper ('@'));
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff -Nrup a/elf/tst-dlopen-tlsreinit3.c b/elf/tst-dlopen-tlsreinit3.c
--- a/elf/tst-dlopen-tlsreinit3.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinit3.c 2024-08-09 22:00:43.985707332 -0400
@@ -0,0 +1,2 @@
+/* Same code, but run with LD_AUDIT=tst-auditmod1.so. */
+#include "tst-dlopen-tlsreinit1.c"
diff -Nrup a/elf/tst-dlopen-tlsreinit4.c b/elf/tst-dlopen-tlsreinit4.c
--- a/elf/tst-dlopen-tlsreinit4.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinit4.c 2024-08-09 22:01:30.058973079 -0400
@@ -0,0 +1,2 @@
+/* Same code, but run with LD_AUDIT=tst-auditmod1.so. */
+#include "tst-dlopen-tlsreinit2.c"
diff -Nrup a/elf/tst-dlopen-tlsreinitmod1.c b/elf/tst-dlopen-tlsreinitmod1.c
--- a/elf/tst-dlopen-tlsreinitmod1.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinitmod1.c 2024-08-09 22:02:42.337389978 -0400
@@ -0,0 +1,20 @@
+/* Test that dlopen preserves already accessed TLS (bug 31717), module 1.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This module triggers loading of tst-dlopen-tlsreinitmod2.so and
+ tst-dlopen-tlsreinitmod3.so. */
diff -Nrup a/elf/tst-dlopen-tlsreinitmod2.c b/elf/tst-dlopen-tlsreinitmod2.c
--- a/elf/tst-dlopen-tlsreinitmod2.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinitmod2.c 2024-08-09 22:03:54.322805189 -0400
@@ -0,0 +1,30 @@
+/* Test that dlopen preserves already accessed TLS (bug 31717), module 2.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+/* Defined in tst-dlopen-tlsreinitmod3.so. This an underlinked symbol
+ dependency. */
+extern void call_tlsreinitmod3 (void);
+
+static void __attribute__ ((constructor))
+tlsreinitmod2_init (void)
+{
+ puts ("info: constructor of tst-dlopen-tlsreinitmod2.so invoked");
+ call_tlsreinitmod3 ();
+}
diff -Nrup a/elf/tst-dlopen-tlsreinitmod3.c b/elf/tst-dlopen-tlsreinitmod3.c
--- a/elf/tst-dlopen-tlsreinitmod3.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-dlopen-tlsreinitmod3.c 2024-08-09 22:07:26.980031781 -0400
@@ -0,0 +1,102 @@
+/* Test that dlopen preserves already accessed TLS (bug 31717), module 3.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+
+/* Used to verify from the main program that the test ran. */
+bool tlsreinitmod3_tested;
+
+/* This TLS variable must not revert back to the initial state after
+ dlopen. */
+static __thread int tlsreinitmod3_state = 1;
+
+/* Set from the ELF constructor during dlopen. */
+static bool tlsreinitmod3_constructed;
+
+/* Second half of test, behind a compiler barrier. The compiler
+ barrier is necessary to prevent carrying over TLS address
+ information from call_tlsreinitmod3 to call_tlsreinitmod3_tail. */
+void call_tlsreinitmod3_tail (void *self) __attribute__ ((weak));
+
+/* Called from tst-dlopen-tlsreinitmod2.so. */
+void
+call_tlsreinitmod3 (void)
+{
+ printf ("info: call_tlsreinitmod3 invoked (state=%d)\n",
+ tlsreinitmod3_state);
+
+ if (tlsreinitmod3_constructed)
+ {
+ puts ("error: call_tlsreinitmod3 called after ELF constructor");
+ fflush (stdout);
+ /* Cannot rely on test harness due to dynamic linking. */
+ _exit (1);
+ }
+
+ tlsreinitmod3_state = 2;
+
+ /* Self-dlopen. This will run the ELF constructor. */
+ void *self = dlopen ("tst-dlopen-tlsreinitmod3.so", RTLD_NOW);
+ if (self == NULL)
+ {
+ printf ("error: dlopen: %s\n", dlerror ());
+ fflush (stdout);
+ /* Cannot rely on test harness due to dynamic linking. */
+ _exit (1);
+ }
+
+ call_tlsreinitmod3_tail (self);
+}
+
+void
+call_tlsreinitmod3_tail (void *self)
+{
+ printf ("info: dlopen returned in tlsreinitmod3 (state=%d)\n",
+ tlsreinitmod3_state);
+
+ if (!tlsreinitmod3_constructed)
+ {
+ puts ("error: dlopen did not call tlsreinitmod3 ELF constructor");
+ fflush (stdout);
+ /* Cannot rely on test harness due to dynamic linking. */
+ _exit (1);
+ }
+
+ if (tlsreinitmod3_state != 2)
+ {
+ puts ("error: TLS state reverted in tlsreinitmod3");
+ fflush (stdout);
+ /* Cannot rely on test harness due to dynamic linking. */
+ _exit (1);
+ }
+
+ dlclose (self);
+
+ /* Signal test completion to the main program. */
+ tlsreinitmod3_tested = true;
+}
+
+static void __attribute__ ((constructor))
+tlsreinitmod3_init (void)
+{
+ puts ("info: constructor of tst-dlopen-tlsreinitmod3.so invoked");
+ tlsreinitmod3_constructed = true;
+}
diff -Nrup a/include/link.h b/include/link.h
--- a/include/link.h 2024-08-09 21:34:37.642710968 -0400
+++ b/include/link.h 2024-08-09 22:09:03.764585534 -0400
@@ -210,6 +210,7 @@ struct link_map
unsigned int l_free_initfini:1; /* Nonzero if l_initfini can be
freed, ie. not allocated with
the dummy malloc in ld.so. */
+ unsigned int l_tls_in_slotinfo:1; /* TLS slotinfo updated in dlopen. */
/* NODELETE status of the map. Only valid for maps of type
lt_loaded. Lazy binding sets l_nodelete_active directly,
diff -Nrup a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
--- a/sysdeps/generic/ldsodefs.h 2024-08-09 21:34:38.164713958 -0400
+++ b/sysdeps/generic/ldsodefs.h 2024-08-13 16:41:52.398600434 -0400
@@ -1218,13 +1218,7 @@ extern void *_dl_open (const char *name,
extern int _dl_scope_free (void *) attribute_hidden;
-/* Add module to slot information data. If DO_ADD is false, only the
- required memory is allocated. Must be called with GL
- (dl_load_tls_lock) acquired. If the function has already been called
- for the link map L with !do_add, then this function will not raise
- an exception, otherwise it is possible that it encounters a memory
- allocation failure. */
-extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add)
+extern bool _dl_add_to_slotinfo (struct link_map *l, bool do_add)
attribute_hidden;
/* Update slot information data for at least the generation of the

View File

@ -0,0 +1,16 @@
Author: Patsy Griffin <patsy@redhat.com>
Fix a typo in naming tst-dlopen-tlsreinit1.out
diff -Nrup a/elf/Makefile b/elf/Makefile
--- a/elf/Makefile 2024-08-16 13:27:00.700921146 -0400
+++ b/elf/Makefile 2024-08-16 13:29:12.951628406 -0400
@@ -2802,7 +2802,7 @@ $(objpfx)tst-dlopen-tlsreinit4: \
# tst-dlopen-tlsreinitmod3.so. The dependency is provided via
# $(objpfx)tst-dlopen-tlsreinitmod1.so.
tst-dlopen-tlsreinitmod2.so-no-z-defs = yes
-$(objpfx)tst-dlopen-tlsreinit.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \
+$(objpfx)tst-dlopen-tlsreinit1.out: $(objpfx)tst-dlopen-tlsreinitmod1.so \
$(objpfx)tst-dlopen-tlsreinitmod2.so $(objpfx)tst-dlopen-tlsreinitmod3.so
# Reuse an audit module which provides ample debug logging.
$(objpfx)tst-dlopen-tlsreinit3.out: $(objpfx)tst-auditmod1.so

View File

@ -0,0 +1,43 @@
commit afe42e935b3ee97bac9a7064157587777259c60e
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Jun 3 10:49:40 2024 +0200
elf: Avoid some free (NULL) calls in _dl_update_slotinfo
This has been confirmed to work around some interposed mallocs. Here
is a discussion of the impact test ust/libc-wrapper/test_libc-wrapper
in lttng-tools:
New TLS usage in libgcc_s.so.1, compatibility impact
<https://inbox.sourceware.org/libc-alpha/8734v1ieke.fsf@oldenburg.str.redhat.com/>
Reportedly, this patch also papers over a similar issue when tcmalloc
2.9.1 is not compiled with -ftls-model=initial-exec. Of course the
goal really should be to compile mallocs with the initial-exec TLS
model, but this commit appears to be a useful interim workaround.
Fixes commit d2123d68275acc0f061e73d5f86ca504e0d5a344 ("elf: Fix slow
tls access after dlopen [BZ #19924]").
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 7b3dd9ab60..670dbc42fc 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -819,7 +819,14 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
dtv entry free it. Note: this is not AS-safe. */
/* XXX Ideally we will at some point create a memory
pool. */
- free (dtv[modid].pointer.to_free);
+ /* Avoid calling free on a null pointer. Some mallocs
+ incorrectly use dynamic TLS, and depending on how the
+ free function was compiled, it could call
+ __tls_get_addr before the null pointer check in the
+ free implementation. Checking here papers over at
+ least some dynamic TLS usage by interposed mallocs. */
+ if (dtv[modid].pointer.to_free != NULL)
+ free (dtv[modid].pointer.to_free);
dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
dtv[modid].pointer.to_free = NULL;

View File

@ -0,0 +1,501 @@
commit 018f0fc3b818d4d1460a4e2384c24802504b1d20
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Jul 1 17:42:04 2024 +0200
elf: Support recursive use of dynamic TLS in interposed malloc
It turns out that quite a few applications use bundled mallocs that
have been built to use global-dynamic TLS (instead of the recommended
initial-exec TLS). The previous workaround from
commit afe42e935b3ee97bac9a7064157587777259c60e ("elf: Avoid some
free (NULL) calls in _dl_update_slotinfo") does not fix all
encountered cases unfortunatelly.
This change avoids the TLS generation update for recursive use
of TLS from a malloc that was called during a TLS update. This
is possible because an interposed malloc has a fixed module ID and
TLS slot. (It cannot be unloaded.) If an initially-loaded module ID
is encountered in __tls_get_addr and the dynamic linker is already
in the middle of a TLS update, use the outdated DTV, thus avoiding
another call into malloc. It's still necessary to update the
DTV to the most recent generation, to get out of the slow path,
which is why the check for recursion is needed.
The bookkeeping is done using a global counter instead of per-thread
flag because TLS access in the dynamic linker is tricky.
All this will go away once the dynamic linker stops using malloc
for TLS, likely as part of a change that pre-allocates all TLS
during pthread_create/dlopen.
Fixes commit d2123d68275acc0f061e73d5f86ca504e0d5a344 ("elf: Fix slow
tls access after dlopen [BZ #19924]").
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Conflicts:
elf/Makefile - tests and module-names differences
elf/Makefile - add $(libdl) for tst-recursive-tlsmallocmod
diff -Nrup a/elf/Makefile b/elf/Makefile
--- a/elf/Makefile 2024-07-28 20:45:16.055243849 -0400
+++ b/elf/Makefile 2024-07-28 20:44:23.446952825 -0400
@@ -391,6 +391,7 @@ tests += \
tst-nodeps2 \
tst-noload \
tst-null-argv \
+ tst-recursive-tls \
tst-relsort1 \
tst-rtld-run-static \
tst-sonamemove-dlopen \
@@ -749,6 +750,23 @@ modules-names = \
tst-nodeps1-mod \
tst-nodeps2-mod \
tst-null-argv-lib \
+ tst-recursive-tlsmallocmod \
+ tst-recursive-tlsmod0 \
+ tst-recursive-tlsmod1 \
+ tst-recursive-tlsmod2 \
+ tst-recursive-tlsmod3 \
+ tst-recursive-tlsmod4 \
+ tst-recursive-tlsmod5 \
+ tst-recursive-tlsmod6 \
+ tst-recursive-tlsmod7 \
+ tst-recursive-tlsmod8 \
+ tst-recursive-tlsmod9 \
+ tst-recursive-tlsmod10 \
+ tst-recursive-tlsmod11 \
+ tst-recursive-tlsmod12 \
+ tst-recursive-tlsmod13 \
+ tst-recursive-tlsmod14 \
+ tst-recursive-tlsmod15 \
tst-relsort1mod1 \
tst-relsort1mod2 \
tst-sonamemove-linkmod1 \
@@ -2358,6 +2376,9 @@ LDLIBS-tst-absolute-zero-lib.so = tst-ab
$(objpfx)tst-absolute-zero-lib.so: $(LDLIBS-tst-absolute-zero-lib.so)
$(objpfx)tst-absolute-zero: $(objpfx)tst-absolute-zero-lib.so
+$(objpfx)tst-recursive-tlsmallocmod: $(libdl)
+$(objpfx)tst-recursive-tlsmallocmod.so: $(libdl)
+
# Both the main program and the DSO for tst-libc_dlvsym need to link
# against libdl.
$(objpfx)tst-libc_dlvsym: $(libdl)
@@ -2746,3 +2767,11 @@ CFLAGS-tst-tlsgap-mod0.c += -mtls-dialec
CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
endif
+
+$(objpfx)tst-recursive-tls: $(objpfx)tst-recursive-tlsmallocmod.so $(libdl)
+# More objects than DTV_SURPLUS, to trigger DTV reallocation.
+$(objpfx)tst-recursive-tls.out: \
+ $(patsubst %,$(objpfx)tst-recursive-tlsmod%.so, \
+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
+$(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c
+ $(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$*
diff -Nrup a/elf/dl-tls.c b/elf/dl-tls.c
--- a/elf/dl-tls.c 2024-07-28 20:45:16.086244021 -0400
+++ b/elf/dl-tls.c 2024-07-28 20:44:23.443952808 -0400
@@ -71,6 +71,31 @@
/* Default for dl_tls_static_optional. */
#define OPTIONAL_TLS 512
+/* Used to count the number of threads currently executing dynamic TLS
+ updates. Used to avoid recursive malloc calls in __tls_get_addr
+ for an interposed malloc that uses global-dynamic TLS (which is not
+ recommended); see _dl_tls_allocate_active checks. This could be a
+ per-thread flag, but would need TLS access in the dynamic linker. */
+unsigned int _dl_tls_threads_in_update;
+
+static inline void
+_dl_tls_allocate_begin (void)
+{
+ atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1);
+}
+
+static inline void
+_dl_tls_allocate_end (void)
+{
+ atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1);
+}
+
+static inline bool
+_dl_tls_allocate_active (void)
+{
+ return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0;
+}
+
/* Compute the static TLS surplus based on the namespace count and the
TLS space that can be used for optimizations. */
static inline int
@@ -426,12 +451,18 @@ _dl_allocate_tls_storage (void)
size += TLS_PRE_TCB_SIZE;
#endif
- /* Perform the allocation. Reserve space for the required alignment
- and the pointer to the original allocation. */
+ /* Reserve space for the required alignment and the pointer to the
+ original allocation. */
size_t alignment = GL(dl_tls_static_align);
+
+ /* Perform the allocation. */
+ _dl_tls_allocate_begin ();
void *allocated = malloc (size + alignment + sizeof (void *));
if (__glibc_unlikely (allocated == NULL))
- return NULL;
+ {
+ _dl_tls_allocate_end ();
+ return NULL;
+ }
/* Perform alignment and allocate the DTV. */
#if TLS_TCB_AT_TP
@@ -467,6 +498,8 @@ _dl_allocate_tls_storage (void)
result = allocate_dtv (result);
if (result == NULL)
free (allocated);
+
+ _dl_tls_allocate_end ();
return result;
}
@@ -484,6 +517,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_m
size_t newsize = max_modid + DTV_SURPLUS;
size_t oldsize = dtv[-1].counter;
+ _dl_tls_allocate_begin ();
if (dtv == GL(dl_initial_dtv))
{
/* This is the initial dtv that was either statically allocated in
@@ -503,6 +537,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_m
if (newp == NULL)
oom ();
}
+ _dl_tls_allocate_end ();
newp[0].counter = newsize;
@@ -677,7 +712,9 @@ allocate_dtv_entry (size_t alignment, si
if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
{
/* The alignment is supported by malloc. */
+ _dl_tls_allocate_begin ();
void *ptr = malloc (size);
+ _dl_tls_allocate_end ();
return (struct dtv_pointer) { ptr, ptr };
}
@@ -689,7 +726,10 @@ allocate_dtv_entry (size_t alignment, si
/* Perform the allocation. This is the pointer we need to free
later. */
+ _dl_tls_allocate_begin ();
void *start = malloc (alloc_size);
+ _dl_tls_allocate_end ();
+
if (start == NULL)
return (struct dtv_pointer) {};
@@ -827,7 +867,11 @@ _dl_update_slotinfo (unsigned long int r
free implementation. Checking here papers over at
least some dynamic TLS usage by interposed mallocs. */
if (dtv[modid].pointer.to_free != NULL)
- free (dtv[modid].pointer.to_free);
+ {
+ _dl_tls_allocate_begin ();
+ free (dtv[modid].pointer.to_free);
+ _dl_tls_allocate_end ();
+ }
dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
dtv[modid].pointer.to_free = NULL;
@@ -957,10 +1001,22 @@ __tls_get_addr (GET_ADDR_ARGS)
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
if (__glibc_unlikely (dtv[0].counter != gen))
{
- /* Update DTV up to the global generation, see CONCURRENCY NOTES
- in _dl_update_slotinfo. */
- gen = atomic_load_acquire (&GL(dl_tls_generation));
- return update_get_addr (GET_ADDR_PARAM, gen);
+ if (_dl_tls_allocate_active ()
+ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)
+ /* This is a reentrant __tls_get_addr call, but we can
+ satisfy it because it's an initially-loaded module ID.
+ These TLS slotinfo slots do not change, so the
+ out-of-date generation counter does not matter. However,
+ if not in a TLS update, still update_get_addr below, to
+ get off the slow path eventually. */
+ ;
+ else
+ {
+ /* Update DTV up to the global generation, see CONCURRENCY NOTES
+ in _dl_update_slotinfo. */
+ gen = atomic_load_acquire (&GL(dl_tls_generation));
+ return update_get_addr (GET_ADDR_PARAM, gen);
+ }
}
void *p = dtv[GET_ADDR_MODULE].pointer.val;
@@ -970,7 +1026,7 @@ __tls_get_addr (GET_ADDR_ARGS)
return (char *) p + GET_ADDR_OFFSET;
}
-#endif
+#endif /* SHARED */
/* Look up the module's TLS block as for __tls_get_addr,
@@ -1019,6 +1075,25 @@ _dl_tls_get_addr_soft (struct link_map *
return data;
}
+size_t _dl_tls_initial_modid_limit;
+
+void
+_dl_tls_initial_modid_limit_setup (void)
+{
+ struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+ size_t idx;
+ for (idx = 0; idx < listp->len; ++idx)
+ {
+ struct link_map *l = listp->slotinfo[idx].map;
+ if (l == NULL
+ /* The object can be unloaded, so its modid can be
+ reassociated. */
+ || !(l->l_type == lt_executable || l->l_type == lt_library))
+ break;
+ }
+ _dl_tls_initial_modid_limit = idx;
+}
+
void
_dl_add_to_slotinfo (struct link_map *l, bool do_add)
@@ -1051,9 +1126,11 @@ _dl_add_to_slotinfo (struct link_map *l,
the first slot. */
assert (idx == 0);
+ _dl_tls_allocate_begin ();
listp = (struct dtv_slotinfo_list *)
malloc (sizeof (struct dtv_slotinfo_list)
+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+ _dl_tls_allocate_end ();
if (listp == NULL)
{
/* We ran out of memory while resizing the dtv slotinfo list. */
diff -Nrup a/elf/rtld.c b/elf/rtld.c
--- a/elf/rtld.c 2024-07-28 20:45:15.726242029 -0400
+++ b/elf/rtld.c 2024-07-28 20:44:23.443952808 -0400
@@ -797,6 +797,8 @@ init_tls (size_t naudit)
_dl_fatal_printf ("\
cannot allocate TLS data structures for initial thread\n");
+ _dl_tls_initial_modid_limit_setup ();
+
/* Store for detection of the special case by __tls_get_addr
so it knows not to pass this dtv to the normal realloc. */
GL(dl_initial_dtv) = GET_DTV (tcbp);
diff -Nrup a/elf/tst-recursive-tls.c b/elf/tst-recursive-tls.c
--- a/elf/tst-recursive-tls.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-recursive-tls.c 2024-07-28 20:44:23.443952808 -0400
@@ -0,0 +1,60 @@
+/* Test with interposed malloc with dynamic TLS.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+
+/* Defined in tst-recursive-tlsmallocmod.so. */
+extern __thread unsigned int malloc_subsytem_counter;
+
+static int
+do_test (void)
+{
+ /* 16 is large enough to exercise the DTV resizing case. */
+ void *handles[16];
+
+ for (unsigned int i = 0; i < array_length (handles); ++i)
+ {
+ /* Re-use the TLS slot for module 0. */
+ if (i > 0)
+ xdlclose (handles[0]);
+
+ char soname[30];
+ snprintf (soname, sizeof (soname), "tst-recursive-tlsmod%u.so", i);
+ handles[i] = xdlopen (soname, RTLD_NOW);
+
+ if (i > 0)
+ {
+ handles[0] = xdlopen ("tst-recursive-tlsmod0.so", RTLD_NOW);
+ int (*fptr) (void) = xdlsym (handles[0], "get_threadvar_0");
+ /* May trigger TLS storage allocation using malloc. */
+ TEST_COMPARE (fptr (), 0);
+ }
+ }
+
+ for (unsigned int i = 0; i < array_length (handles); ++i)
+ xdlclose (handles[i]);
+
+ printf ("info: malloc subsystem calls: %u\n", malloc_subsytem_counter);
+ TEST_VERIFY (malloc_subsytem_counter > 0);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff -Nrup a/elf/tst-recursive-tlsmallocmod.c b/elf/tst-recursive-tlsmallocmod.c
--- a/elf/tst-recursive-tlsmallocmod.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-recursive-tlsmallocmod.c 2024-07-28 20:44:23.443952808 -0400
@@ -0,0 +1,64 @@
+/* Interposed malloc with dynamic TLS.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <dlfcn.h>
+
+__thread unsigned int malloc_subsytem_counter;
+
+static __typeof (malloc) *malloc_fptr;
+static __typeof (free) *free_fptr;
+static __typeof (calloc) *calloc_fptr;
+static __typeof (realloc) *realloc_fptr;
+
+static void __attribute__ ((constructor))
+init (void)
+{
+ malloc_fptr = dlsym (RTLD_NEXT, "malloc");
+ free_fptr = dlsym (RTLD_NEXT, "free");
+ calloc_fptr = dlsym (RTLD_NEXT, "calloc");
+ realloc_fptr = dlsym (RTLD_NEXT, "realloc");
+}
+
+void *
+malloc (size_t size)
+{
+ ++malloc_subsytem_counter;
+ return malloc_fptr (size);
+}
+
+void
+free (void *ptr)
+{
+ ++malloc_subsytem_counter;
+ return free_fptr (ptr);
+}
+
+void *
+calloc (size_t a, size_t b)
+{
+ ++malloc_subsytem_counter;
+ return calloc_fptr (a, b);
+}
+
+void *
+realloc (void *ptr, size_t size)
+{
+ ++malloc_subsytem_counter;
+ return realloc_fptr (ptr, size);
+}
diff -Nrup a/elf/tst-recursive-tlsmodN.c b/elf/tst-recursive-tlsmodN.c
--- a/elf/tst-recursive-tlsmodN.c 1969-12-31 19:00:00.000000000 -0500
+++ b/elf/tst-recursive-tlsmodN.c 2024-07-28 20:44:23.443952808 -0400
@@ -0,0 +1,28 @@
+/* Test module with global-dynamic TLS. Used to trigger DTV reallocation.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Compiled with VAR and FUNC set via -D. FUNC requires some
+ relocation against TLS variable VAR. */
+
+__thread int VAR;
+
+int
+FUNC (void)
+{
+ return VAR;
+}
diff -Nrup a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
--- a/sysdeps/generic/ldsodefs.h 2024-07-28 20:45:15.698241875 -0400
+++ b/sysdeps/generic/ldsodefs.h 2024-07-28 20:44:23.444952814 -0400
@@ -1235,6 +1235,20 @@ extern struct link_map *_dl_update_sloti
size_t gen)
attribute_hidden;
+/* The last TLS module ID that is initially loaded, plus 1. TLS
+ addresses for modules with IDs lower than that can be obtained from
+ the DTV even if its generation is outdated. */
+extern size_t _dl_tls_initial_modid_limit attribute_hidden attribute_relro;
+
+/* Compute _dl_tls_initial_modid_limit. To be called after initial
+ relocation. */
+void _dl_tls_initial_modid_limit_setup (void) attribute_hidden;
+
+/* Number of threads currently in a TLS update. This is used to
+ detect reentrant __tls_get_addr calls without a per-thread
+ flag. */
+extern unsigned int _dl_tls_threads_in_update attribute_hidden;
+
/* Look up the module's TLS block as for __tls_get_addr,
but never touch anything. Return null if it's not allocated yet. */
extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden;
diff -Nrup a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
--- a/sysdeps/x86_64/dl-tls.c 2024-07-28 20:45:15.699241880 -0400
+++ b/sysdeps/x86_64/dl-tls.c 2024-07-28 20:44:23.444952814 -0400
@@ -41,7 +41,10 @@ __tls_get_addr_slow (GET_ADDR_ARGS)
dtv_t *dtv = THREAD_DTV ();
size_t gen = atomic_load_acquire (&GL(dl_tls_generation));
- if (__glibc_unlikely (dtv[0].counter != gen))
+ if (__glibc_unlikely (dtv[0].counter != gen)
+ /* See comment in __tls_get_addr in elf/dl-tls.c. */
+ && !(_dl_tls_allocate_active ()
+ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit))
return update_get_addr (GET_ADDR_PARAM, gen);
return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);

View File

@ -0,0 +1,51 @@
commit 9b7651410375ec8848a1944992d663d514db4ba7
Author: Stefan Liebler <stli@linux.ibm.com>
Date: Thu Jul 11 11:28:53 2024 +0200
s390x: Fix segfault in wcsncmp [BZ #31934]
The z13/vector-optimized wcsncmp implementation segfaults if n=1
and there is only one character (equal on both strings) before
the page end. Then it loads and compares one character and misses
to check n again. The following load fails.
This patch removes the extra load and compare of the first character
and just start with the loop which uses vector-load-to-block-boundary.
This code-path also checks n.
With this patch both tests are passing:
- the simplified one mentioned in the bugzilla 31934
- the full one in Florian Weimer's patch:
"manual: Document a GNU extension for strncmp/wcsncmp"
(https://patchwork.sourceware.org/project/glibc/patch/874j9eml6y.fsf@oldenburg.str.redhat.com/):
On s390x-linux-gnu (z16), the new wcsncmp test fails due to bug 31934.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/sysdeps/s390/wcsncmp-vx.S b/sysdeps/s390/wcsncmp-vx.S
index bf6dfa6bc2..8b081567a2 100644
--- a/sysdeps/s390/wcsncmp-vx.S
+++ b/sysdeps/s390/wcsncmp-vx.S
@@ -59,14 +59,7 @@ ENTRY(WCSNCMP_Z13)
sllg %r4,%r4,2 /* Convert character-count to byte-count. */
locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */
- /* Check first character without vector load. */
- lghi %r5,4 /* current_len = 4 bytes. */
- /* Check s1/2[0]. */
- lt %r0,0(%r2)
- l %r1,0(%r3)
- je .Lend_cmp_one_char
- crjne %r0,%r1,.Lend_cmp_one_char
-
+ lghi %r5,0 /* current_len = 0 bytes. */
.Lloop:
vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */
vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */
@@ -167,7 +160,6 @@ ENTRY(WCSNCMP_Z13)
srl %r4,2 /* And convert it to character-index. */
vlgvf %r0,%v16,0(%r4) /* Load character-values. */
vlgvf %r1,%v17,0(%r4)
-.Lend_cmp_one_char:
cr %r0,%r1
je .Lend_equal
lghi %r2,1

View File

@ -0,0 +1,248 @@
commit 54252394c25ddf0062e288d4a6ab7a885f8ae009
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Jun 27 16:26:56 2024 +0200
Enhanced test coverage for strncmp, wcsncmp
Add string/test-strncmp-nonarray and
wcsmbs/test-wcsncmp-nonarray.
This is the test that uncovered bug 31934. Test run time
is more than one minute on a fairly current system, so turn
these into xtests that do not run automatically.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff -Nrup a/string/Makefile b/string/Makefile
--- a/string/Makefile 2024-09-17 14:28:06.030433788 -0400
+++ b/string/Makefile 2024-09-17 14:33:56.846338606 -0400
@@ -62,7 +62,11 @@ tests := tester inl-tester noinl-tester
test-endian-sign-conversion
# This test allocates a lot of memory and can run for a long time.
-xtests = tst-strcoll-overflow
+xtests += tst-strcoll-overflow
+
+# This test runs for a long time.
+xtests += test-strncmp-nonarray
+
# This test needs libdl.
ifeq (yes,$(build-shared))
diff -Nrup a/string/test-Xncmp-nonarray.c b/string/test-Xncmp-nonarray.c
--- a/string/test-Xncmp-nonarray.c 1969-12-31 19:00:00.000000000 -0500
+++ b/string/test-Xncmp-nonarray.c 2024-09-17 14:27:27.538224809 -0400
@@ -0,0 +1,183 @@
+/* Test non-array inputs to string comparison functions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This skeleton file is included from string/test-strncmp-nonarray.c and
+ wcsmbs/test-wcsncmp-nonarray.c to test that reading of the arrays stops
+ at the first null character.
+
+ TEST_IDENTIFIER must be the test function identifier. TEST_NAME is
+ the same as a string.
+
+ CHAR must be defined as the character type. */
+
+#include <array_length.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/next_to_fault.h>
+#include <support/test-driver.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+/* Much shorter than test-Xnlen-nonarray.c because of deeply nested loops. */
+enum { buffer_length = 80 };
+
+/* The test buffer layout follows what is described test-Xnlen-nonarray.c,
+ except that there two buffers, left and right. The variables
+ a_count, zero_count, start_offset are all duplicated. */
+
+/* Return the maximum string length for a string that starts at
+ start_offset. */
+static int
+string_length (int a_count, int start_offset)
+{
+ if (start_offset == buffer_length || start_offset >= a_count)
+ return 0;
+ else
+ return a_count - start_offset;
+}
+
+/* This is the valid maximum length argument computation for
+ strnlen/wcsnlen. See text-Xnlen-nonarray.c. */
+static int
+maximum_length (int start_offset, int zero_count)
+{
+ if (start_offset == buffer_length)
+ return 0;
+ else if (zero_count > 0)
+ /* Effectively unbounded, but we need to stop fairly low,
+ otherwise testing takes too long. */
+ return buffer_length + 32;
+ else
+ return buffer_length - start_offset;
+}
+
+typedef __typeof (TEST_IDENTIFIER) *proto_t;
+
+#define TEST_MAIN
+#include "test-string.h"
+
+IMPL (TEST_IDENTIFIER, 1)
+
+static int
+test_main (void)
+{
+ TEST_VERIFY_EXIT (sysconf (_SC_PAGESIZE) >= buffer_length);
+ test_init ();
+
+ struct support_next_to_fault left_ntf
+ = support_next_to_fault_allocate (buffer_length * sizeof (CHAR));
+ CHAR *left_buffer = (CHAR *) left_ntf.buffer;
+ struct support_next_to_fault right_ntf
+ = support_next_to_fault_allocate (buffer_length * sizeof (CHAR));
+ CHAR *right_buffer = (CHAR *) right_ntf.buffer;
+
+ FOR_EACH_IMPL (impl, 0)
+ {
+ printf ("info: testing %s\n", impl->name);
+ for (size_t i = 0; i < buffer_length; ++i)
+ left_buffer[i] = 'A';
+
+ for (int left_zero_count = 0; left_zero_count <= buffer_length;
+ ++left_zero_count)
+ {
+ if (left_zero_count > 0)
+ left_buffer[buffer_length - left_zero_count] = 0;
+ int left_a_count = buffer_length - left_zero_count;
+ for (size_t i = 0; i < buffer_length; ++i)
+ right_buffer[i] = 'A';
+ for (int right_zero_count = 0; right_zero_count <= buffer_length;
+ ++right_zero_count)
+ {
+ if (right_zero_count > 0)
+ right_buffer[buffer_length - right_zero_count] = 0;
+ int right_a_count = buffer_length - right_zero_count;
+ for (int left_start_offset = 0;
+ left_start_offset <= buffer_length;
+ ++left_start_offset)
+ {
+ CHAR *left_start_pointer = left_buffer + left_start_offset;
+ int left_maxlen
+ = maximum_length (left_start_offset, left_zero_count);
+ int left_length
+ = string_length (left_a_count, left_start_offset);
+ for (int right_start_offset = 0;
+ right_start_offset <= buffer_length;
+ ++right_start_offset)
+ {
+ CHAR *right_start_pointer
+ = right_buffer + right_start_offset;
+ int right_maxlen
+ = maximum_length (right_start_offset, right_zero_count);
+ int right_length
+ = string_length (right_a_count, right_start_offset);
+
+ /* Maximum length is modelled after strnlen/wcsnlen,
+ and must be valid for both pointer arguments at
+ the same time. */
+ int maxlen = MIN (left_maxlen, right_maxlen);
+
+ for (int length_argument = 0; length_argument <= maxlen;
+ ++length_argument)
+ {
+ if (test_verbose)
+ {
+ printf ("left: zero_count=%d"
+ " a_count=%d start_offset=%d\n",
+ left_zero_count, left_a_count,
+ left_start_offset);
+ printf ("right: zero_count=%d"
+ " a_count=%d start_offset=%d\n",
+ right_zero_count, right_a_count,
+ right_start_offset);
+ printf ("length argument: %d\n",
+ length_argument);
+ }
+
+ /* Effective lengths bounded by length argument.
+ The effective length determines the
+ outcome of the comparison. */
+ int left_effective
+ = MIN (left_length, length_argument);
+ int right_effective
+ = MIN (right_length, length_argument);
+ if (left_effective == right_effective)
+ TEST_COMPARE (CALL (impl,
+ left_start_pointer,
+ right_start_pointer,
+ length_argument), 0);
+ else if (left_effective < right_effective)
+ TEST_COMPARE (CALL (impl,
+ left_start_pointer,
+ right_start_pointer,
+ length_argument) < 0, 1);
+ else
+ TEST_COMPARE (CALL (impl,
+ left_start_pointer,
+ right_start_pointer,
+ length_argument) > 0, 1);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff -Nrup a/string/test-strncmp-nonarray.c b/string/test-strncmp-nonarray.c
--- a/string/test-strncmp-nonarray.c 1969-12-31 19:00:00.000000000 -0500
+++ b/string/test-strncmp-nonarray.c 2024-09-17 14:27:27.538224809 -0400
@@ -0,0 +1,4 @@
+#define TEST_IDENTIFIER strncmp
+#define TEST_NAME "strncmp"
+typedef char CHAR;
+#include "test-Xncmp-nonarray.c"
diff -Nrup a/wcsmbs/Makefile b/wcsmbs/Makefile
--- a/wcsmbs/Makefile 2018-08-01 01:10:47.000000000 -0400
+++ b/wcsmbs/Makefile 2024-09-17 14:31:04.725404041 -0400
@@ -53,6 +53,9 @@ tests := tst-wcstof wcsmbs-tst1 tst-wcsn
tst-wcstod-round test-char-types tst-fgetwc-after-eof \
tst-wcstod-nan-sign $(addprefix test-,$(strop-tests))
+# This test runs for a long time.
+xtests += test-wcsncmp-nonarray
+
include ../Rules
ifeq ($(run-built-tests),yes)
diff -Nrup a/wcsmbs/test-wcsncmp-nonarray.c b/wcsmbs/test-wcsncmp-nonarray.c
--- a/wcsmbs/test-wcsncmp-nonarray.c 1969-12-31 19:00:00.000000000 -0500
+++ b/wcsmbs/test-wcsncmp-nonarray.c 2024-09-17 14:27:27.539224815 -0400
@@ -0,0 +1,5 @@
+#include <wchar.h>
+#define TEST_IDENTIFIER wcsncmp
+#define TEST_NAME "wcsncmp"
+typedef wchar_t CHAR;
+#include "../string/test-Xncmp-nonarray.c"

View File

@ -0,0 +1,34 @@
commit 56e098118a31753a9f755948bb5a47bc7111e214
Author: Andreas Schwab <schwab@suse.de>
Date: Thu Aug 15 12:14:35 2019 +0200
Update i386 libm-test-ulps
Conflicts: ChangeLog removed
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index e83bae71b4..2232296fe0 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -1158,8 +1158,8 @@ float128: 4
idouble: 4
ifloat: 5
ifloat128: 4
-ildouble: 7
-ldouble: 7
+ildouble: 8
+ldouble: 8
Function: Imaginary part of "clog10_upward":
double: 2
@@ -2222,8 +2222,8 @@ float128: 8
idouble: 5
ifloat: 5
ifloat128: 8
-ildouble: 5
-ldouble: 5
+ildouble: 6
+ldouble: 6
Function: "log":
double: 1

View File

@ -0,0 +1,26 @@
Author: Patsy Griffin <patsy@redhat.com>
i386: update ulps
This change fixes 3 test failures:
math/test-ildouble-lgamma
math/test-ldouble-finite-lgamma
math/test-ldouble-lgamma
This is a downstream only patch as upstream removed entries for
i{float,double,ldouble} by commit: 1c15464ca05f36db5c582856d3770d5e8bde9d61.
The ldouble change is already upstream.
--- a/sysdeps/i386/fpu/libm-test-ulps 2024-08-06 15:51:18.182808710 -0400
+++ b/sysdeps/i386/fpu/libm-test-ulps 2024-08-06 18:01:50.579719841 -0400
@@ -2030,8 +2030,8 @@ double: 5
float: 5
idouble: 5
ifloat: 5
-ildouble: 5
-ldouble: 5
+ildouble: 6
+ldouble: 6
Function: "hypot":
double: 1

View File

@ -0,0 +1,374 @@
commit 03e1378f94173fc192a81e421457198f7b8a34a0
Author: Alex Butler <Alex.Butler@arm.com>
Date: Tue Jun 16 12:44:24 2020 +0000
aarch64: MTE compatible strncmp
Add support for MTE to strncmp. Regression tested with xcheck and benchmarked
with glibc's benchtests on the Cortex-A53, Cortex-A72, and Neoverse N1.
The existing implementation assumes that any access to the pages in which the
string resides is safe. This assumption is not true when MTE is enabled. This
patch updates the algorithm to ensure that accesses remain within the bounds
of an MTE tag (16-byte chunks) and improves overall performance.
Co-authored-by: Branislav Rankov <branislav.rankov@arm.com>
Co-authored-by: Wilco Dijkstra <wilco.dijkstra@arm.com>
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
index c5141fab8a..ba2563490e 100644
--- a/sysdeps/aarch64/strncmp.S
+++ b/sysdeps/aarch64/strncmp.S
@@ -25,7 +25,6 @@
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
/* Parameters and result. */
#define src1 x0
@@ -46,15 +45,31 @@
#define tmp3 x10
#define zeroones x11
#define pos x12
-#define limit_wd x13
-#define mask x14
-#define endloop x15
+#define mask x13
+#define endloop x14
#define count mask
+#define offset pos
+#define neg_offset x15
-ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
- DELOUSE (0)
- DELOUSE (1)
- DELOUSE (2)
+/* Define endian dependent shift operations.
+ On big-endian early bytes are at MSB and on little-endian LSB.
+ LS_FW means shifting towards early bytes.
+ LS_BK means shifting towards later bytes.
+ */
+#ifdef __AARCH64EB__
+#define LS_FW lsl
+#define LS_BK lsr
+#else
+#define LS_FW lsr
+#define LS_BK lsl
+#endif
+
+ .text
+ .p2align 6
+ .rep 9
+ nop /* Pad so that the loop below fits a cache line. */
+ .endr
+ENTRY_ALIGN (strncmp, 0)
cbz limit, L(ret0)
eor tmp1, src1, src2
mov zeroones, #REP8_01
@@ -62,9 +77,6 @@ ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
and count, src1, #7
b.ne L(misaligned8)
cbnz count, L(mutual_align)
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
@@ -74,56 +86,52 @@ L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
L(start_realigned):
- subs limit_wd, limit_wd, #1
+ subs limit, limit, #8
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
+ csinv endloop, diff, xzr, hi /* Last Dword or differences. */
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
ccmp endloop, #0, #0, eq
b.eq L(loop_aligned)
/* End of performance-critical section -- one 64B cache line. */
- /* Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, L(not_limit)
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq L(not_limit)
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- /* Make sure that the NUL byte is marked in the syndrome. */
- orr has_nul, has_nul, mask
-
-L(not_limit):
+L(full_check):
+#ifndef __AARCH64EB__
orr syndrome, diff, has_nul
-
-#ifndef __AARCH64EB__
+ add limit, limit, 8 /* Rewind limit to before last subs. */
+L(syndrome_check):
+ /* Limit was reached. Check if the NUL byte or the difference
+ is before the limit. */
rev syndrome, syndrome
rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
clz pos, syndrome
rev data2, data2
lsl data1, data1, pos
+ cmp limit, pos, lsr #3
lsl data2, data2, pos
/* But we need to zero-extend (char is unsigned) the value and then
perform a signed 32-bit subtraction. */
lsr data1, data1, #56
sub result, data1, data2, lsr #56
- RET
+ csel result, result, xzr, hi
+ ret
#else
+ /* Not reached the limit, must have found the end or a diff. */
+ tbz limit, #63, L(not_limit)
+ add tmp1, limit, 8
+ cbz limit, L(not_limit)
+
+ lsl limit, tmp1, #3 /* Bits -> bytes. */
+ mov mask, #~0
+ lsr mask, mask, limit
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ /* Make sure that the NUL byte is marked in the syndrome. */
+ orr has_nul, has_nul, mask
+
+L(not_limit):
/* For big-endian we cannot use the trick with the syndrome value
as carry-propagation can corrupt the upper bits if the trailing
bytes in the string contain 0x01. */
@@ -134,7 +142,7 @@ L(not_limit):
cmp data1, data2
cset result, ne
cneg result, result, lo
- RET
+ ret
1:
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
rev tmp3, data1
@@ -144,17 +152,18 @@ L(not_limit):
rev has_nul, has_nul
orr syndrome, diff, has_nul
clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
+ /* The most-significant-non-zero bit of the syndrome marks either the
+ first bit that is different, or the top bit of the first zero byte.
Shifting left now will bring the critical information into the
top bits. */
+L(end_quick):
lsl data1, data1, pos
lsl data2, data2, pos
/* But we need to zero-extend (char is unsigned) the value and then
perform a signed 32-bit subtraction. */
lsr data1, data1, #56
sub result, data1, data2, lsr #56
- RET
+ ret
#endif
L(mutual_align):
@@ -169,22 +178,12 @@ L(mutual_align):
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
ldr data2, [src2], #8
mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#endif
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, count
- add tmp3, tmp3, count
+ LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
+ /* Adjust the limit and ensure it doesn't overflow. */
+ adds limit, limit, count
+ csinv limit, limit, xzr, lo
orr data1, data1, tmp2
orr data2, data2, tmp2
- add limit_wd, limit_wd, tmp3, lsr #3
b L(start_realigned)
.p2align 6
@@ -203,18 +202,15 @@ L(byte_loop):
b.eq L(byte_loop)
L(done):
sub result, data1, data2
- RET
-
+ ret
/* Align the SRC1 to a dword by doing a bytewise compare and then do
the dword loop. */
L(try_misaligned_words):
- lsr limit_wd, limit, #3
- cbz count, L(do_misaligned)
+ cbz count, L(src1_aligned)
neg count, count
and count, count, #7
sub limit, limit, count
- lsr limit_wd, limit, #3
L(page_end_loop):
ldrb data1w, [src1], #1
@@ -225,48 +221,98 @@ L(page_end_loop):
subs count, count, #1
b.hi L(page_end_loop)
-L(do_misaligned):
- /* Prepare ourselves for the next page crossing. Unlike the aligned
- loop, we fetch 1 less dword because we risk crossing bounds on
- SRC2. */
- mov count, #8
- subs limit_wd, limit_wd, #1
- b.lo L(done_loop)
+ /* The following diagram explains the comparison of misaligned strings.
+ The bytes are shown in natural order. For little-endian, it is
+ reversed in the registers. The "x" bytes are before the string.
+ The "|" separates data that is loaded at one time.
+ src1 | a a a a a a a a | b b b c c c c c | . . .
+ src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
+ After shifting in each step, the data looks like this:
+ STEP_A STEP_B STEP_C
+ data1 a a a a a a a a b b b c c c c c b b b c c c c c
+ data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
+ The bytes with "0" are eliminated from the syndrome via mask.
+ Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
+ time from SRC2. The comparison happens in 3 steps. After each step
+ the loop can exit, or read from SRC1 or SRC2. */
+L(src1_aligned):
+ /* Calculate offset from 8 byte alignment to string start in bits. No
+ need to mask offset since shifts are ignoring upper bits. */
+ lsl offset, src2, #3
+ bic src2, src2, #0xf
+ mov mask, -1
+ neg neg_offset, offset
+ ldr data1, [src1], #8
+ ldp tmp1, tmp2, [src2], #16
+ LS_BK mask, mask, neg_offset
+ and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
+ /* Skip the first compare if data in tmp1 is irrelevant. */
+ tbnz offset, 6, L(misaligned_mid_loop)
+
L(loop_misaligned):
- and tmp2, src2, #0xff8
- eor tmp2, tmp2, #0xff8
- cbz tmp2, L(page_end_loop)
+ /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
+ LS_FW data2, tmp1, offset
+ LS_BK tmp1, tmp2, neg_offset
+ subs limit, limit, #8
+ orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
+ sub has_nul, data1, zeroones
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr tmp3, data1, #REP8_7f
+ csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
+ orr tmp3, endloop, has_nul
+ cbnz tmp3, L(full_check)
ldr data1, [src1], #8
- ldr data2, [src2], #8
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
- subs limit_wd, limit_wd, #1
- b.pl L(loop_misaligned)
+L(misaligned_mid_loop):
+ /* STEP_B: Compare first part of data1 to second part of tmp2. */
+ LS_FW data2, tmp2, offset
+#ifdef __AARCH64EB__
+ /* For big-endian we do a byte reverse to avoid carry-propagation
+ problem described above. This way we can reuse the has_nul in the
+ next step and also use syndrome value trick at the end. */
+ rev tmp3, data1
+ #define data1_fixed tmp3
+#else
+ #define data1_fixed data1
+#endif
+ sub has_nul, data1_fixed, zeroones
+ orr tmp3, data1_fixed, #REP8_7f
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
+#ifdef __AARCH64EB__
+ rev has_nul, has_nul
+#endif
+ cmp limit, neg_offset, lsr #3
+ orr syndrome, diff, has_nul
+ bic syndrome, syndrome, mask /* Ignore later bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
-L(done_loop):
- /* We found a difference or a NULL before the limit was reached. */
- and limit, limit, #7
- cbz limit, L(not_limit)
- /* Read the last word. */
- sub src1, src1, 8
- sub src2, src2, 8
- ldr data1, [src1, limit]
- ldr data2, [src2, limit]
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
+ /* STEP_C: Compare second part of data1 to first part of tmp1. */
+ ldp tmp1, tmp2, [src2], #16
+ cmp limit, #8
+ LS_BK data2, tmp1, neg_offset
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ and syndrome, syndrome, mask /* Ignore earlier bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
+
+ ldr data1, [src1], #8
+ sub limit, limit, #8
+ b L(loop_misaligned)
+
+#ifdef __AARCH64EB__
+L(syndrome_check):
+ clz pos, syndrome
+ cmp pos, limit, lsl #3
+ b.lo L(end_quick)
+#endif
L(ret0):
mov result, #0
- RET
+ ret
END (strncmp)
libc_hidden_builtin_def (strncmp)

View File

@ -0,0 +1,335 @@
commit 2f47198b04a02097f438ecb765306fa39568a006
Author: Rajalakshmi Srinivasaraghavan <rajis@linux.ibm.com>
Date: Fri Dec 2 14:26:41 2022 -0600
powerpc64: Remove old strncmp optimization
This patch cleans up the power4 strncmp optimization for powerpc64 which
is unlikely to be used anywhere.
Tested on ppc64le with and without --disable-multi-arch flag.
Reviewed-by: Paul E. Murphy <murphyp@linux.ibm.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
sysdeps/powerpc/powerpc64/power4/strncmp.S
(copyright year changes upstream)
sysdeps/powerpc/powerpc64/multiarch/strncmp.c - fix indentation
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 626845a43c4e8ded..5b20dab108de14ab 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -12,8 +12,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
strnlen-power8 strnlen-power7 strnlen-ppc64 \
strcasecmp-power7 strcasecmp_l-power7 \
strncase-power7 strncase_l-power7 \
- strncmp-power8 strncmp-power7 \
- strncmp-power4 strncmp-ppc64 \
+ strncmp-power8 strncmp-power7 strncmp-ppc64 \
strchr-power8 strchr-power7 strchr-ppc64 \
strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \
diff -Nrup a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c 2024-11-21 16:22:27.914201581 -0500
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c 2024-11-21 16:25:31.721479517 -0500
@@ -154,8 +154,6 @@ __libc_ifunc_impl_list (const char *name
__strncmp_power8)
IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX,
__strncmp_power7)
- IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4,
- __strncmp_power4)
IFUNC_IMPL_ADD (array, i, strncmp, 1,
__strncmp_ppc))
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
deleted file mode 100644
index 6ead3b6374749e6a..0000000000000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (C) 2013-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define STRNCMP __strncmp_power4
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/power4/strncmp.S>
diff -Nrup a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c 2024-11-21 19:52:23.420623986 -0500
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c 2024-11-21 19:52:02.088475579 -0500
@@ -26,7 +26,6 @@
# include "init-arch.h"
extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
-extern __typeof (strncmp) __strncmp_power4 attribute_hidden;
extern __typeof (strncmp) __strncmp_power7 attribute_hidden;
extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
# ifdef __LITTLE_ENDIAN__
@@ -41,11 +40,9 @@ libc_ifunc_redirected (__redirect_strncm
(hwcap2 & PPC_FEATURE2_ARCH_3_00)
? __strncmp_power9 :
# endif
- (hwcap2 & PPC_FEATURE2_ARCH_2_07)
- ? __strncmp_power8
- : (hwcap & PPC_FEATURE_HAS_VSX)
- ? __strncmp_power7
- : (hwcap & PPC_FEATURE_POWER4)
- ? __strncmp_power4
- : __strncmp_ppc);
+ (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ ? __strncmp_power8
+ : (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __strncmp_power7
+ : __strncmp_ppc);
#endif
diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S
deleted file mode 100644
index cf5f4e5fb8fb2522..0000000000000000
--- a/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/* Optimized strcmp implementation for PowerPC64.
- Copyright (C) 2003-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* See strlen.s for comments on how the end-of-string testing works. */
-
-/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
-
-ENTRY_TOCLESS (STRNCMP, 4)
- CALL_MCOUNT 3
-
-#define rTMP2 r0
-#define rRTN r3
-#define rSTR1 r3 /* first string arg */
-#define rSTR2 r4 /* second string arg */
-#define rN r5 /* max string length */
-#define rWORD1 r6 /* current word in s1 */
-#define rWORD2 r7 /* current word in s2 */
-#define rWORD3 r10
-#define rWORD4 r11
-#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
-#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
-#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
-#define rBITDIF r11 /* bits that differ in s1 & s2 words */
-#define rTMP r12
-
- dcbt 0,rSTR1
- or rTMP, rSTR2, rSTR1
- lis r7F7F, 0x7f7f
- dcbt 0,rSTR2
- clrldi. rTMP, rTMP, 61
- cmpldi cr1, rN, 0
- lis rFEFE, -0x101
- bne L(unaligned)
-/* We are doubleword aligned so set up for two loops. first a double word
- loop, then fall into the byte loop if any residual. */
- srdi. rTMP, rN, 3
- clrldi rN, rN, 61
- addi rFEFE, rFEFE, -0x101
- addi r7F7F, r7F7F, 0x7f7f
- cmpldi cr1, rN, 0
- beq L(unaligned)
-
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
- ld rWORD1, 0(rSTR1)
- ld rWORD2, 0(rSTR2)
- sldi rTMP, rFEFE, 32
- insrdi r7F7F, r7F7F, 32, 0
- add rFEFE, rFEFE, rTMP
- b L(g1)
-
-L(g0):
- ldu rWORD1, 8(rSTR1)
- bne- cr1, L(different)
- ldu rWORD2, 8(rSTR2)
-L(g1): add rTMP, rFEFE, rWORD1
- nor rNEG, r7F7F, rWORD1
- bdz L(tail)
- and. rTMP, rTMP, rNEG
- cmpd cr1, rWORD1, rWORD2
- beq+ L(g0)
-
-/* OK. We've hit the end of the string. We need to be careful that
- we don't compare two strings as different because of gunk beyond
- the end of the strings... */
-
-#ifdef __LITTLE_ENDIAN__
-L(endstring):
- addi rTMP2, rTMP, -1
- beq cr1, L(equal)
- andc rTMP2, rTMP2, rTMP
- rldimi rTMP2, rTMP2, 1, 0
- and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
- and rWORD1, rWORD1, rTMP2
- cmpd cr1, rWORD1, rWORD2
- beq cr1, L(equal)
- xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
- neg rNEG, rBITDIF
- and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
- cntlzd rNEG, rNEG /* bitcount of the bit. */
- andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
- sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
- sld rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- sradi rRTN, rRTN, 63 /* must return an int. */
- ori rRTN, rRTN, 1
- blr
-L(equal):
- li rRTN, 0
- blr
-
-L(different):
- ld rWORD1, -8(rSTR1)
- xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
- neg rNEG, rBITDIF
- and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
- cntlzd rNEG, rNEG /* bitcount of the bit. */
- andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
- sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
- sld rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- sradi rRTN, rRTN, 63
- ori rRTN, rRTN, 1
- blr
-L(highbit):
- sradi rRTN, rWORD2, 63
- ori rRTN, rRTN, 1
- blr
-
-#else
-L(endstring):
- and rTMP, r7F7F, rWORD1
- beq cr1, L(equal)
- add rTMP, rTMP, r7F7F
- xor. rBITDIF, rWORD1, rWORD2
- andc rNEG, rNEG, rTMP
- blt- L(highbit)
- cntlzd rBITDIF, rBITDIF
- cntlzd rNEG, rNEG
- addi rNEG, rNEG, 7
- cmpd cr1, rNEG, rBITDIF
- sub rRTN, rWORD1, rWORD2
- blt- cr1, L(equal)
- sradi rRTN, rRTN, 63 /* must return an int. */
- ori rRTN, rRTN, 1
- blr
-L(equal):
- li rRTN, 0
- blr
-
-L(different):
- ld rWORD1, -8(rSTR1)
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- sradi rRTN, rRTN, 63
- ori rRTN, rRTN, 1
- blr
-L(highbit):
- sradi rRTN, rWORD2, 63
- ori rRTN, rRTN, 1
- blr
-#endif
-
-/* Oh well. In this case, we just do a byte-by-byte comparison. */
- .align 4
-L(tail):
- and. rTMP, rTMP, rNEG
- cmpd cr1, rWORD1, rWORD2
- bne- L(endstring)
- addi rSTR1, rSTR1, 8
- bne- cr1, L(different)
- addi rSTR2, rSTR2, 8
- cmpldi cr1, rN, 0
-L(unaligned):
- mtctr rN /* Power4 wants mtctr 1st in dispatch group */
- ble cr1, L(ux)
-L(uz):
- lbz rWORD1, 0(rSTR1)
- lbz rWORD2, 0(rSTR2)
- .align 4
-L(u1):
- cmpdi cr1, rWORD1, 0
- bdz L(u4)
- cmpd rWORD1, rWORD2
- beq- cr1, L(u4)
- bne- L(u4)
- lbzu rWORD3, 1(rSTR1)
- lbzu rWORD4, 1(rSTR2)
- cmpdi cr1, rWORD3, 0
- bdz L(u3)
- cmpd rWORD3, rWORD4
- beq- cr1, L(u3)
- bne- L(u3)
- lbzu rWORD1, 1(rSTR1)
- lbzu rWORD2, 1(rSTR2)
- cmpdi cr1, rWORD1, 0
- bdz L(u4)
- cmpd rWORD1, rWORD2
- beq- cr1, L(u4)
- bne- L(u4)
- lbzu rWORD3, 1(rSTR1)
- lbzu rWORD4, 1(rSTR2)
- cmpdi cr1, rWORD3, 0
- bdz L(u3)
- cmpd rWORD3, rWORD4
- beq- cr1, L(u3)
- bne- L(u3)
- lbzu rWORD1, 1(rSTR1)
- lbzu rWORD2, 1(rSTR2)
- b L(u1)
-
-L(u3): sub rRTN, rWORD3, rWORD4
- blr
-L(u4): sub rRTN, rWORD1, rWORD2
- blr
-L(ux):
- li rRTN, 0
- blr
-END (STRNCMP)
-libc_hidden_builtin_def (strncmp)

View File

@ -0,0 +1,595 @@
commit 92fdb11ae7aa1ab6b18622670ea702205cd6fdc5
Author: Adhemerval Zanella Netto <adhemerval.zanella@linaro.org>
Date: Tue Feb 28 14:23:59 2023 -0300
powerpc: Remove powerpc64 strncmp variants
The default, and power7 implementation just adds word aligned
access when inputs have the same aligment. The unaligned case
is still done by byte operations.
This is already covered by the generic implementation, which also add
the unaligned input optimization.
Checked on powerpc64-linux-gnu built without multi-arch for powerpc64,
power7, power8, and power9 (build for le).
Reviewed-by: Rajalakshmi Srinivasaraghavan <rajis@linux.ibm.com>
Conflicts:
sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S
sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S
sysdeps/powerpc/powerpc64/power7/strncmp.S
sysdeps/powerpc/powerpc64/strncmp.S
(copyright year changes upstream)
sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c - PPC_FEATURE difference
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 5b20dab108de14ab..0ee7ce39d6470d80 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -12,7 +12,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
strnlen-power8 strnlen-power7 strnlen-ppc64 \
strcasecmp-power7 strcasecmp_l-power7 \
strncase-power7 strncase_l-power7 \
- strncmp-power8 strncmp-power7 strncmp-ppc64 \
+ strncmp-power8 strncmp-ppc64 \
strchr-power8 strchr-power7 strchr-ppc64 \
strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 914e7d5e28a98b5d..2c84d287ee76a7ea 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -152,8 +152,6 @@ __libc_ifunc_impl_list (const char *name
#endif
IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_2_07,
__strncmp_power8)
- IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX,
- __strncmp_power7)
IFUNC_IMPL_ADD (array, i, strncmp, 1,
__strncmp_ppc))
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S
deleted file mode 100644
index 8282ff076c9e00ce..0000000000000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (C) 2013-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define STRNCMP __strncmp_power7
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/power7/strncmp.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S
deleted file mode 100644
index c6f325650c9ace3b..0000000000000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (C) 2013-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if defined SHARED && IS_IN (libc)
-# define STRNCMP __strncmp_ppc
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strncmp; __GI_strncmp = __strncmp_ppc
-#endif
-
-#include <sysdeps/powerpc/powerpc64/strncmp.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.c
new file mode 100644
index 0000000000000000..09cc009a913ed169
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.c
@@ -0,0 +1,7 @@
+#if defined SHARED && IS_IN (libc)
+# define STRNCMP __strncmp_ppc
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strncmp_ppc, __GI_strncmp, __strncmp_ppc);
+#endif
+#include <string/strncmp.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index 275a558e4afa7d61..df2c0707a919ad79 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -26,7 +26,6 @@
# include "init-arch.h"
extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
-extern __typeof (strncmp) __strncmp_power7 attribute_hidden;
extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
# ifdef __LITTLE_ENDIAN__
extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
@@ -42,7 +41,5 @@ libc_ifunc_redirected (__redirect_strncm
# endif
(hwcap2 & PPC_FEATURE2_ARCH_2_07)
? __strncmp_power8
- : (hwcap & PPC_FEATURE_HAS_VSX)
- ? __strncmp_power7
- : __strncmp_ppc);
+ : __strncmp_ppc);
#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S
deleted file mode 100644
index d91aeb6077a558f2..0000000000000000
--- a/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Optimized strcmp implementation for POWER7/PowerPC64.
- Copyright (C) 2010-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* See strlen.s for comments on how the end-of-string testing works. */
-
-/* int [r3] strncmp (const char *s1 [r3],
- const char *s2 [r4],
- size_t size [r5]) */
-
-ENTRY_TOCLESS (STRNCMP, 5)
- CALL_MCOUNT 3
-
-#define rTMP2 r0
-#define rRTN r3
-#define rSTR1 r3 /* first string arg */
-#define rSTR2 r4 /* second string arg */
-#define rN r5 /* max string length */
-#define rWORD1 r6 /* current word in s1 */
-#define rWORD2 r7 /* current word in s2 */
-#define rWORD3 r10
-#define rWORD4 r11
-#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
-#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
-#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
-#define rBITDIF r11 /* bits that differ in s1 & s2 words */
-#define rTMP r12
-
- dcbt 0,rSTR1
- nop
- or rTMP,rSTR2,rSTR1
- lis r7F7F,0x7f7f
- dcbt 0,rSTR2
- nop
- clrldi. rTMP,rTMP,61
- cmpldi cr1,rN,0
- lis rFEFE,-0x101
- bne L(unaligned)
-/* We are doubleword aligned so set up for two loops. first a double word
- loop, then fall into the byte loop if any residual. */
- srdi. rTMP,rN,3
- clrldi rN,rN,61
- addi rFEFE,rFEFE,-0x101
- addi r7F7F,r7F7F,0x7f7f
- cmpldi cr1,rN,0
- beq L(unaligned)
-
- mtctr rTMP
- ld rWORD1,0(rSTR1)
- ld rWORD2,0(rSTR2)
- sldi rTMP,rFEFE,32
- insrdi r7F7F,r7F7F,32,0
- add rFEFE,rFEFE,rTMP
- b L(g1)
-
-L(g0):
- ldu rWORD1,8(rSTR1)
- bne cr1,L(different)
- ldu rWORD2,8(rSTR2)
-L(g1): add rTMP,rFEFE,rWORD1
- nor rNEG,r7F7F,rWORD1
- bdz L(tail)
- and. rTMP,rTMP,rNEG
- cmpd cr1,rWORD1,rWORD2
- beq L(g0)
-
-/* OK. We've hit the end of the string. We need to be careful that
- we don't compare two strings as different because of gunk beyond
- the end of the strings... */
-
-#ifdef __LITTLE_ENDIAN__
-L(endstring):
- addi rTMP2, rTMP, -1
- beq cr1, L(equal)
- andc rTMP2, rTMP2, rTMP
- rldimi rTMP2, rTMP2, 1, 0
- and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
- and rWORD1, rWORD1, rTMP2
- cmpd cr1, rWORD1, rWORD2
- beq cr1, L(equal)
- cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
- addi rNEG, rBITDIF, 1
- orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
- sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
- andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
- andc rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt L(highbit)
- sradi rRTN, rRTN, 63 /* must return an int. */
- ori rRTN, rRTN, 1
- blr
-L(equal):
- li rRTN, 0
- blr
-
-L(different):
- ld rWORD1, -8(rSTR1)
- cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
- addi rNEG, rBITDIF, 1
- orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
- sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
- andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
- andc rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt L(highbit)
- sradi rRTN, rRTN, 63
- ori rRTN, rRTN, 1
- blr
-L(highbit):
- sradi rRTN, rWORD2, 63
- ori rRTN, rRTN, 1
- blr
-
-#else
-L(endstring):
- and rTMP,r7F7F,rWORD1
- beq cr1,L(equal)
- add rTMP,rTMP,r7F7F
- xor. rBITDIF,rWORD1,rWORD2
- andc rNEG,rNEG,rTMP
- blt L(highbit)
- cntlzd rBITDIF,rBITDIF
- cntlzd rNEG,rNEG
- addi rNEG,rNEG,7
- cmpd cr1,rNEG,rBITDIF
- sub rRTN,rWORD1,rWORD2
- blt cr1,L(equal)
- sradi rRTN,rRTN,63 /* must return an int. */
- ori rRTN,rRTN,1
- blr
-L(equal):
- li rRTN,0
- blr
-
-L(different):
- ld rWORD1,-8(rSTR1)
- xor. rBITDIF,rWORD1,rWORD2
- sub rRTN,rWORD1,rWORD2
- blt L(highbit)
- sradi rRTN,rRTN,63
- ori rRTN,rRTN,1
- blr
-L(highbit):
- sradi rRTN,rWORD2,63
- ori rRTN,rRTN,1
- blr
-#endif
-
-/* Oh well. In this case, we just do a byte-by-byte comparison. */
- .align 4
-L(tail):
- and. rTMP,rTMP,rNEG
- cmpd cr1,rWORD1,rWORD2
- bne L(endstring)
- addi rSTR1,rSTR1,8
- bne cr1,L(different)
- addi rSTR2,rSTR2,8
- cmpldi cr1,rN,0
-L(unaligned):
- mtctr rN
- ble cr1,L(ux)
-L(uz):
- lbz rWORD1,0(rSTR1)
- lbz rWORD2,0(rSTR2)
- .align 4
-L(u1):
- cmpdi cr1,rWORD1,0
- bdz L(u4)
- cmpd rWORD1,rWORD2
- beq cr1,L(u4)
- bne L(u4)
- lbzu rWORD3,1(rSTR1)
- lbzu rWORD4,1(rSTR2)
- cmpdi cr1,rWORD3,0
- bdz L(u3)
- cmpd rWORD3,rWORD4
- beq cr1,L(u3)
- bne L(u3)
- lbzu rWORD1,1(rSTR1)
- lbzu rWORD2,1(rSTR2)
- cmpdi cr1,rWORD1,0
- bdz L(u4)
- cmpd rWORD1,rWORD2
- beq cr1,L(u4)
- bne L(u4)
- lbzu rWORD3,1(rSTR1)
- lbzu rWORD4,1(rSTR2)
- cmpdi cr1,rWORD3,0
- bdz L(u3)
- cmpd rWORD3,rWORD4
- beq cr1,L(u3)
- bne L(u3)
- lbzu rWORD1,1(rSTR1)
- lbzu rWORD2,1(rSTR2)
- b L(u1)
-
-L(u3): sub rRTN,rWORD3,rWORD4
- blr
-L(u4): sub rRTN,rWORD1,rWORD2
- blr
-L(ux):
- li rRTN,0
- blr
-END (STRNCMP)
-libc_hidden_builtin_def (strncmp)
diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S
deleted file mode 100644
index 40a230242cbdf733..0000000000000000
--- a/sysdeps/powerpc/powerpc64/strncmp.S
+++ /dev/null
@@ -1,210 +0,0 @@
-/* Optimized strcmp implementation for PowerPC64.
- Copyright (C) 2003-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-/* See strlen.s for comments on how the end-of-string testing works. */
-
-/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-ENTRY_TOCLESS (STRNCMP, 4)
- CALL_MCOUNT 3
-
-#define rTMP2 r0
-#define rRTN r3
-#define rSTR1 r3 /* first string arg */
-#define rSTR2 r4 /* second string arg */
-#define rN r5 /* max string length */
-#define rWORD1 r6 /* current word in s1 */
-#define rWORD2 r7 /* current word in s2 */
-#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
-#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
-#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
-#define rBITDIF r11 /* bits that differ in s1 & s2 words */
-#define rTMP r12
-
- dcbt 0,rSTR1
- or rTMP, rSTR2, rSTR1
- lis r7F7F, 0x7f7f
- dcbt 0,rSTR2
- clrldi. rTMP, rTMP, 61
- cmpldi cr1, rN, 0
- lis rFEFE, -0x101
- bne L(unaligned)
-/* We are doubleword aligned so set up for two loops. first a double word
- loop, then fall into the byte loop if any residual. */
- srdi. rTMP, rN, 3
- clrldi rN, rN, 61
- addi rFEFE, rFEFE, -0x101
- addi r7F7F, r7F7F, 0x7f7f
- cmpldi cr1, rN, 0
- beq L(unaligned)
-
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
- ld rWORD1, 0(rSTR1)
- ld rWORD2, 0(rSTR2)
- sldi rTMP, rFEFE, 32
- insrdi r7F7F, r7F7F, 32, 0
- add rFEFE, rFEFE, rTMP
- b L(g1)
-
-L(g0):
- ldu rWORD1, 8(rSTR1)
- bne- cr1, L(different)
- ldu rWORD2, 8(rSTR2)
-L(g1): add rTMP, rFEFE, rWORD1
- nor rNEG, r7F7F, rWORD1
- bdz L(tail)
- and. rTMP, rTMP, rNEG
- cmpd cr1, rWORD1, rWORD2
- beq+ L(g0)
-
-/* OK. We've hit the end of the string. We need to be careful that
- we don't compare two strings as different because of gunk beyond
- the end of the strings... */
-
-#ifdef __LITTLE_ENDIAN__
-L(endstring):
- addi rTMP2, rTMP, -1
- beq cr1, L(equal)
- andc rTMP2, rTMP2, rTMP
- rldimi rTMP2, rTMP2, 1, 0
- and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
- and rWORD1, rWORD1, rTMP2
- cmpd cr1, rWORD1, rWORD2
- beq cr1, L(equal)
- xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
- neg rNEG, rBITDIF
- and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
- cntlzd rNEG, rNEG /* bitcount of the bit. */
- andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
- sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
- sld rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- sradi rRTN, rRTN, 63 /* must return an int. */
- ori rRTN, rRTN, 1
- blr
-L(equal):
- li rRTN, 0
- blr
-
-L(different):
- ld rWORD1, -8(rSTR1)
- xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
- neg rNEG, rBITDIF
- and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
- cntlzd rNEG, rNEG /* bitcount of the bit. */
- andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
- sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
- sld rWORD2, rWORD2, rNEG
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- sradi rRTN, rRTN, 63
- ori rRTN, rRTN, 1
- blr
-L(highbit):
- sradi rRTN, rWORD2, 63
- ori rRTN, rRTN, 1
- blr
-
-#else
-L(endstring):
- and rTMP, r7F7F, rWORD1
- beq cr1, L(equal)
- add rTMP, rTMP, r7F7F
- xor. rBITDIF, rWORD1, rWORD2
- andc rNEG, rNEG, rTMP
- blt- L(highbit)
- cntlzd rBITDIF, rBITDIF
- cntlzd rNEG, rNEG
- addi rNEG, rNEG, 7
- cmpd cr1, rNEG, rBITDIF
- sub rRTN, rWORD1, rWORD2
- blt- cr1, L(equal)
- sradi rRTN, rRTN, 63 /* must return an int. */
- ori rRTN, rRTN, 1
- blr
-L(equal):
- li rRTN, 0
- blr
-
-L(different):
- ld rWORD1, -8(rSTR1)
- xor. rBITDIF, rWORD1, rWORD2
- sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- sradi rRTN, rRTN, 63
- ori rRTN, rRTN, 1
- blr
-L(highbit):
- sradi rRTN, rWORD2, 63
- ori rRTN, rRTN, 1
- blr
-#endif
-
-/* Oh well. In this case, we just do a byte-by-byte comparison. */
- .align 4
-L(tail):
- and. rTMP, rTMP, rNEG
- cmpd cr1, rWORD1, rWORD2
- bne- L(endstring)
- addi rSTR1, rSTR1, 8
- bne- cr1, L(different)
- addi rSTR2, rSTR2, 8
- cmpldi cr1, rN, 0
-L(unaligned):
- mtctr rN /* Power4 wants mtctr 1st in dispatch group */
- bgt cr1, L(uz)
-L(ux):
- li rRTN, 0
- blr
- .align 4
-L(uz):
- lbz rWORD1, 0(rSTR1)
- lbz rWORD2, 0(rSTR2)
- nop
- b L(u1)
-L(u0):
- lbzu rWORD2, 1(rSTR2)
-L(u1):
- bdz L(u3)
- cmpdi cr1, rWORD1, 0
- cmpd rWORD1, rWORD2
- beq- cr1, L(u3)
- lbzu rWORD1, 1(rSTR1)
- bne- L(u2)
- lbzu rWORD2, 1(rSTR2)
- bdz L(u3)
- cmpdi cr1, rWORD1, 0
- cmpd rWORD1, rWORD2
- bne- L(u3)
- lbzu rWORD1, 1(rSTR1)
- bne+ cr1, L(u0)
-
-L(u2): lbzu rWORD1, -1(rSTR1)
-L(u3): sub rRTN, rWORD1, rWORD2
- blr
-END (STRNCMP)
-libc_hidden_builtin_def (strncmp)

View File

@ -0,0 +1,845 @@
From e1d3312015e8f70344620375aedf91afe7e7e7a4 Mon Sep 17 00:00:00 2001
From: lijianglin <lijianglin2@huawei.com>
Date: Tue, 27 Jun 2023 20:15:49 +0800
Subject: add GB18030-2022 charmap and test the entire GB18030 charmap [BZ
#30243]
support GB18030-2022 after add and change some transcoding relationship
of GB18030-2022.Details are as follows:
add 25 transcoding relationship
UE81E 0x82359037
UE826 0x82359038
UE82B 0x82359039
UE82C 0x82359130
UE832 0x82359131
UE843 0x82359132
UE854 0x82359133
UE864 0x82359134
UE78D 0x84318236
UE78F 0x84318237
UE78E 0x84318238
UE790 0x84318239
UE791 0x84318330
UE792 0x84318331
UE793 0x84318332
UE794 0x84318333
UE795 0x84318334
UE796 0x84318335
UE816 0xfe51
UE817 0xfe52
UE818 0xfe53
UE831 0xfe6c
UE83B 0xfe76
UE855 0xfe91
change 6 transcoding relationship
U20087 0x95329031
U20089 0x95329033
U200CC 0x95329730
U215D7 0x9536b937
U2298F 0x9630ba35
U241FE 0x9635b630
Test the entire GB18030 charmap, not only the Unicode BMP part.
Co-authored-by: yangyanchao <yangyanchao6@huawei.com>
Co-authored-by: liqingqing <liqingqing3@huawei.com>
Co-authored-by: Bruno Haible <bruno@clisp.org>
Reviewed-by: Andreas Schwab <schwab@suse.de>
Reviewed-by: Mike FABIAN <mfabian@redhat.com>
diff --git a/iconvdata/gb18030.c b/iconvdata/gb18030.c
index 9996a59eaf..be6cfe652c 100644
--- a/iconvdata/gb18030.c
+++ b/iconvdata/gb18030.c
@@ -6009,49 +6009,50 @@ static const uint16_t __twobyte_to_ucs[] =
[0x5dc2] = 0xfa0e, [0x5dc3] = 0xfa0f, [0x5dc4] = 0xfa11, [0x5dc5] = 0xfa13,
[0x5dc6] = 0xfa14, [0x5dc7] = 0xfa18, [0x5dc8] = 0xfa1f, [0x5dc9] = 0xfa20,
[0x5dca] = 0xfa21, [0x5dcb] = 0xfa23, [0x5dcc] = 0xfa24, [0x5dcd] = 0xfa27,
- [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0x2e81, [0x5dd4] = 0x2e84,
- [0x5dd5] = 0x3473, [0x5dd6] = 0x3447, [0x5dd7] = 0x2e88, [0x5dd8] = 0x2e8b,
- [0x5dd9] = 0x9fb4, [0x5dda] = 0x359e, [0x5ddb] = 0x361a, [0x5ddc] = 0x360e,
- [0x5ddd] = 0x2e8c, [0x5dde] = 0x2e97, [0x5ddf] = 0x396e, [0x5de0] = 0x3918,
- [0x5de1] = 0x9fb5, [0x5de2] = 0x39cf, [0x5de3] = 0x39df, [0x5de4] = 0x3a73,
- [0x5de5] = 0x39d0, [0x5de6] = 0x9fb6, [0x5de7] = 0x9fb7, [0x5de8] = 0x3b4e,
- [0x5de9] = 0x3c6e, [0x5dea] = 0x3ce0, [0x5deb] = 0x2ea7, [0x5ded] = 0x9fb8,
+ [0x5dce] = 0xfa28, [0x5dcf] = 0xfa29, [0x5dd0] = 0x2e81, [0x5dd1] = 0xe816,
+ [0x5dd2] = 0xe817, [0x5dd3] = 0xe818, [0x5dd4] = 0x2e84, [0x5dd5] = 0x3473,
+ [0x5dd6] = 0x3447, [0x5dd7] = 0x2e88, [0x5dd8] = 0x2e8b, [0x5dd9] = 0x9fb4,
+ [0x5dda] = 0x359e, [0x5ddb] = 0x361a, [0x5ddc] = 0x360e, [0x5ddd] = 0x2e8c,
+ [0x5dde] = 0x2e97, [0x5ddf] = 0x396e, [0x5de0] = 0x3918, [0x5de1] = 0x9fb5,
+ [0x5de2] = 0x39cf, [0x5de3] = 0x39df, [0x5de4] = 0x3a73, [0x5de5] = 0x39d0,
+ [0x5de6] = 0x9fb6, [0x5de7] = 0x9fb7, [0x5de8] = 0x3b4e, [0x5de9] = 0x3c6e,
+ [0x5dea] = 0x3ce0, [0x5deb] = 0x2ea7, [0x5dec] = 0xe831, [0x5ded] = 0x9fb8,
[0x5dee] = 0x2eaa, [0x5def] = 0x4056, [0x5df0] = 0x415f, [0x5df1] = 0x2eae,
[0x5df2] = 0x4337, [0x5df3] = 0x2eb3, [0x5df4] = 0x2eb6, [0x5df5] = 0x2eb7,
- [0x5df7] = 0x43b1, [0x5df8] = 0x43ac, [0x5df9] = 0x2ebb, [0x5dfa] = 0x43dd,
- [0x5dfb] = 0x44d6, [0x5dfc] = 0x4661, [0x5dfd] = 0x464c, [0x5dfe] = 0x9fb9,
- [0x5e00] = 0x4723, [0x5e01] = 0x4729, [0x5e02] = 0x477c, [0x5e03] = 0x478d,
- [0x5e04] = 0x2eca, [0x5e05] = 0x4947, [0x5e06] = 0x497a, [0x5e07] = 0x497d,
- [0x5e08] = 0x4982, [0x5e09] = 0x4983, [0x5e0a] = 0x4985, [0x5e0b] = 0x4986,
- [0x5e0c] = 0x499f, [0x5e0d] = 0x499b, [0x5e0e] = 0x49b7, [0x5e0f] = 0x49b6,
- [0x5e10] = 0x9fba, [0x5e12] = 0x4ca3, [0x5e13] = 0x4c9f, [0x5e14] = 0x4ca0,
- [0x5e15] = 0x4ca1, [0x5e16] = 0x4c77, [0x5e17] = 0x4ca2, [0x5e18] = 0x4d13,
- [0x5e19] = 0x4d14, [0x5e1a] = 0x4d15, [0x5e1b] = 0x4d16, [0x5e1c] = 0x4d17,
- [0x5e1d] = 0x4d18, [0x5e1e] = 0x4d19, [0x5e1f] = 0x4dae, [0x5e20] = 0x9fbb,
- [0x5e21] = 0xe468, [0x5e22] = 0xe469, [0x5e23] = 0xe46a, [0x5e24] = 0xe46b,
- [0x5e25] = 0xe46c, [0x5e26] = 0xe46d, [0x5e27] = 0xe46e, [0x5e28] = 0xe46f,
- [0x5e29] = 0xe470, [0x5e2a] = 0xe471, [0x5e2b] = 0xe472, [0x5e2c] = 0xe473,
- [0x5e2d] = 0xe474, [0x5e2e] = 0xe475, [0x5e2f] = 0xe476, [0x5e30] = 0xe477,
- [0x5e31] = 0xe478, [0x5e32] = 0xe479, [0x5e33] = 0xe47a, [0x5e34] = 0xe47b,
- [0x5e35] = 0xe47c, [0x5e36] = 0xe47d, [0x5e37] = 0xe47e, [0x5e38] = 0xe47f,
- [0x5e39] = 0xe480, [0x5e3a] = 0xe481, [0x5e3b] = 0xe482, [0x5e3c] = 0xe483,
- [0x5e3d] = 0xe484, [0x5e3e] = 0xe485, [0x5e3f] = 0xe486, [0x5e40] = 0xe487,
- [0x5e41] = 0xe488, [0x5e42] = 0xe489, [0x5e43] = 0xe48a, [0x5e44] = 0xe48b,
- [0x5e45] = 0xe48c, [0x5e46] = 0xe48d, [0x5e47] = 0xe48e, [0x5e48] = 0xe48f,
- [0x5e49] = 0xe490, [0x5e4a] = 0xe491, [0x5e4b] = 0xe492, [0x5e4c] = 0xe493,
- [0x5e4d] = 0xe494, [0x5e4e] = 0xe495, [0x5e4f] = 0xe496, [0x5e50] = 0xe497,
- [0x5e51] = 0xe498, [0x5e52] = 0xe499, [0x5e53] = 0xe49a, [0x5e54] = 0xe49b,
- [0x5e55] = 0xe49c, [0x5e56] = 0xe49d, [0x5e57] = 0xe49e, [0x5e58] = 0xe49f,
- [0x5e59] = 0xe4a0, [0x5e5a] = 0xe4a1, [0x5e5b] = 0xe4a2, [0x5e5c] = 0xe4a3,
- [0x5e5d] = 0xe4a4, [0x5e5e] = 0xe4a5, [0x5e5f] = 0xe4a6, [0x5e60] = 0xe4a7,
- [0x5e61] = 0xe4a8, [0x5e62] = 0xe4a9, [0x5e63] = 0xe4aa, [0x5e64] = 0xe4ab,
- [0x5e65] = 0xe4ac, [0x5e66] = 0xe4ad, [0x5e67] = 0xe4ae, [0x5e68] = 0xe4af,
- [0x5e69] = 0xe4b0, [0x5e6a] = 0xe4b1, [0x5e6b] = 0xe4b2, [0x5e6c] = 0xe4b3,
- [0x5e6d] = 0xe4b4, [0x5e6e] = 0xe4b5, [0x5e6f] = 0xe4b6, [0x5e70] = 0xe4b7,
- [0x5e71] = 0xe4b8, [0x5e72] = 0xe4b9, [0x5e73] = 0xe4ba, [0x5e74] = 0xe4bb,
- [0x5e75] = 0xe4bc, [0x5e76] = 0xe4bd, [0x5e77] = 0xe4be, [0x5e78] = 0xe4bf,
- [0x5e79] = 0xe4c0, [0x5e7a] = 0xe4c1, [0x5e7b] = 0xe4c2, [0x5e7c] = 0xe4c3,
- [0x5e7d] = 0xe4c4, [0x5e7e] = 0xe4c5,
+ [0x5df6] = 0xe83b, [0x5df7] = 0x43b1, [0x5df8] = 0x43ac, [0x5df9] = 0x2ebb,
+ [0x5dfa] = 0x43dd, [0x5dfb] = 0x44d6, [0x5dfc] = 0x4661, [0x5dfd] = 0x464c,
+ [0x5dfe] = 0x9fb9, [0x5e00] = 0x4723, [0x5e01] = 0x4729, [0x5e02] = 0x477c,
+ [0x5e03] = 0x478d, [0x5e04] = 0x2eca, [0x5e05] = 0x4947, [0x5e06] = 0x497a,
+ [0x5e07] = 0x497d, [0x5e08] = 0x4982, [0x5e09] = 0x4983, [0x5e0a] = 0x4985,
+ [0x5e0b] = 0x4986, [0x5e0c] = 0x499f, [0x5e0d] = 0x499b, [0x5e0e] = 0x49b7,
+ [0x5e0f] = 0x49b6, [0x5e10] = 0x9fba, [0x5e11] = 0xe855, [0x5e12] = 0x4ca3,
+ [0x5e13] = 0x4c9f, [0x5e14] = 0x4ca0, [0x5e15] = 0x4ca1, [0x5e16] = 0x4c77,
+ [0x5e17] = 0x4ca2, [0x5e18] = 0x4d13, [0x5e19] = 0x4d14, [0x5e1a] = 0x4d15,
+ [0x5e1b] = 0x4d16, [0x5e1c] = 0x4d17, [0x5e1d] = 0x4d18, [0x5e1e] = 0x4d19,
+ [0x5e1f] = 0x4dae, [0x5e20] = 0x9fbb, [0x5e21] = 0xe468, [0x5e22] = 0xe469,
+ [0x5e23] = 0xe46a, [0x5e24] = 0xe46b, [0x5e25] = 0xe46c, [0x5e26] = 0xe46d,
+ [0x5e27] = 0xe46e, [0x5e28] = 0xe46f, [0x5e29] = 0xe470, [0x5e2a] = 0xe471,
+ [0x5e2b] = 0xe472, [0x5e2c] = 0xe473, [0x5e2d] = 0xe474, [0x5e2e] = 0xe475,
+ [0x5e2f] = 0xe476, [0x5e30] = 0xe477, [0x5e31] = 0xe478, [0x5e32] = 0xe479,
+ [0x5e33] = 0xe47a, [0x5e34] = 0xe47b, [0x5e35] = 0xe47c, [0x5e36] = 0xe47d,
+ [0x5e37] = 0xe47e, [0x5e38] = 0xe47f, [0x5e39] = 0xe480, [0x5e3a] = 0xe481,
+ [0x5e3b] = 0xe482, [0x5e3c] = 0xe483, [0x5e3d] = 0xe484, [0x5e3e] = 0xe485,
+ [0x5e3f] = 0xe486, [0x5e40] = 0xe487, [0x5e41] = 0xe488, [0x5e42] = 0xe489,
+ [0x5e43] = 0xe48a, [0x5e44] = 0xe48b, [0x5e45] = 0xe48c, [0x5e46] = 0xe48d,
+ [0x5e47] = 0xe48e, [0x5e48] = 0xe48f, [0x5e49] = 0xe490, [0x5e4a] = 0xe491,
+ [0x5e4b] = 0xe492, [0x5e4c] = 0xe493, [0x5e4d] = 0xe494, [0x5e4e] = 0xe495,
+ [0x5e4f] = 0xe496, [0x5e50] = 0xe497, [0x5e51] = 0xe498, [0x5e52] = 0xe499,
+ [0x5e53] = 0xe49a, [0x5e54] = 0xe49b, [0x5e55] = 0xe49c, [0x5e56] = 0xe49d,
+ [0x5e57] = 0xe49e, [0x5e58] = 0xe49f, [0x5e59] = 0xe4a0, [0x5e5a] = 0xe4a1,
+ [0x5e5b] = 0xe4a2, [0x5e5c] = 0xe4a3, [0x5e5d] = 0xe4a4, [0x5e5e] = 0xe4a5,
+ [0x5e5f] = 0xe4a6, [0x5e60] = 0xe4a7, [0x5e61] = 0xe4a8, [0x5e62] = 0xe4a9,
+ [0x5e63] = 0xe4aa, [0x5e64] = 0xe4ab, [0x5e65] = 0xe4ac, [0x5e66] = 0xe4ad,
+ [0x5e67] = 0xe4ae, [0x5e68] = 0xe4af, [0x5e69] = 0xe4b0, [0x5e6a] = 0xe4b1,
+ [0x5e6b] = 0xe4b2, [0x5e6c] = 0xe4b3, [0x5e6d] = 0xe4b4, [0x5e6e] = 0xe4b5,
+ [0x5e6f] = 0xe4b6, [0x5e70] = 0xe4b7, [0x5e71] = 0xe4b8, [0x5e72] = 0xe4b9,
+ [0x5e73] = 0xe4ba, [0x5e74] = 0xe4bb, [0x5e75] = 0xe4bc, [0x5e76] = 0xe4bd,
+ [0x5e77] = 0xe4be, [0x5e78] = 0xe4bf, [0x5e79] = 0xe4c0, [0x5e7a] = 0xe4c1,
+ [0x5e7b] = 0xe4c2, [0x5e7c] = 0xe4c3, [0x5e7d] = 0xe4c4, [0x5e7e] = 0xe4c5,
};
/* Table for GB18030 -> UCS-4, containing the four-byte characters only,
@@ -8680,7 +8681,9 @@ static const uint16_t __fourbyte_to_ucs[0x99e2 - 6637 - 2110 - 14404 - 4295] =
[0x2838] = 0x9fa6, [0x2839] = 0x9fa7, [0x283a] = 0x9fa8, [0x283b] = 0x9fa9,
[0x283c] = 0x9faa, [0x283d] = 0x9fab, [0x283e] = 0x9fac, [0x283f] = 0x9fad,
[0x2840] = 0x9fae, [0x2841] = 0x9faf, [0x2842] = 0x9fb0, [0x2843] = 0x9fb1,
- [0x2844] = 0x9fb2, [0x2845] = 0x9fb3, [0x284e] = 0xe76c, [0x284f] = 0xe7c8,
+ [0x2844] = 0x9fb2, [0x2845] = 0x9fb3, [0x2846] = 0xe81e, [0x2847] = 0xe826,
+ [0x2848] = 0xe82b, [0x2849] = 0xe82c, [0x284a] = 0xe832, [0x284b] = 0xe843,
+ [0x284c] = 0xe854, [0x284d] = 0xe864, [0x284e] = 0xe76c, [0x284f] = 0xe7c8,
[0x2850] = 0xe7e7, [0x2851] = 0xe7e8, [0x2852] = 0xe7e9, [0x2853] = 0xe7ea,
[0x2854] = 0xe7eb, [0x2855] = 0xe7ec, [0x2856] = 0xe7ed, [0x2857] = 0xe7ee,
[0x2858] = 0xe7ef, [0x2859] = 0xe7f0, [0x285a] = 0xe7f1, [0x285b] = 0xe7f2,
@@ -9008,84 +9011,86 @@ static const uint16_t __fourbyte_to_ucs[0x99e2 - 6637 - 2110 - 14404 - 4295] =
[0x2d60] = 0xfe02, [0x2d61] = 0xfe03, [0x2d62] = 0xfe04, [0x2d63] = 0xfe05,
[0x2d64] = 0xfe06, [0x2d65] = 0xfe07, [0x2d66] = 0xfe08, [0x2d67] = 0xfe09,
[0x2d68] = 0xfe0a, [0x2d69] = 0xfe0b, [0x2d6a] = 0xfe0c, [0x2d6b] = 0xfe0d,
- [0x2d6c] = 0xfe0e, [0x2d6d] = 0xfe0f, [0x2d78] = 0xfe1a, [0x2d79] = 0xfe1b,
- [0x2d7a] = 0xfe1c, [0x2d7b] = 0xfe1d, [0x2d7c] = 0xfe1e, [0x2d7d] = 0xfe1f,
- [0x2d7e] = 0xfe20, [0x2d7f] = 0xfe21, [0x2d80] = 0xfe22, [0x2d81] = 0xfe23,
- [0x2d82] = 0xfe24, [0x2d83] = 0xfe25, [0x2d84] = 0xfe26, [0x2d85] = 0xfe27,
- [0x2d86] = 0xfe28, [0x2d87] = 0xfe29, [0x2d88] = 0xfe2a, [0x2d89] = 0xfe2b,
- [0x2d8a] = 0xfe2c, [0x2d8b] = 0xfe2d, [0x2d8c] = 0xfe2e, [0x2d8d] = 0xfe2f,
- [0x2d8e] = 0xfe32, [0x2d8f] = 0xfe45, [0x2d90] = 0xfe46, [0x2d91] = 0xfe47,
- [0x2d92] = 0xfe48, [0x2d93] = 0xfe53, [0x2d94] = 0xfe58, [0x2d95] = 0xfe67,
- [0x2d96] = 0xfe6c, [0x2d97] = 0xfe6d, [0x2d98] = 0xfe6e, [0x2d99] = 0xfe6f,
- [0x2d9a] = 0xfe70, [0x2d9b] = 0xfe71, [0x2d9c] = 0xfe72, [0x2d9d] = 0xfe73,
- [0x2d9e] = 0xfe74, [0x2d9f] = 0xfe75, [0x2da0] = 0xfe76, [0x2da1] = 0xfe77,
- [0x2da2] = 0xfe78, [0x2da3] = 0xfe79, [0x2da4] = 0xfe7a, [0x2da5] = 0xfe7b,
- [0x2da6] = 0xfe7c, [0x2da7] = 0xfe7d, [0x2da8] = 0xfe7e, [0x2da9] = 0xfe7f,
- [0x2daa] = 0xfe80, [0x2dab] = 0xfe81, [0x2dac] = 0xfe82, [0x2dad] = 0xfe83,
- [0x2dae] = 0xfe84, [0x2daf] = 0xfe85, [0x2db0] = 0xfe86, [0x2db1] = 0xfe87,
- [0x2db2] = 0xfe88, [0x2db3] = 0xfe89, [0x2db4] = 0xfe8a, [0x2db5] = 0xfe8b,
- [0x2db6] = 0xfe8c, [0x2db7] = 0xfe8d, [0x2db8] = 0xfe8e, [0x2db9] = 0xfe8f,
- [0x2dba] = 0xfe90, [0x2dbb] = 0xfe91, [0x2dbc] = 0xfe92, [0x2dbd] = 0xfe93,
- [0x2dbe] = 0xfe94, [0x2dbf] = 0xfe95, [0x2dc0] = 0xfe96, [0x2dc1] = 0xfe97,
- [0x2dc2] = 0xfe98, [0x2dc3] = 0xfe99, [0x2dc4] = 0xfe9a, [0x2dc5] = 0xfe9b,
- [0x2dc6] = 0xfe9c, [0x2dc7] = 0xfe9d, [0x2dc8] = 0xfe9e, [0x2dc9] = 0xfe9f,
- [0x2dca] = 0xfea0, [0x2dcb] = 0xfea1, [0x2dcc] = 0xfea2, [0x2dcd] = 0xfea3,
- [0x2dce] = 0xfea4, [0x2dcf] = 0xfea5, [0x2dd0] = 0xfea6, [0x2dd1] = 0xfea7,
- [0x2dd2] = 0xfea8, [0x2dd3] = 0xfea9, [0x2dd4] = 0xfeaa, [0x2dd5] = 0xfeab,
- [0x2dd6] = 0xfeac, [0x2dd7] = 0xfead, [0x2dd8] = 0xfeae, [0x2dd9] = 0xfeaf,
- [0x2dda] = 0xfeb0, [0x2ddb] = 0xfeb1, [0x2ddc] = 0xfeb2, [0x2ddd] = 0xfeb3,
- [0x2dde] = 0xfeb4, [0x2ddf] = 0xfeb5, [0x2de0] = 0xfeb6, [0x2de1] = 0xfeb7,
- [0x2de2] = 0xfeb8, [0x2de3] = 0xfeb9, [0x2de4] = 0xfeba, [0x2de5] = 0xfebb,
- [0x2de6] = 0xfebc, [0x2de7] = 0xfebd, [0x2de8] = 0xfebe, [0x2de9] = 0xfebf,
- [0x2dea] = 0xfec0, [0x2deb] = 0xfec1, [0x2dec] = 0xfec2, [0x2ded] = 0xfec3,
- [0x2dee] = 0xfec4, [0x2def] = 0xfec5, [0x2df0] = 0xfec6, [0x2df1] = 0xfec7,
- [0x2df2] = 0xfec8, [0x2df3] = 0xfec9, [0x2df4] = 0xfeca, [0x2df5] = 0xfecb,
- [0x2df6] = 0xfecc, [0x2df7] = 0xfecd, [0x2df8] = 0xfece, [0x2df9] = 0xfecf,
- [0x2dfa] = 0xfed0, [0x2dfb] = 0xfed1, [0x2dfc] = 0xfed2, [0x2dfd] = 0xfed3,
- [0x2dfe] = 0xfed4, [0x2dff] = 0xfed5, [0x2e00] = 0xfed6, [0x2e01] = 0xfed7,
- [0x2e02] = 0xfed8, [0x2e03] = 0xfed9, [0x2e04] = 0xfeda, [0x2e05] = 0xfedb,
- [0x2e06] = 0xfedc, [0x2e07] = 0xfedd, [0x2e08] = 0xfede, [0x2e09] = 0xfedf,
- [0x2e0a] = 0xfee0, [0x2e0b] = 0xfee1, [0x2e0c] = 0xfee2, [0x2e0d] = 0xfee3,
- [0x2e0e] = 0xfee4, [0x2e0f] = 0xfee5, [0x2e10] = 0xfee6, [0x2e11] = 0xfee7,
- [0x2e12] = 0xfee8, [0x2e13] = 0xfee9, [0x2e14] = 0xfeea, [0x2e15] = 0xfeeb,
- [0x2e16] = 0xfeec, [0x2e17] = 0xfeed, [0x2e18] = 0xfeee, [0x2e19] = 0xfeef,
- [0x2e1a] = 0xfef0, [0x2e1b] = 0xfef1, [0x2e1c] = 0xfef2, [0x2e1d] = 0xfef3,
- [0x2e1e] = 0xfef4, [0x2e1f] = 0xfef5, [0x2e20] = 0xfef6, [0x2e21] = 0xfef7,
- [0x2e22] = 0xfef8, [0x2e23] = 0xfef9, [0x2e24] = 0xfefa, [0x2e25] = 0xfefb,
- [0x2e26] = 0xfefc, [0x2e27] = 0xfefd, [0x2e28] = 0xfefe, [0x2e29] = 0xfeff,
- [0x2e2a] = 0xff00, [0x2e2b] = 0xff5f, [0x2e2c] = 0xff60, [0x2e2d] = 0xff61,
- [0x2e2e] = 0xff62, [0x2e2f] = 0xff63, [0x2e30] = 0xff64, [0x2e31] = 0xff65,
- [0x2e32] = 0xff66, [0x2e33] = 0xff67, [0x2e34] = 0xff68, [0x2e35] = 0xff69,
- [0x2e36] = 0xff6a, [0x2e37] = 0xff6b, [0x2e38] = 0xff6c, [0x2e39] = 0xff6d,
- [0x2e3a] = 0xff6e, [0x2e3b] = 0xff6f, [0x2e3c] = 0xff70, [0x2e3d] = 0xff71,
- [0x2e3e] = 0xff72, [0x2e3f] = 0xff73, [0x2e40] = 0xff74, [0x2e41] = 0xff75,
- [0x2e42] = 0xff76, [0x2e43] = 0xff77, [0x2e44] = 0xff78, [0x2e45] = 0xff79,
- [0x2e46] = 0xff7a, [0x2e47] = 0xff7b, [0x2e48] = 0xff7c, [0x2e49] = 0xff7d,
- [0x2e4a] = 0xff7e, [0x2e4b] = 0xff7f, [0x2e4c] = 0xff80, [0x2e4d] = 0xff81,
- [0x2e4e] = 0xff82, [0x2e4f] = 0xff83, [0x2e50] = 0xff84, [0x2e51] = 0xff85,
- [0x2e52] = 0xff86, [0x2e53] = 0xff87, [0x2e54] = 0xff88, [0x2e55] = 0xff89,
- [0x2e56] = 0xff8a, [0x2e57] = 0xff8b, [0x2e58] = 0xff8c, [0x2e59] = 0xff8d,
- [0x2e5a] = 0xff8e, [0x2e5b] = 0xff8f, [0x2e5c] = 0xff90, [0x2e5d] = 0xff91,
- [0x2e5e] = 0xff92, [0x2e5f] = 0xff93, [0x2e60] = 0xff94, [0x2e61] = 0xff95,
- [0x2e62] = 0xff96, [0x2e63] = 0xff97, [0x2e64] = 0xff98, [0x2e65] = 0xff99,
- [0x2e66] = 0xff9a, [0x2e67] = 0xff9b, [0x2e68] = 0xff9c, [0x2e69] = 0xff9d,
- [0x2e6a] = 0xff9e, [0x2e6b] = 0xff9f, [0x2e6c] = 0xffa0, [0x2e6d] = 0xffa1,
- [0x2e6e] = 0xffa2, [0x2e6f] = 0xffa3, [0x2e70] = 0xffa4, [0x2e71] = 0xffa5,
- [0x2e72] = 0xffa6, [0x2e73] = 0xffa7, [0x2e74] = 0xffa8, [0x2e75] = 0xffa9,
- [0x2e76] = 0xffaa, [0x2e77] = 0xffab, [0x2e78] = 0xffac, [0x2e79] = 0xffad,
- [0x2e7a] = 0xffae, [0x2e7b] = 0xffaf, [0x2e7c] = 0xffb0, [0x2e7d] = 0xffb1,
- [0x2e7e] = 0xffb2, [0x2e7f] = 0xffb3, [0x2e80] = 0xffb4, [0x2e81] = 0xffb5,
- [0x2e82] = 0xffb6, [0x2e83] = 0xffb7, [0x2e84] = 0xffb8, [0x2e85] = 0xffb9,
- [0x2e86] = 0xffba, [0x2e87] = 0xffbb, [0x2e88] = 0xffbc, [0x2e89] = 0xffbd,
- [0x2e8a] = 0xffbe, [0x2e8b] = 0xffbf, [0x2e8c] = 0xffc0, [0x2e8d] = 0xffc1,
- [0x2e8e] = 0xffc2, [0x2e8f] = 0xffc3, [0x2e90] = 0xffc4, [0x2e91] = 0xffc5,
- [0x2e92] = 0xffc6, [0x2e93] = 0xffc7, [0x2e94] = 0xffc8, [0x2e95] = 0xffc9,
- [0x2e96] = 0xffca, [0x2e97] = 0xffcb, [0x2e98] = 0xffcc, [0x2e99] = 0xffcd,
- [0x2e9a] = 0xffce, [0x2e9b] = 0xffcf, [0x2e9c] = 0xffd0, [0x2e9d] = 0xffd1,
- [0x2e9e] = 0xffd2, [0x2e9f] = 0xffd3, [0x2ea0] = 0xffd4, [0x2ea1] = 0xffd5,
- [0x2ea2] = 0xffd6, [0x2ea3] = 0xffd7, [0x2ea4] = 0xffd8, [0x2ea5] = 0xffd9,
- [0x2ea6] = 0xffda, [0x2ea7] = 0xffdb, [0x2ea8] = 0xffdc, [0x2ea9] = 0xffdd,
- [0x2eaa] = 0xffde, [0x2eab] = 0xffdf,
+ [0x2d6c] = 0xfe0e, [0x2d6d] = 0xfe0f, [0x2d6e] = 0xe78d, [0x2d6f] = 0xe78f,
+ [0x2d70] = 0xe78e, [0x2d71] = 0xe790, [0x2d72] = 0xe791, [0x2d73] = 0xe792,
+ [0x2d74] = 0xe793, [0x2d75] = 0xe794, [0x2d76] = 0xe795, [0x2d77] = 0xe796,
+ [0x2d78] = 0xfe1a, [0x2d79] = 0xfe1b, [0x2d7a] = 0xfe1c, [0x2d7b] = 0xfe1d,
+ [0x2d7c] = 0xfe1e, [0x2d7d] = 0xfe1f, [0x2d7e] = 0xfe20, [0x2d7f] = 0xfe21,
+ [0x2d80] = 0xfe22, [0x2d81] = 0xfe23, [0x2d82] = 0xfe24, [0x2d83] = 0xfe25,
+ [0x2d84] = 0xfe26, [0x2d85] = 0xfe27, [0x2d86] = 0xfe28, [0x2d87] = 0xfe29,
+ [0x2d88] = 0xfe2a, [0x2d89] = 0xfe2b, [0x2d8a] = 0xfe2c, [0x2d8b] = 0xfe2d,
+ [0x2d8c] = 0xfe2e, [0x2d8d] = 0xfe2f, [0x2d8e] = 0xfe32, [0x2d8f] = 0xfe45,
+ [0x2d90] = 0xfe46, [0x2d91] = 0xfe47, [0x2d92] = 0xfe48, [0x2d93] = 0xfe53,
+ [0x2d94] = 0xfe58, [0x2d95] = 0xfe67, [0x2d96] = 0xfe6c, [0x2d97] = 0xfe6d,
+ [0x2d98] = 0xfe6e, [0x2d99] = 0xfe6f, [0x2d9a] = 0xfe70, [0x2d9b] = 0xfe71,
+ [0x2d9c] = 0xfe72, [0x2d9d] = 0xfe73, [0x2d9e] = 0xfe74, [0x2d9f] = 0xfe75,
+ [0x2da0] = 0xfe76, [0x2da1] = 0xfe77, [0x2da2] = 0xfe78, [0x2da3] = 0xfe79,
+ [0x2da4] = 0xfe7a, [0x2da5] = 0xfe7b, [0x2da6] = 0xfe7c, [0x2da7] = 0xfe7d,
+ [0x2da8] = 0xfe7e, [0x2da9] = 0xfe7f, [0x2daa] = 0xfe80, [0x2dab] = 0xfe81,
+ [0x2dac] = 0xfe82, [0x2dad] = 0xfe83, [0x2dae] = 0xfe84, [0x2daf] = 0xfe85,
+ [0x2db0] = 0xfe86, [0x2db1] = 0xfe87, [0x2db2] = 0xfe88, [0x2db3] = 0xfe89,
+ [0x2db4] = 0xfe8a, [0x2db5] = 0xfe8b, [0x2db6] = 0xfe8c, [0x2db7] = 0xfe8d,
+ [0x2db8] = 0xfe8e, [0x2db9] = 0xfe8f, [0x2dba] = 0xfe90, [0x2dbb] = 0xfe91,
+ [0x2dbc] = 0xfe92, [0x2dbd] = 0xfe93, [0x2dbe] = 0xfe94, [0x2dbf] = 0xfe95,
+ [0x2dc0] = 0xfe96, [0x2dc1] = 0xfe97, [0x2dc2] = 0xfe98, [0x2dc3] = 0xfe99,
+ [0x2dc4] = 0xfe9a, [0x2dc5] = 0xfe9b, [0x2dc6] = 0xfe9c, [0x2dc7] = 0xfe9d,
+ [0x2dc8] = 0xfe9e, [0x2dc9] = 0xfe9f, [0x2dca] = 0xfea0, [0x2dcb] = 0xfea1,
+ [0x2dcc] = 0xfea2, [0x2dcd] = 0xfea3, [0x2dce] = 0xfea4, [0x2dcf] = 0xfea5,
+ [0x2dd0] = 0xfea6, [0x2dd1] = 0xfea7, [0x2dd2] = 0xfea8, [0x2dd3] = 0xfea9,
+ [0x2dd4] = 0xfeaa, [0x2dd5] = 0xfeab, [0x2dd6] = 0xfeac, [0x2dd7] = 0xfead,
+ [0x2dd8] = 0xfeae, [0x2dd9] = 0xfeaf, [0x2dda] = 0xfeb0, [0x2ddb] = 0xfeb1,
+ [0x2ddc] = 0xfeb2, [0x2ddd] = 0xfeb3, [0x2dde] = 0xfeb4, [0x2ddf] = 0xfeb5,
+ [0x2de0] = 0xfeb6, [0x2de1] = 0xfeb7, [0x2de2] = 0xfeb8, [0x2de3] = 0xfeb9,
+ [0x2de4] = 0xfeba, [0x2de5] = 0xfebb, [0x2de6] = 0xfebc, [0x2de7] = 0xfebd,
+ [0x2de8] = 0xfebe, [0x2de9] = 0xfebf, [0x2dea] = 0xfec0, [0x2deb] = 0xfec1,
+ [0x2dec] = 0xfec2, [0x2ded] = 0xfec3, [0x2dee] = 0xfec4, [0x2def] = 0xfec5,
+ [0x2df0] = 0xfec6, [0x2df1] = 0xfec7, [0x2df2] = 0xfec8, [0x2df3] = 0xfec9,
+ [0x2df4] = 0xfeca, [0x2df5] = 0xfecb, [0x2df6] = 0xfecc, [0x2df7] = 0xfecd,
+ [0x2df8] = 0xfece, [0x2df9] = 0xfecf, [0x2dfa] = 0xfed0, [0x2dfb] = 0xfed1,
+ [0x2dfc] = 0xfed2, [0x2dfd] = 0xfed3, [0x2dfe] = 0xfed4, [0x2dff] = 0xfed5,
+ [0x2e00] = 0xfed6, [0x2e01] = 0xfed7, [0x2e02] = 0xfed8, [0x2e03] = 0xfed9,
+ [0x2e04] = 0xfeda, [0x2e05] = 0xfedb, [0x2e06] = 0xfedc, [0x2e07] = 0xfedd,
+ [0x2e08] = 0xfede, [0x2e09] = 0xfedf, [0x2e0a] = 0xfee0, [0x2e0b] = 0xfee1,
+ [0x2e0c] = 0xfee2, [0x2e0d] = 0xfee3, [0x2e0e] = 0xfee4, [0x2e0f] = 0xfee5,
+ [0x2e10] = 0xfee6, [0x2e11] = 0xfee7, [0x2e12] = 0xfee8, [0x2e13] = 0xfee9,
+ [0x2e14] = 0xfeea, [0x2e15] = 0xfeeb, [0x2e16] = 0xfeec, [0x2e17] = 0xfeed,
+ [0x2e18] = 0xfeee, [0x2e19] = 0xfeef, [0x2e1a] = 0xfef0, [0x2e1b] = 0xfef1,
+ [0x2e1c] = 0xfef2, [0x2e1d] = 0xfef3, [0x2e1e] = 0xfef4, [0x2e1f] = 0xfef5,
+ [0x2e20] = 0xfef6, [0x2e21] = 0xfef7, [0x2e22] = 0xfef8, [0x2e23] = 0xfef9,
+ [0x2e24] = 0xfefa, [0x2e25] = 0xfefb, [0x2e26] = 0xfefc, [0x2e27] = 0xfefd,
+ [0x2e28] = 0xfefe, [0x2e29] = 0xfeff, [0x2e2a] = 0xff00, [0x2e2b] = 0xff5f,
+ [0x2e2c] = 0xff60, [0x2e2d] = 0xff61, [0x2e2e] = 0xff62, [0x2e2f] = 0xff63,
+ [0x2e30] = 0xff64, [0x2e31] = 0xff65, [0x2e32] = 0xff66, [0x2e33] = 0xff67,
+ [0x2e34] = 0xff68, [0x2e35] = 0xff69, [0x2e36] = 0xff6a, [0x2e37] = 0xff6b,
+ [0x2e38] = 0xff6c, [0x2e39] = 0xff6d, [0x2e3a] = 0xff6e, [0x2e3b] = 0xff6f,
+ [0x2e3c] = 0xff70, [0x2e3d] = 0xff71, [0x2e3e] = 0xff72, [0x2e3f] = 0xff73,
+ [0x2e40] = 0xff74, [0x2e41] = 0xff75, [0x2e42] = 0xff76, [0x2e43] = 0xff77,
+ [0x2e44] = 0xff78, [0x2e45] = 0xff79, [0x2e46] = 0xff7a, [0x2e47] = 0xff7b,
+ [0x2e48] = 0xff7c, [0x2e49] = 0xff7d, [0x2e4a] = 0xff7e, [0x2e4b] = 0xff7f,
+ [0x2e4c] = 0xff80, [0x2e4d] = 0xff81, [0x2e4e] = 0xff82, [0x2e4f] = 0xff83,
+ [0x2e50] = 0xff84, [0x2e51] = 0xff85, [0x2e52] = 0xff86, [0x2e53] = 0xff87,
+ [0x2e54] = 0xff88, [0x2e55] = 0xff89, [0x2e56] = 0xff8a, [0x2e57] = 0xff8b,
+ [0x2e58] = 0xff8c, [0x2e59] = 0xff8d, [0x2e5a] = 0xff8e, [0x2e5b] = 0xff8f,
+ [0x2e5c] = 0xff90, [0x2e5d] = 0xff91, [0x2e5e] = 0xff92, [0x2e5f] = 0xff93,
+ [0x2e60] = 0xff94, [0x2e61] = 0xff95, [0x2e62] = 0xff96, [0x2e63] = 0xff97,
+ [0x2e64] = 0xff98, [0x2e65] = 0xff99, [0x2e66] = 0xff9a, [0x2e67] = 0xff9b,
+ [0x2e68] = 0xff9c, [0x2e69] = 0xff9d, [0x2e6a] = 0xff9e, [0x2e6b] = 0xff9f,
+ [0x2e6c] = 0xffa0, [0x2e6d] = 0xffa1, [0x2e6e] = 0xffa2, [0x2e6f] = 0xffa3,
+ [0x2e70] = 0xffa4, [0x2e71] = 0xffa5, [0x2e72] = 0xffa6, [0x2e73] = 0xffa7,
+ [0x2e74] = 0xffa8, [0x2e75] = 0xffa9, [0x2e76] = 0xffaa, [0x2e77] = 0xffab,
+ [0x2e78] = 0xffac, [0x2e79] = 0xffad, [0x2e7a] = 0xffae, [0x2e7b] = 0xffaf,
+ [0x2e7c] = 0xffb0, [0x2e7d] = 0xffb1, [0x2e7e] = 0xffb2, [0x2e7f] = 0xffb3,
+ [0x2e80] = 0xffb4, [0x2e81] = 0xffb5, [0x2e82] = 0xffb6, [0x2e83] = 0xffb7,
+ [0x2e84] = 0xffb8, [0x2e85] = 0xffb9, [0x2e86] = 0xffba, [0x2e87] = 0xffbb,
+ [0x2e88] = 0xffbc, [0x2e89] = 0xffbd, [0x2e8a] = 0xffbe, [0x2e8b] = 0xffbf,
+ [0x2e8c] = 0xffc0, [0x2e8d] = 0xffc1, [0x2e8e] = 0xffc2, [0x2e8f] = 0xffc3,
+ [0x2e90] = 0xffc4, [0x2e91] = 0xffc5, [0x2e92] = 0xffc6, [0x2e93] = 0xffc7,
+ [0x2e94] = 0xffc8, [0x2e95] = 0xffc9, [0x2e96] = 0xffca, [0x2e97] = 0xffcb,
+ [0x2e98] = 0xffcc, [0x2e99] = 0xffcd, [0x2e9a] = 0xffce, [0x2e9b] = 0xffcf,
+ [0x2e9c] = 0xffd0, [0x2e9d] = 0xffd1, [0x2e9e] = 0xffd2, [0x2e9f] = 0xffd3,
+ [0x2ea0] = 0xffd4, [0x2ea1] = 0xffd5, [0x2ea2] = 0xffd6, [0x2ea3] = 0xffd7,
+ [0x2ea4] = 0xffd8, [0x2ea5] = 0xffd9, [0x2ea6] = 0xffda, [0x2ea7] = 0xffdb,
+ [0x2ea8] = 0xffdc, [0x2ea9] = 0xffdd, [0x2eaa] = 0xffde, [0x2eab] = 0xffdf,
};
/* Table for UCS-4 -> GB18030, for the range U+0080..U+9FBB.
@@ -23437,71 +23442,79 @@ static const unsigned char __ucs_to_gb18030_tab2[][2] =
[0x0783] = "\xa5\xfd", [0x0784] = "\xa5\xfe", [0x0785] = "\xa6\xb9",
[0x0786] = "\xa6\xba", [0x0787] = "\xa6\xbb", [0x0788] = "\xa6\xbc",
[0x0789] = "\xa6\xbd", [0x078a] = "\xa6\xbe", [0x078b] = "\xa6\xbf",
- [0x078c] = "\xa6\xc0", [0x0797] = "\xa6\xf6", [0x0798] = "\xa6\xf7",
- [0x0799] = "\xa6\xf8", [0x079a] = "\xa6\xf9", [0x079b] = "\xa6\xfa",
- [0x079c] = "\xa6\xfb", [0x079d] = "\xa6\xfc", [0x079e] = "\xa6\xfd",
- [0x079f] = "\xa6\xfe", [0x07a0] = "\xa7\xc2", [0x07a1] = "\xa7\xc3",
- [0x07a2] = "\xa7\xc4", [0x07a3] = "\xa7\xc5", [0x07a4] = "\xa7\xc6",
- [0x07a5] = "\xa7\xc7", [0x07a6] = "\xa7\xc8", [0x07a7] = "\xa7\xc9",
- [0x07a8] = "\xa7\xca", [0x07a9] = "\xa7\xcb", [0x07aa] = "\xa7\xcc",
- [0x07ab] = "\xa7\xcd", [0x07ac] = "\xa7\xce", [0x07ad] = "\xa7\xcf",
- [0x07ae] = "\xa7\xd0", [0x07af] = "\xa7\xf2", [0x07b0] = "\xa7\xf3",
- [0x07b1] = "\xa7\xf4", [0x07b2] = "\xa7\xf5", [0x07b3] = "\xa7\xf6",
- [0x07b4] = "\xa7\xf7", [0x07b5] = "\xa7\xf8", [0x07b6] = "\xa7\xf9",
- [0x07b7] = "\xa7\xfa", [0x07b8] = "\xa7\xfb", [0x07b9] = "\xa7\xfc",
- [0x07ba] = "\xa7\xfd", [0x07bb] = "\xa7\xfe", [0x07bc] = "\xa8\x96",
- [0x07bd] = "\xa8\x97", [0x07be] = "\xa8\x98", [0x07bf] = "\xa8\x99",
- [0x07c0] = "\xa8\x9a", [0x07c1] = "\xa8\x9b", [0x07c2] = "\xa8\x9c",
- [0x07c3] = "\xa8\x9d", [0x07c4] = "\xa8\x9e", [0x07c5] = "\xa8\x9f",
- [0x07c6] = "\xa8\xa0", [0x07c7] = "\x00\x01", [0x07c8] = "\x65\x9e",
- [0x07c9] = "\xa8\xc1", [0x07ca] = "\xa8\xc2", [0x07cb] = "\xa8\xc3",
- [0x07cc] = "\xa8\xc4", [0x07cd] = "\xa8\xea", [0x07ce] = "\xa8\xeb",
- [0x07cf] = "\xa8\xec", [0x07d0] = "\xa8\xed", [0x07d1] = "\xa8\xee",
- [0x07d2] = "\xa8\xef", [0x07d3] = "\xa8\xf0", [0x07d4] = "\xa8\xf1",
- [0x07d5] = "\xa8\xf2", [0x07d6] = "\xa8\xf3", [0x07d7] = "\xa8\xf4",
- [0x07d8] = "\xa8\xf5", [0x07d9] = "\xa8\xf6", [0x07da] = "\xa8\xf7",
- [0x07db] = "\xa8\xf8", [0x07dc] = "\xa8\xf9", [0x07dd] = "\xa8\xfa",
- [0x07de] = "\xa8\xfb", [0x07df] = "\xa8\xfc", [0x07e0] = "\xa8\xfd",
- [0x07e1] = "\xa8\xfe", [0x07e2] = "\xa9\x58", [0x07e3] = "\xa9\x5b",
- [0x07e4] = "\xa9\x5d", [0x07e5] = "\xa9\x5e", [0x07e6] = "\xa9\x5f",
- [0x07e7] = "\x65\x9f", [0x07e8] = "\x65\xa0", [0x07e9] = "\x65\xa1",
- [0x07ea] = "\x65\xa2", [0x07eb] = "\x65\xa3", [0x07ec] = "\x65\xa4",
- [0x07ed] = "\x65\xa5", [0x07ee] = "\x65\xa6", [0x07ef] = "\x65\xa7",
- [0x07f0] = "\x65\xa8", [0x07f1] = "\x65\xa9", [0x07f2] = "\x65\xaa",
- [0x07f3] = "\x65\xab", [0x07f4] = "\xa9\x97", [0x07f5] = "\xa9\x98",
- [0x07f6] = "\xa9\x99", [0x07f7] = "\xa9\x9a", [0x07f8] = "\xa9\x9b",
- [0x07f9] = "\xa9\x9c", [0x07fa] = "\xa9\x9d", [0x07fb] = "\xa9\x9e",
- [0x07fc] = "\xa9\x9f", [0x07fd] = "\xa9\xa0", [0x07fe] = "\xa9\xa1",
- [0x07ff] = "\xa9\xa2", [0x0800] = "\xa9\xa3", [0x0801] = "\xa9\xf0",
- [0x0802] = "\xa9\xf1", [0x0803] = "\xa9\xf2", [0x0804] = "\xa9\xf3",
- [0x0805] = "\xa9\xf4", [0x0806] = "\xa9\xf5", [0x0807] = "\xa9\xf6",
- [0x0808] = "\xa9\xf7", [0x0809] = "\xa9\xf8", [0x080a] = "\xa9\xf9",
- [0x080b] = "\xa9\xfa", [0x080c] = "\xa9\xfb", [0x080d] = "\xa9\xfc",
- [0x080e] = "\xa9\xfd", [0x080f] = "\xa9\xfe", [0x0810] = "\xd7\xfa",
- [0x0811] = "\xd7\xfb", [0x0812] = "\xd7\xfc", [0x0813] = "\xd7\xfd",
- [0x0814] = "\xd7\xfe", [0x0815] = "\x65\xac", [0x0819] = "\x65\xad",
- [0x081a] = "\x65\xae", [0x081b] = "\x65\xaf", [0x081c] = "\x65\xb0",
- [0x081d] = "\x65\xb1", [0x081f] = "\x65\xb2", [0x0820] = "\x65\xb3",
- [0x0821] = "\x65\xb4", [0x0822] = "\x65\xb5", [0x0823] = "\x65\xb6",
- [0x0824] = "\x65\xb7", [0x0825] = "\x65\xb8", [0x0827] = "\x65\xb9",
+ [0x078c] = "\xa6\xc0", [0x078d] = "\x7b\x84", [0x078e] = "\x7b\x86",
+ [0x078f] = "\x7b\x85", [0x0790] = "\x7b\x87", [0x0791] = "\x7b\x88",
+ [0x0792] = "\x7b\x89", [0x0793] = "\x7b\x8a", [0x0794] = "\x7b\x8b",
+ [0x0795] = "\x7b\x8c", [0x0796] = "\x7b\x8d", [0x0797] = "\xa6\xf6",
+ [0x0798] = "\xa6\xf7", [0x0799] = "\xa6\xf8", [0x079a] = "\xa6\xf9",
+ [0x079b] = "\xa6\xfa", [0x079c] = "\xa6\xfb", [0x079d] = "\xa6\xfc",
+ [0x079e] = "\xa6\xfd", [0x079f] = "\xa6\xfe", [0x07a0] = "\xa7\xc2",
+ [0x07a1] = "\xa7\xc3", [0x07a2] = "\xa7\xc4", [0x07a3] = "\xa7\xc5",
+ [0x07a4] = "\xa7\xc6", [0x07a5] = "\xa7\xc7", [0x07a6] = "\xa7\xc8",
+ [0x07a7] = "\xa7\xc9", [0x07a8] = "\xa7\xca", [0x07a9] = "\xa7\xcb",
+ [0x07aa] = "\xa7\xcc", [0x07ab] = "\xa7\xcd", [0x07ac] = "\xa7\xce",
+ [0x07ad] = "\xa7\xcf", [0x07ae] = "\xa7\xd0", [0x07af] = "\xa7\xf2",
+ [0x07b0] = "\xa7\xf3", [0x07b1] = "\xa7\xf4", [0x07b2] = "\xa7\xf5",
+ [0x07b3] = "\xa7\xf6", [0x07b4] = "\xa7\xf7", [0x07b5] = "\xa7\xf8",
+ [0x07b6] = "\xa7\xf9", [0x07b7] = "\xa7\xfa", [0x07b8] = "\xa7\xfb",
+ [0x07b9] = "\xa7\xfc", [0x07ba] = "\xa7\xfd", [0x07bb] = "\xa7\xfe",
+ [0x07bc] = "\xa8\x96", [0x07bd] = "\xa8\x97", [0x07be] = "\xa8\x98",
+ [0x07bf] = "\xa8\x99", [0x07c0] = "\xa8\x9a", [0x07c1] = "\xa8\x9b",
+ [0x07c2] = "\xa8\x9c", [0x07c3] = "\xa8\x9d", [0x07c4] = "\xa8\x9e",
+ [0x07c5] = "\xa8\x9f", [0x07c6] = "\xa8\xa0", [0x07c7] = "\x00\x01",
+ [0x07c8] = "\x65\x9e", [0x07c9] = "\xa8\xc1", [0x07ca] = "\xa8\xc2",
+ [0x07cb] = "\xa8\xc3", [0x07cc] = "\xa8\xc4", [0x07cd] = "\xa8\xea",
+ [0x07ce] = "\xa8\xeb", [0x07cf] = "\xa8\xec", [0x07d0] = "\xa8\xed",
+ [0x07d1] = "\xa8\xee", [0x07d2] = "\xa8\xef", [0x07d3] = "\xa8\xf0",
+ [0x07d4] = "\xa8\xf1", [0x07d5] = "\xa8\xf2", [0x07d6] = "\xa8\xf3",
+ [0x07d7] = "\xa8\xf4", [0x07d8] = "\xa8\xf5", [0x07d9] = "\xa8\xf6",
+ [0x07da] = "\xa8\xf7", [0x07db] = "\xa8\xf8", [0x07dc] = "\xa8\xf9",
+ [0x07dd] = "\xa8\xfa", [0x07de] = "\xa8\xfb", [0x07df] = "\xa8\xfc",
+ [0x07e0] = "\xa8\xfd", [0x07e1] = "\xa8\xfe", [0x07e2] = "\xa9\x58",
+ [0x07e3] = "\xa9\x5b", [0x07e4] = "\xa9\x5d", [0x07e5] = "\xa9\x5e",
+ [0x07e6] = "\xa9\x5f", [0x07e7] = "\x65\x9f", [0x07e8] = "\x65\xa0",
+ [0x07e9] = "\x65\xa1", [0x07ea] = "\x65\xa2", [0x07eb] = "\x65\xa3",
+ [0x07ec] = "\x65\xa4", [0x07ed] = "\x65\xa5", [0x07ee] = "\x65\xa6",
+ [0x07ef] = "\x65\xa7", [0x07f0] = "\x65\xa8", [0x07f1] = "\x65\xa9",
+ [0x07f2] = "\x65\xaa", [0x07f3] = "\x65\xab", [0x07f4] = "\xa9\x97",
+ [0x07f5] = "\xa9\x98", [0x07f6] = "\xa9\x99", [0x07f7] = "\xa9\x9a",
+ [0x07f8] = "\xa9\x9b", [0x07f9] = "\xa9\x9c", [0x07fa] = "\xa9\x9d",
+ [0x07fb] = "\xa9\x9e", [0x07fc] = "\xa9\x9f", [0x07fd] = "\xa9\xa0",
+ [0x07fe] = "\xa9\xa1", [0x07ff] = "\xa9\xa2", [0x0800] = "\xa9\xa3",
+ [0x0801] = "\xa9\xf0", [0x0802] = "\xa9\xf1", [0x0803] = "\xa9\xf2",
+ [0x0804] = "\xa9\xf3", [0x0805] = "\xa9\xf4", [0x0806] = "\xa9\xf5",
+ [0x0807] = "\xa9\xf6", [0x0808] = "\xa9\xf7", [0x0809] = "\xa9\xf8",
+ [0x080a] = "\xa9\xf9", [0x080b] = "\xa9\xfa", [0x080c] = "\xa9\xfb",
+ [0x080d] = "\xa9\xfc", [0x080e] = "\xa9\xfd", [0x080f] = "\xa9\xfe",
+ [0x0810] = "\xd7\xfa", [0x0811] = "\xd7\xfb", [0x0812] = "\xd7\xfc",
+ [0x0813] = "\xd7\xfd", [0x0814] = "\xd7\xfe", [0x0815] = "\x65\xac",
+ [0x0816] = "\xfe\x51", [0x0817] = "\xfe\x52", [0x0818] = "\xfe\x53",
+ [0x0819] = "\x65\xad", [0x081a] = "\x65\xae", [0x081b] = "\x65\xaf",
+ [0x081c] = "\x65\xb0", [0x081d] = "\x65\xb1", [0x081e] = "\x2d\x51",
+ [0x081f] = "\x65\xb2", [0x0820] = "\x65\xb3", [0x0821] = "\x65\xb4",
+ [0x0822] = "\x65\xb5", [0x0823] = "\x65\xb6", [0x0824] = "\x65\xb7",
+ [0x0825] = "\x65\xb8", [0x0826] = "\x2d\x52", [0x0827] = "\x65\xb9",
[0x0828] = "\x65\xba", [0x0829] = "\x65\xbb", [0x082a] = "\x65\xbc",
- [0x082d] = "\x65\xbd", [0x082e] = "\x65\xbe", [0x082f] = "\x65\xbf",
- [0x0830] = "\x65\xc0", [0x0833] = "\x65\xc1", [0x0834] = "\x65\xc2",
- [0x0835] = "\x65\xc3", [0x0836] = "\x65\xc4", [0x0837] = "\x65\xc5",
- [0x0838] = "\x65\xc6", [0x0839] = "\x65\xc7", [0x083a] = "\x65\xc8",
- [0x083c] = "\x65\xc9", [0x083d] = "\x65\xca", [0x083e] = "\x65\xcb",
- [0x083f] = "\x65\xcc", [0x0840] = "\x65\xcd", [0x0841] = "\x65\xce",
- [0x0842] = "\x65\xcf", [0x0844] = "\x65\xd0", [0x0845] = "\x65\xd1",
+ [0x082b] = "\x2d\x53", [0x082c] = "\x2d\x54", [0x082d] = "\x65\xbd",
+ [0x082e] = "\x65\xbe", [0x082f] = "\x65\xbf", [0x0830] = "\x65\xc0",
+ [0x0831] = "\xfe\x6c", [0x0832] = "\x2d\x55", [0x0833] = "\x65\xc1",
+ [0x0834] = "\x65\xc2", [0x0835] = "\x65\xc3", [0x0836] = "\x65\xc4",
+ [0x0837] = "\x65\xc5", [0x0838] = "\x65\xc6", [0x0839] = "\x65\xc7",
+ [0x083a] = "\x65\xc8", [0x083b] = "\xfe\x76", [0x083c] = "\x65\xc9",
+ [0x083d] = "\x65\xca", [0x083e] = "\x65\xcb", [0x083f] = "\x65\xcc",
+ [0x0840] = "\x65\xcd", [0x0841] = "\x65\xce", [0x0842] = "\x65\xcf",
+ [0x0843] = "\x2d\x56", [0x0844] = "\x65\xd0", [0x0845] = "\x65\xd1",
[0x0846] = "\x65\xd2", [0x0847] = "\x65\xd3", [0x0848] = "\x65\xd4",
[0x0849] = "\x65\xd5", [0x084a] = "\x65\xd6", [0x084b] = "\x65\xd7",
[0x084c] = "\x65\xd8", [0x084d] = "\x65\xd9", [0x084e] = "\x65\xda",
[0x084f] = "\x65\xdb", [0x0850] = "\x65\xdc", [0x0851] = "\x65\xdd",
- [0x0852] = "\x65\xde", [0x0853] = "\x65\xdf", [0x0856] = "\x65\xe0",
- [0x0857] = "\x65\xe1", [0x0858] = "\x65\xe2", [0x0859] = "\x65\xe3",
- [0x085a] = "\x65\xe4", [0x085b] = "\x65\xe5", [0x085c] = "\x65\xe6",
- [0x085d] = "\x65\xe7", [0x085e] = "\x65\xe8", [0x085f] = "\x65\xe9",
- [0x0860] = "\x65\xea", [0x0861] = "\x65\xeb", [0x0862] = "\x65\xec",
- [0x0863] = "\x65\xed", [0x0865] = "\xfd\x9c", [0x0866] = "\x76\xb5",
+ [0x0852] = "\x65\xde", [0x0853] = "\x65\xdf", [0x0854] = "\x2d\x57",
+ [0x0855] = "\xfe\x91", [0x0856] = "\x65\xe0", [0x0857] = "\x65\xe1",
+ [0x0858] = "\x65\xe2", [0x0859] = "\x65\xe3", [0x085a] = "\x65\xe4",
+ [0x085b] = "\x65\xe5", [0x085c] = "\x65\xe6", [0x085d] = "\x65\xe7",
+ [0x085e] = "\x65\xe8", [0x085f] = "\x65\xe9", [0x0860] = "\x65\xea",
+ [0x0861] = "\x65\xeb", [0x0862] = "\x65\xec", [0x0863] = "\x65\xed",
+ [0x0864] = "\x2d\x58", [0x0865] = "\xfd\x9c", [0x0866] = "\x76\xb5",
[0x0867] = "\x76\xb6", [0x0868] = "\x76\xb7", [0x0869] = "\x76\xb8",
[0x086a] = "\x76\xb9", [0x086b] = "\x76\xba", [0x086c] = "\x76\xbb",
[0x086d] = "\x76\xbc", [0x086e] = "\x76\xbd", [0x086f] = "\x76\xbe",
@@ -24211,24 +24224,8 @@ static const unsigned char __ucs_to_gb18030_tab2[][2] =
|| (ch = __twobyte_to_ucs[idx], \
ch == 0 && *inptr != '\0')) \
{ \
- /* Handle a few special cases. */ \
- if (idx == 0x5dd1) \
- ch = 0x20087; \
- else if (idx == 0x5dd2) \
- ch = 0x20089; \
- else if (idx == 0x5dd3) \
- ch = 0x200cc; \
- else if (idx == 0x5dec) \
- ch = 0x215D7; \
- else if (idx == 0x5df6) \
- ch = 0x2298F; \
- else if (idx == 0x5e11) \
- ch = 0x241FE; \
- else \
- { \
- /* This is an illegal character. */ \
- STANDARD_FROM_LOOP_ERR_HANDLER (2); \
- } \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
\
inptr += 2; \
@@ -24320,17 +24317,35 @@ static const unsigned char __ucs_to_gb18030_tab2[][2] =
len = 4; \
} \
else if (ch == 0x20087) \
- cp = (const unsigned char *) "\xfe\x51"; \
+ { \
+ idx = 0x3E2CF; \
+ len = 4; \
+ } \
else if (ch == 0x20089) \
- cp = (const unsigned char *) "\xfe\x52"; \
+ { \
+ idx = 0x3E2D1; \
+ len = 4; \
+ } \
else if (ch == 0x200CC) \
- cp = (const unsigned char *) "\xfe\x53"; \
+ { \
+ idx = 0x3E314; \
+ len = 4; \
+ } \
else if (ch == 0x215d7) \
- cp = (const unsigned char *) "\xfe\x6c"; \
+ { \
+ idx = 0x3F81F; \
+ len = 4; \
+ } \
else if (ch == 0x2298F) \
- cp = (const unsigned char *) "\xfe\x76"; \
+ { \
+ idx = 0x40BD7; \
+ len = 4; \
+ } \
else if (ch == 0x241FE) \
- cp = (const unsigned char *) "\xfe\x91"; \
+ { \
+ idx = 0x42446; \
+ len = 4; \
+ } \
else if (ch >= 0x10000 && ch <= 0x10FFFF) \
{ \
idx = ch + 0x1E248; \
diff --git a/iconvdata/tst-table-from.c b/iconvdata/tst-table-from.c
index 09aaaf0942..55a7113d8c 100644
--- a/iconvdata/tst-table-from.c
+++ b/iconvdata/tst-table-from.c
@@ -194,10 +194,9 @@ main (int argc, char *argv[])
exit (1);
}
- /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
+ /* When testing UTF-8, stop at 0x10000, otherwise the output
file gets too big. */
- bmp_only = (strcmp (charset, "UTF-8") == 0
- || strcmp (charset, "GB18030") == 0);
+ bmp_only = (strcmp (charset, "UTF-8") == 0);
search_depth = (strcmp (charset, "UTF-8") == 0 ? 3 : 4);
{
diff --git a/iconvdata/tst-table-to.c b/iconvdata/tst-table-to.c
index 4dec4acad1..2b75f0c6e8 100644
--- a/iconvdata/tst-table-to.c
+++ b/iconvdata/tst-table-to.c
@@ -32,6 +32,7 @@ main (int argc, char *argv[])
const char *charset;
iconv_t cd;
int bmp_only;
+ int no_tags;
if (argc != 2)
{
@@ -47,16 +48,19 @@ main (int argc, char *argv[])
return 1;
}
- /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
+ /* When testing UTF-8, stop at 0x10000, otherwise the output
file gets too big. */
- bmp_only = (strcmp (charset, "UTF-8") == 0
+ bmp_only = (strcmp (charset, "UTF-8") == 0);
+ /* When testing any encoding other than UTF-8 or GB18030, stop at 0xE0000,
+ because the conversion drops Unicode tag characters (range
+ U+E0000..U+E007F). */
+ no_tags = !(strcmp (charset, "UTF-8") == 0
|| strcmp (charset, "GB18030") == 0);
{
unsigned int i;
unsigned char buf[10];
-
- for (i = 0; i < (bmp_only ? 0x10000 : 0x30000); i++)
+ for (i = 0; i < (bmp_only ? 0x10000 : no_tags ? 0xE0000 : 0x110000); i++)
{
unsigned char in[6];
unsigned int incount =
diff --git a/iconvdata/tst-table.sh b/iconvdata/tst-table.sh
index bc6f542b24..7ba15bbf5c 100755
--- a/iconvdata/tst-table.sh
+++ b/iconvdata/tst-table.sh
@@ -37,7 +37,8 @@ set -e
< ../localedata/charmaps/${charmap:-$charset} \
> ${objpfx}tst-${charset}.charmap.table
# When the charset is GB18030, truncate this table because for this encoding,
-# the tst-table-from and tst-table-to programs scan the Unicode BMP only.
+# the charmap contains ranges (<Unnnn>..<Ummmm> notation), which the
+# tst-table-charmap.sh script does not grok.
if test ${charset} = GB18030; then
grep '0x....$' < ${objpfx}tst-${charset}.charmap.table \
> ${objpfx}tst-${charset}.truncated.table
@@ -73,25 +74,42 @@ diff ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.inverse.table
# Check 1: charmap and iconv forward should be identical, except for
# precomposed characters.
-if test -f ${precomposed}; then
- cat ${objpfx}tst-${charset}.table ${precomposed} | sort | uniq -u \
- > ${objpfx}tst-${charset}.tmp.table
- cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.tmp.table ||
+{ if test -f ${precomposed}; then
+ cat ${objpfx}tst-${charset}.table ${precomposed} | sort | uniq -u
+ else
+ cat ${objpfx}tst-${charset}.table
+ fi
+} | { if test ${charset} = GB18030; then grep '0x....$'; else cat; fi; } \
+ > ${objpfx}tst-${charset}.tmp1.table
+cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.tmp1.table ||
exit 1
-else
- cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.table ||
- exit 1
-fi
# Check 2: the difference between the charmap and iconv backward.
-if test -f ${irreversible}; then
- cat ${objpfx}tst-${charset}.charmap.table ${irreversible} | sort | uniq -u \
- > ${objpfx}tst-${charset}.tmp.table
- cmp -s ${objpfx}tst-${charset}.tmp.table ${objpfx}tst-${charset}.inverse.table ||
- exit 1
-else
- cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.inverse.table ||
+{ if test -f ${irreversible}; then
+ cat ${objpfx}tst-${charset}.charmap.table ${irreversible} | sort | uniq -u
+ else
+ cat ${objpfx}tst-${charset}.charmap.table
+ fi
+} | { if test ${charset} = GB18030; then grep '0x....$'; else cat; fi; } \
+ > ${objpfx}tst-${charset}.tmp2c.table
+cat ${objpfx}tst-${charset}.inverse.table \
+ | { if test ${charset} = GB18030; then grep '0x....$'; else cat; fi; } \
+ > ${objpfx}tst-${charset}.tmp2i.table
+cmp -s ${objpfx}tst-${charset}.tmp2c.table ${objpfx}tst-${charset}.tmp2i.table ||
exit 1
+
+# Check 3: the difference between iconv forward and iconv backward. This is
+# necessary only for GB18030, because ${objpfx}tst-${charset}.charmap.table
+# is truncated for this encoding (see above).
+if test ${charset} = GB18030; then
+ { if test -f ${irreversible}; then
+ cat ${objpfx}tst-${charset}.table ${irreversible} | sort | uniq -u
+ else
+ cat ${objpfx}tst-${charset}.table
+ fi
+ } > ${objpfx}tst-${charset}.tmp3.table
+ cmp -s ${objpfx}tst-${charset}.tmp3.table ${objpfx}tst-${charset}.inverse.table ||
+ exit 1
fi
exit 0
diff --git a/localedata/charmaps/GB18030 b/localedata/charmaps/GB18030
index ad6728c5bd..fc3b1d2d40 100644
--- a/localedata/charmaps/GB18030
+++ b/localedata/charmaps/GB18030
@@ -57234,32 +57234,16 @@ CHARMAP
<UE78A> /xa6/xbe <Private Use>
<UE78B> /xa6/xbf <Private Use>
<UE78C> /xa6/xc0 <Private Use>
-% The newest GB 18030-2005 standard still uses some private use area
-% code points. Any implementation which has Unicode 4.1 or newer
-% support should not use these PUA code points, and instead should
-% map these entries to their equivalent non-PUA code points. There
-% are 24 idiograms in GB 18030-2005 which have non-PUA equivalents.
-% In glibc we only support roundtrip code points, and so must choose
-% between supporting the old PUA code points, or using the newer
-% non-PUA code points. We choose to use the non-PUA code points to
-% be compatible with ICU's similar choice. In choosing the non-PUA
-% code points we can no longer convert the old PUA code points back
-% to GB-18030-2005 (technically only fixable if we added support
-% for non-roundtrip code points e.g. ICU's "fallback mapping").
-% The recommendation to use the non-PUA code points, where available,
-% is based on "CJKV Information Processing" 2nd Ed. by Dr. Ken Lunde.
-%
-% These 10 PUA mappings use equivalents from <UFE10> to <UFE19>.
-% <UE78D> /xa6/xd9 <Private Use>
-% <UE78E> /xa6/xda <Private Use>
-% <UE78F> /xa6/xdb <Private Use>
-% <UE790> /xa6/xdc <Private Use>
-% <UE791> /xa6/xdd <Private Use>
-% <UE792> /xa6/xde <Private Use>
-% <UE793> /xa6/xdf <Private Use>
-% <UE794> /xa6/xec <Private Use>
-% <UE795> /xa6/xed <Private Use>
-% <UE796> /xa6/xf3 <Private Use>
+<UE78D> /x84/x31/x82/x36 <Private Use>
+<UE78E> /x84/x31/x82/x38 <Private Use>
+<UE78F> /x84/x31/x82/x37 <Private Use>
+<UE790> /x84/x31/x82/x39 <Private Use>
+<UE791> /x84/x31/x83/x30 <Private Use>
+<UE792> /x84/x31/x83/x31 <Private Use>
+<UE793> /x84/x31/x83/x32 <Private Use>
+<UE794> /x84/x31/x83/x33 <Private Use>
+<UE795> /x84/x31/x83/x34 <Private Use>
+<UE796> /x84/x31/x83/x35 <Private Use>
<UE797> /xa6/xf6 <Private Use>
<UE798> /xa6/xf7 <Private Use>
<UE799> /xa6/xf8 <Private Use>
@@ -57387,17 +57371,15 @@ CHARMAP
<UE813> /xd7/xfd <Private Use>
<UE814> /xd7/xfe <Private Use>
<UE815> /x83/x36/xc9/x34 <Private Use>
-% These 3 PUA mappings use equivalents <U20087>, <U20089> and <U200CC>.
-% <UE816> /xfe/x51 <Private Use>
-% <UE817> /xfe/x52 <Private Use>
-% <UE818> /xfe/x53 <Private Use>
+<UE816> /xfe/x51 <Private Use>
+<UE817> /xfe/x52 <Private Use>
+<UE818> /xfe/x53 <Private Use>
<UE819> /x83/x36/xc9/x35 <Private Use>
<UE81A> /x83/x36/xc9/x36 <Private Use>
<UE81B> /x83/x36/xc9/x37 <Private Use>
<UE81C> /x83/x36/xc9/x38 <Private Use>
<UE81D> /x83/x36/xc9/x39 <Private Use>
-% This 1 PUA mapping uses the equivalent <U9FB4>.
-% <UE81E> /xfe/x59 <Private Use>
+<UE81E> /x82/x35/x90/x37 <Private Use>
<UE81F> /x83/x36/xca/x30 <Private Use>
<UE820> /x83/x36/xca/x31 <Private Use>
<UE821> /x83/x36/xca/x32 <Private Use>
@@ -57405,22 +57387,19 @@ CHARMAP
<UE823> /x83/x36/xca/x34 <Private Use>
<UE824> /x83/x36/xca/x35 <Private Use>
<UE825> /x83/x36/xca/x36 <Private Use>
-% This 1 PUA mapping uses the equivalent <U9FB5>.
-% <UE826> /xfe/x61 <Private Use>
+<UE826> /x82/x35/x90/x38 <Private Use>
<UE827> /x83/x36/xca/x37 <Private Use>
<UE828> /x83/x36/xca/x38 <Private Use>
<UE829> /x83/x36/xca/x39 <Private Use>
<UE82A> /x83/x36/xcb/x30 <Private Use>
-% These 2 PUA mappings use the equivalents <U9FB6> and <U9FB7>.
-% <UE82B> /xfe/x66 <Private Use>
-% <UE82C> /xfe/x67 <Private Use>
+<UE82B> /x82/x35/x90/x39 <Private Use>
+<UE82C> /x82/x35/x91/x30 <Private Use>
<UE82D> /x83/x36/xcb/x31 <Private Use>
<UE82E> /x83/x36/xcb/x32 <Private Use>
<UE82F> /x83/x36/xcb/x33 <Private Use>
<UE830> /x83/x36/xcb/x34 <Private Use>
-% These 2 PUA mappings use the equivalents <U215D7> and <U9FB8>.
-% <UE831> /xfe/x6c <Private Use>
-% <UE832> /xfe/x6d <Private Use>
+<UE831> /xfe/x6c <Private Use>
+<UE832> /x82/x35/x91/x31 <Private Use>
<UE833> /x83/x36/xcb/x35 <Private Use>
<UE834> /x83/x36/xcb/x36 <Private Use>
<UE835> /x83/x36/xcb/x37 <Private Use>
@@ -57429,8 +57408,7 @@ CHARMAP
<UE838> /x83/x36/xcc/x30 <Private Use>
<UE839> /x83/x36/xcc/x31 <Private Use>
<UE83A> /x83/x36/xcc/x32 <Private Use>
-% This 1 PUA mapping uses the equivalent <U2298F>.
-% <UE83B> /xfe/x76 <Private Use>
+<UE83B> /xfe/x76 <Private Use>
<UE83C> /x83/x36/xcc/x33 <Private Use>
<UE83D> /x83/x36/xcc/x34 <Private Use>
<UE83E> /x83/x36/xcc/x35 <Private Use>
@@ -57438,8 +57416,7 @@ CHARMAP
<UE840> /x83/x36/xcc/x37 <Private Use>
<UE841> /x83/x36/xcc/x38 <Private Use>
<UE842> /x83/x36/xcc/x39 <Private Use>
-% This 1 PUA mapping uses the equivalent <U9FB9>.
-% <UE843> /xfe/x7e <Private Use>
+<UE843> /x82/x35/x91/x32 <Private Use>
<UE844> /x83/x36/xcd/x30 <Private Use>
<UE845> /x83/x36/xcd/x31 <Private Use>
<UE846> /x83/x36/xcd/x32 <Private Use>
@@ -57456,9 +57433,8 @@ CHARMAP
<UE851> /x83/x36/xce/x33 <Private Use>
<UE852> /x83/x36/xce/x34 <Private Use>
<UE853> /x83/x36/xce/x35 <Private Use>
-% These 2 PUA mappings use the equivalents <U9FBA> and <U241FE>.
-% <UE854> /xfe/x90 <Private Use>
-% <UE855> /xfe/x91 <Private Use>
+<UE854> /x82/x35/x91/x33 <Private Use>
+<UE855> /xfe/x91 <Private Use>
<UE856> /x83/x36/xce/x36 <Private Use>
<UE857> /x83/x36/xce/x37 <Private Use>
<UE858> /x83/x36/xce/x38 <Private Use>
@@ -57473,8 +57449,7 @@ CHARMAP
<UE861> /x83/x36/xcf/x37 <Private Use>
<UE862> /x83/x36/xcf/x38 <Private Use>
<UE863> /x83/x36/xcf/x39 <Private Use>
-% This 1 PUA mapping uses the equivalent <U9FBB>.
-% <UE864> /xfe/xa0 <Private Use>
+<UE864> /x82/x35/x91/x34 <Private Use>
<UE865> /x83/x36/xd0/x30 <Private Use>
<UE866> /x83/x36/xd0/x31 <Private Use>
<UE867> /x83/x36/xd0/x32 <Private Use>
@@ -70447,19 +70422,14 @@ CHARMAP
<U00020068>..<U00020071> /x95/x32/x8d/x30 <CJK>
<U00020072>..<U0002007B> /x95/x32/x8e/x30 <CJK>
<U0002007C>..<U00020085> /x95/x32/x8f/x30 <CJK>
-<U00020086> /x95/x32/x90/x30 <CJK>
-<U00020087> /xfe/x51 <CJK>
-<U00020088> /x95/x32/x90/x32 <CJK>
-<U00020089> /xfe/x52 <CJK>
-<U0002008A>..<U0002008F> /x95/x32/x90/x34 <CJK>
+<U00020086>..<U0002008F> /x95/x32/x90/x30 <CJK>
<U00020090>..<U00020099> /x95/x32/x91/x30 <CJK>
<U0002009A>..<U000200A3> /x95/x32/x92/x30 <CJK>
<U000200A4>..<U000200AD> /x95/x32/x93/x30 <CJK>
<U000200AE>..<U000200B7> /x95/x32/x94/x30 <CJK>
<U000200B8>..<U000200C1> /x95/x32/x95/x30 <CJK>
<U000200C2>..<U000200CB> /x95/x32/x96/x30 <CJK>
-<U000200CC> /xfe/x53 <CJK>
-<U000200CD>..<U000200D5> /x95/x32/x97/x31 <CJK>
+<U000200CC>..<U000200D5> /x95/x32/x97/x30 <CJK>
<U000200D6>..<U000200DF> /x95/x32/x98/x30 <CJK>
<U000200E0>..<U000200E9> /x95/x32/x99/x30 <CJK>
<U000200EA>..<U000200F3> /x95/x32/x9a/x30 <CJK>
@@ -70998,8 +70968,7 @@ CHARMAP
<U000215BC>..<U000215C5> /x95/x36/xb7/x30 <CJK>
<U000215C6>..<U000215CF> /x95/x36/xb8/x30 <CJK>
<U000215D0>..<U000215D6> /x95/x36/xb9/x30 <CJK>
-<U000215D7> /xfe/x6c <CJK>
-<U000215D8>..<U000215D9> /x95/x36/xb9/x38 <CJK>
+<U000215D7>..<U000215D9> /x95/x36/xb9/x37 <CJK>
<U000215DA>..<U000215E3> /x95/x36/xba/x30 <CJK>
<U000215E4>..<U000215ED> /x95/x36/xbb/x30 <CJK>
<U000215EE>..<U000215F7> /x95/x36/xbc/x30 <CJK>
@@ -71505,8 +71474,7 @@ CHARMAP
<U00022976>..<U0002297F> /x96/x30/xb8/x30 <CJK>
<U00022980>..<U00022989> /x96/x30/xb9/x30 <CJK>
<U0002298A>..<U0002298E> /x96/x30/xba/x30 <CJK>
-<U0002298F> /xfe/x76 <CJK>
-<U00022990>..<U00022993> /x96/x30/xba/x36 <CJK>
+<U0002298F>..<U00022993> /x96/x30/xba/x35 <CJK>
<U00022994>..<U0002299D> /x96/x30/xbb/x30 <CJK>
<U0002299E>..<U000229A7> /x96/x30/xbc/x30 <CJK>
<U000229A8>..<U000229B1> /x96/x30/xbd/x30 <CJK>
@@ -72132,8 +72100,7 @@ CHARMAP
<U000241E0>..<U000241E9> /x96/x35/xb3/x30 <CJK>
<U000241EA>..<U000241F3> /x96/x35/xb4/x30 <CJK>
<U000241F4>..<U000241FD> /x96/x35/xb5/x30 <CJK>
-<U000241FE> /xfe/x91 <CJK>
-<U000241FF>..<U00024207> /x96/x35/xb6/x31 <CJK>
+<U000241FE>..<U00024207> /x96/x35/xb6/x30 <CJK>
<U00024208>..<U00024211> /x96/x35/xb7/x30 <CJK>
<U00024212>..<U0002421B> /x96/x35/xb8/x30 <CJK>
<U0002421C>..<U00024225> /x96/x35/xb9/x30 <CJK>

View File

@ -0,0 +1,463 @@
commit 1db84775f831a1494993ce9c118deaf9537cc50a
Author: Frank Barrus <frankbarrus_sw@shaggy.cc>
Date: Wed Dec 4 07:55:02 2024 -0500
pthreads NPTL: lost wakeup fix 2
This fixes the lost wakeup (from a bug in signal stealing) with a change
in the usage of g_signals[] in the condition variable internal state.
It also completely eliminates the concept and handling of signal stealing,
as well as the need for signalers to block to wait for waiters to wake
up every time there is a G1/G2 switch. This greatly reduces the average
and maximum latency for pthread_cond_signal.
The g_signals[] field now contains a signal count that is relative to
the current g1_start value. Since it is a 32-bit field, and the LSB is
still reserved (though not currently used anymore), it has a 31-bit value
that corresponds to the low 31 bits of the sequence number in g1_start.
(since g1_start also has an LSB flag, this means bits 31:1 in g_signals
correspond to bits 31:1 in g1_start, plus the current signal count)
By making the signal count relative to g1_start, there is no longer
any ambiguity or A/B/A issue, and thus any checks before blocking,
including the futex call itself, are guaranteed not to block if the G1/G2
switch occurs, even if the signal count remains the same. This allows
initially safely blocking in G2 until the switch to G1 occurs, and
then transitioning from G1 to a new G1 or G2, and always being able to
distinguish the state change. This removes the race condition and A/B/A
problems that otherwise ocurred if a late (pre-empted) waiter were to
resume just as the futex call attempted to block on g_signal since
otherwise there was no last opportunity to re-check things like whether
the current G1 group was already closed.
By fixing these issues, the signal stealing code can be eliminated,
since there is no concept of signal stealing anymore. The code to block
for all waiters to exit g_refs can also be removed, since any waiters
that are still in the g_refs region can be guaranteed to safely wake
up and exit. If there are still any left at this time, they are all
sent one final futex wakeup to ensure that they are not blocked any
longer, but there is no need for the signaller to block and wait for
them to wake up and exit the g_refs region.
The signal count is then effectively "zeroed" but since it is now
relative to g1_start, this is done by advancing it to a new value that
can be observed by any pending blocking waiters. Any late waiters can
always tell the difference, and can thus just cleanly exit if they are
in a stale G1 or G2. They can never steal a signal from the current
G1 if they are not in the current G1, since the signal value that has
to match in the cmpxchg has the low 31 bits of the g1_start value
contained in it, and that's first checked, and then it won't match if
there's a G1/G2 change.
Note: the 31-bit sequence number used in g_signals is designed to
handle wrap-around when checking the signal count, but if the entire
31-bit wraparound (2 billion signals) occurs while there is still a
late waiter that has not yet resumed, and it happens to then match
the current g1_start low bits, and the pre-emption occurs after the
normal "closed group" checks (which are 64-bit) but then hits the
futex syscall and signal consuming code, then an A/B/A issue could
still result and cause an incorrect assumption about whether it
should block. This particular scenario seems unlikely in practice.
Note that once awake from the futex, the waiter would notice the
closed group before consuming the signal (since that's still a 64-bit
check that would not be aliased in the wrap-around in g_signals),
so the biggest impact would be blocking on the futex until the next
full wakeup from a G1/G2 switch.
Signed-off-by: Frank Barrus <frankbarrus_sw@shaggy.cc>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
# Conflicts:
# nptl/pthread_cond_common.c (timed wait refactor)
# nptl/pthread_cond_wait.c (textual conflicts)
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
index 479e54febb417675..9175e6779ebff244 100644
--- a/nptl/pthread_cond_common.c
+++ b/nptl/pthread_cond_common.c
@@ -341,7 +341,6 @@ static bool __attribute__ ((unused))
__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
unsigned int *g1index, int private)
{
- const unsigned int maxspin = 0;
unsigned int g1 = *g1index;
/* If there is no waiter in G2, we don't do anything. The expression may
@@ -362,84 +361,46 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
* New waiters arriving concurrently with the group switching will all go
into G2 until we atomically make the switch. Waiters existing in G2
are not affected.
- * Waiters in G1 will be closed out immediately by setting a flag in
- __g_signals, which will prevent waiters from blocking using a futex on
- __g_signals and also notifies them that the group is closed. As a
- result, they will eventually remove their group reference, allowing us
- to close switch group roles. */
-
- /* First, set the closed flag on __g_signals. This tells waiters that are
- about to wait that they shouldn't do that anymore. This basically
- serves as an advance notificaton of the upcoming change to __g1_start;
- waiters interpret it as if __g1_start was larger than their waiter
- sequence position. This allows us to change __g1_start after waiting
- for all existing waiters with group references to leave, which in turn
- makes recovery after stealing a signal simpler because it then can be
- skipped if __g1_start indicates that the group is closed (otherwise,
- we would have to recover always because waiters don't know how big their
- groups are). Relaxed MO is fine. */
- atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1);
-
- /* Wait until there are no group references anymore. The fetch-or operation
- injects us into the modification order of __g_refs; release MO ensures
- that waiters incrementing __g_refs after our fetch-or see the previous
- changes to __g_signals and to __g1_start that had to happen before we can
- switch this G1 and alias with an older group (we have two groups, so
- aliasing requires switching group roles twice). Note that nobody else
- can have set the wake-request flag, so we do not have to act upon it.
-
- Also note that it is harmless if older waiters or waiters from this G1
- get a group reference after we have quiesced the group because it will
- remain closed for them either because of the closed flag in __g_signals
- or the later update to __g1_start. New waiters will never arrive here
- but instead continue to go into the still current G2. */
- unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0);
- while ((r >> 1) > 0)
- {
- for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--)
- {
- /* TODO Back off. */
- r = atomic_load_relaxed (cond->__data.__g_refs + g1);
- }
- if ((r >> 1) > 0)
- {
- /* There is still a waiter after spinning. Set the wake-request
- flag and block. Relaxed MO is fine because this is just about
- this futex word.
-
- Update r to include the set wake-request flag so that the upcoming
- futex_wait only blocks if the flag is still set (otherwise, we'd
- violate the basic client-side futex protocol). */
- r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1) | 1;
-
- if ((r >> 1) > 0)
- futex_wait_simple (cond->__data.__g_refs + g1, r, private);
- /* Reload here so we eventually see the most recent value even if we
- do not spin. */
- r = atomic_load_relaxed (cond->__data.__g_refs + g1);
- }
- }
- /* Acquire MO so that we synchronize with the release operation that waiters
- use to decrement __g_refs and thus happen after the waiters we waited
- for. */
- atomic_thread_fence_acquire ();
+ * Waiters in G1 will be closed out immediately by the advancing of
+ __g_signals to the next "lowseq" (low 31 bits of the new g1_start),
+ which will prevent waiters from blocking using a futex on
+ __g_signals since it provides enough signals for all possible
+ remaining waiters. As a result, they can each consume a signal
+ and they will eventually remove their group reference. */
/* Update __g1_start, which finishes closing this group. The value we add
will never be negative because old_orig_size can only be zero when we
switch groups the first time after a condvar was initialized, in which
- case G1 will be at index 1 and we will add a value of 1. See above for
- why this takes place after waiting for quiescence of the group.
+ case G1 will be at index 1 and we will add a value of 1.
Relaxed MO is fine because the change comes with no additional
constraints that others would have to observe. */
__condvar_add_g1_start_relaxed (cond,
(old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
- /* Now reopen the group, thus enabling waiters to again block using the
- futex controlled by __g_signals. Release MO so that observers that see
- no signals (and thus can block) also see the write __g1_start and thus
- that this is now a new group (see __pthread_cond_wait_common for the
- matching acquire MO loads). */
- atomic_store_release (cond->__data.__g_signals + g1, 0);
+ unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U;
+
+ /* If any waiters still hold group references (and thus could be blocked),
+ then wake them all up now and prevent any running ones from blocking.
+ This is effectively a catch-all for any possible current or future
+ bugs that can allow the group size to reach 0 before all G1 waiters
+ have been awakened or at least given signals to consume, or any
+ other case that can leave blocked (or about to block) older waiters.. */
+ if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0)
+ {
+ /* First advance signals to the end of the group (i.e. enough signals
+ for the entire G1 group) to ensure that waiters which have not
+ yet blocked in the futex will not block.
+ Note that in the vast majority of cases, this should never
+ actually be necessary, since __g_signals will have enough
+ signals for the remaining g_refs waiters. As an optimization,
+ we could check this first before proceeding, although that
+ could still leave the potential for futex lost wakeup bugs
+ if the signal count was non-zero but the futex wakeup
+ was somehow lost. */
+ atomic_store_release (cond->__data.__g_signals + g1, lowseq);
+
+ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
+ }
/* At this point, the old G1 is now a valid new G2 (but not in use yet).
No old waiter can neither grab a signal nor acquire a reference without
@@ -451,6 +412,10 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
g1 ^= 1;
*g1index ^= 1;
+ /* Now advance the new G1 g_signals to the new lowseq, giving it
+ an effective signal count of 0 to start. */
+ atomic_store_release (cond->__data.__g_signals + g1, lowseq);
+
/* These values are just observed by signalers, and thus protected by the
lock. */
unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index ebf07ca82d87de7d..4fb22b28a7a20ecd 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -239,9 +239,7 @@ __condvar_cleanup_waiting (void *arg)
signaled), and a reference count.
The group reference count is used to maintain the number of waiters that
- are using the group's futex. Before a group can change its role, the
- reference count must show that no waiters are using the futex anymore; this
- prevents ABA issues on the futex word.
+ are using the group's futex.
To represent which intervals in the waiter sequence the groups cover (and
thus also which group slot contains G1 or G2), we use a 64b counter to
@@ -301,11 +299,12 @@ __condvar_cleanup_waiting (void *arg)
last reference.
* Reference count used by waiters concurrently with signalers that have
acquired the condvar-internal lock.
- __g_signals: The number of signals that can still be consumed.
+ __g_signals: The number of signals that can still be consumed, relative to
+ the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits
+ 31 to 1 of g1_start with the signal count added)
* Used as a futex word by waiters. Used concurrently by waiters and
signalers.
- * LSB is true iff this group has been completely signaled (i.e., it is
- closed).
+ * LSB is currently reserved and 0.
__g_size: Waiters remaining in this group (i.e., which have not been
signaled yet.
* Accessed by signalers and waiters that cancel waiting (both do so only
@@ -329,18 +328,6 @@ __condvar_cleanup_waiting (void *arg)
sufficient because if a waiter can see a sufficiently large value, it could
have also consume a signal in the waiters group.
- Waiters try to grab a signal from __g_signals without holding a reference
- count, which can lead to stealing a signal from a more recent group after
- their own group was already closed. They cannot always detect whether they
- in fact did because they do not know when they stole, but they can
- conservatively add a signal back to the group they stole from; if they
- did so unnecessarily, all that happens is a spurious wake-up. To make this
- even less likely, __g1_start contains the index of the current g2 too,
- which allows waiters to check if there aliasing on the group slots; if
- there wasn't, they didn't steal from the current G1, which means that the
- G1 they stole from must have been already closed and they do not need to
- fix anything.
-
It is essential that the last field in pthread_cond_t is __g_signals[1]:
The previous condvar used a pointer-sized field in pthread_cond_t, so a
PTHREAD_COND_INITIALIZER from that condvar implementation might only
@@ -431,6 +418,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
{
while (1)
{
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+
/* Spin-wait first.
Note that spinning first without checking whether a timeout
passed might lead to what looks like a spurious wake-up even
@@ -442,35 +432,45 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
having to compare against the current time seems to be the right
choice from a performance perspective for most use cases. */
unsigned int spin = maxspin;
- while (signals == 0 && spin > 0)
+ while (spin > 0 && ((int)(signals - lowseq) < 2))
{
/* Check that we are not spinning on a group that's already
closed. */
- if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
- goto done;
+ if (seq < (g1_start >> 1))
+ break;
/* TODO Back off. */
/* Reload signals. See above for MO. */
signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ g1_start = __condvar_load_g1_start_relaxed (cond);
+ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
spin--;
}
- /* If our group will be closed as indicated by the flag on signals,
- don't bother grabbing a signal. */
- if (signals & 1)
- goto done;
-
- /* If there is an available signal, don't block. */
- if (signals != 0)
+ if (seq < (g1_start >> 1))
+ {
+ /* If the group is closed already,
+ then this waiter originally had enough extra signals to
+ consume, up until the time its group was closed. */
+ goto done;
+ }
+
+ /* If there is an available signal, don't block.
+ If __g1_start has advanced at all, then we must be in G1
+ by now, perhaps in the process of switching back to an older
+ G2, but in either case we're allowed to consume the available
+ signal and should not block anymore. */
+ if ((int)(signals - lowseq) >= 2)
break;
/* No signals available after spinning, so prepare to block.
We first acquire a group reference and use acquire MO for that so
that we synchronize with the dummy read-modify-write in
__condvar_quiesce_and_switch_g1 if we read from that. In turn,
- in this case this will make us see the closed flag on __g_signals
- that designates a concurrent attempt to reuse the group's slot.
+ in this case this will make us see the advancement of __g_signals
+ to the upcoming new g1_start that occurs with a concurrent
+ attempt to reuse the group's slot.
We use acquire MO for the __g_signals check to make the
__g1_start check work (see spinning above).
Note that the group reference acquisition will not mask the
@@ -478,15 +478,24 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
an atomic read-modify-write operation and thus extend the release
sequence. */
atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
- if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
- || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
+ signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ g1_start = __condvar_load_g1_start_relaxed (cond);
+ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+
+ if (seq < (g1_start >> 1))
{
- /* Our group is closed. Wake up any signalers that might be
- waiting. */
+ /* group is closed already, so don't block */
__condvar_dec_grefs (cond, g, private);
goto done;
}
+ if ((int)(signals - lowseq) >= 2)
+ {
+ /* a signal showed up or G1/G2 switched after we grabbed the refcount */
+ __condvar_dec_grefs (cond, g, private);
+ break;
+ }
+
// Now block.
struct _pthread_cleanup_buffer buffer;
struct _condvar_cleanup_buffer cbuffer;
@@ -500,7 +509,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
{
/* Block without a timeout. */
err = futex_wait_cancelable (
- cond->__data.__g_signals + g, 0, private);
+ cond->__data.__g_signals + g, signals, private);
}
else
{
@@ -531,13 +540,13 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
err = ETIMEDOUT;
else
err = futex_reltimed_wait_cancelable
- (cond->__data.__g_signals + g, 0, &rt, private);
+ (cond->__data.__g_signals + g, signals, &rt, private);
}
else
{
/* Use CLOCK_REALTIME. */
err = futex_abstimed_wait_cancelable
- (cond->__data.__g_signals + g, 0, abstime, private);
+ (cond->__data.__g_signals + g, signals, abstime, private);
}
}
@@ -562,6 +571,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
signals = atomic_load_acquire (cond->__data.__g_signals + g);
}
+ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
+ goto done;
}
/* Try to grab a signal. Use acquire MO so that we see an up-to-date value
of __g1_start below (see spinning above for a similar case). In
@@ -570,69 +581,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
&signals, signals - 2));
- /* We consumed a signal but we could have consumed from a more recent group
- that aliased with ours due to being in the same group slot. If this
- might be the case our group must be closed as visible through
- __g1_start. */
- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
- if (seq < (g1_start >> 1))
- {
- /* We potentially stole a signal from a more recent group but we do not
- know which group we really consumed from.
- We do not care about groups older than current G1 because they are
- closed; we could have stolen from these, but then we just add a
- spurious wake-up for the current groups.
- We will never steal a signal from current G2 that was really intended
- for G2 because G2 never receives signals (until it becomes G1). We
- could have stolen a signal from G2 that was conservatively added by a
- previous waiter that also thought it stole a signal -- but given that
- that signal was added unnecessarily, it's not a problem if we steal
- it.
- Thus, the remaining case is that we could have stolen from the current
- G1, where "current" means the __g1_start value we observed. However,
- if the current G1 does not have the same slot index as we do, we did
- not steal from it and do not need to undo that. This is the reason
- for putting a bit with G2's index into__g1_start as well. */
- if (((g1_start & 1) ^ 1) == g)
- {
- /* We have to conservatively undo our potential mistake of stealing
- a signal. We can stop trying to do that when the current G1
- changes because other spinning waiters will notice this too and
- __condvar_quiesce_and_switch_g1 has checked that there are no
- futex waiters anymore before switching G1.
- Relaxed MO is fine for the __g1_start load because we need to
- merely be able to observe this fact and not have to observe
- something else as well.
- ??? Would it help to spin for a little while to see whether the
- current G1 gets closed? This might be worthwhile if the group is
- small or close to being closed. */
- unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
- while (__condvar_load_g1_start_relaxed (cond) == g1_start)
- {
- /* Try to add a signal. We don't need to acquire the lock
- because at worst we can cause a spurious wake-up. If the
- group is in the process of being closed (LSB is true), this
- has an effect similar to us adding a signal. */
- if (((s & 1) != 0)
- || atomic_compare_exchange_weak_relaxed
- (cond->__data.__g_signals + g, &s, s + 2))
- {
- /* If we added a signal, we also need to add a wake-up on
- the futex. We also need to do that if we skipped adding
- a signal because the group is being closed because
- while __condvar_quiesce_and_switch_g1 could have closed
- the group, it might stil be waiting for futex waiters to
- leave (and one of those waiters might be the one we stole
- the signal from, which cause it to block using the
- futex). */
- futex_wake (cond->__data.__g_signals + g, 1, private);
- break;
- }
- /* TODO Back off. */
- }
- }
- }
-
done:
/* Confirm that we have been woken. We do that before acquiring the mutex

View File

@ -0,0 +1,39 @@
Partial revert of commit c36fc50781995e6758cae2b6927839d0157f213c
to restore the layout of pthread_cond_t and avoid a downstream
rpminspect and abidiff (libabigail tooling) spurious warning
about internal ABI changes. Without this change all RHEL developers
using pthread_cond_t would have to audit and waive the warning.
The alternative is to update the supression lists used in abidiff,
propagate that to the rpminspect service, and wait for that to
complete before doing the update. The more conservative position
is the partial revert of the layout change.
This is a downstream-only change and is not required upstream.
diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h
index dcb799b130178f3f..798e0de31680065b 100644
--- a/sysdeps/nptl/bits/thread-shared-types.h
+++ b/sysdeps/nptl/bits/thread-shared-types.h
@@ -188,7 +188,8 @@ struct __pthread_cond_s
unsigned int __high;
} __g1_start32;
};
- unsigned int __g_size[2] __LOCK_ALIGNMENT;
+ unsigned int __glibc_unused___g_refs[2] __LOCK_ALIGNMENT;
+ unsigned int __g_size[2];
unsigned int __g1_orig_size;
unsigned int __wrefs;
unsigned int __g_signals[2];
diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h
index 4f7adccdab1d6e9e..df049abf74d47522 100644
--- a/sysdeps/nptl/pthread.h
+++ b/sysdeps/nptl/pthread.h
@@ -184,7 +184,7 @@ enum
/* Conditional variable handling. */
-#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0} } }
+#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } }
/* Cleanup buffers */

View File

@ -0,0 +1,133 @@
commit 0cc973160c23bb67f895bc887dd6942d29f8fee3
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 07:55:22 2024 -0500
nptl: Update comments and indentation for new condvar implementation
Some comments were wrong after the most recent commit. This fixes that.
Also fixing indentation where it was using spaces instead of tabs.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
index 9175e6779ebff244..36ec30a103390b3e 100644
--- a/nptl/pthread_cond_common.c
+++ b/nptl/pthread_cond_common.c
@@ -361,8 +361,9 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
* New waiters arriving concurrently with the group switching will all go
into G2 until we atomically make the switch. Waiters existing in G2
are not affected.
- * Waiters in G1 will be closed out immediately by the advancing of
- __g_signals to the next "lowseq" (low 31 bits of the new g1_start),
+ * Waiters in G1 have already received a signal and been woken. If they
+ haven't woken yet, they will be closed out immediately by the advancing
+ of __g_signals to the next "lowseq" (low 31 bits of the new g1_start),
which will prevent waiters from blocking using a futex on
__g_signals since it provides enough signals for all possible
remaining waiters. As a result, they can each consume a signal
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 4fb22b28a7a20ecd..2964c2d1be046b8a 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -250,7 +250,7 @@ __condvar_cleanup_waiting (void *arg)
figure out whether they are in a group that has already been completely
signaled (i.e., if the current G1 starts at a later position that the
waiter's position). Waiters cannot determine whether they are currently
- in G2 or G1 -- but they do not have too because all they are interested in
+ in G2 or G1 -- but they do not have to because all they are interested in
is whether there are available signals, and they always start in G2 (whose
group slot they know because of the bit in the waiter sequence. Signalers
will simply fill the right group until it is completely signaled and can
@@ -408,7 +408,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
}
/* Now wait until a signal is available in our group or it is closed.
- Acquire MO so that if we observe a value of zero written after group
+ Acquire MO so that if we observe (signals == lowseq) after group
switching in __condvar_quiesce_and_switch_g1, we synchronize with that
store and will see the prior update of __g1_start done while switching
groups too. */
@@ -418,8 +418,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
{
while (1)
{
- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
- unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
/* Spin-wait first.
Note that spinning first without checking whether a timeout
@@ -443,21 +443,21 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
/* Reload signals. See above for MO. */
signals = atomic_load_acquire (cond->__data.__g_signals + g);
- g1_start = __condvar_load_g1_start_relaxed (cond);
- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+ g1_start = __condvar_load_g1_start_relaxed (cond);
+ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
spin--;
}
- if (seq < (g1_start >> 1))
+ if (seq < (g1_start >> 1))
{
- /* If the group is closed already,
+ /* If the group is closed already,
then this waiter originally had enough extra signals to
consume, up until the time its group was closed. */
goto done;
- }
+ }
/* If there is an available signal, don't block.
- If __g1_start has advanced at all, then we must be in G1
+ If __g1_start has advanced at all, then we must be in G1
by now, perhaps in the process of switching back to an older
G2, but in either case we're allowed to consume the available
signal and should not block anymore. */
@@ -479,22 +479,23 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
sequence. */
atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
signals = atomic_load_acquire (cond->__data.__g_signals + g);
- g1_start = __condvar_load_g1_start_relaxed (cond);
- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+ g1_start = __condvar_load_g1_start_relaxed (cond);
+ lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
- if (seq < (g1_start >> 1))
+ if (seq < (g1_start >> 1))
{
- /* group is closed already, so don't block */
+ /* group is closed already, so don't block */
__condvar_dec_grefs (cond, g, private);
goto done;
}
if ((int)(signals - lowseq) >= 2)
{
- /* a signal showed up or G1/G2 switched after we grabbed the refcount */
+ /* a signal showed up or G1/G2 switched after we grabbed the
+ refcount */
__condvar_dec_grefs (cond, g, private);
break;
- }
+ }
// Now block.
struct _pthread_cleanup_buffer buffer;
@@ -574,10 +575,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
goto done;
}
- /* Try to grab a signal. Use acquire MO so that we see an up-to-date value
- of __g1_start below (see spinning above for a similar case). In
- particular, if we steal from a more recent group, we will also see a
- more recent __g1_start below. */
+ /* Try to grab a signal. See above for MO. (if we do another loop
+ iteration we need to see the correct value of g1_start) */
while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
&signals, signals - 2));

View File

@ -0,0 +1,67 @@
commit b42cc6af11062c260c7dfa91f1c89891366fed3e
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 07:55:50 2024 -0500
nptl: Remove unnecessary catch-all-wake in condvar group switch
This wake is unnecessary. We only switch groups after every sleeper in a group
has been woken. Sure, they may take a while to actually wake up and may still
hold a reference, but waking them a second time doesn't speed that up. Instead
this just makes the code more complicated and may hide problems.
In particular this safety wake wouldn't even have helped with the bug that was
fixed by Barrus' patch: The bug there was that pthread_cond_signal would not
switch g1 when it should, so we wouldn't even have entered this code path.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
index 36ec30a103390b3e..f6d8c72b7f30ecff 100644
--- a/nptl/pthread_cond_common.c
+++ b/nptl/pthread_cond_common.c
@@ -361,13 +361,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
* New waiters arriving concurrently with the group switching will all go
into G2 until we atomically make the switch. Waiters existing in G2
are not affected.
- * Waiters in G1 have already received a signal and been woken. If they
- haven't woken yet, they will be closed out immediately by the advancing
- of __g_signals to the next "lowseq" (low 31 bits of the new g1_start),
- which will prevent waiters from blocking using a futex on
- __g_signals since it provides enough signals for all possible
- remaining waiters. As a result, they can each consume a signal
- and they will eventually remove their group reference. */
+ * Waiters in G1 have already received a signal and been woken. */
/* Update __g1_start, which finishes closing this group. The value we add
will never be negative because old_orig_size can only be zero when we
@@ -380,29 +374,6 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U;
- /* If any waiters still hold group references (and thus could be blocked),
- then wake them all up now and prevent any running ones from blocking.
- This is effectively a catch-all for any possible current or future
- bugs that can allow the group size to reach 0 before all G1 waiters
- have been awakened or at least given signals to consume, or any
- other case that can leave blocked (or about to block) older waiters.. */
- if ((atomic_fetch_or_release (cond->__data.__g_refs + g1, 0) >> 1) > 0)
- {
- /* First advance signals to the end of the group (i.e. enough signals
- for the entire G1 group) to ensure that waiters which have not
- yet blocked in the futex will not block.
- Note that in the vast majority of cases, this should never
- actually be necessary, since __g_signals will have enough
- signals for the remaining g_refs waiters. As an optimization,
- we could check this first before proceeding, although that
- could still leave the potential for futex lost wakeup bugs
- if the signal count was non-zero but the futex wakeup
- was somehow lost. */
- atomic_store_release (cond->__data.__g_signals + g1, lowseq);
-
- futex_wake (cond->__data.__g_signals + g1, INT_MAX, private);
- }
-
/* At this point, the old G1 is now a valid new G2 (but not in use yet).
No old waiter can neither grab a signal nor acquire a reference without
noticing that __g1_start is larger.

View File

@ -0,0 +1,107 @@
commit 4f7b051f8ee3feff1b53b27a906f245afaa9cee1
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 07:56:13 2024 -0500
nptl: Remove unnecessary quadruple check in pthread_cond_wait
pthread_cond_wait was checking whether it was in a closed group no less than
four times. Checking once is enough. Here are the four checks:
1. While spin-waiting. This was dead code: maxspin is set to 0 and has been
for years.
2. Before deciding to go to sleep, and before incrementing grefs: I kept this
3. After incrementing grefs. There is no reason to think that the group would
close while we do an atomic increment. Obviously it could close at any
point, but that doesn't mean we have to recheck after every step. This
check was equally good as check 2, except it has to do more work.
4. When we find ourselves in a group that has a signal. We only get here after
we check that we're not in a closed group. There is no need to check again.
The check would only have helped in cases where the compare_exchange in the
next line would also have failed. Relying on the compare_exchange is fine.
Removing the duplicate checks clarifies the code.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 2964c2d1be046b8a..8358784867f6074a 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -367,7 +367,6 @@ static __always_inline int
__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
const struct timespec *abstime)
{
- const int maxspin = 0;
int err;
int result = 0;
@@ -421,33 +420,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
- /* Spin-wait first.
- Note that spinning first without checking whether a timeout
- passed might lead to what looks like a spurious wake-up even
- though we should return ETIMEDOUT (e.g., if the caller provides
- an absolute timeout that is clearly in the past). However,
- (1) spurious wake-ups are allowed, (2) it seems unlikely that a
- user will (ab)use pthread_cond_wait as a check for whether a
- point in time is in the past, and (3) spinning first without
- having to compare against the current time seems to be the right
- choice from a performance perspective for most use cases. */
- unsigned int spin = maxspin;
- while (spin > 0 && ((int)(signals - lowseq) < 2))
- {
- /* Check that we are not spinning on a group that's already
- closed. */
- if (seq < (g1_start >> 1))
- break;
-
- /* TODO Back off. */
-
- /* Reload signals. See above for MO. */
- signals = atomic_load_acquire (cond->__data.__g_signals + g);
- g1_start = __condvar_load_g1_start_relaxed (cond);
- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
- spin--;
- }
-
if (seq < (g1_start >> 1))
{
/* If the group is closed already,
@@ -478,24 +450,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
an atomic read-modify-write operation and thus extend the release
sequence. */
atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
- signals = atomic_load_acquire (cond->__data.__g_signals + g);
- g1_start = __condvar_load_g1_start_relaxed (cond);
- lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
-
- if (seq < (g1_start >> 1))
- {
- /* group is closed already, so don't block */
- __condvar_dec_grefs (cond, g, private);
- goto done;
- }
-
- if ((int)(signals - lowseq) >= 2)
- {
- /* a signal showed up or G1/G2 switched after we grabbed the
- refcount */
- __condvar_dec_grefs (cond, g, private);
- break;
- }
// Now block.
struct _pthread_cleanup_buffer buffer;
@@ -571,9 +525,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
/* Reload signals. See above for MO. */
signals = atomic_load_acquire (cond->__data.__g_signals + g);
}
-
- if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
- goto done;
}
/* Try to grab a signal. See above for MO. (if we do another loop
iteration we need to see the correct value of g1_start) */

View File

@ -0,0 +1,172 @@
commit c36fc50781995e6758cae2b6927839d0157f213c
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 07:56:38 2024 -0500
nptl: Remove g_refs from condition variables
This variable used to be needed to wait in group switching until all sleepers
have confirmed that they have woken. This is no longer needed. Nothing waits
on this variable so there is no need to track how many threads are currently
asleep in each group.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
# Conflicts:
# nptl/tst-cond22.c (64-bit atomic counter refactor missing)
# sysdeps/nptl/bits/thread-shared-types.h (Likewise)
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 8358784867f6074a..b2bf3bd0a7af43e8 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -144,23 +144,6 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
}
}
-/* Wake up any signalers that might be waiting. */
-static void
-__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private)
-{
- /* Release MO to synchronize-with the acquire load in
- __condvar_quiesce_and_switch_g1. */
- if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3)
- {
- /* Clear the wake-up request flag before waking up. We do not need more
- than relaxed MO and it doesn't matter if we apply this for an aliased
- group because we wake all futex waiters right after clearing the
- flag. */
- atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1);
- futex_wake (cond->__data.__g_refs + g, INT_MAX, private);
- }
-}
-
/* Clean-up for cancellation of waiters waiting for normal signals. We cancel
our registration as a waiter, confirm we have woken up, and re-acquire the
mutex. */
@@ -172,8 +155,6 @@ __condvar_cleanup_waiting (void *arg)
pthread_cond_t *cond = cbuffer->cond;
unsigned g = cbuffer->wseq & 1;
- __condvar_dec_grefs (cond, g, cbuffer->private);
-
__condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private);
/* FIXME With the current cancellation implementation, it is possible that
a thread is cancelled after it has returned from a syscall. This could
@@ -328,15 +309,6 @@ __condvar_cleanup_waiting (void *arg)
sufficient because if a waiter can see a sufficiently large value, it could
have also consume a signal in the waiters group.
- It is essential that the last field in pthread_cond_t is __g_signals[1]:
- The previous condvar used a pointer-sized field in pthread_cond_t, so a
- PTHREAD_COND_INITIALIZER from that condvar implementation might only
- initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
- in total instead of the 48 we need). __g_signals[1] is not accessed before
- the first group switch (G2 starts at index 0), which will set its value to
- zero after a harmless fetch-or whose return value is ignored. This
- effectively completes initialization.
-
Limitations:
* This condvar isn't designed to allow for more than
@@ -436,21 +408,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
if ((int)(signals - lowseq) >= 2)
break;
- /* No signals available after spinning, so prepare to block.
- We first acquire a group reference and use acquire MO for that so
- that we synchronize with the dummy read-modify-write in
- __condvar_quiesce_and_switch_g1 if we read from that. In turn,
- in this case this will make us see the advancement of __g_signals
- to the upcoming new g1_start that occurs with a concurrent
- attempt to reuse the group's slot.
- We use acquire MO for the __g_signals check to make the
- __g1_start check work (see spinning above).
- Note that the group reference acquisition will not mask the
- release MO when decrementing the reference count because we use
- an atomic read-modify-write operation and thus extend the release
- sequence. */
- atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2);
-
// Now block.
struct _pthread_cleanup_buffer buffer;
struct _condvar_cleanup_buffer cbuffer;
@@ -509,18 +466,11 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
if (__glibc_unlikely (err == ETIMEDOUT))
{
- __condvar_dec_grefs (cond, g, private);
- /* If we timed out, we effectively cancel waiting. Note that
- we have decremented __g_refs before cancellation, so that a
- deadlock between waiting for quiescence of our group in
- __condvar_quiesce_and_switch_g1 and us trying to acquire
- the lock during cancellation is not possible. */
+ /* If we timed out, we effectively cancel waiting. */
__condvar_cancel_waiting (cond, seq, g, private);
result = ETIMEDOUT;
goto done;
}
- else
- __condvar_dec_grefs (cond, g, private);
/* Reload signals. See above for MO. */
signals = atomic_load_acquire (cond->__data.__g_signals + g);
diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c
index 64f19ea0a55af057..ebeeeaf666070076 100644
--- a/nptl/tst-cond22.c
+++ b/nptl/tst-cond22.c
@@ -106,10 +106,10 @@ do_test (void)
status = 1;
}
- printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
+ printf ("cond = { %llu, %llu, %u/%u, %u/%u, %u, %u }\n",
c.__data.__wseq, c.__data.__g1_start,
- c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
- c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
+ c.__data.__g_signals[0], c.__data.__g_size[0],
+ c.__data.__g_signals[1], c.__data.__g_size[1],
c.__data.__g1_orig_size, c.__data.__wrefs);
if (pthread_create (&th, NULL, tf, (void *) 1l) != 0)
@@ -149,10 +149,10 @@ do_test (void)
status = 1;
}
- printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n",
+ printf ("cond = { %llu, %llu, %u/%u, %u/%u, %u, %u }\n",
c.__data.__wseq, c.__data.__g1_start,
- c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0],
- c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1],
+ c.__data.__g_signals[0], c.__data.__g_size[0],
+ c.__data.__g_signals[1], c.__data.__g_size[1],
c.__data.__g1_orig_size, c.__data.__wrefs);
return status;
diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h
index 05c94e7a710c0eb9..dcb799b130178f3f 100644
--- a/sysdeps/nptl/bits/thread-shared-types.h
+++ b/sysdeps/nptl/bits/thread-shared-types.h
@@ -188,8 +188,7 @@ struct __pthread_cond_s
unsigned int __high;
} __g1_start32;
};
- unsigned int __g_refs[2] __LOCK_ALIGNMENT;
- unsigned int __g_size[2];
+ unsigned int __g_size[2] __LOCK_ALIGNMENT;
unsigned int __g1_orig_size;
unsigned int __wrefs;
unsigned int __g_signals[2];
diff --git a/sysdeps/nptl/pthread.h b/sysdeps/nptl/pthread.h
index df049abf74d47522..4f7adccdab1d6e9e 100644
--- a/sysdeps/nptl/pthread.h
+++ b/sysdeps/nptl/pthread.h
@@ -184,7 +184,7 @@ enum
/* Conditional variable handling. */
-#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } }
+#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, 0, 0, {0, 0} } }
/* Cleanup buffers */

View File

@ -0,0 +1,94 @@
commit 929a4764ac90382616b6a21f099192b2475da674
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 08:03:44 2024 -0500
nptl: Use a single loop in pthread_cond_wait instaed of a nested loop
The loop was a little more complicated than necessary. There was only one
break statement out of the inner loop, and the outer loop was nearly empty.
So just remove the outer loop, moving its code to the one break statement in
the inner loop. This allows us to replace all gotos with break statements.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
# Conflicts:
# nptl/pthread_cond_wait.c (Missing EOVERFLOW checks for y2038)
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index b2bf3bd0a7af43e8..8f12fc4ee288cf4a 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -378,17 +378,15 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
return err;
}
- /* Now wait until a signal is available in our group or it is closed.
- Acquire MO so that if we observe (signals == lowseq) after group
- switching in __condvar_quiesce_and_switch_g1, we synchronize with that
- store and will see the prior update of __g1_start done while switching
- groups too. */
- unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
-
- do
- {
+
while (1)
{
+ /* Now wait until a signal is available in our group or it is closed.
+ Acquire MO so that if we observe (signals == lowseq) after group
+ switching in __condvar_quiesce_and_switch_g1, we synchronize with that
+ store and will see the prior update of __g1_start done while switching
+ groups too. */
+ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
@@ -397,7 +395,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
/* If the group is closed already,
then this waiter originally had enough extra signals to
consume, up until the time its group was closed. */
- goto done;
+ break;
}
/* If there is an available signal, don't block.
@@ -406,7 +404,16 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
G2, but in either case we're allowed to consume the available
signal and should not block anymore. */
if ((int)(signals - lowseq) >= 2)
- break;
+ {
+ /* Try to grab a signal. See above for MO. (if we do another loop
+ iteration we need to see the correct value of g1_start) */
+ if (atomic_compare_exchange_weak_acquire (
+ cond->__data.__g_signals + g,
+ &signals, signals - 2))
+ break;
+ else
+ continue;
+ }
// Now block.
struct _pthread_cleanup_buffer buffer;
@@ -469,19 +476,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
/* If we timed out, we effectively cancel waiting. */
__condvar_cancel_waiting (cond, seq, g, private);
result = ETIMEDOUT;
- goto done;
+ break;
}
-
- /* Reload signals. See above for MO. */
- signals = atomic_load_acquire (cond->__data.__g_signals + g);
}
- }
- /* Try to grab a signal. See above for MO. (if we do another loop
- iteration we need to see the correct value of g1_start) */
- while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
- &signals, signals - 2));
-
- done:
/* Confirm that we have been woken. We do that before acquiring the mutex
to allow for execution of pthread_cond_destroy while having acquired the

View File

@ -0,0 +1,218 @@
commit ee6c14ed59d480720721aaacc5fb03213dc153da
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 08:04:10 2024 -0500
nptl: Fix indentation
In my previous change I turned a nested loop into a simple loop. I'm doing
the resulting indentation changes in a separate commit to make the diff on
the previous commit easier to review.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
# Conflicts:
# nptl/pthread_cond_wait.c (Missing futex_wait_cancelable cleanup)
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 8f12fc4ee288cf4a..964591449dc57758 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -379,107 +379,108 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
}
- while (1)
- {
- /* Now wait until a signal is available in our group or it is closed.
- Acquire MO so that if we observe (signals == lowseq) after group
- switching in __condvar_quiesce_and_switch_g1, we synchronize with that
- store and will see the prior update of __g1_start done while switching
- groups too. */
- unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
- unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
-
- if (seq < (g1_start >> 1))
- {
- /* If the group is closed already,
- then this waiter originally had enough extra signals to
- consume, up until the time its group was closed. */
- break;
- }
-
- /* If there is an available signal, don't block.
- If __g1_start has advanced at all, then we must be in G1
- by now, perhaps in the process of switching back to an older
- G2, but in either case we're allowed to consume the available
- signal and should not block anymore. */
- if ((int)(signals - lowseq) >= 2)
- {
- /* Try to grab a signal. See above for MO. (if we do another loop
- iteration we need to see the correct value of g1_start) */
- if (atomic_compare_exchange_weak_acquire (
- cond->__data.__g_signals + g,
+ while (1)
+ {
+ /* Now wait until a signal is available in our group or it is closed.
+ Acquire MO so that if we observe (signals == lowseq) after group
+ switching in __condvar_quiesce_and_switch_g1, we synchronize with that
+ store and will see the prior update of __g1_start done while switching
+ groups too. */
+ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
+ unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
+
+ if (seq < (g1_start >> 1))
+ {
+ /* If the group is closed already,
+ then this waiter originally had enough extra signals to
+ consume, up until the time its group was closed. */
+ break;
+ }
+
+ /* If there is an available signal, don't block.
+ If __g1_start has advanced at all, then we must be in G1
+ by now, perhaps in the process of switching back to an older
+ G2, but in either case we're allowed to consume the available
+ signal and should not block anymore. */
+ if ((int)(signals - lowseq) >= 2)
+ {
+ /* Try to grab a signal. See above for MO. (if we do another loop
+ iteration we need to see the correct value of g1_start) */
+ if (atomic_compare_exchange_weak_acquire (
+ cond->__data.__g_signals + g,
&signals, signals - 2))
- break;
- else
- continue;
- }
+ break;
+ else
+ continue;
+ }
- // Now block.
- struct _pthread_cleanup_buffer buffer;
- struct _condvar_cleanup_buffer cbuffer;
- cbuffer.wseq = wseq;
- cbuffer.cond = cond;
- cbuffer.mutex = mutex;
- cbuffer.private = private;
- __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
+ // Now block.
+ struct _pthread_cleanup_buffer buffer;
+ struct _condvar_cleanup_buffer cbuffer;
+ cbuffer.wseq = wseq;
+ cbuffer.cond = cond;
+ cbuffer.mutex = mutex;
+ cbuffer.private = private;
+ __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
- if (abstime == NULL)
- {
- /* Block without a timeout. */
- err = futex_wait_cancelable (
- cond->__data.__g_signals + g, signals, private);
- }
- else
+ if (abstime == NULL)
+ {
+ /* Block without a timeout. */
+ err = futex_wait_cancelable
+ (cond->__data.__g_signals + g, signals, private);
+ }
+ else
+ {
+ /* Block, but with a timeout.
+ Work around the fact that the kernel rejects negative timeout
+ values despite them being valid. */
+ if (__glibc_unlikely (abstime->tv_sec < 0))
+ err = ETIMEDOUT;
+ else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
{
- /* Block, but with a timeout.
- Work around the fact that the kernel rejects negative timeout
- values despite them being valid. */
- if (__glibc_unlikely (abstime->tv_sec < 0))
- err = ETIMEDOUT;
-
- else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0)
+ /* CLOCK_MONOTONIC is requested. */
+ struct timespec rt;
+ if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
+ __libc_fatal ("clock_gettime does not support "
+ "CLOCK_MONOTONIC\n");
+ /* Convert the absolute timeout value to a relative
+ timeout. */
+ rt.tv_sec = abstime->tv_sec - rt.tv_sec;
+ rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
+ if (rt.tv_nsec < 0)
{
- /* CLOCK_MONOTONIC is requested. */
- struct timespec rt;
- if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0)
- __libc_fatal ("clock_gettime does not support "
- "CLOCK_MONOTONIC\n");
- /* Convert the absolute timeout value to a relative
- timeout. */
- rt.tv_sec = abstime->tv_sec - rt.tv_sec;
- rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
- if (rt.tv_nsec < 0)
- {
- rt.tv_nsec += 1000000000;
- --rt.tv_sec;
- }
- /* Did we already time out? */
- if (__glibc_unlikely (rt.tv_sec < 0))
- err = ETIMEDOUT;
- else
- err = futex_reltimed_wait_cancelable
- (cond->__data.__g_signals + g, signals, &rt, private);
+ rt.tv_nsec += 1000000000;
+ --rt.tv_sec;
}
+ /* Did we already time out? */
+ if (__glibc_unlikely (rt.tv_sec < 0))
+ err = ETIMEDOUT;
else
- {
- /* Use CLOCK_REALTIME. */
- err = futex_abstimed_wait_cancelable
- (cond->__data.__g_signals + g, signals, abstime, private);
- }
+ err = futex_reltimed_wait_cancelable
+ (cond->__data.__g_signals + g, signals,
+ &rt, private);
}
-
- __pthread_cleanup_pop (&buffer, 0);
-
- if (__glibc_unlikely (err == ETIMEDOUT))
+ else
{
- /* If we timed out, we effectively cancel waiting. */
- __condvar_cancel_waiting (cond, seq, g, private);
- result = ETIMEDOUT;
- break;
+ /* Use CLOCK_REALTIME. */
+ err = futex_abstimed_wait_cancelable
+ (cond->__data.__g_signals + g, signals,
+ abstime, private);
}
}
+ __pthread_cleanup_pop (&buffer, 0);
+
+ if (__glibc_unlikely (err == ETIMEDOUT))
+ {
+ /* If we timed out, we effectively cancel waiting. */
+ __condvar_cancel_waiting (cond, seq, g, private);
+ result = ETIMEDOUT;
+ break;
+ }
+ }
+
/* Confirm that we have been woken. We do that before acquiring the mutex
to allow for execution of pthread_cond_destroy while having acquired the
mutex. */

View File

@ -0,0 +1,147 @@
commit 4b79e27a5073c02f6bff9aa8f4791230a0ab1867
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 08:04:54 2024 -0500
nptl: rename __condvar_quiesce_and_switch_g1
This function no longer waits for threads to leave g1, so rename it to
__condvar_switch_g1
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c
index e6bcb9b61b0055a5..1ec746ec3df51c4f 100644
--- a/nptl/pthread_cond_broadcast.c
+++ b/nptl/pthread_cond_broadcast.c
@@ -61,7 +61,7 @@ __pthread_cond_broadcast (pthread_cond_t *cond)
cond->__data.__g_size[g1] << 1);
cond->__data.__g_size[g1] = 0;
- /* We need to wake G1 waiters before we quiesce G1 below. */
+ /* We need to wake G1 waiters before we switch G1 below. */
/* TODO Only set it if there are indeed futex waiters. We could
also try to move this out of the critical section in cases when
G2 is empty (and we don't need to quiesce). */
@@ -70,7 +70,7 @@ __pthread_cond_broadcast (pthread_cond_t *cond)
/* G1 is complete. Step (2) is next unless there are no waiters in G2, in
which case we can stop. */
- if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
+ if (__condvar_switch_g1 (cond, wseq, &g1, private))
{
/* Step (3): Send signals to all waiters in the old G2 / new G1. */
atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
index f6d8c72b7f30ecff..770f8e7c5d347f6b 100644
--- a/nptl/pthread_cond_common.c
+++ b/nptl/pthread_cond_common.c
@@ -329,16 +329,15 @@ __condvar_get_private (int flags)
return FUTEX_SHARED;
}
-/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to
- leave G1, converts G1 into a fresh G2, and then switches group roles so that
- the former G2 becomes the new G1 ending at the current __wseq value when we
- eventually make the switch (WSEQ is just an observation of __wseq by the
- signaler).
+/* This closes G1 (whose index is in G1INDEX), converts G1 into a fresh G2,
+ and then switches group roles so that the former G2 becomes the new G1
+ ending at the current __wseq value when we eventually make the switch
+ (WSEQ is just an observation of __wseq by the signaler).
If G2 is empty, it will not switch groups because then it would create an
empty G1 which would require switching groups again on the next signal.
Returns false iff groups were not switched because G2 was empty. */
static bool __attribute__ ((unused))
-__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
+__condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
unsigned int *g1index, int private)
{
unsigned int g1 = *g1index;
@@ -354,8 +353,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
+ cond->__data.__g_size[g1 ^ 1]) == 0)
return false;
- /* Now try to close and quiesce G1. We have to consider the following kinds
- of waiters:
+ /* We have to consider the following kinds of waiters:
* Waiters from less recent groups than G1 are not affected because
nothing will change for them apart from __g1_start getting larger.
* New waiters arriving concurrently with the group switching will all go
@@ -363,12 +361,12 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
are not affected.
* Waiters in G1 have already received a signal and been woken. */
- /* Update __g1_start, which finishes closing this group. The value we add
- will never be negative because old_orig_size can only be zero when we
- switch groups the first time after a condvar was initialized, in which
- case G1 will be at index 1 and we will add a value of 1.
- Relaxed MO is fine because the change comes with no additional
- constraints that others would have to observe. */
+ /* Update __g1_start, which closes this group. The value we add will never
+ be negative because old_orig_size can only be zero when we switch groups
+ the first time after a condvar was initialized, in which case G1 will be
+ at index 1 and we will add a value of 1. Relaxed MO is fine because the
+ change comes with no additional constraints that others would have to
+ observe. */
__condvar_add_g1_start_relaxed (cond,
(old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c
index 3db3d1fbeb165ea4..24c9d813e7c0ada6 100644
--- a/nptl/pthread_cond_signal.c
+++ b/nptl/pthread_cond_signal.c
@@ -70,18 +70,17 @@ __pthread_cond_signal (pthread_cond_t *cond)
bool do_futex_wake = false;
/* If G1 is still receiving signals, we put the signal there. If not, we
- check if G2 has waiters, and if so, quiesce and switch G1 to the former
- G2; if this results in a new G1 with waiters (G2 might have cancellations
- already, see __condvar_quiesce_and_switch_g1), we put the signal in the
- new G1. */
+ check if G2 has waiters, and if so, switch G1 to the former G2; if this
+ results in a new G1 with waiters (G2 might have cancellations already,
+ see __condvar_switch_g1), we put the signal in the new G1. */
if ((cond->__data.__g_size[g1] != 0)
- || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private))
+ || __condvar_switch_g1 (cond, wseq, &g1, private))
{
/* Add a signal. Relaxed MO is fine because signaling does not need to
- establish a happens-before relation (see above). We do not mask the
- release-MO store when initializing a group in
- __condvar_quiesce_and_switch_g1 because we use an atomic
- read-modify-write and thus extend that store's release sequence. */
+ establish a happens-before relation (see above). We do not mask the
+ release-MO store when initializing a group in __condvar_switch_g1
+ because we use an atomic read-modify-write and thus extend that
+ store's release sequence. */
atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2);
cond->__data.__g_size[g1]--;
/* TODO Only set it if there are indeed futex waiters. */
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index c97b5d22cb31ca6b..5b82ce639367e0c0 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -350,8 +350,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
because we do not need to establish any happens-before relation with
signalers (see __pthread_cond_signal); modification order alone
establishes a total order of waiters/signals. We do need acquire MO
- to synchronize with group reinitialization in
- __condvar_quiesce_and_switch_g1. */
+ to synchronize with group reinitialization in __condvar_switch_g1. */
uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2);
/* Find our group's index. We always go into what was G2 when we acquired
our position. */
@@ -383,9 +382,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
{
/* Now wait until a signal is available in our group or it is closed.
Acquire MO so that if we observe (signals == lowseq) after group
- switching in __condvar_quiesce_and_switch_g1, we synchronize with that
- store and will see the prior update of __g1_start done while switching
- groups too. */
+ switching in __condvar_switch_g1, we synchronize with that store and
+ will see the prior update of __g1_start done while switching groups
+ too. */
unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;

View File

@ -0,0 +1,179 @@
commit 91bb902f58264a2fd50fbce8f39a9a290dd23706
Author: Malte Skarupke <malteskarupke@fastmail.fm>
Date: Wed Dec 4 08:05:40 2024 -0500
nptl: Use all of g1_start and g_signals
The LSB of g_signals was unused. The LSB of g1_start was used to indicate
which group is G2. This was used to always go to sleep in pthread_cond_wait
if a waiter is in G2. A comment earlier in the file says that this is not
correct to do:
"Waiters cannot determine whether they are currently in G2 or G1 -- but they
do not have to because all they are interested in is whether there are
available signals"
I either would have had to update the comment, or get rid of the check. I
chose to get rid of the check. In fact I don't quite know why it was there.
There will never be available signals for group G2, so we didn't need the
special case. Even if there were, this would just be a spurious wake. This
might have caught some cases where the count has wrapped around, but it
wouldn't reliably do that, (and even if it did, why would you want to force a
sleep in that case?) and we don't support that many concurrent waiters
anyway. Getting rid of it allows us to use one more bit, making us more
robust to wraparound.
Signed-off-by: Malte Skarupke <malteskarupke@fastmail.fm>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/nptl/pthread_cond_broadcast.c b/nptl/pthread_cond_broadcast.c
index 1ec746ec3df51c4f..14d3e533ad4b24d7 100644
--- a/nptl/pthread_cond_broadcast.c
+++ b/nptl/pthread_cond_broadcast.c
@@ -58,7 +58,7 @@ __pthread_cond_broadcast (pthread_cond_t *cond)
{
/* Add as many signals as the remaining size of the group. */
atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
- cond->__data.__g_size[g1] << 1);
+ cond->__data.__g_size[g1]);
cond->__data.__g_size[g1] = 0;
/* We need to wake G1 waiters before we switch G1 below. */
@@ -74,7 +74,7 @@ __pthread_cond_broadcast (pthread_cond_t *cond)
{
/* Step (3): Send signals to all waiters in the old G2 / new G1. */
atomic_fetch_add_relaxed (cond->__data.__g_signals + g1,
- cond->__data.__g_size[g1] << 1);
+ cond->__data.__g_size[g1]);
cond->__data.__g_size[g1] = 0;
/* TODO Only set it if there are indeed futex waiters. */
do_futex_wake = true;
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
index 770f8e7c5d347f6b..1fe0448d8705c326 100644
--- a/nptl/pthread_cond_common.c
+++ b/nptl/pthread_cond_common.c
@@ -348,9 +348,9 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
behavior.
Note that this works correctly for a zero-initialized condvar too. */
unsigned int old_orig_size = __condvar_get_orig_size (cond);
- uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
- if (((unsigned) (wseq - old_g1_start - old_orig_size)
- + cond->__data.__g_size[g1 ^ 1]) == 0)
+ uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond);
+ uint64_t new_g1_start = old_g1_start + old_orig_size;
+ if (((unsigned) (wseq - new_g1_start) + cond->__data.__g_size[g1 ^ 1]) == 0)
return false;
/* We have to consider the following kinds of waiters:
@@ -361,16 +361,10 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
are not affected.
* Waiters in G1 have already received a signal and been woken. */
- /* Update __g1_start, which closes this group. The value we add will never
- be negative because old_orig_size can only be zero when we switch groups
- the first time after a condvar was initialized, in which case G1 will be
- at index 1 and we will add a value of 1. Relaxed MO is fine because the
- change comes with no additional constraints that others would have to
- observe. */
- __condvar_add_g1_start_relaxed (cond,
- (old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
-
- unsigned int lowseq = ((old_g1_start + old_orig_size) << 1) & ~1U;
+ /* Update __g1_start, which closes this group. Relaxed MO is fine because
+ the change comes with no additional constraints that others would have
+ to observe. */
+ __condvar_add_g1_start_relaxed (cond, old_orig_size);
/* At this point, the old G1 is now a valid new G2 (but not in use yet).
No old waiter can neither grab a signal nor acquire a reference without
@@ -382,13 +376,13 @@ __condvar_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
g1 ^= 1;
*g1index ^= 1;
- /* Now advance the new G1 g_signals to the new lowseq, giving it
+ /* Now advance the new G1 g_signals to the new g1_start, giving it
an effective signal count of 0 to start. */
- atomic_store_release (cond->__data.__g_signals + g1, lowseq);
+ atomic_store_release (cond->__data.__g_signals + g1, (unsigned)new_g1_start);
/* These values are just observed by signalers, and thus protected by the
lock. */
- unsigned int orig_size = wseq - (old_g1_start + old_orig_size);
+ unsigned int orig_size = wseq - new_g1_start;
__condvar_set_orig_size (cond, orig_size);
/* Use and addition to not loose track of cancellations in what was
previously G2. */
diff --git a/nptl/pthread_cond_signal.c b/nptl/pthread_cond_signal.c
index 24c9d813e7c0ada6..9f04833119fd3f59 100644
--- a/nptl/pthread_cond_signal.c
+++ b/nptl/pthread_cond_signal.c
@@ -81,7 +81,7 @@ __pthread_cond_signal (pthread_cond_t *cond)
release-MO store when initializing a group in __condvar_switch_g1
because we use an atomic read-modify-write and thus extend that
store's release sequence. */
- atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2);
+ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 1);
cond->__data.__g_size[g1]--;
/* TODO Only set it if there are indeed futex waiters. */
do_futex_wake = true;
diff --git a/nptl/pthread_cond_wait.c b/nptl/pthread_cond_wait.c
index 5b82ce639367e0c0..031ec717ca64f66f 100644
--- a/nptl/pthread_cond_wait.c
+++ b/nptl/pthread_cond_wait.c
@@ -85,7 +85,7 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
not hold a reference on the group. */
__condvar_acquire_lock (cond, private);
- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
if (g1_start > seq)
{
/* Our group is closed, so someone provided enough signals for it.
@@ -260,7 +260,6 @@ __condvar_cleanup_waiting (void *arg)
* Waiters fetch-add while having acquire the mutex associated with the
condvar. Signalers load it and fetch-xor it concurrently.
__g1_start: Starting position of G1 (inclusive)
- * LSB is index of current G2.
* Modified by signalers while having acquired the condvar-internal lock
and observed concurrently by waiters.
__g1_orig_size: Initial size of G1
@@ -281,11 +280,9 @@ __condvar_cleanup_waiting (void *arg)
* Reference count used by waiters concurrently with signalers that have
acquired the condvar-internal lock.
__g_signals: The number of signals that can still be consumed, relative to
- the current g1_start. (i.e. bits 31 to 1 of __g_signals are bits
- 31 to 1 of g1_start with the signal count added)
+ the current g1_start. (i.e. g1_start with the signal count added)
* Used as a futex word by waiters. Used concurrently by waiters and
signalers.
- * LSB is currently reserved and 0.
__g_size: Waiters remaining in this group (i.e., which have not been
signaled yet.
* Accessed by signalers and waiters that cancel waiting (both do so only
@@ -387,9 +384,8 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
too. */
unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
- unsigned int lowseq = (g1_start & 1) == g ? signals : g1_start & ~1U;
- if (seq < (g1_start >> 1))
+ if (seq < g1_start)
{
/* If the group is closed already,
then this waiter originally had enough extra signals to
@@ -402,13 +398,13 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
by now, perhaps in the process of switching back to an older
G2, but in either case we're allowed to consume the available
signal and should not block anymore. */
- if ((int)(signals - lowseq) >= 2)
+ if ((int)(signals - (unsigned int)g1_start) > 0)
{
/* Try to grab a signal. See above for MO. (if we do another loop
iteration we need to see the correct value of g1_start) */
if (atomic_compare_exchange_weak_acquire (
cond->__data.__g_signals + g,
- &signals, signals - 2))
+ &signals, signals - 1))
break;
else
continue;

File diff suppressed because it is too large Load Diff