693 lines
26 KiB
Diff
693 lines
26 KiB
Diff
commit 8bd336a00a5311bf7a9e99b3b0e9f01ff5faa74b
|
|
Author: Florian Weimer <fweimer@redhat.com>
|
|
Date: Wed Nov 17 12:20:13 2021 +0100
|
|
|
|
nptl: Extract <bits/atomic_wide_counter.h> from pthread_cond_common.c
|
|
|
|
And make it an installed header. This addresses a few aliasing
|
|
violations (which do not seem to result in miscompilation due to
|
|
the use of atomics), and also enables use of wide counters in other
|
|
parts of the library.
|
|
|
|
The debug output in nptl/tst-cond22 has been adjusted to print
|
|
the 32-bit values instead because it avoids a big-endian/little-endian
|
|
difference.
|
|
|
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
|
|
|
Conflicts:
|
|
nptl/Makefile
|
|
Adapt to different overall layout
|
|
nptl/tst-cond22.c
|
|
Updated for context due to
|
|
c36fc50781995e6758cae2b6927839d0157f213c
|
|
being already backported
|
|
sysdeps/nptl/bits/thread-shared-types.h
|
|
Updated for context due to
|
|
c36fc50781995e6758cae2b6927839d0157f213c
|
|
being already backported
|
|
|
|
diff --git a/bits/atomic_wide_counter.h b/bits/atomic_wide_counter.h
|
|
new file mode 100644
|
|
index 0000000000000000..0687eb554e5051a1
|
|
--- /dev/null
|
|
+++ b/bits/atomic_wide_counter.h
|
|
@@ -0,0 +1,35 @@
|
|
+/* Monotonically increasing wide counters (at least 62 bits).
|
|
+ Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#ifndef _BITS_ATOMIC_WIDE_COUNTER_H
|
|
+#define _BITS_ATOMIC_WIDE_COUNTER_H
|
|
+
|
|
+/* Counter that is monotonically increasing (by less than 2**31 per
|
|
+ increment), with a single writer, and an arbitrary number of
|
|
+ readers. */
|
|
+typedef union
|
|
+{
|
|
+ __extension__ unsigned long long int __value64;
|
|
+ struct
|
|
+ {
|
|
+ unsigned int __low;
|
|
+ unsigned int __high;
|
|
+ } __value32;
|
|
+} __atomic_wide_counter;
|
|
+
|
|
+#endif /* _BITS_ATOMIC_WIDE_COUNTER_H */
|
|
diff --git a/include/atomic_wide_counter.h b/include/atomic_wide_counter.h
|
|
new file mode 100644
|
|
index 0000000000000000..31f009d5e66cb45f
|
|
--- /dev/null
|
|
+++ b/include/atomic_wide_counter.h
|
|
@@ -0,0 +1,89 @@
|
|
+/* Monotonically increasing wide counters (at least 62 bits).
|
|
+ Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#ifndef _ATOMIC_WIDE_COUNTER_H
|
|
+#define _ATOMIC_WIDE_COUNTER_H
|
|
+
|
|
+#include <atomic.h>
|
|
+#include <bits/atomic_wide_counter.h>
|
|
+
|
|
+#if __HAVE_64B_ATOMICS
|
|
+
|
|
+static inline uint64_t
|
|
+__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
|
|
+{
|
|
+ return atomic_load_relaxed (&c->__value64);
|
|
+}
|
|
+
|
|
+static inline uint64_t
|
|
+__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
|
|
+ unsigned int val)
|
|
+{
|
|
+ return atomic_fetch_add_relaxed (&c->__value64, val);
|
|
+}
|
|
+
|
|
+static inline uint64_t
|
|
+__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c,
|
|
+ unsigned int val)
|
|
+{
|
|
+ return atomic_fetch_add_acquire (&c->__value64, val);
|
|
+}
|
|
+
|
|
+static inline void
|
|
+__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c,
|
|
+ unsigned int val)
|
|
+{
|
|
+ atomic_store_relaxed (&c->__value64,
|
|
+ atomic_load_relaxed (&c->__value64) + val);
|
|
+}
|
|
+
|
|
+static uint64_t __attribute__ ((unused))
|
|
+__atomic_wide_counter_fetch_xor_release (__atomic_wide_counter *c,
|
|
+ unsigned int val)
|
|
+{
|
|
+ return atomic_fetch_xor_release (&c->__value64, val);
|
|
+}
|
|
+
|
|
+#else /* !__HAVE_64B_ATOMICS */
|
|
+
|
|
+uint64_t __atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
|
|
+ attribute_hidden;
|
|
+
|
|
+uint64_t __atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
|
|
+ unsigned int op)
|
|
+ attribute_hidden;
|
|
+
|
|
+static inline uint64_t
|
|
+__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c,
|
|
+ unsigned int val)
|
|
+{
|
|
+ uint64_t r = __atomic_wide_counter_fetch_add_relaxed (c, val);
|
|
+ atomic_thread_fence_acquire ();
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static inline void
|
|
+__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c,
|
|
+ unsigned int val)
|
|
+{
|
|
+ __atomic_wide_counter_fetch_add_relaxed (c, val);
|
|
+}
|
|
+
|
|
+#endif /* !__HAVE_64B_ATOMICS */
|
|
+
|
|
+#endif /* _ATOMIC_WIDE_COUNTER_H */
|
|
diff --git a/include/bits/atomic_wide_counter.h b/include/bits/atomic_wide_counter.h
|
|
new file mode 100644
|
|
index 0000000000000000..8fb09a529104cc1d
|
|
--- /dev/null
|
|
+++ b/include/bits/atomic_wide_counter.h
|
|
@@ -0,0 +1 @@
|
|
+#include_next <bits/atomic_wide_counter.h>
|
|
diff --git a/misc/Makefile b/misc/Makefile
|
|
index 5d6fc0f6824a734f..6e8725309cf73293 100644
|
|
--- a/misc/Makefile
|
|
+++ b/misc/Makefile
|
|
@@ -73,7 +73,8 @@ routines := brk sbrk sstk ioctl \
|
|
fgetxattr flistxattr fremovexattr fsetxattr getxattr \
|
|
listxattr lgetxattr llistxattr lremovexattr lsetxattr \
|
|
removexattr setxattr getauxval ifunc-impl-list makedev \
|
|
- allocate_once fd_to_filename single_threaded unwind-link
|
|
+ allocate_once fd_to_filename single_threaded unwind-link \
|
|
+ atomic_wide_counter
|
|
|
|
generated += tst-error1.mtrace tst-error1-mem.out \
|
|
tst-allocate_once.mtrace tst-allocate_once-mem.out
|
|
diff --git a/misc/atomic_wide_counter.c b/misc/atomic_wide_counter.c
|
|
new file mode 100644
|
|
index 0000000000000000..56d898192573193e
|
|
--- /dev/null
|
|
+++ b/misc/atomic_wide_counter.c
|
|
@@ -0,0 +1,127 @@
|
|
+/* Monotonically increasing wide counters (at least 62 bits).
|
|
+ Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library; if not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <atomic_wide_counter.h>
|
|
+
|
|
+#if !__HAVE_64B_ATOMICS
|
|
+
|
|
+/* Values we add or xor are less than or equal to 1<<31, so we only
|
|
+ have to make overflow-and-addition atomic wrt. to concurrent load
|
|
+ operations and xor operations. To do that, we split each counter
|
|
+ into two 32b values of which we reserve the MSB of each to
|
|
+ represent an overflow from the lower-order half to the higher-order
|
|
+ half.
|
|
+
|
|
+ In the common case, the state is (higher-order / lower-order half, and . is
|
|
+ basically concatenation of the bits):
|
|
+ 0.h / 0.l = h.l
|
|
+
|
|
+ When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
|
|
+ following steps S1-S4 (the values these represent are on the right-hand
|
|
+ side):
|
|
+ S1: 0.h / 1.L == (h+1).L
|
|
+ S2: 1.(h+1) / 1.L == (h+1).L
|
|
+ S3: 1.(h+1) / 0.L == (h+1).L
|
|
+ S4: 0.(h+1) / 0.L == (h+1).L
|
|
+ If the LSB of the higher-order half is set, readers will ignore the
|
|
+ overflow bit in the lower-order half.
|
|
+
|
|
+ To get an atomic snapshot in load operations, we exploit that the
|
|
+ higher-order half is monotonically increasing; if we load a value V from
|
|
+ it, then read the lower-order half, and then read the higher-order half
|
|
+ again and see the same value V, we know that both halves have existed in
|
|
+ the sequence of values the full counter had. This is similar to the
|
|
+ validated reads in the time-based STMs in GCC's libitm (e.g.,
|
|
+ method_ml_wt).
|
|
+
|
|
+ One benefit of this scheme is that this makes load operations
|
|
+ obstruction-free because unlike if we would just lock the counter, readers
|
|
+ can almost always interpret a snapshot of each halves. Readers can be
|
|
+ forced to read a new snapshot when the read is concurrent with an overflow.
|
|
+ However, overflows will happen infrequently, so load operations are
|
|
+ practically lock-free. */
|
|
+
|
|
+uint64_t
|
|
+__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c,
|
|
+ unsigned int op)
|
|
+{
|
|
+ /* S1. Note that this is an atomic read-modify-write so it extends the
|
|
+ release sequence of release MO store at S3. */
|
|
+ unsigned int l = atomic_fetch_add_relaxed (&c->__value32.__low, op);
|
|
+ unsigned int h = atomic_load_relaxed (&c->__value32.__high);
|
|
+ uint64_t result = ((uint64_t) h << 31) | l;
|
|
+ l += op;
|
|
+ if ((l >> 31) > 0)
|
|
+ {
|
|
+ /* Overflow. Need to increment higher-order half. Note that all
|
|
+ add operations are ordered in happens-before. */
|
|
+ h++;
|
|
+ /* S2. Release MO to synchronize with the loads of the higher-order half
|
|
+ in the load operation. See __condvar_load_64_relaxed. */
|
|
+ atomic_store_release (&c->__value32.__high,
|
|
+ h | ((unsigned int) 1 << 31));
|
|
+ l ^= (unsigned int) 1 << 31;
|
|
+ /* S3. See __condvar_load_64_relaxed. */
|
|
+ atomic_store_release (&c->__value32.__low, l);
|
|
+ /* S4. Likewise. */
|
|
+ atomic_store_release (&c->__value32.__high, h);
|
|
+ }
|
|
+ return result;
|
|
+}
|
|
+
|
|
+uint64_t
|
|
+__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c)
|
|
+{
|
|
+ unsigned int h, l, h2;
|
|
+ do
|
|
+ {
|
|
+ /* This load and the second one below to the same location read from the
|
|
+ stores in the overflow handling of the add operation or the
|
|
+ initializing stores (which is a simple special case because
|
|
+ initialization always completely happens before further use).
|
|
+ Because no two stores to the higher-order half write the same value,
|
|
+ the loop ensures that if we continue to use the snapshot, this load
|
|
+ and the second one read from the same store operation. All candidate
|
|
+ store operations have release MO.
|
|
+ If we read from S2 in the first load, then we will see the value of
|
|
+ S1 on the next load (because we synchronize with S2), or a value
|
|
+ later in modification order. We correctly ignore the lower-half's
|
|
+ overflow bit in this case. If we read from S4, then we will see the
|
|
+ value of S3 in the next load (or a later value), which does not have
|
|
+ the overflow bit set anymore.
|
|
+ */
|
|
+ h = atomic_load_acquire (&c->__value32.__high);
|
|
+ /* This will read from the release sequence of S3 (i.e, either the S3
|
|
+ store or the read-modify-writes at S1 following S3 in modification
|
|
+ order). Thus, the read synchronizes with S3, and the following load
|
|
+ of the higher-order half will read from the matching S2 (or a later
|
|
+ value).
|
|
+ Thus, if we read a lower-half value here that already overflowed and
|
|
+ belongs to an increased higher-order half value, we will see the
|
|
+ latter and h and h2 will not be equal. */
|
|
+ l = atomic_load_acquire (&c->__value32.__low);
|
|
+ /* See above. */
|
|
+ h2 = atomic_load_relaxed (&c->__value32.__high);
|
|
+ }
|
|
+ while (h != h2);
|
|
+ if (((l >> 31) > 0) && ((h >> 31) > 0))
|
|
+ l ^= (unsigned int) 1 << 31;
|
|
+ return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
|
|
+}
|
|
+
|
|
+#endif /* !__HAVE_64B_ATOMICS */
|
|
diff --git a/nptl/Makefile b/nptl/Makefile
|
|
index cac75eb8f5b68320..80c7587f0086677b 100644
|
|
--- a/nptl/Makefile
|
|
+++ b/nptl/Makefile
|
|
@@ -22,8 +22,14 @@ subdir := nptl
|
|
|
|
include ../Makeconfig
|
|
|
|
-headers := pthread.h semaphore.h bits/semaphore.h \
|
|
- bits/struct_mutex.h bits/struct_rwlock.h
|
|
+headers := \
|
|
+ bits/atomic_wide_counter.h \
|
|
+ bits/semaphore.h \
|
|
+ bits/struct_mutex.h \
|
|
+ bits/struct_rwlock.h \
|
|
+ pthread.h \
|
|
+ semaphore.h \
|
|
+ # headers
|
|
|
|
extra-libs := libpthread
|
|
extra-libs-others := $(extra-libs)
|
|
@@ -277,7 +283,6 @@ tests = \
|
|
tst-cancel7 \
|
|
tst-cancel17 \
|
|
tst-cancel24 \
|
|
- tst-cond22 \
|
|
tst-cond26 \
|
|
tst-context1 \
|
|
tst-default-attr \
|
|
@@ -368,6 +373,7 @@ tests-container = tst-pthread-getattr
|
|
|
|
tests-internal := \
|
|
tst-barrier5 \
|
|
+ tst-cond22 \
|
|
tst-mutex8 \
|
|
tst-mutex8-static \
|
|
tst-mutexpi8 \
|
|
diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
|
|
index 485aca4076a372d7..9e00ebd73d4d2fba 100644
|
|
--- a/nptl/pthread_cond_common.c
|
|
+++ b/nptl/pthread_cond_common.c
|
|
@@ -17,79 +17,52 @@
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#include <atomic.h>
|
|
+#include <atomic_wide_counter.h>
|
|
#include <stdint.h>
|
|
#include <pthread.h>
|
|
|
|
-/* We need 3 least-significant bits on __wrefs for something else. */
|
|
+/* We need 3 least-significant bits on __wrefs for something else.
|
|
+ This also matches __atomic_wide_counter requirements: The highest
|
|
+ value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to __g1_start
|
|
+ (the two extra bits are for the lock in the two LSBs of
|
|
+ __g1_start). */
|
|
#define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29)
|
|
|
|
-#if __HAVE_64B_ATOMICS == 1
|
|
-
|
|
-static uint64_t __attribute__ ((unused))
|
|
+static inline uint64_t
|
|
__condvar_load_wseq_relaxed (pthread_cond_t *cond)
|
|
{
|
|
- return atomic_load_relaxed (&cond->__data.__wseq);
|
|
+ return __atomic_wide_counter_load_relaxed (&cond->__data.__wseq);
|
|
}
|
|
|
|
-static uint64_t __attribute__ ((unused))
|
|
+static inline uint64_t
|
|
__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
|
|
{
|
|
- return atomic_fetch_add_acquire (&cond->__data.__wseq, val);
|
|
+ return __atomic_wide_counter_fetch_add_acquire (&cond->__data.__wseq, val);
|
|
}
|
|
|
|
-static uint64_t __attribute__ ((unused))
|
|
-__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
|
|
-{
|
|
- return atomic_fetch_xor_release (&cond->__data.__wseq, val);
|
|
-}
|
|
-
|
|
-static uint64_t __attribute__ ((unused))
|
|
+static inline uint64_t
|
|
__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
|
|
{
|
|
- return atomic_load_relaxed (&cond->__data.__g1_start);
|
|
+ return __atomic_wide_counter_load_relaxed (&cond->__data.__g1_start);
|
|
}
|
|
|
|
-static void __attribute__ ((unused))
|
|
+static inline void
|
|
__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
|
|
{
|
|
- atomic_store_relaxed (&cond->__data.__g1_start,
|
|
- atomic_load_relaxed (&cond->__data.__g1_start) + val);
|
|
+ __atomic_wide_counter_add_relaxed (&cond->__data.__g1_start, val);
|
|
}
|
|
|
|
-#else
|
|
-
|
|
-/* We use two 64b counters: __wseq and __g1_start. They are monotonically
|
|
- increasing and single-writer-multiple-readers counters, so we can implement
|
|
- load, fetch-and-add, and fetch-and-xor operations even when we just have
|
|
- 32b atomics. Values we add or xor are less than or equal to 1<<31 (*),
|
|
- so we only have to make overflow-and-addition atomic wrt. to concurrent
|
|
- load operations and xor operations. To do that, we split each counter into
|
|
- two 32b values of which we reserve the MSB of each to represent an
|
|
- overflow from the lower-order half to the higher-order half.
|
|
-
|
|
- In the common case, the state is (higher-order / lower-order half, and . is
|
|
- basically concatenation of the bits):
|
|
- 0.h / 0.l = h.l
|
|
+#if __HAVE_64B_ATOMICS == 1
|
|
|
|
- When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the
|
|
- following steps S1-S4 (the values these represent are on the right-hand
|
|
- side):
|
|
- S1: 0.h / 1.L == (h+1).L
|
|
- S2: 1.(h+1) / 1.L == (h+1).L
|
|
- S3: 1.(h+1) / 0.L == (h+1).L
|
|
- S4: 0.(h+1) / 0.L == (h+1).L
|
|
- If the LSB of the higher-order half is set, readers will ignore the
|
|
- overflow bit in the lower-order half.
|
|
+static inline uint64_t
|
|
+__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
|
|
+{
|
|
+ return atomic_fetch_xor_release (&cond->__data.__wseq.__value64, val);
|
|
+}
|
|
|
|
- To get an atomic snapshot in load operations, we exploit that the
|
|
- higher-order half is monotonically increasing; if we load a value V from
|
|
- it, then read the lower-order half, and then read the higher-order half
|
|
- again and see the same value V, we know that both halves have existed in
|
|
- the sequence of values the full counter had. This is similar to the
|
|
- validated reads in the time-based STMs in GCC's libitm (e.g.,
|
|
- method_ml_wt).
|
|
+#else /* !__HAVE_64B_ATOMICS */
|
|
|
|
- The xor operation needs to be an atomic read-modify-write. The write
|
|
+/* The xor operation needs to be an atomic read-modify-write. The write
|
|
itself is not an issue as it affects just the lower-order half but not bits
|
|
used in the add operation. To make the full fetch-and-xor atomic, we
|
|
exploit that concurrently, the value can increase by at most 1<<31 (*): The
|
|
@@ -97,117 +70,18 @@ __condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
|
|
than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus
|
|
increment __wseq. Therefore, if the xor operation observes a value of
|
|
__wseq, then the value it applies the modification to later on can be
|
|
- derived (see below).
|
|
-
|
|
- One benefit of this scheme is that this makes load operations
|
|
- obstruction-free because unlike if we would just lock the counter, readers
|
|
- can almost always interpret a snapshot of each halves. Readers can be
|
|
- forced to read a new snapshot when the read is concurrent with an overflow.
|
|
- However, overflows will happen infrequently, so load operations are
|
|
- practically lock-free.
|
|
-
|
|
- (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to
|
|
- __g1_start (the two extra bits are for the lock in the two LSBs of
|
|
- __g1_start). */
|
|
-
|
|
-typedef struct
|
|
-{
|
|
- unsigned int low;
|
|
- unsigned int high;
|
|
-} _condvar_lohi;
|
|
-
|
|
-static uint64_t
|
|
-__condvar_fetch_add_64_relaxed (_condvar_lohi *lh, unsigned int op)
|
|
-{
|
|
- /* S1. Note that this is an atomic read-modify-write so it extends the
|
|
- release sequence of release MO store at S3. */
|
|
- unsigned int l = atomic_fetch_add_relaxed (&lh->low, op);
|
|
- unsigned int h = atomic_load_relaxed (&lh->high);
|
|
- uint64_t result = ((uint64_t) h << 31) | l;
|
|
- l += op;
|
|
- if ((l >> 31) > 0)
|
|
- {
|
|
- /* Overflow. Need to increment higher-order half. Note that all
|
|
- add operations are ordered in happens-before. */
|
|
- h++;
|
|
- /* S2. Release MO to synchronize with the loads of the higher-order half
|
|
- in the load operation. See __condvar_load_64_relaxed. */
|
|
- atomic_store_release (&lh->high, h | ((unsigned int) 1 << 31));
|
|
- l ^= (unsigned int) 1 << 31;
|
|
- /* S3. See __condvar_load_64_relaxed. */
|
|
- atomic_store_release (&lh->low, l);
|
|
- /* S4. Likewise. */
|
|
- atomic_store_release (&lh->high, h);
|
|
- }
|
|
- return result;
|
|
-}
|
|
-
|
|
-static uint64_t
|
|
-__condvar_load_64_relaxed (_condvar_lohi *lh)
|
|
-{
|
|
- unsigned int h, l, h2;
|
|
- do
|
|
- {
|
|
- /* This load and the second one below to the same location read from the
|
|
- stores in the overflow handling of the add operation or the
|
|
- initializing stores (which is a simple special case because
|
|
- initialization always completely happens before further use).
|
|
- Because no two stores to the higher-order half write the same value,
|
|
- the loop ensures that if we continue to use the snapshot, this load
|
|
- and the second one read from the same store operation. All candidate
|
|
- store operations have release MO.
|
|
- If we read from S2 in the first load, then we will see the value of
|
|
- S1 on the next load (because we synchronize with S2), or a value
|
|
- later in modification order. We correctly ignore the lower-half's
|
|
- overflow bit in this case. If we read from S4, then we will see the
|
|
- value of S3 in the next load (or a later value), which does not have
|
|
- the overflow bit set anymore.
|
|
- */
|
|
- h = atomic_load_acquire (&lh->high);
|
|
- /* This will read from the release sequence of S3 (i.e, either the S3
|
|
- store or the read-modify-writes at S1 following S3 in modification
|
|
- order). Thus, the read synchronizes with S3, and the following load
|
|
- of the higher-order half will read from the matching S2 (or a later
|
|
- value).
|
|
- Thus, if we read a lower-half value here that already overflowed and
|
|
- belongs to an increased higher-order half value, we will see the
|
|
- latter and h and h2 will not be equal. */
|
|
- l = atomic_load_acquire (&lh->low);
|
|
- /* See above. */
|
|
- h2 = atomic_load_relaxed (&lh->high);
|
|
- }
|
|
- while (h != h2);
|
|
- if (((l >> 31) > 0) && ((h >> 31) > 0))
|
|
- l ^= (unsigned int) 1 << 31;
|
|
- return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l;
|
|
-}
|
|
-
|
|
-static uint64_t __attribute__ ((unused))
|
|
-__condvar_load_wseq_relaxed (pthread_cond_t *cond)
|
|
-{
|
|
- return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32);
|
|
-}
|
|
-
|
|
-static uint64_t __attribute__ ((unused))
|
|
-__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val)
|
|
-{
|
|
- uint64_t r = __condvar_fetch_add_64_relaxed
|
|
- ((_condvar_lohi *) &cond->__data.__wseq32, val);
|
|
- atomic_thread_fence_acquire ();
|
|
- return r;
|
|
-}
|
|
+ derived. */
|
|
|
|
static uint64_t __attribute__ ((unused))
|
|
__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
|
|
{
|
|
- _condvar_lohi *lh = (_condvar_lohi *) &cond->__data.__wseq32;
|
|
/* First, get the current value. See __condvar_load_64_relaxed. */
|
|
unsigned int h, l, h2;
|
|
do
|
|
{
|
|
- h = atomic_load_acquire (&lh->high);
|
|
- l = atomic_load_acquire (&lh->low);
|
|
- h2 = atomic_load_relaxed (&lh->high);
|
|
+ h = atomic_load_acquire (&cond->__data.__wseq.__value32.__high);
|
|
+ l = atomic_load_acquire (&cond->__data.__wseq.__value32.__low);
|
|
+ h2 = atomic_load_relaxed (&cond->__data.__wseq.__value32.__high);
|
|
}
|
|
while (h != h2);
|
|
if (((l >> 31) > 0) && ((h >> 31) == 0))
|
|
@@ -219,8 +93,9 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
|
|
earlier in modification order than the following fetch-xor.
|
|
This uses release MO to make the full operation have release semantics
|
|
(all other operations access the lower-order half). */
|
|
- unsigned int l2 = atomic_fetch_xor_release (&lh->low, val)
|
|
- & ~((unsigned int) 1 << 31);
|
|
+ unsigned int l2
|
|
+ = (atomic_fetch_xor_release (&cond->__data.__wseq.__value32.__low, val)
|
|
+ & ~((unsigned int) 1 << 31));
|
|
if (l2 < l)
|
|
/* The lower-order half overflowed in the meantime. This happened exactly
|
|
once due to the limit on concurrent waiters (see above). */
|
|
@@ -228,22 +103,7 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
|
|
return ((uint64_t) h << 31) + l2;
|
|
}
|
|
|
|
-static uint64_t __attribute__ ((unused))
|
|
-__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
|
|
-{
|
|
- return __condvar_load_64_relaxed
|
|
- ((_condvar_lohi *) &cond->__data.__g1_start32);
|
|
-}
|
|
-
|
|
-static void __attribute__ ((unused))
|
|
-__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
|
|
-{
|
|
- ignore_value (__condvar_fetch_add_64_relaxed
|
|
- ((_condvar_lohi *) &cond->__data.__g1_start32, val));
|
|
-}
|
|
-
|
|
-#endif /* !__HAVE_64B_ATOMICS */
|
|
-
|
|
+#endif /* !__HAVE_64B_ATOMICS */
|
|
|
|
/* The lock that signalers use. See pthread_cond_wait_common for uses.
|
|
The lock is our normal three-state lock: not acquired (0) / acquired (1) /
|
|
diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c
|
|
index ebeeeaf666070076..bdcb45c53674a5fd 100644
|
|
--- a/nptl/tst-cond22.c
|
|
+++ b/nptl/tst-cond22.c
|
|
@@ -106,8 +106,11 @@ do_test (void)
|
|
status = 1;
|
|
}
|
|
|
|
- printf ("cond = { %llu, %llu, %u/%u, %u/%u, %u, %u }\n",
|
|
- c.__data.__wseq, c.__data.__g1_start,
|
|
+ printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n",
|
|
+ c.__data.__wseq.__value32.__high,
|
|
+ c.__data.__wseq.__value32.__low,
|
|
+ c.__data.__g1_start.__value32.__high,
|
|
+ c.__data.__g1_start.__value32.__low,
|
|
c.__data.__g_signals[0], c.__data.__g_size[0],
|
|
c.__data.__g_signals[1], c.__data.__g_size[1],
|
|
c.__data.__g1_orig_size, c.__data.__wrefs);
|
|
@@ -149,8 +152,11 @@ do_test (void)
|
|
status = 1;
|
|
}
|
|
|
|
- printf ("cond = { %llu, %llu, %u/%u, %u/%u, %u, %u }\n",
|
|
- c.__data.__wseq, c.__data.__g1_start,
|
|
+ printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u, %u/%u, %u, %u }\n",
|
|
+ c.__data.__wseq.__value32.__high,
|
|
+ c.__data.__wseq.__value32.__low,
|
|
+ c.__data.__g1_start.__value32.__high,
|
|
+ c.__data.__g1_start.__value32.__low,
|
|
c.__data.__g_signals[0], c.__data.__g_size[0],
|
|
c.__data.__g_signals[1], c.__data.__g_size[1],
|
|
c.__data.__g1_orig_size, c.__data.__wrefs);
|
|
diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h
|
|
index 5644472323fe5424..2143281eabc7016c 100644
|
|
--- a/sysdeps/nptl/bits/thread-shared-types.h
|
|
+++ b/sysdeps/nptl/bits/thread-shared-types.h
|
|
@@ -43,6 +43,8 @@
|
|
|
|
#include <bits/pthreadtypes-arch.h>
|
|
|
|
+#include <bits/atomic_wide_counter.h>
|
|
+
|
|
|
|
/* Common definition of pthread_mutex_t. */
|
|
|
|
@@ -91,24 +93,8 @@ typedef struct __pthread_internal_slist
|
|
|
|
struct __pthread_cond_s
|
|
{
|
|
- __extension__ union
|
|
- {
|
|
- __extension__ unsigned long long int __wseq;
|
|
- struct
|
|
- {
|
|
- unsigned int __low;
|
|
- unsigned int __high;
|
|
- } __wseq32;
|
|
- };
|
|
- __extension__ union
|
|
- {
|
|
- __extension__ unsigned long long int __g1_start;
|
|
- struct
|
|
- {
|
|
- unsigned int __low;
|
|
- unsigned int __high;
|
|
- } __g1_start32;
|
|
- };
|
|
+ __atomic_wide_counter __wseq;
|
|
+ __atomic_wide_counter __g1_start;
|
|
unsigned int __glibc_unused___g_refs[2] __LOCK_ALIGNMENT;
|
|
unsigned int __g_size[2];
|
|
unsigned int __g1_orig_size;
|