From 8084be9f8c5b8428d081d899d0d15eaaa22a8816 Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Wed, 13 Jan 2016 21:35:53 -0500 Subject: [PATCH] New pthread_barrier algorithm with improved standards compliance. --- .gitignore | 4 - glibc-pthread-barrier.patch | 987 ++++++++++++++++++++++++++++++++++++ glibc.spec | 9 +- series | 9 +- 4 files changed, 997 insertions(+), 12 deletions(-) create mode 100644 glibc-pthread-barrier.patch diff --git a/.gitignore b/.gitignore index 339be6d..e69de29 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +0,0 @@ -/.build* -/src -/glibc-*.tar.gz -/glibc-*.tar.xz diff --git a/glibc-pthread-barrier.patch b/glibc-pthread-barrier.patch new file mode 100644 index 0000000..4071e59 --- /dev/null +++ b/glibc-pthread-barrier.patch @@ -0,0 +1,987 @@ +Short description: New pthread_barrier algorithm to fulfill barrier destruction requirements. +Author(s): Torvald Riegel +Origin: PATCH +Bug-RHEL: NA +Bug-Fedora: NA +Bug-Upstream: #13065 +Upstream status: http://patchwork.sourceware.org/patch/10062/ +# commit d6533b39f004789e0de4b7d58a29f8282ee95f7b +# Author: Torvald Riegel +# Date: Wed Jun 24 14:37:32 2015 +0200 +# +# New pthread_barrier algorithm to fulfill barrier destruction requirements. +# +# The previous barrier implementation did not fulfill the POSIX requirements +# for when a barrier can be destroyed. Specifically, it was possible that +# threads that haven't noticed yet that their round is complete still access +# the barrier's memory, and that those accesses can happen after the barrier +# has been legally destroyed. +# The new algorithm does not have this issue, and it avoids using a lock +# internally. +# +Index: glibc-2.22-621-g90c400b/nptl/Makefile +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/Makefile ++++ glibc-2.22-621-g90c400b/nptl/Makefile +@@ -243,7 +243,7 @@ tests = tst-typesizes \ + tst-sem1 tst-sem2 tst-sem3 tst-sem4 tst-sem5 tst-sem6 tst-sem7 \ + tst-sem8 tst-sem9 tst-sem10 tst-sem11 tst-sem12 tst-sem13 tst-sem14 \ + tst-sem15 \ +- tst-barrier1 tst-barrier2 tst-barrier3 tst-barrier4 \ ++ tst-barrier1 tst-barrier2 tst-barrier3 tst-barrier4 tst-barrier5 \ + tst-align tst-align3 \ + tst-basic1 tst-basic2 tst-basic3 tst-basic4 tst-basic5 tst-basic6 \ + tst-basic7 \ +@@ -304,7 +304,7 @@ tests-nolibpthread = tst-unload + + gen-as-const-headers = pthread-errnos.sym \ + lowlevelcond.sym lowlevelrwlock.sym \ +- lowlevelbarrier.sym unwindbuf.sym \ ++ unwindbuf.sym \ + lowlevelrobustlock.sym pthread-pi-defines.sym + + +Index: glibc-2.22-621-g90c400b/nptl/lowlevelbarrier.sym +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/lowlevelbarrier.sym ++++ /dev/null +@@ -1,12 +0,0 @@ +-#include +-#include +-#include +-#include "internaltypes.h" +- +--- +- +-CURR_EVENT offsetof (struct pthread_barrier, curr_event) +-MUTEX offsetof (struct pthread_barrier, lock) +-LEFT offsetof (struct pthread_barrier, left) +-INIT_COUNT offsetof (struct pthread_barrier, init_count) +-PRIVATE offsetof (struct pthread_barrier, private) +Index: glibc-2.22-621-g90c400b/nptl/pthread_barrier_destroy.c +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/pthread_barrier_destroy.c ++++ glibc-2.22-621-g90c400b/nptl/pthread_barrier_destroy.c +@@ -18,25 +18,44 @@ + + #include + #include "pthreadP.h" +-#include ++#include ++#include + + + int + pthread_barrier_destroy (pthread_barrier_t *barrier) + { +- struct pthread_barrier *ibarrier; +- int result = EBUSY; ++ struct pthread_barrier *bar = (struct pthread_barrier *) barrier; + +- ibarrier = (struct pthread_barrier *) barrier; ++ /* Destroying a barrier is only allowed if no thread is blocked on it. ++ Thus, there is no unfinished round, and all modifications to IN will ++ have happened before us (either because the calling thread took part ++ in the most recent round and thus synchronized-with all other threads ++ entering, or the program ensured this through other synchronization). ++ We must wait until all threads that entered so far have confirmed that ++ they have exited as well. To get the notification, pretend that we have ++ reached the reset threshold. */ ++ unsigned int count = bar->count; ++ unsigned int max_in_before_reset = BARRIER_IN_THRESHOLD ++ - BARRIER_IN_THRESHOLD % count; ++ /* Relaxed MO sufficient because the program must have ensured that all ++ modifications happen-before this load (see above). */ ++ unsigned int in = atomic_load_relaxed (&bar->in); ++ /* Trigger reset. The required acquire MO is below. */ ++ if (atomic_fetch_add_relaxed (&bar->out, max_in_before_reset - in) < in) ++ { ++ /* Not all threads confirmed yet that they have exited, so another ++ thread will perform a reset. Wait until that has happened. */ ++ while (in != 0) ++ { ++ futex_wait_simple (&bar->in, in, bar->shared); ++ in = atomic_load_relaxed (&bar->in); ++ } ++ } ++ /* We must ensure that memory reuse happens after all prior use of the ++ barrier (specifically, synchronize-with the reset of the barrier or the ++ confirmation of threads leaving the barrier). */ ++ atomic_thread_fence_acquire (); + +- lll_lock (ibarrier->lock, ibarrier->private ^ FUTEX_PRIVATE_FLAG); +- +- if (__glibc_likely (ibarrier->left == ibarrier->init_count)) +- /* The barrier is not used anymore. */ +- result = 0; +- else +- /* Still used, return with an error. */ +- lll_unlock (ibarrier->lock, ibarrier->private ^ FUTEX_PRIVATE_FLAG); +- +- return result; ++ return 0; + } +Index: glibc-2.22-621-g90c400b/nptl/pthread_barrier_init.c +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/pthread_barrier_init.c ++++ glibc-2.22-621-g90c400b/nptl/pthread_barrier_init.c +@@ -18,7 +18,7 @@ + + #include + #include "pthreadP.h" +-#include ++#include + #include + + +@@ -34,8 +34,10 @@ __pthread_barrier_init (pthread_barrier_ + { + struct pthread_barrier *ibarrier; + +- /* XXX EINVAL is not specified by POSIX as a possible error code. */ +- if (__glibc_unlikely (count == 0)) ++ /* XXX EINVAL is not specified by POSIX as a possible error code. See ++ pthread_barrier_wait for the reason for the comparison with ++ BARRIER_IN_THRESHOLD. */ ++ if (__glibc_unlikely (count == 0 || count >= BARRIER_IN_THRESHOLD)) + return EINVAL; + + const struct pthread_barrierattr *iattr +@@ -46,15 +48,12 @@ __pthread_barrier_init (pthread_barrier_ + ibarrier = (struct pthread_barrier *) barrier; + + /* Initialize the individual fields. */ +- ibarrier->lock = LLL_LOCK_INITIALIZER; +- ibarrier->left = count; +- ibarrier->init_count = count; +- ibarrier->curr_event = 0; +- +- /* XXX Don't use FUTEX_SHARED or FUTEX_PRIVATE as long as there are still +- assembly implementations that expect the value determined below. */ +- ibarrier->private = (iattr->pshared != PTHREAD_PROCESS_PRIVATE +- ? 0 : FUTEX_PRIVATE_FLAG); ++ ibarrier->in = 0; ++ ibarrier->out = 0; ++ ibarrier->count = count; ++ ibarrier->current_round = 0; ++ ibarrier->shared = (iattr->pshared == PTHREAD_PROCESS_PRIVATE ++ ? FUTEX_PRIVATE : FUTEX_SHARED); + + return 0; + } +Index: glibc-2.22-621-g90c400b/nptl/pthread_barrier_wait.c +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/pthread_barrier_wait.c ++++ glibc-2.22-621-g90c400b/nptl/pthread_barrier_wait.c +@@ -18,63 +18,206 @@ + + #include + #include +-#include + #include + #include + + +-/* Wait on barrier. */ ++/* Wait on the barrier. ++ ++ In each round, we wait for a fixed number of threads to enter the barrier ++ (COUNT). Once that has happened, exactly these threads are allowed to ++ leave the barrier. Note that POSIX does not require that only COUNT ++ threads can attempt to block using the barrier concurrently. ++ ++ We count the number of threads that have entered (IN). Each thread ++ increments IN when entering, thus getting a position in the sequence of ++ threads that are or have been waiting (starting with 1, so the position ++ is the number of threads that have entered so far including the current ++ thread). ++ CURRENT_ROUND designates the most recent thread whose round has been ++ detected as complete. When a thread detects that enough threads have ++ entered to make a round complete, it finishes this round by effectively ++ adding COUNT to CURRENT_ROUND atomically. Threads that believe that their ++ round is not complete yet wait until CURRENT_ROUND is not smaller than ++ their position anymore. ++ ++ A barrier can be destroyed as soon as no threads are blocked on the ++ barrier. This is already the case if just one thread from the last round ++ has stopped waiting and returned to the caller; the assumption is that ++ all threads from the round are unblocked atomically, even though they may ++ return at different times from the respective calls to ++ pthread_barrier_wait). Thus, a valid call to pthread_barrier_destroy can ++ be concurrent with other threads still figuring out that their round has ++ been completed. Therefore, threads need to confirm that they have left ++ the barrier by incrementing OUT, and pthread_barrier_destroy needs to wait ++ until OUT equals IN. ++ ++ To avoid an ABA issue for futex_wait on CURRENT_ROUND and for archs with ++ 32b-only atomics, we additionally reset the barrier when IN reaches ++ a threshold to avoid overflow. We assume that the total number of threads ++ is less than INT_MAX/2, and set the threshold accordingly so that we can ++ use a simple atomic_fetch_add on IN instead of a CAS when entering. The ++ threshold is always set to the end of a round, so all threads that have ++ entered are either pre-reset threads or post-reset threads (i.e., have a ++ position larger than the threshold). ++ Pre-reset threads just run the algorithm explained above. Post-reset ++ threads wait until IN is reset to a pre-threshold value. ++ When the last pre-reset thread leaves the barrier (i.e., OUT equals the ++ threshold), it resets the barrier to its initial state. Other (post-reset) ++ threads wait for the reset to have finished by waiting until IN is less ++ than the threshold and then restart by trying to enter the barrier again. ++ ++ We reuse the reset mechanism in pthread_barrier_destroy to get notified ++ when all threads have left the barrier: We trigger an artificial reset and ++ wait for the last pre-reset thread to finish reset, thus notifying the ++ thread that is about to destroy the barrier. ++ ++ Blocking using futexes is straightforward: pre-reset threads wait for ++ completion of their round using CURRENT_ROUND as futex word, and post-reset ++ threads and pthread_barrier_destroy use IN as futex word. ++ ++ Further notes: ++ * It is not simple to let some of the post-reset threads help with the ++ reset because of the ABA issues that arise; therefore, we simply make ++ the last thread to leave responsible for the reset. ++ * POSIX leaves it unspecified whether a signal handler running in a thread ++ that has been unblocked (because its round is complete) can stall all ++ other threads and prevent them from returning from the barrier. In this ++ implementation, other threads will return. However, ++ pthread_barrier_destroy will of course wait for the signal handler thread ++ to confirm that it left the barrier. ++ ++ TODO We should add spinning with back-off. Once we do that, we could also ++ try to avoid the futex_wake syscall when a round is detected as finished. ++ If we do not spin, it is quite likely that at least some other threads will ++ have called futex_wait already. */ + int + __pthread_barrier_wait (pthread_barrier_t *barrier) + { +- struct pthread_barrier *ibarrier = (struct pthread_barrier *) barrier; +- int result = 0; +- int lll_private = ibarrier->private ^ FUTEX_PRIVATE_FLAG; +- int futex_private = (lll_private == LLL_PRIVATE +- ? FUTEX_PRIVATE : FUTEX_SHARED); ++ struct pthread_barrier *bar = (struct pthread_barrier *) barrier; + +- /* Make sure we are alone. */ +- lll_lock (ibarrier->lock, lll_private); ++ /* How many threads entered so far, including ourself. */ ++ unsigned int i; + +- /* One more arrival. */ +- --ibarrier->left; ++ reset_restart: ++ /* Try to enter the barrier. We need acquire MO to (1) ensure that if we ++ observe that our round can be completed (see below for our attempt to do ++ so), all pre-barrier-entry effects of all threads in our round happen ++ before us completing the round, and (2) to make our use of the barrier ++ happen after a potential reset. We need release MO to make sure that our ++ pre-barrier-entry effects happen before threads in this round leaving the ++ barrier. */ ++ i = atomic_fetch_add_acq_rel (&bar->in, 1) + 1; ++ /* These loads are after the fetch_add so that we're less likely to first ++ pull in the cache line as shared. */ ++ unsigned int count = bar->count; ++ /* This is the number of threads that can enter before we need to reset. ++ Always at the end of a round. */ ++ unsigned int max_in_before_reset = BARRIER_IN_THRESHOLD ++ - BARRIER_IN_THRESHOLD % count; + +- /* Are these all? */ +- if (ibarrier->left == 0) ++ if (i > max_in_before_reset) + { +- /* Yes. Increment the event counter to avoid invalid wake-ups and +- tell the current waiters that it is their turn. */ +- ++ibarrier->curr_event; +- +- /* Wake up everybody. */ +- futex_wake (&ibarrier->curr_event, INT_MAX, futex_private); ++ /* We're in a reset round. Just wait for a reset to finish; do not ++ help finishing previous rounds because this could happen ++ concurrently with a reset. */ ++ while (i > max_in_before_reset) ++ { ++ futex_wait_simple (&bar->in, i, bar->shared); ++ /* Relaxed MO is fine here because we just need an indication for ++ when we should retry to enter (which will use acquire MO, see ++ above). */ ++ i = atomic_load_relaxed (&bar->in); ++ } ++ goto reset_restart; ++ } + +- /* This is the thread which finished the serialization. */ +- result = PTHREAD_BARRIER_SERIAL_THREAD; ++ /* Look at the current round. At this point, we are just interested in ++ whether we can complete rounds, based on the information we obtained ++ through our acquire-MO load of IN. Nonetheless, if we notice that ++ our round has been completed using this load, we use the acquire-MO ++ fence below to make sure that all pre-barrier-entry effects of all ++ threads in our round happen before us leaving the barrier. Therefore, ++ relaxed MO is sufficient. */ ++ unsigned cr = atomic_load_relaxed (&bar->current_round); ++ ++ /* Try to finish previous rounds and/or the current round. We simply ++ consider just our position here and do not try to do the work of threads ++ that entered more recently. */ ++ while (cr + count <= i) ++ { ++ /* Calculate the new current round based on how many threads entered. ++ NEWCR must be larger than CR because CR+COUNT ends a round. */ ++ unsigned int newcr = i - i % count; ++ /* Try to complete previous and/or the current round. We need release ++ MO to propagate the happens-before that we observed through reading ++ with acquire MO from IN to other threads. If the CAS fails, it ++ is like the relaxed-MO load of CURRENT_ROUND above. */ ++ if (atomic_compare_exchange_weak_release (&bar->current_round, &cr, ++ newcr)) ++ { ++ /* Update CR with the modification we just did. */ ++ cr = newcr; ++ /* Wake threads belonging to the rounds we just finished. We may ++ wake more threads than necessary if more than COUNT threads try ++ to block concurrently on the barrier, but this is not a typical ++ use of barriers. ++ Note that we can still access SHARED because we haven't yet ++ confirmed to have left the barrier. */ ++ futex_wake (&bar->current_round, INT_MAX, bar->shared); ++ /* We did as much as we could based on our position. If we advanced ++ the current round to a round sufficient for us, do not wait for ++ that to happen and skip the acquire fence (we already ++ synchronize-with all other threads in our round through the ++ initial acquire MO fetch_add of IN. */ ++ if (i <= cr) ++ goto ready_to_leave; ++ else ++ break; ++ } + } +- else ++ ++ /* Wait until the current round is more recent than the round we are in. */ ++ while (i > cr) + { +- /* The number of the event we are waiting for. The barrier's event +- number must be bumped before we continue. */ +- unsigned int event = ibarrier->curr_event; +- +- /* Before suspending, make the barrier available to others. */ +- lll_unlock (ibarrier->lock, lll_private); +- +- /* Wait for the event counter of the barrier to change. */ +- do +- futex_wait_simple (&ibarrier->curr_event, event, futex_private); +- while (event == ibarrier->curr_event); ++ /* Wait for the current round to finish. */ ++ futex_wait_simple (&bar->current_round, cr, bar->shared); ++ /* See the fence below. */ ++ cr = atomic_load_relaxed (&bar->current_round); + } + +- /* Make sure the init_count is stored locally or in a register. */ +- unsigned int init_count = ibarrier->init_count; ++ /* Our round finished. Use the acquire MO fence to synchronize-with the ++ thread that finished the round, either through the initial load of ++ CURRENT_ROUND above or a failed CAS in the loop above. */ ++ atomic_thread_fence_acquire (); ++ ++ /* Now signal that we left. */ ++ unsigned int o; ++ ready_to_leave: ++ /* We need release MO here so that our use of the barrier happens before ++ reset or memory reuse after pthread_barrier_destroy. */ ++ o = atomic_fetch_add_release (&bar->out, 1) + 1; ++ if (o == max_in_before_reset) ++ { ++ /* Perform a reset if we are the last pre-reset thread leaving. All ++ other threads accessing the barrier are post-reset threads and are ++ incrementing or spinning on IN. Thus, resetting IN as the last step ++ of reset ensures that the reset is not concurrent with actual use of ++ the barrier. We need the acquire MO fence so that the reset happens ++ after use of the barrier by all earlier pre-reset threads. */ ++ atomic_thread_fence_acquire (); ++ atomic_store_relaxed (&bar->current_round, 0); ++ atomic_store_relaxed (&bar->out, 0); ++ /* When destroying the barrier, we wait for a reset to happen. Thus, ++ we must load SHARED now so that this happens before the barrier is ++ destroyed. */ ++ int shared = bar->shared; ++ atomic_store_release (&bar->in, 0); ++ futex_wake (&bar->in, INT_MAX, shared); + +- /* If this was the last woken thread, unlock. */ +- if (atomic_increment_val (&ibarrier->left) == init_count) +- /* We are done. */ +- lll_unlock (ibarrier->lock, lll_private); ++ } + +- return result; ++ /* Return a special value for exactly one thread per round. */ ++ return i % count == 0 ? PTHREAD_BARRIER_SERIAL_THREAD : 0; + } + weak_alias (__pthread_barrier_wait, pthread_barrier_wait) +Index: glibc-2.22-621-g90c400b/nptl/pthread_barrierattr_setpshared.c +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/pthread_barrierattr_setpshared.c ++++ glibc-2.22-621-g90c400b/nptl/pthread_barrierattr_setpshared.c +@@ -24,15 +24,11 @@ + int + pthread_barrierattr_setpshared (pthread_barrierattr_t *attr, int pshared) + { +- struct pthread_barrierattr *iattr; +- + int err = futex_supports_pshared (pshared); + if (err != 0) + return err; + +- iattr = (struct pthread_barrierattr *) attr; +- +- iattr->pshared = pshared; ++ ((struct pthread_barrierattr *) attr)->pshared = pshared; + + return 0; + } +Index: glibc-2.22-621-g90c400b/nptl/tst-barrier4.c +=================================================================== +--- glibc-2.22-621-g90c400b.orig/nptl/tst-barrier4.c ++++ glibc-2.22-621-g90c400b/nptl/tst-barrier4.c +@@ -16,7 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This is a test for behavior not guaranteed by POSIX. */ ++/* This tests destruction of a barrier right after waiting on it. */ + #include + #include + #include +Index: glibc-2.22-621-g90c400b/nptl/tst-barrier5.c +=================================================================== +--- /dev/null ++++ glibc-2.22-621-g90c400b/nptl/tst-barrier5.c +@@ -0,0 +1,145 @@ ++/* Copyright (C) 2004-2015 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This tests the barrier reset mechanism. */ ++#include ++#include ++#include ++#include ++#include ++ ++ ++static pthread_barrier_t b1; ++static pthread_barrier_t b2; ++ ++ ++#define N 20 ++#define ROUNDS_PER_RUN 20 ++#define START ((BARRIER_IN_THRESHOLD / N - ROUNDS_PER_RUN / 2) * N) ++ ++static void * ++tf (void *arg) ++{ ++ int runs = 0; ++ ++ while (runs++ < 30) ++ { ++ /* In each run, we execute a number of rounds and initialize the barrier ++ so that we will go over the reset threshold with those rounds. */ ++ for (int rounds = 0; rounds < ROUNDS_PER_RUN; rounds++) ++ pthread_barrier_wait (&b1); ++ ++ if (pthread_barrier_wait (&b1) == PTHREAD_BARRIER_SERIAL_THREAD) ++ { ++ pthread_barrier_destroy (&b1); ++ if (pthread_barrier_init (&b1, NULL, N) != 0) ++ { ++ puts ("tf: 1st barrier_init failed"); ++ exit (1); ++ } ++ puts ("b1 reinitialized"); ++ /* Trigger a reset. */ ++ struct pthread_barrier *bar = (struct pthread_barrier *) &b1; ++ bar->in = START; ++ bar->out = START; ++ /* We deliberately don't set bar->current_round so that we also ++ test whether the helping for the updates of current_round ++ works correctly. */ ++ } ++ ++ /* Same as above, just for b2. */ ++ for (int rounds = 0; rounds < ROUNDS_PER_RUN; rounds++) ++ pthread_barrier_wait (&b2); ++ ++ if (pthread_barrier_wait (&b2) == PTHREAD_BARRIER_SERIAL_THREAD) ++ { ++ pthread_barrier_destroy (&b2); ++ if (pthread_barrier_init (&b2, NULL, N) != 0) ++ { ++ puts ("tf: 2nd barrier_init failed"); ++ exit (1); ++ } ++ puts ("b2 reinitialized"); ++ /* Trigger a reset. See above. */ ++ struct pthread_barrier *bar = (struct pthread_barrier *) &b2; ++ bar->in = START; ++ bar->out = START; ++ } ++ } ++ ++ return NULL; ++} ++ ++ ++static int ++do_test (void) ++{ ++ pthread_attr_t at; ++ int cnt; ++ ++ if (pthread_attr_init (&at) != 0) ++ { ++ puts ("attr_init failed"); ++ return 1; ++ } ++ ++ if (pthread_attr_setstacksize (&at, 1 * 1024 * 1024) != 0) ++ { ++ puts ("attr_setstacksize failed"); ++ return 1; ++ } ++ ++ if (pthread_barrier_init (&b1, NULL, N) != 0) ++ { ++ puts ("1st barrier_init failed"); ++ return 1; ++ } ++ ++ if (pthread_barrier_init (&b2, NULL, N) != 0) ++ { ++ puts ("2nd barrier_init failed"); ++ return 1; ++ } ++ ++ pthread_t th[N - 1]; ++ for (cnt = 0; cnt < N - 1; ++cnt) ++ if (pthread_create (&th[cnt], &at, tf, NULL) != 0) ++ { ++ puts ("pthread_create failed"); ++ return 1; ++ } ++ ++ if (pthread_attr_destroy (&at) != 0) ++ { ++ puts ("attr_destroy failed"); ++ return 1; ++ } ++ ++ tf (NULL); ++ ++ for (cnt = 0; cnt < N - 1; ++cnt) ++ if (pthread_join (th[cnt], NULL) != 0) ++ { ++ puts ("pthread_join failed"); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION do_test () ++#include "../test-skeleton.c" +Index: glibc-2.22-621-g90c400b/sysdeps/nptl/internaltypes.h +=================================================================== +--- glibc-2.22-621-g90c400b.orig/sysdeps/nptl/internaltypes.h ++++ glibc-2.22-621-g90c400b/sysdeps/nptl/internaltypes.h +@@ -95,12 +95,13 @@ struct pthread_rwlockattr + /* Barrier data structure. */ + struct pthread_barrier + { +- unsigned int curr_event; +- int lock; +- unsigned int left; +- unsigned int init_count; +- int private; ++ unsigned int in; ++ unsigned int current_round; ++ unsigned int count; ++ int shared; ++ unsigned int out; + }; ++#define BARRIER_IN_THRESHOLD (UINT_MAX/2) + + + /* Barrier variable attribute data structure. */ +Index: glibc-2.22-621-g90c400b/sysdeps/unix/sysv/linux/i386/pthread_barrier_wait.S +=================================================================== +--- glibc-2.22-621-g90c400b.orig/sysdeps/unix/sysv/linux/i386/pthread_barrier_wait.S ++++ /dev/null +@@ -1,187 +0,0 @@ +-/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +- +- .text +- +- .globl __pthread_barrier_wait +- .type __pthread_barrier_wait,@function +- .align 16 +-__pthread_barrier_wait: +- cfi_startproc +- pushl %ebx +- cfi_adjust_cfa_offset(4) +- cfi_offset(%ebx, -8) +- +- movl 8(%esp), %ebx +- +- /* Get the mutex. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +- cmpxchgl %edx, MUTEX(%ebx) +- jnz 1f +- +- /* One less waiter. If this was the last one needed wake +- everybody. */ +-2: subl $1, LEFT(%ebx) +- je 3f +- +- /* There are more threads to come. */ +- pushl %esi +- cfi_adjust_cfa_offset(4) +- cfi_offset(%esi, -12) +- +-#if CURR_EVENT == 0 +- movl (%ebx), %edx +-#else +- movl CURR_EVENT(%ebx), %edx +-#endif +- +- /* Release the mutex. */ +- LOCK +- subl $1, MUTEX(%ebx) +- jne 6f +- +- /* Wait for the remaining threads. The call will return immediately +- if the CURR_EVENT memory has meanwhile been changed. */ +-7: +-#if FUTEX_WAIT == 0 +- movl PRIVATE(%ebx), %ecx +-#else +- movl $FUTEX_WAIT, %ecx +- orl PRIVATE(%ebx), %ecx +-#endif +- xorl %esi, %esi +-8: movl $SYS_futex, %eax +- ENTER_KERNEL +- +- /* Don't return on spurious wakeups. The syscall does not change +- any register except %eax so there is no need to reload any of +- them. */ +-#if CURR_EVENT == 0 +- cmpl %edx, (%ebx) +-#else +- cmpl %edx, CURR_EVENT(%ebx) +-#endif +- je 8b +- +- /* Increment LEFT. If this brings the count back to the +- initial count unlock the object. */ +- movl $1, %edx +- movl INIT_COUNT(%ebx), %ecx +- LOCK +- xaddl %edx, LEFT(%ebx) +- subl $1, %ecx +- cmpl %ecx, %edx +- jne 10f +- +- /* Release the mutex. We cannot release the lock before +- waking the waiting threads since otherwise a new thread might +- arrive and gets waken up, too. */ +- LOCK +- subl $1, MUTEX(%ebx) +- jne 9f +- +- /* Note: %esi is still zero. */ +-10: movl %esi, %eax /* != PTHREAD_BARRIER_SERIAL_THREAD */ +- +- popl %esi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%esi) +- popl %ebx +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebx) +- ret +- +- cfi_adjust_cfa_offset(4) +- cfi_offset(%ebx, -8) +- +- /* The necessary number of threads arrived. */ +-3: +-#if CURR_EVENT == 0 +- addl $1, (%ebx) +-#else +- addl $1, CURR_EVENT(%ebx) +-#endif +- +- /* Wake up all waiters. The count is a signed number in the kernel +- so 0x7fffffff is the highest value. */ +- movl $0x7fffffff, %edx +- movl $FUTEX_WAKE, %ecx +- orl PRIVATE(%ebx), %ecx +- movl $SYS_futex, %eax +- ENTER_KERNEL +- +- /* Increment LEFT. If this brings the count back to the +- initial count unlock the object. */ +- movl $1, %edx +- movl INIT_COUNT(%ebx), %ecx +- LOCK +- xaddl %edx, LEFT(%ebx) +- subl $1, %ecx +- cmpl %ecx, %edx +- jne 5f +- +- /* Release the mutex. We cannot release the lock before +- waking the waiting threads since otherwise a new thread might +- arrive and gets waken up, too. */ +- LOCK +- subl $1, MUTEX(%ebx) +- jne 4f +- +-5: orl $-1, %eax /* == PTHREAD_BARRIER_SERIAL_THREAD */ +- +- popl %ebx +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebx) +- ret +- +- cfi_adjust_cfa_offset(4) +- cfi_offset(%ebx, -8) +-1: movl PRIVATE(%ebx), %ecx +- leal MUTEX(%ebx), %edx +- xorl $LLL_SHARED, %ecx +- call __lll_lock_wait +- jmp 2b +- +-4: movl PRIVATE(%ebx), %ecx +- leal MUTEX(%ebx), %eax +- xorl $LLL_SHARED, %ecx +- call __lll_unlock_wake +- jmp 5b +- +- cfi_adjust_cfa_offset(4) +- cfi_offset(%esi, -12) +-6: movl PRIVATE(%ebx), %ecx +- leal MUTEX(%ebx), %eax +- xorl $LLL_SHARED, %ecx +- call __lll_unlock_wake +- jmp 7b +- +-9: movl PRIVATE(%ebx), %ecx +- leal MUTEX(%ebx), %eax +- xorl $LLL_SHARED, %ecx +- call __lll_unlock_wake +- jmp 10b +- cfi_endproc +- .size __pthread_barrier_wait,.-__pthread_barrier_wait +-weak_alias (__pthread_barrier_wait, pthread_barrier_wait) +Index: glibc-2.22-621-g90c400b/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S +=================================================================== +--- glibc-2.22-621-g90c400b.orig/sysdeps/unix/sysv/linux/x86_64/pthread_barrier_wait.S ++++ /dev/null +@@ -1,161 +0,0 @@ +-/* Copyright (C) 2002-2015 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +- +- +- .text +- +- .globl __pthread_barrier_wait +- .type __pthread_barrier_wait,@function +- .align 16 +-__pthread_barrier_wait: +- /* Get the mutex. */ +- xorl %eax, %eax +- movl $1, %esi +- LOCK +- cmpxchgl %esi, MUTEX(%rdi) +- jnz 1f +- +- /* One less waiter. If this was the last one needed wake +- everybody. */ +-2: decl LEFT(%rdi) +- je 3f +- +- /* There are more threads to come. */ +-#if CURR_EVENT == 0 +- movl (%rdi), %edx +-#else +- movl CURR_EVENT(%rdi), %edx +-#endif +- +- /* Release the mutex. */ +- LOCK +- decl MUTEX(%rdi) +- jne 6f +- +- /* Wait for the remaining threads. The call will return immediately +- if the CURR_EVENT memory has meanwhile been changed. */ +-7: +-#if FUTEX_WAIT == 0 +- movl PRIVATE(%rdi), %esi +-#else +- movl $FUTEX_WAIT, %esi +- orl PRIVATE(%rdi), %esi +-#endif +- xorq %r10, %r10 +-8: movl $SYS_futex, %eax +- syscall +- +- /* Don't return on spurious wakeups. The syscall does not change +- any register except %eax so there is no need to reload any of +- them. */ +-#if CURR_EVENT == 0 +- cmpl %edx, (%rdi) +-#else +- cmpl %edx, CURR_EVENT(%rdi) +-#endif +- je 8b +- +- /* Increment LEFT. If this brings the count back to the +- initial count unlock the object. */ +- movl $1, %edx +- movl INIT_COUNT(%rdi), %eax +- LOCK +- xaddl %edx, LEFT(%rdi) +- subl $1, %eax +- cmpl %eax, %edx +- jne,pt 10f +- +- /* Release the mutex. We cannot release the lock before +- waking the waiting threads since otherwise a new thread might +- arrive and gets waken up, too. */ +- LOCK +- decl MUTEX(%rdi) +- jne 9f +- +-10: xorl %eax, %eax /* != PTHREAD_BARRIER_SERIAL_THREAD */ +- +- retq +- +- /* The necessary number of threads arrived. */ +-3: +-#if CURR_EVENT == 0 +- incl (%rdi) +-#else +- incl CURR_EVENT(%rdi) +-#endif +- +- /* Wake up all waiters. The count is a signed number in the kernel +- so 0x7fffffff is the highest value. */ +- movl $0x7fffffff, %edx +- movl $FUTEX_WAKE, %esi +- orl PRIVATE(%rdi), %esi +- movl $SYS_futex, %eax +- syscall +- +- /* Increment LEFT. If this brings the count back to the +- initial count unlock the object. */ +- movl $1, %edx +- movl INIT_COUNT(%rdi), %eax +- LOCK +- xaddl %edx, LEFT(%rdi) +- subl $1, %eax +- cmpl %eax, %edx +- jne,pt 5f +- +- /* Release the mutex. We cannot release the lock before +- waking the waiting threads since otherwise a new thread might +- arrive and gets waken up, too. */ +- LOCK +- decl MUTEX(%rdi) +- jne 4f +- +-5: orl $-1, %eax /* == PTHREAD_BARRIER_SERIAL_THREAD */ +- +- retq +- +-1: movl PRIVATE(%rdi), %esi +- addq $MUTEX, %rdi +- xorl $LLL_SHARED, %esi +- callq __lll_lock_wait +- subq $MUTEX, %rdi +- jmp 2b +- +-4: movl PRIVATE(%rdi), %esi +- addq $MUTEX, %rdi +- xorl $LLL_SHARED, %esi +- callq __lll_unlock_wake +- jmp 5b +- +-6: movl PRIVATE(%rdi), %esi +- addq $MUTEX, %rdi +- xorl $LLL_SHARED, %esi +- callq __lll_unlock_wake +- subq $MUTEX, %rdi +- jmp 7b +- +-9: movl PRIVATE(%rdi), %esi +- addq $MUTEX, %rdi +- xorl $LLL_SHARED, %esi +- callq __lll_unlock_wake +- jmp 10b +- .size __pthread_barrier_wait,.-__pthread_barrier_wait +-weak_alias (__pthread_barrier_wait, pthread_barrier_wait) diff --git a/glibc.spec b/glibc.spec index 5512d2c..8fc1617 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.22-621-g90c400b %define glibcversion 2.22.90 -%define glibcrelease 28%{?dist} +%define glibcrelease 29%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -291,6 +291,9 @@ Patch2034: glibc-aarch64-workaround-nzcv-clobber-in-tlsdesc.patch # Group Merge Patch: Patch2035: glibc-nsswitch-Add-group-merging-support.patch +# New pthread_barrier algorithm: +Patch2036: glibc-pthread-barrier.patch + ############################################################################## # # Benchmark comparison patches. @@ -683,6 +686,7 @@ cat /proc/meminfo %patch0058 -p1 %patch0059 -p1 %patch2035 -p1 +%patch2036 -p1 ############################################################################## # %%prep - Additional prep required... @@ -1941,6 +1945,9 @@ rm -f *.filelist* %endif %changelog +* Wed Jan 13 2016 Carlos O'Donell - 2.22.90-29 +- New pthread_barrier algorithm with improved standards compliance. + * Wed Jan 13 2016 Carlos O'Donell - 2.22.90-28 - Add group merging support for distributed management (#1146822). diff --git a/series b/series index c2bfbfc..900d53b 100644 --- a/series +++ b/series @@ -20,7 +20,6 @@ glibc-fedora-localedata-rh61908.patch -p1 --fuzz=0 glibc-fedora-uname-getrlimit.patch -p1 --fuzz=0 glibc-fedora-__libc_multiple_libcs.patch -p1 --fuzz=0 glibc-fedora-elf-ORIGIN.patch -p1 --fuzz=0 -glibc-fedora-elf-init-hidden_undef.patch -p1 --fuzz=0 glibc-rh952799.patch -p1 --fuzz=0 glibc-rh1009145.patch -p1 --fuzz=0 glibc-rh1013801.patch -p1 --fuzz=0 @@ -31,15 +30,11 @@ glibc-aarch64-workaround-nzcv-clobber-in-tlsdesc.patch -p1 --fuzz=0 glibc-disable-rwlock-elision.patch -p1 --fuzz=0 glibc-cs-path.patch -p1 --fuzz=0 glibc-bench-build.patch -p1 --fuzz=0 -glibc-new-condvar.patch -p1 --fuzz=0 -glibc-rh1238412-remove-duplicate-transliterations.patch -p1 --fuzz=0 -glibc-rh1238412-addition-and-fixes-for-translit_neutral.patch -p1 --fuzz=0 -glibc-rh1238412-update-the-translit-files-to-unicode-7.0.0.patch -p1 --fuzz=0 -glibc-rh1238412-add-translit-rules-for-da-nb-nn-sv-locales.patch -p1 --fuzz=0 -glibc-rh1238412-unicode-8.0.0-update.patch -p1 --fuzz=0 glibc-res-hconf-gcc5.patch -p1 --fuzz=0 glibc-ld-ctype-gcc5.patch -p1 --fuzz=0 glibc-gethnamaddr-gcc5.patch -p1 --fuzz=0 glibc-dns-host-gcc5.patch -p1 --fuzz=0 glibc-bug-regex-gcc5.patch -p1 --fuzz=0 glibc-c-utf8-locale.patch -p1 --fuzz=0 +glibc-nsswitch-Add-group-merging-support.patch -p1 --fuzz=0 +glibc-pthread-barrier.patch --fuzz=0