421 lines
16 KiB
Diff
421 lines
16 KiB
Diff
commit acbaad31e8ea10fce8b9c0aef58afb388bf7489d
|
|
Author: Florian Weimer <fweimer@redhat.com>
|
|
Date: Fri Jan 7 13:21:57 2022 +0100
|
|
|
|
elf: Fix fences in _dl_find_object_update (bug 28745)
|
|
|
|
As explained in Hans Boehm, Can Seqlocks Get Along with Programming
|
|
Language Memory Models?, an acquire fence is needed in
|
|
_dlfo_read_success. The lack of a fence resulted in an observable
|
|
bug on powerpc64le compile-time load reordering.
|
|
|
|
The fence in _dlfo_mappings_begin_update has been reordered, turning
|
|
the fence/store sequence into a release MO store equivalent.
|
|
|
|
Relaxed MO loads are used on the reader side, and relaxed MO stores
|
|
on the writer side for the shared data, to avoid formal data races.
|
|
This is just to be conservative; it should not actually be necessary
|
|
given how the data is used.
|
|
|
|
This commit also fixes the test run time. The intent was to run it
|
|
for 3 seconds, but 0.3 seconds was enough to uncover the bug very
|
|
occasionally (while 3 seconds did not reliably show the bug on every
|
|
test run).
|
|
|
|
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
|
|
|
|
diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
|
|
index 324f40742d59b4dc..b23bcaf0a15e82cd 100644
|
|
--- a/elf/dl-find_object.c
|
|
+++ b/elf/dl-find_object.c
|
|
@@ -17,6 +17,7 @@
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#include <assert.h>
|
|
+#include <atomic.h>
|
|
#include <atomic_wide_counter.h>
|
|
#include <dl-find_object.h>
|
|
#include <dlfcn.h>
|
|
@@ -80,13 +81,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
|
|
over all segments, even though the data is not stored in one
|
|
contiguous array.
|
|
|
|
- During updates, the segments are overwritten in place, and a
|
|
- software transactional memory construct (involving the
|
|
+ During updates, the segments are overwritten in place. A software
|
|
+ transactional memory construct (involving the
|
|
_dlfo_loaded_mappings_version variable) is used to detect
|
|
- concurrent modification, and retry as necessary. The memory
|
|
- allocations are never deallocated, but slots used for objects that
|
|
- have been dlclose'd can be reused by dlopen. The memory can live
|
|
- in the regular C malloc heap.
|
|
+ concurrent modification, and retry as necessary. (This approach is
|
|
+ similar to seqlocks, except that two copies are used, and there is
|
|
+ only one writer, ever, due to the loader lock.) Technically,
|
|
+ relaxed MO loads and stores need to be used for the shared TM data,
|
|
+ to avoid data races.
|
|
+
|
|
+ The memory allocations are never deallocated, but slots used for
|
|
+ objects that have been dlclose'd can be reused by dlopen. The
|
|
+ memory can live in the regular C malloc heap.
|
|
|
|
The segments are populated from the start of the list, with the
|
|
mappings with the highest address. Only if this segment is full,
|
|
@@ -101,17 +107,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
|
|
needed. */
|
|
struct dlfo_mappings_segment
|
|
{
|
|
- /* The previous segment has lower base addresses. */
|
|
+ /* The previous segment has lower base addresses. Constant after
|
|
+ initialization; read in the TM region. */
|
|
struct dlfo_mappings_segment *previous;
|
|
|
|
/* Used by __libc_freeres to deallocate malloc'ed memory. */
|
|
void *to_free;
|
|
|
|
/* Count of array elements in use and allocated. */
|
|
- size_t size;
|
|
+ size_t size; /* Read in the TM region. */
|
|
size_t allocated;
|
|
|
|
- struct dl_find_object_internal objects[];
|
|
+ struct dl_find_object_internal objects[]; /* Read in the TM region. */
|
|
};
|
|
|
|
/* To achieve async-signal-safety, two copies of the data structure
|
|
@@ -240,7 +247,8 @@ static inline uint64_t
|
|
_dlfo_read_start_version (void)
|
|
{
|
|
/* Acquire MO load synchronizes with the fences at the beginning and
|
|
- end of the TM update region. */
|
|
+ end of the TM update region in _dlfo_mappings_begin_update,
|
|
+ _dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch. */
|
|
return __atomic_wide_counter_load_acquire (&_dlfo_loaded_mappings_version);
|
|
}
|
|
|
|
@@ -258,34 +266,30 @@ _dlfo_read_version_locked (void)
|
|
static inline unsigned int
|
|
_dlfo_mappings_begin_update (void)
|
|
{
|
|
- unsigned int v
|
|
- = __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
|
|
- 2);
|
|
- /* Subsequent stores to the TM data must not be reordered before the
|
|
- store above with the version update. */
|
|
+ /* The store synchronizes with loads in _dlfo_read_start_version
|
|
+ (also called from _dlfo_read_success). */
|
|
atomic_thread_fence_release ();
|
|
- return v & 1;
|
|
+ return __atomic_wide_counter_fetch_add_relaxed
|
|
+ (&_dlfo_loaded_mappings_version, 2);
|
|
}
|
|
|
|
/* Installs the just-updated version as the active version. */
|
|
static inline void
|
|
_dlfo_mappings_end_update (void)
|
|
{
|
|
- /* The previous writes to the TM data must not be reordered after
|
|
- the version update below. */
|
|
+ /* The store synchronizes with loads in _dlfo_read_start_version
|
|
+ (also called from _dlfo_read_success). */
|
|
atomic_thread_fence_release ();
|
|
- __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
|
|
- 1);
|
|
+ __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 1);
|
|
}
|
|
/* Completes an in-place update without switching versions. */
|
|
static inline void
|
|
_dlfo_mappings_end_update_no_switch (void)
|
|
{
|
|
- /* The previous writes to the TM data must not be reordered after
|
|
- the version update below. */
|
|
+ /* The store synchronizes with loads in _dlfo_read_start_version
|
|
+ (also called from _dlfo_read_success). */
|
|
atomic_thread_fence_release ();
|
|
- __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version,
|
|
- 2);
|
|
+ __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 2);
|
|
}
|
|
|
|
/* Return true if the read was successful, given the start
|
|
@@ -293,6 +297,19 @@ _dlfo_mappings_end_update_no_switch (void)
|
|
static inline bool
|
|
_dlfo_read_success (uint64_t start_version)
|
|
{
|
|
+ /* See Hans Boehm, Can Seqlocks Get Along with Programming Language
|
|
+ Memory Models?, Section 4. This is necessary so that loads in
|
|
+ the TM region are not ordered past the version check below. */
|
|
+ atomic_thread_fence_acquire ();
|
|
+
|
|
+ /* Synchronizes with stores in _dlfo_mappings_begin_update,
|
|
+ _dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch.
|
|
+ It is important that all stores from the last update have been
|
|
+ visible, and stores from the next updates are not.
|
|
+
|
|
+ Unlike with seqlocks, there is no check for odd versions here
|
|
+ because we have read the unmodified copy (confirmed to be
|
|
+ unmodified by the unchanged version). */
|
|
return _dlfo_read_start_version () == start_version;
|
|
}
|
|
|
|
@@ -318,7 +335,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
|
|
{
|
|
size_t half = size >> 1;
|
|
struct dl_find_object_internal *middle = first + half;
|
|
- if (middle->map_start < pc)
|
|
+ if (atomic_load_relaxed (&middle->map_start) < pc)
|
|
{
|
|
first = middle + 1;
|
|
size -= half + 1;
|
|
@@ -327,9 +344,9 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
|
|
size = half;
|
|
}
|
|
|
|
- if (first != end && pc == first->map_start)
|
|
+ if (first != end && pc == atomic_load_relaxed (&first->map_start))
|
|
{
|
|
- if (pc < first->map_end)
|
|
+ if (pc < atomic_load_relaxed (&first->map_end))
|
|
return first;
|
|
else
|
|
/* Zero-length mapping after dlclose. */
|
|
@@ -339,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
|
|
{
|
|
/* Check to see if PC is in the previous mapping. */
|
|
--first;
|
|
- if (pc < first->map_end)
|
|
+ if (pc < atomic_load_relaxed (&first->map_end))
|
|
/* pc >= first->map_start implied by the search above. */
|
|
return first;
|
|
else
|
|
@@ -408,39 +425,47 @@ _dl_find_object (void *pc1, struct dl_find_object *result)
|
|
size on earlier unused segments. */
|
|
for (struct dlfo_mappings_segment *seg
|
|
= _dlfo_mappings_active_segment (start_version);
|
|
- seg != NULL && seg->size > 0; seg = seg->previous)
|
|
- if (pc >= seg->objects[0].map_start)
|
|
- {
|
|
- /* PC may lie within this segment. If it is less than the
|
|
- segment start address, it can only lie in a previous
|
|
- segment, due to the base address sorting. */
|
|
- struct dl_find_object_internal *obj
|
|
- = _dlfo_lookup (pc, seg->objects, seg->size);
|
|
+ seg != NULL;
|
|
+ seg = atomic_load_acquire (&seg->previous))
|
|
+ {
|
|
+ size_t seg_size = atomic_load_relaxed (&seg->size);
|
|
+ if (seg_size == 0)
|
|
+ break;
|
|
|
|
- if (obj != NULL)
|
|
- {
|
|
- /* Found the right mapping. Copy out the data prior to
|
|
- checking if the read transaction was successful. */
|
|
- struct dl_find_object_internal copy = *obj;
|
|
- if (_dlfo_read_success (start_version))
|
|
- {
|
|
- _dl_find_object_to_external (©, result);
|
|
- return 0;
|
|
- }
|
|
- else
|
|
- /* Read transaction failure. */
|
|
- goto retry;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* PC is not covered by this mapping. */
|
|
- if (_dlfo_read_success (start_version))
|
|
- return -1;
|
|
- else
|
|
- /* Read transaction failure. */
|
|
- goto retry;
|
|
- }
|
|
- } /* if: PC might lie within the current seg. */
|
|
+ if (pc >= atomic_load_relaxed (&seg->objects[0].map_start))
|
|
+ {
|
|
+ /* PC may lie within this segment. If it is less than the
|
|
+ segment start address, it can only lie in a previous
|
|
+ segment, due to the base address sorting. */
|
|
+ struct dl_find_object_internal *obj
|
|
+ = _dlfo_lookup (pc, seg->objects, seg_size);
|
|
+
|
|
+ if (obj != NULL)
|
|
+ {
|
|
+ /* Found the right mapping. Copy out the data prior to
|
|
+ checking if the read transaction was successful. */
|
|
+ struct dl_find_object_internal copy;
|
|
+ _dl_find_object_internal_copy (obj, ©);
|
|
+ if (_dlfo_read_success (start_version))
|
|
+ {
|
|
+ _dl_find_object_to_external (©, result);
|
|
+ return 0;
|
|
+ }
|
|
+ else
|
|
+ /* Read transaction failure. */
|
|
+ goto retry;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ /* PC is not covered by this mapping. */
|
|
+ if (_dlfo_read_success (start_version))
|
|
+ return -1;
|
|
+ else
|
|
+ /* Read transaction failure. */
|
|
+ goto retry;
|
|
+ }
|
|
+ } /* if: PC might lie within the current seg. */
|
|
+ }
|
|
|
|
/* PC is not covered by any segment. */
|
|
if (_dlfo_read_success (start_version))
|
|
@@ -619,15 +644,19 @@ static inline size_t
|
|
_dlfo_update_init_seg (struct dlfo_mappings_segment *seg,
|
|
size_t remaining_to_add)
|
|
{
|
|
+ size_t new_seg_size;
|
|
if (remaining_to_add < seg->allocated)
|
|
/* Partially filled segment. */
|
|
- seg->size = remaining_to_add;
|
|
+ new_seg_size = remaining_to_add;
|
|
else
|
|
- seg->size = seg->allocated;
|
|
- return seg->size;
|
|
+ new_seg_size = seg->allocated;
|
|
+ atomic_store_relaxed (&seg->size, new_seg_size);
|
|
+ return new_seg_size;
|
|
}
|
|
|
|
-/* Invoked from _dl_find_object_update after sorting. */
|
|
+/* Invoked from _dl_find_object_update after sorting. Stores to the
|
|
+ shared data need to use relaxed MO. But plain loads can be used
|
|
+ because the loader lock prevents concurrent stores. */
|
|
static bool
|
|
_dl_find_object_update_1 (struct link_map **loaded, size_t count)
|
|
{
|
|
@@ -727,7 +756,8 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
|
|
{
|
|
/* Prefer mapping in current_seg. */
|
|
assert (current_seg_index1 > 0);
|
|
- *dlfo = current_seg->objects[current_seg_index1 - 1];
|
|
+ _dl_find_object_internal_copy
|
|
+ (¤t_seg->objects[current_seg_index1 - 1], dlfo);
|
|
--current_seg_index1;
|
|
}
|
|
else
|
|
@@ -753,7 +783,7 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
|
|
|
|
/* Prevent searching further into unused segments. */
|
|
if (target_seg->previous != NULL)
|
|
- target_seg->previous->size = 0;
|
|
+ atomic_store_relaxed (&target_seg->previous->size, 0);
|
|
|
|
_dlfo_mappings_end_update ();
|
|
return true;
|
|
diff --git a/elf/dl-find_object.h b/elf/dl-find_object.h
|
|
index f899905e09427a0d..11569efc9b7daf9c 100644
|
|
--- a/elf/dl-find_object.h
|
|
+++ b/elf/dl-find_object.h
|
|
@@ -20,6 +20,7 @@
|
|
#define _DL_FIND_EH_FRAME_H
|
|
|
|
#include <assert.h>
|
|
+#include <atomic.h>
|
|
#include <dlfcn.h>
|
|
#include <ldsodefs.h>
|
|
#include <stdbool.h>
|
|
@@ -44,6 +45,30 @@ struct dl_find_object_internal
|
|
#endif
|
|
};
|
|
|
|
+/* Create a copy of *SOURCE in *COPY using relaxed MO loads and
|
|
+ stores. */
|
|
+static inline void
|
|
+_dl_find_object_internal_copy (const struct dl_find_object_internal *source,
|
|
+ struct dl_find_object_internal *copy)
|
|
+{
|
|
+ atomic_store_relaxed (©->map_start,
|
|
+ atomic_load_relaxed (&source->map_start));
|
|
+ atomic_store_relaxed (©->map_end,
|
|
+ atomic_load_relaxed (&source->map_end));
|
|
+ atomic_store_relaxed (©->map,
|
|
+ atomic_load_relaxed (&source->map));
|
|
+ atomic_store_relaxed (©->eh_frame,
|
|
+ atomic_load_relaxed (&source->eh_frame));
|
|
+#if DLFO_STRUCT_HAS_EH_DBASE
|
|
+ atomic_store_relaxed (©->eh_dbase,
|
|
+ atomic_load_relaxed (&source->eh_dbase));
|
|
+#endif
|
|
+#if DLFO_STRUCT_HAS_EH_COUNT
|
|
+ atomic_store_relaxed (©->eh_count,
|
|
+ atomic_load_relaxed (&source->eh_count));
|
|
+#endif
|
|
+}
|
|
+
|
|
static inline void
|
|
_dl_find_object_to_external (struct dl_find_object_internal *internal,
|
|
struct dl_find_object *external)
|
|
@@ -62,34 +87,35 @@ _dl_find_object_to_external (struct dl_find_object_internal *internal,
|
|
}
|
|
|
|
/* Extract the object location data from a link map and writes it to
|
|
- *RESULT. */
|
|
+ *RESULT using relaxed MO stores. */
|
|
static void __attribute__ ((unused))
|
|
_dl_find_object_from_map (struct link_map *l,
|
|
struct dl_find_object_internal *result)
|
|
{
|
|
- result->map_start = (uintptr_t) l->l_map_start;
|
|
- result->map_end = (uintptr_t) l->l_map_end;
|
|
- result->map = l;
|
|
+ atomic_store_relaxed (&result->map_start, (uintptr_t) l->l_map_start);
|
|
+ atomic_store_relaxed (&result->map_end, (uintptr_t) l->l_map_end);
|
|
+ atomic_store_relaxed (&result->map, l);
|
|
|
|
#if DLFO_STRUCT_HAS_EH_DBASE
|
|
- result->eh_dbase = (void *) l->l_info[DT_PLTGOT];
|
|
+ atomic_store_relaxed (&result->eh_dbase, (void *) l->l_info[DT_PLTGOT]);
|
|
#endif
|
|
|
|
for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum;
|
|
ph < ph_end; ++ph)
|
|
if (ph->p_type == DLFO_EH_SEGMENT_TYPE)
|
|
{
|
|
- result->eh_frame = (void *) (ph->p_vaddr + l->l_addr);
|
|
+ atomic_store_relaxed (&result->eh_frame,
|
|
+ (void *) (ph->p_vaddr + l->l_addr));
|
|
#if DLFO_STRUCT_HAS_EH_COUNT
|
|
- result->eh_count = ph->p_memsz / 8;
|
|
+ atomic_store_relaxed (&result->eh_count, ph->p_memsz / 8);
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
/* Object has no exception handling segment. */
|
|
- result->eh_frame = NULL;
|
|
+ atomic_store_relaxed (&result->eh_frame, NULL);
|
|
#if DLFO_STRUCT_HAS_EH_COUNT
|
|
- result->eh_count = 0;
|
|
+ atomic_store_relaxed (&result->eh_count, 0);
|
|
#endif
|
|
}
|
|
|
|
diff --git a/elf/tst-dl_find_object-threads.c b/elf/tst-dl_find_object-threads.c
|
|
index 472deeec57da0560..453ba684024b5069 100644
|
|
--- a/elf/tst-dl_find_object-threads.c
|
|
+++ b/elf/tst-dl_find_object-threads.c
|
|
@@ -138,12 +138,12 @@ check (void *address, struct dl_find_object *expected, int line)
|
|
#endif
|
|
}
|
|
|
|
-/* Request process termination after 3 seconds. */
|
|
+/* Request process termination after 0.3 seconds. */
|
|
static bool exit_requested;
|
|
static void *
|
|
exit_thread (void *ignored)
|
|
{
|
|
- usleep (3 * 100 * 1000);
|
|
+ usleep (300 * 1000);
|
|
__atomic_store_n (&exit_requested, true, __ATOMIC_RELAXED);
|
|
return NULL;
|
|
}
|