877 lines
27 KiB
Diff
877 lines
27 KiB
Diff
commit 709fbd3ec3595f2d1076b4fec09a739327459288
|
|
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
|
Date: Mon Jan 15 11:07:21 2024 -0300
|
|
|
|
stdlib: Reinstate stable mergesort implementation on qsort
|
|
|
|
The mergesort removal from qsort implementation (commit 03bf8357e8)
|
|
had the side-effect of making sorting nonstable. Although neither
|
|
POSIX nor C standard specify that qsort should be stable, it seems
|
|
that it has become an instance of Hyrum's law where multiple programs
|
|
expect it.
|
|
|
|
Also, the resulting introsort implementation is not faster than
|
|
the previous mergesort (which makes the change even less appealing).
|
|
|
|
This patch restores the previous mergesort implementation, with the
|
|
exception of machinery that checks the resulting allocation against
|
|
the _SC_PHYS_PAGES (it only adds complexity and the heuristic not
|
|
always make sense depending on the system configuration and load).
|
|
The alloca usage was replaced with a fixed-size buffer.
|
|
|
|
For the fallback mechanism, the implementation uses heapsort. It is
|
|
simpler than quicksort, and it does not suffer from adversarial
|
|
inputs. With memory overcommit, it should be rarely triggered.
|
|
|
|
The drawback is mergesort requires O(n) extra space, and since it is
|
|
allocated with malloc the function is AS-signal-unsafe. It should be
|
|
feasible to change it to use mmap, although I am not sure how urgent
|
|
it is. The heapsort is also nonstable, so programs that require a
|
|
stable sort would still be subject to this latent issue.
|
|
|
|
The tst-qsort5 is removed since it will not create quicksort adversarial
|
|
inputs with the current qsort_r implementation.
|
|
|
|
Checked on x86_64-linux-gnu and aarch64-linux-gnu.
|
|
Reviewed-by: Florian Weimer <fweimer@redhat.com>
|
|
|
|
Conflicts:
|
|
stdlib/tst-qsort5.c: Deletion had conflicts due to copyright update.
|
|
|
|
diff --git a/manual/argp.texi b/manual/argp.texi
|
|
index b77ad68285ecb732..0023441812d4e584 100644
|
|
--- a/manual/argp.texi
|
|
+++ b/manual/argp.texi
|
|
@@ -735,7 +735,7 @@ for options, bad phase of the moon, etc.
|
|
@c hol_set_group ok
|
|
@c hol_find_entry ok
|
|
@c hol_sort @mtslocale @acucorrupt
|
|
-@c qsort dup
|
|
+@c qsort dup @acucorrupt
|
|
@c hol_entry_qcmp @mtslocale
|
|
@c hol_entry_cmp @mtslocale
|
|
@c group_cmp ok
|
|
diff --git a/manual/locale.texi b/manual/locale.texi
|
|
index f6afa5dc44a2a016..1b3f97839bb5d068 100644
|
|
--- a/manual/locale.texi
|
|
+++ b/manual/locale.texi
|
|
@@ -253,7 +253,7 @@ The symbols in this section are defined in the header file @file{locale.h}.
|
|
@c calculate_head_size ok
|
|
@c __munmap ok
|
|
@c compute_hashval ok
|
|
-@c qsort dup
|
|
+@c qsort dup @acucorrupt
|
|
@c rangecmp ok
|
|
@c malloc @ascuheap @acsmem
|
|
@c strdup @ascuheap @acsmem
|
|
diff --git a/manual/search.texi b/manual/search.texi
|
|
index a550858478f7fc83..ffaadc46f51b18f9 100644
|
|
--- a/manual/search.texi
|
|
+++ b/manual/search.texi
|
|
@@ -159,7 +159,7 @@ To sort an array using an arbitrary comparison function, use the
|
|
|
|
@deftypefun void qsort (void *@var{array}, size_t @var{count}, size_t @var{size}, comparison_fn_t @var{compare})
|
|
@standards{ISO, stdlib.h}
|
|
-@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
|
|
+@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}}
|
|
The @code{qsort} function sorts the array @var{array}. The array
|
|
contains @var{count} elements, each of which is of size @var{size}.
|
|
|
|
@@ -199,8 +199,9 @@ Functions}):
|
|
The @code{qsort} function derives its name from the fact that it was
|
|
originally implemented using the ``quick sort'' algorithm.
|
|
|
|
-The implementation of @code{qsort} in this library is an in-place sort
|
|
-and uses a constant extra space (allocated on the stack).
|
|
+The implementation of @code{qsort} attempts to allocate auxiliary storage
|
|
+and use the merge sort algorithm, without violating C standard requirement
|
|
+that arguments passed to the comparison function point within the array.
|
|
@end deftypefun
|
|
|
|
@node Search/Sort Example
|
|
diff --git a/stdlib/Makefile b/stdlib/Makefile
|
|
index 82d9d909890853b7..a9d91a57c08ac506 100644
|
|
--- a/stdlib/Makefile
|
|
+++ b/stdlib/Makefile
|
|
@@ -212,7 +212,6 @@ tests := \
|
|
tst-qsort \
|
|
tst-qsort2 \
|
|
tst-qsort3 \
|
|
- tst-qsort5 \
|
|
tst-qsort6 \
|
|
tst-quick_exit \
|
|
tst-rand48 \
|
|
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
|
|
index 3d5405705862ddf0..b95889047ba31193 100644
|
|
--- a/stdlib/qsort.c
|
|
+++ b/stdlib/qsort.c
|
|
@@ -20,6 +20,7 @@
|
|
Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
|
|
Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
|
|
|
|
+#include <errno.h>
|
|
#include <limits.h>
|
|
#include <memswap.h>
|
|
#include <stdlib.h>
|
|
@@ -33,9 +34,13 @@ enum swap_type_t
|
|
{
|
|
SWAP_WORDS_64,
|
|
SWAP_WORDS_32,
|
|
+ SWAP_VOID_ARG,
|
|
SWAP_BYTES
|
|
};
|
|
|
|
+typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
|
|
+typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
|
|
+
|
|
/* If this function returns true, elements can be safely copied using word
|
|
loads and stores. Otherwise, it might not be safe. BASE (as an integer)
|
|
must be a multiple of the word alignment. SIZE must be a multiple of
|
|
@@ -52,7 +57,6 @@ is_aligned (const void *base, size_t size, size_t wordsize)
|
|
static inline void
|
|
swap_words_64 (void * restrict a, void * restrict b, size_t n)
|
|
{
|
|
- typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
|
|
do
|
|
{
|
|
n -= 8;
|
|
@@ -65,7 +69,6 @@ swap_words_64 (void * restrict a, void * restrict b, size_t n)
|
|
static inline void
|
|
swap_words_32 (void * restrict a, void * restrict b, size_t n)
|
|
{
|
|
- typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
|
|
do
|
|
{
|
|
n -= 4;
|
|
@@ -89,43 +92,6 @@ do_swap (void * restrict a, void * restrict b, size_t size,
|
|
__memswap (a, b, size);
|
|
}
|
|
|
|
-/* Discontinue quicksort algorithm when partition gets below this size.
|
|
- This particular magic number was chosen to work best on a Sun 4/260. */
|
|
-#define MAX_THRESH 4
|
|
-
|
|
-/* Stack node declarations used to store unfulfilled partition obligations. */
|
|
-typedef struct
|
|
- {
|
|
- char *lo;
|
|
- char *hi;
|
|
- size_t depth;
|
|
- } stack_node;
|
|
-
|
|
-/* The stack needs log (total_elements) entries (we could even subtract
|
|
- log(MAX_THRESH)). Since total_elements has type size_t, we get as
|
|
- upper bound for log (total_elements):
|
|
- bits per byte (CHAR_BIT) * sizeof(size_t). */
|
|
-enum { STACK_SIZE = CHAR_BIT * sizeof (size_t) };
|
|
-
|
|
-static inline stack_node *
|
|
-push (stack_node *top, char *lo, char *hi, size_t depth)
|
|
-{
|
|
- top->lo = lo;
|
|
- top->hi = hi;
|
|
- top->depth = depth;
|
|
- return ++top;
|
|
-}
|
|
-
|
|
-static inline stack_node *
|
|
-pop (stack_node *top, char **lo, char **hi, size_t *depth)
|
|
-{
|
|
- --top;
|
|
- *lo = top->lo;
|
|
- *hi = top->hi;
|
|
- *depth = top->depth;
|
|
- return top;
|
|
-}
|
|
-
|
|
/* Establish the heap condition at index K, that is, the key at K will
|
|
not be less than either of its children, at 2 * K + 1 and 2 * K + 2
|
|
(if they exist). N is the last valid index. */
|
|
@@ -173,21 +139,35 @@ heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
|
|
}
|
|
}
|
|
|
|
-/* A non-recursive heapsort, used on introsort implementation as a
|
|
- fallback routine with worst-case performance of O(nlog n) and
|
|
- worst-case space complexity of O(1). It sorts the array starting
|
|
- at BASE and ending at END (inclusive), with each element of SIZE
|
|
- bytes. The SWAP_TYPE is the callback function used to swap
|
|
- elements, and CMP is the function used to compare elements. */
|
|
+static enum swap_type_t
|
|
+get_swap_type (void *const pbase, size_t size)
|
|
+{
|
|
+ if ((size & (sizeof (uint32_t) - 1)) == 0
|
|
+ && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0)
|
|
+ {
|
|
+ if (size == sizeof (uint32_t))
|
|
+ return SWAP_WORDS_32;
|
|
+ else if (size == sizeof (uint64_t)
|
|
+ && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0)
|
|
+ return SWAP_WORDS_64;
|
|
+ }
|
|
+ return SWAP_BYTES;
|
|
+}
|
|
+
|
|
+
|
|
+/* A non-recursive heapsort with worst-case performance of O(nlog n) and
|
|
+ worst-case space complexity of O(1). It sorts the array starting at
|
|
+ BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
|
|
+ function used to swap elements, and CMP is the function used to compare
|
|
+ elements. */
|
|
static void
|
|
-heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type,
|
|
- __compar_d_fn_t cmp, void *arg)
|
|
+heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg)
|
|
{
|
|
- size_t n = ((uintptr_t) end - (uintptr_t) base) / size;
|
|
if (n <= 1)
|
|
- /* Handled by insertion sort. */
|
|
return;
|
|
|
|
+ enum swap_type_t swap_type = get_swap_type (base, size);
|
|
+
|
|
/* Build the binary heap, largest value at the base[0]. */
|
|
heapify (base, size, n, swap_type, cmp, arg);
|
|
|
|
@@ -209,226 +189,226 @@ heapsort_r (void *base, void *end, size_t size, enum swap_type_t swap_type,
|
|
}
|
|
}
|
|
|
|
-static inline void
|
|
-insertion_sort_qsort_partitions (void *const pbase, size_t total_elems,
|
|
- size_t size, enum swap_type_t swap_type,
|
|
- __compar_d_fn_t cmp, void *arg)
|
|
+/* The maximum size in bytes required by mergesort that will be provided
|
|
+ through a buffer allocated in the stack. */
|
|
+#define QSORT_STACK_SIZE 1024
|
|
+
|
|
+/* Elements larger than this value will be sorted through indirect sorting
|
|
+ to minimize the need to memory swap calls. */
|
|
+#define INDIRECT_SORT_SIZE_THRES 32
|
|
+
|
|
+struct msort_param
|
|
{
|
|
- char *base_ptr = (char *) pbase;
|
|
- char *const end_ptr = &base_ptr[size * (total_elems - 1)];
|
|
- char *tmp_ptr = base_ptr;
|
|
-#define min(x, y) ((x) < (y) ? (x) : (y))
|
|
- const size_t max_thresh = MAX_THRESH * size;
|
|
- char *thresh = min(end_ptr, base_ptr + max_thresh);
|
|
- char *run_ptr;
|
|
+ size_t s;
|
|
+ enum swap_type_t var;
|
|
+ __compar_d_fn_t cmp;
|
|
+ void *arg;
|
|
+ char *t;
|
|
+};
|
|
|
|
- /* Find smallest element in first threshold and place it at the
|
|
- array's beginning. This is the smallest array element,
|
|
- and the operation speeds up insertion sort's inner loop. */
|
|
+static void
|
|
+msort_with_tmp (const struct msort_param *p, void *b, size_t n)
|
|
+{
|
|
+ char *b1, *b2;
|
|
+ size_t n1, n2;
|
|
|
|
- for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
|
|
- if (cmp (run_ptr, tmp_ptr, arg) < 0)
|
|
- tmp_ptr = run_ptr;
|
|
+ if (n <= 1)
|
|
+ return;
|
|
|
|
- if (tmp_ptr != base_ptr)
|
|
- do_swap (tmp_ptr, base_ptr, size, swap_type);
|
|
+ n1 = n / 2;
|
|
+ n2 = n - n1;
|
|
+ b1 = b;
|
|
+ b2 = (char *) b + (n1 * p->s);
|
|
|
|
- /* Insertion sort, running from left-hand-side up to right-hand-side. */
|
|
+ msort_with_tmp (p, b1, n1);
|
|
+ msort_with_tmp (p, b2, n2);
|
|
|
|
- run_ptr = base_ptr + size;
|
|
- while ((run_ptr += size) <= end_ptr)
|
|
+ char *tmp = p->t;
|
|
+ const size_t s = p->s;
|
|
+ __compar_d_fn_t cmp = p->cmp;
|
|
+ void *arg = p->arg;
|
|
+ switch (p->var)
|
|
{
|
|
- tmp_ptr = run_ptr - size;
|
|
- while (tmp_ptr != base_ptr && cmp (run_ptr, tmp_ptr, arg) < 0)
|
|
- tmp_ptr -= size;
|
|
-
|
|
- tmp_ptr += size;
|
|
- if (tmp_ptr != run_ptr)
|
|
- {
|
|
- char *trav;
|
|
-
|
|
- trav = run_ptr + size;
|
|
- while (--trav >= run_ptr)
|
|
- {
|
|
- char c = *trav;
|
|
- char *hi, *lo;
|
|
-
|
|
- for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
|
|
- *hi = *lo;
|
|
- *hi = c;
|
|
- }
|
|
- }
|
|
+ case SWAP_WORDS_32:
|
|
+ while (n1 > 0 && n2 > 0)
|
|
+ {
|
|
+ if (cmp (b1, b2, arg) <= 0)
|
|
+ {
|
|
+ *(u32_alias_t *) tmp = *(u32_alias_t *) b1;
|
|
+ b1 += sizeof (u32_alias_t);
|
|
+ --n1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ *(u32_alias_t *) tmp = *(u32_alias_t *) b2;
|
|
+ b2 += sizeof (u32_alias_t);
|
|
+ --n2;
|
|
+ }
|
|
+ tmp += sizeof (u32_alias_t);
|
|
+ }
|
|
+ break;
|
|
+ case SWAP_WORDS_64:
|
|
+ while (n1 > 0 && n2 > 0)
|
|
+ {
|
|
+ if (cmp (b1, b2, arg) <= 0)
|
|
+ {
|
|
+ *(u64_alias_t *) tmp = *(u64_alias_t *) b1;
|
|
+ b1 += sizeof (u64_alias_t);
|
|
+ --n1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ *(u64_alias_t *) tmp = *(u64_alias_t *) b2;
|
|
+ b2 += sizeof (u64_alias_t);
|
|
+ --n2;
|
|
+ }
|
|
+ tmp += sizeof (u64_alias_t);
|
|
+ }
|
|
+ break;
|
|
+ case SWAP_VOID_ARG:
|
|
+ while (n1 > 0 && n2 > 0)
|
|
+ {
|
|
+ if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
|
|
+ {
|
|
+ *(void **) tmp = *(void **) b1;
|
|
+ b1 += sizeof (void *);
|
|
+ --n1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ *(void **) tmp = *(void **) b2;
|
|
+ b2 += sizeof (void *);
|
|
+ --n2;
|
|
+ }
|
|
+ tmp += sizeof (void *);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ while (n1 > 0 && n2 > 0)
|
|
+ {
|
|
+ if (cmp (b1, b2, arg) <= 0)
|
|
+ {
|
|
+ tmp = (char *) __mempcpy (tmp, b1, s);
|
|
+ b1 += s;
|
|
+ --n1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ tmp = (char *) __mempcpy (tmp, b2, s);
|
|
+ b2 += s;
|
|
+ --n2;
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
}
|
|
-}
|
|
-
|
|
-/* Order size using quicksort. This implementation incorporates
|
|
- four optimizations discussed in Sedgewick:
|
|
|
|
- 1. Non-recursive, using an explicit stack of pointer that store the
|
|
- next array partition to sort. To save time, this maximum amount
|
|
- of space required to store an array of SIZE_MAX is allocated on the
|
|
- stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
|
|
- only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
|
|
- Pretty cheap, actually.
|
|
-
|
|
- 2. Chose the pivot element using a median-of-three decision tree.
|
|
- This reduces the probability of selecting a bad pivot value and
|
|
- eliminates certain extraneous comparisons.
|
|
+ if (n1 > 0)
|
|
+ memcpy (tmp, b1, n1 * s);
|
|
+ memcpy (b, p->t, (n - n2) * s);
|
|
+}
|
|
|
|
- 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
|
|
- insertion sort to order the MAX_THRESH items within each partition.
|
|
- This is a big win, since insertion sort is faster for small, mostly
|
|
- sorted array segments.
|
|
+static void
|
|
+__attribute_used__
|
|
+indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
|
|
+ size_t s)
|
|
+{
|
|
+ /* Indirect sorting. */
|
|
+ char *ip = (char *) b;
|
|
+ void **tp = (void **) (p->t + n * sizeof (void *));
|
|
+ void **t = tp;
|
|
+ void *tmp_storage = (void *) (tp + n);
|
|
|
|
- 4. The larger of the two sub-partitions is always pushed onto the
|
|
- stack first, with the algorithm then concentrating on the
|
|
- smaller partition. This *guarantees* no more than log (total_elems)
|
|
- stack size is needed (actually O(1) in this case)! */
|
|
+ while ((void *) t < tmp_storage)
|
|
+ {
|
|
+ *t++ = ip;
|
|
+ ip += s;
|
|
+ }
|
|
+ msort_with_tmp (p, p->t + n * sizeof (void *), n);
|
|
+
|
|
+ /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
|
|
+ the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
|
|
+ char *kp;
|
|
+ size_t i;
|
|
+ for (i = 0, ip = (char *) b; i < n; i++, ip += s)
|
|
+ if ((kp = tp[i]) != ip)
|
|
+ {
|
|
+ size_t j = i;
|
|
+ char *jp = ip;
|
|
+ memcpy (tmp_storage, ip, s);
|
|
+
|
|
+ do
|
|
+ {
|
|
+ size_t k = (kp - (char *) b) / s;
|
|
+ tp[j] = jp;
|
|
+ memcpy (jp, kp, s);
|
|
+ j = k;
|
|
+ jp = kp;
|
|
+ kp = tp[k];
|
|
+ }
|
|
+ while (kp != ip);
|
|
+
|
|
+ tp[j] = jp;
|
|
+ memcpy (jp, tmp_storage, s);
|
|
+ }
|
|
+}
|
|
|
|
void
|
|
__qsort_r (void *const pbase, size_t total_elems, size_t size,
|
|
__compar_d_fn_t cmp, void *arg)
|
|
{
|
|
- char *base_ptr = (char *) pbase;
|
|
-
|
|
- const size_t max_thresh = MAX_THRESH * size;
|
|
-
|
|
if (total_elems <= 1)
|
|
- /* Avoid lossage with unsigned arithmetic below. */
|
|
return;
|
|
|
|
- enum swap_type_t swap_type;
|
|
- if (is_aligned (pbase, size, 8))
|
|
- swap_type = SWAP_WORDS_64;
|
|
- else if (is_aligned (pbase, size, 4))
|
|
- swap_type = SWAP_WORDS_32;
|
|
- else
|
|
- swap_type = SWAP_BYTES;
|
|
+ /* Align to the maximum size used by the swap optimization. */
|
|
+ _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE];
|
|
+ size_t total_size = total_elems * size;
|
|
+ char *buf;
|
|
|
|
- /* Maximum depth before quicksort switches to heapsort. */
|
|
- size_t depth = 2 * (sizeof (size_t) * CHAR_BIT - 1
|
|
- - __builtin_clzl (total_elems));
|
|
+ if (size > INDIRECT_SORT_SIZE_THRES)
|
|
+ total_size = 2 * total_elems * sizeof (void *) + size;
|
|
|
|
- if (total_elems > MAX_THRESH)
|
|
+ if (total_size < sizeof buf)
|
|
+ buf = tmp;
|
|
+ else
|
|
{
|
|
- char *lo = base_ptr;
|
|
- char *hi = &lo[size * (total_elems - 1)];
|
|
- stack_node stack[STACK_SIZE];
|
|
- stack_node *top = push (stack, NULL, NULL, depth);
|
|
-
|
|
- while (stack < top)
|
|
- {
|
|
- if (depth == 0)
|
|
- {
|
|
- heapsort_r (lo, hi, size, swap_type, cmp, arg);
|
|
- top = pop (top, &lo, &hi, &depth);
|
|
- continue;
|
|
- }
|
|
-
|
|
- char *left_ptr;
|
|
- char *right_ptr;
|
|
-
|
|
- /* Select median value from among LO, MID, and HI. Rearrange
|
|
- LO and HI so the three values are sorted. This lowers the
|
|
- probability of picking a pathological pivot value and
|
|
- skips a comparison for both the LEFT_PTR and RIGHT_PTR in
|
|
- the while loops. */
|
|
-
|
|
- char *mid = lo + size * ((hi - lo) / size >> 1);
|
|
-
|
|
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
|
|
- do_swap (mid, lo, size, swap_type);
|
|
- if ((*cmp) ((void *) hi, (void *) mid, arg) < 0)
|
|
- do_swap (mid, hi, size, swap_type);
|
|
- else
|
|
- goto jump_over;
|
|
- if ((*cmp) ((void *) mid, (void *) lo, arg) < 0)
|
|
- do_swap (mid, lo, size, swap_type);
|
|
- jump_over:;
|
|
-
|
|
- left_ptr = lo + size;
|
|
- right_ptr = hi - size;
|
|
-
|
|
- /* Here's the famous ``collapse the walls'' section of quicksort.
|
|
- Gotta like those tight inner loops! They are the main reason
|
|
- that this algorithm runs much faster than others. */
|
|
- do
|
|
- {
|
|
- while (left_ptr != mid
|
|
- && (*cmp) ((void *) left_ptr, (void *) mid, arg) < 0)
|
|
- left_ptr += size;
|
|
-
|
|
- while (right_ptr != mid
|
|
- && (*cmp) ((void *) mid, (void *) right_ptr, arg) < 0)
|
|
- right_ptr -= size;
|
|
-
|
|
- if (left_ptr < right_ptr)
|
|
- {
|
|
- do_swap (left_ptr, right_ptr, size, swap_type);
|
|
- if (mid == left_ptr)
|
|
- mid = right_ptr;
|
|
- else if (mid == right_ptr)
|
|
- mid = left_ptr;
|
|
- left_ptr += size;
|
|
- right_ptr -= size;
|
|
- }
|
|
- else if (left_ptr == right_ptr)
|
|
- {
|
|
- left_ptr += size;
|
|
- right_ptr -= size;
|
|
- break;
|
|
- }
|
|
- }
|
|
- while (left_ptr <= right_ptr);
|
|
-
|
|
- /* Set up pointers for next iteration. First determine whether
|
|
- left and right partitions are below the threshold size. If so,
|
|
- ignore one or both. Otherwise, push the larger partition's
|
|
- bounds on the stack and continue sorting the smaller one. */
|
|
-
|
|
- if ((size_t) (right_ptr - lo) <= max_thresh)
|
|
- {
|
|
- if ((size_t) (hi - left_ptr) <= max_thresh)
|
|
- /* Ignore both small partitions. */
|
|
- {
|
|
- top = pop (top, &lo, &hi, &depth);
|
|
- --depth;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* Ignore small left partition. */
|
|
- lo = left_ptr;
|
|
- --depth;
|
|
- }
|
|
- }
|
|
- else if ((size_t) (hi - left_ptr) <= max_thresh)
|
|
- /* Ignore small right partition. */
|
|
- {
|
|
- hi = right_ptr;
|
|
- --depth;
|
|
- }
|
|
- else if ((right_ptr - lo) > (hi - left_ptr))
|
|
- {
|
|
- /* Push larger left partition indices. */
|
|
- top = push (top, lo, right_ptr, depth - 1);
|
|
- lo = left_ptr;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* Push larger right partition indices. */
|
|
- top = push (top, left_ptr, hi, depth - 1);
|
|
- hi = right_ptr;
|
|
- }
|
|
- }
|
|
+ int save = errno;
|
|
+ buf = malloc (total_size);
|
|
+ __set_errno (save);
|
|
+ if (buf == NULL)
|
|
+ {
|
|
+ /* Fallback to heapsort in case of memory failure. */
|
|
+ heapsort_r (pbase, total_elems - 1, size, cmp, arg);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (size > INDIRECT_SORT_SIZE_THRES)
|
|
+ {
|
|
+ const struct msort_param msort_param =
|
|
+ {
|
|
+ .s = sizeof (void *),
|
|
+ .cmp = cmp,
|
|
+ .arg = arg,
|
|
+ .var = SWAP_VOID_ARG,
|
|
+ .t = buf,
|
|
+ };
|
|
+ indirect_msort_with_tmp (&msort_param, pbase, total_elems, size);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ const struct msort_param msort_param =
|
|
+ {
|
|
+ .s = size,
|
|
+ .cmp = cmp,
|
|
+ .arg = arg,
|
|
+ .var = get_swap_type (pbase, size),
|
|
+ .t = buf,
|
|
+ };
|
|
+ msort_with_tmp (&msort_param, pbase, total_elems);
|
|
}
|
|
|
|
- /* Once the BASE_PTR array is partially sorted by quicksort the rest
|
|
- is completely sorted using insertion sort, since this is efficient
|
|
- for partitions below MAX_THRESH size. BASE_PTR points to the beginning
|
|
- of the array to sort, and END_PTR points at the very last element in
|
|
- the array (*not* one beyond it!). */
|
|
- insertion_sort_qsort_partitions (pbase, total_elems, size, swap_type, cmp,
|
|
- arg);
|
|
+ if (buf != tmp)
|
|
+ free (buf);
|
|
}
|
|
libc_hidden_def (__qsort_r)
|
|
weak_alias (__qsort_r, qsort_r)
|
|
diff --git a/stdlib/tst-qsort4.c b/stdlib/tst-qsort4.c
|
|
index a7abaa1a37461666..4cf373f22e28fade 100644
|
|
--- a/stdlib/tst-qsort4.c
|
|
+++ b/stdlib/tst-qsort4.c
|
|
@@ -30,35 +30,12 @@ cmp (const void *a1, const void *b1, void *closure)
|
|
return *a - *b;
|
|
}
|
|
|
|
-/* Wrapper around heapsort_r that set ups the required variables. */
|
|
-static void
|
|
-heapsort_wrapper (void *const pbase, size_t total_elems, size_t size,
|
|
- __compar_d_fn_t cmp, void *arg)
|
|
-{
|
|
- char *base_ptr = (char *) pbase;
|
|
- char *lo = base_ptr;
|
|
- char *hi = &lo[size * (total_elems - 1)];
|
|
-
|
|
- if (total_elems <= 1)
|
|
- /* Avoid lossage with unsigned arithmetic below. */
|
|
- return;
|
|
-
|
|
- enum swap_type_t swap_type;
|
|
- if (is_aligned (pbase, size, 8))
|
|
- swap_type = SWAP_WORDS_64;
|
|
- else if (is_aligned (pbase, size, 4))
|
|
- swap_type = SWAP_WORDS_32;
|
|
- else
|
|
- swap_type = SWAP_BYTES;
|
|
- heapsort_r (lo, hi, size, swap_type, cmp, arg);
|
|
-}
|
|
-
|
|
static void
|
|
check_one_sort (signed char *array, int length)
|
|
{
|
|
signed char *copy = xmalloc (length);
|
|
memcpy (copy, array, length);
|
|
- heapsort_wrapper (copy, length, 1, cmp, NULL);
|
|
+ heapsort_r (copy, length - 1, 1, cmp, NULL);
|
|
|
|
/* Verify that the result is sorted. */
|
|
for (int i = 1; i < length; ++i)
|
|
diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c
|
|
deleted file mode 100644
|
|
index d3a88c30f8ffb135..0000000000000000
|
|
--- a/stdlib/tst-qsort5.c
|
|
+++ /dev/null
|
|
@@ -1,171 +0,0 @@
|
|
-/* Adversarial test for qsort_r.
|
|
- Copyright (C) 2023 Free Software Foundation, Inc.
|
|
- This file is part of the GNU C Library.
|
|
-
|
|
- The GNU C Library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Lesser General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2.1 of the License, or (at your option) any later version.
|
|
-
|
|
- The GNU C Library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Lesser General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Lesser General Public
|
|
- License along with the GNU C Library; if not, see
|
|
- <http://www.gnu.org/licenses/>. */
|
|
-
|
|
-/* The approach follows Douglas McIlroy, A Killer Adversary for
|
|
- Quicksort. Software—Practice and Experience 29 (1999) 341-344.
|
|
- Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf>
|
|
- (2023-11-17). */
|
|
-
|
|
-#include <math.h>
|
|
-#include <stdlib.h>
|
|
-#include <stdio.h>
|
|
-#include <support/check.h>
|
|
-#include <support/support.h>
|
|
-
|
|
-struct context
|
|
-{
|
|
- /* Called the gas value in the paper. This value is larger than all
|
|
- other values (length minus one will do), so comparison with any
|
|
- decided value has a known result. */
|
|
- int undecided_value;
|
|
-
|
|
- /* If comparing undecided values, one of them as to be assigned a
|
|
- value to ensure consistency with future comparisons. This is the
|
|
- value that will be used. Starts out at zero. */
|
|
- int next_decided;
|
|
-
|
|
- /* Used to trick pivot selection. Deciding the value for the last
|
|
- seen undcided value in a decided/undecided comparison happens
|
|
- to trick the many qsort implementations. */
|
|
- int last_undecided_index;
|
|
-
|
|
- /* This array contains the actually asigned values. The call to
|
|
- qsort_r sorts a different array that contains indices into this
|
|
- array. */
|
|
- int *decided_values;
|
|
-};
|
|
-
|
|
-static int
|
|
-compare_opponent (const void *l1, const void *r1, void *ctx1)
|
|
-{
|
|
- const int *l = l1;
|
|
- const int *r = r1;
|
|
- struct context *ctx = ctx1;
|
|
- int rvalue = ctx->decided_values[*r];
|
|
- int lvalue = ctx->decided_values[*l];
|
|
-
|
|
- if (lvalue == ctx->undecided_value)
|
|
- {
|
|
- if (rvalue == ctx->undecided_value)
|
|
- {
|
|
- /* Both values are undecided. In this case, make a decision
|
|
- for the last-used undecided value. This is tweak is very
|
|
- specific to quicksort. */
|
|
- if (*l == ctx->last_undecided_index)
|
|
- {
|
|
- ctx->decided_values[*l] = ctx->next_decided;
|
|
- ++ctx->next_decided;
|
|
- /* The undecided value or *r is greater. */
|
|
- return -1;
|
|
- }
|
|
- else
|
|
- {
|
|
- ctx->decided_values[*r] = ctx->next_decided;
|
|
- ++ctx->next_decided;
|
|
- /* The undecided value for *l is greater. */
|
|
- return 1;
|
|
- }
|
|
- }
|
|
- else
|
|
- {
|
|
- ctx->last_undecided_index = *l;
|
|
- return 1;
|
|
- }
|
|
- }
|
|
- else
|
|
- {
|
|
- /* *l is a decided value. */
|
|
- if (rvalue == ctx->undecided_value)
|
|
- {
|
|
- ctx->last_undecided_index = *r;
|
|
- /* The undecided value for *r is greater. */
|
|
- return -1;
|
|
- }
|
|
- else
|
|
- return lvalue - rvalue;
|
|
- }
|
|
-}
|
|
-
|
|
-/* Return a pointer to the adversarial permutation of length N. */
|
|
-static int *
|
|
-create_permutation (size_t n)
|
|
-{
|
|
- struct context ctx =
|
|
- {
|
|
- .undecided_value = n - 1, /* Larger than all other values. */
|
|
- .decided_values = xcalloc (n, sizeof (int)),
|
|
- };
|
|
- for (size_t i = 0; i < n; ++i)
|
|
- ctx.decided_values[i] = ctx.undecided_value;
|
|
- int *scratch = xcalloc (n, sizeof (int));
|
|
- for (size_t i = 0; i < n; ++i)
|
|
- scratch[i] = i;
|
|
- qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx);
|
|
- free (scratch);
|
|
- return ctx.decided_values;
|
|
-}
|
|
-
|
|
-/* Callback function for qsort which counts the number of invocations
|
|
- in *CLOSURE. */
|
|
-static int
|
|
-compare_counter (const void *l1, const void *r1, void *closure)
|
|
-{
|
|
- const int *l = l1;
|
|
- const int *r = r1;
|
|
- unsigned long long int *counter = closure;
|
|
- ++*counter;
|
|
- return *l - *r;
|
|
-}
|
|
-
|
|
-/* Count the comparisons required for an adversarial permutation of
|
|
- length N. */
|
|
-static unsigned long long int
|
|
-count_comparisons (size_t n)
|
|
-{
|
|
- int *array = create_permutation (n);
|
|
- unsigned long long int counter = 0;
|
|
- qsort_r (array, n, sizeof (*array), compare_counter, &counter);
|
|
- free (array);
|
|
- return counter;
|
|
-}
|
|
-
|
|
-/* Check the scaling factor for one adversarial permutation of length
|
|
- N, and report some statistics. */
|
|
-static void
|
|
-check_one_n (size_t n)
|
|
-{
|
|
- unsigned long long int count = count_comparisons (n);
|
|
- double factor = count / (n * log (count));
|
|
- printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
|
|
- n, count, factor);
|
|
- /* This is an arbitrary factor which is true for the current
|
|
- implementation across a wide range of sizes. */
|
|
- TEST_VERIFY (factor <= 4.5);
|
|
-}
|
|
-
|
|
-static int
|
|
-do_test (void)
|
|
-{
|
|
- check_one_n (100);
|
|
- check_one_n (1000);
|
|
- for (int i = 1; i <= 15; ++i)
|
|
- check_one_n (i * 10 * 1000);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-#include <support/test-driver.c>
|