492 lines
14 KiB
Diff
492 lines
14 KiB
Diff
commit 03bf8357e8291857a435afcc3048e0b697b6cc04
|
|
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
|
Date: Tue Oct 3 09:22:50 2023 -0300
|
|
|
|
stdlib: Remove use of mergesort on qsort (BZ 21719)
|
|
|
|
This patch removes the mergesort optimization on qsort implementation
|
|
and uses the introsort instead. The mergesort implementation has some
|
|
issues:
|
|
|
|
- It is as-safe only for certain types sizes (if total size is less
|
|
than 1 KB with large element sizes also forcing memory allocation)
|
|
which contradicts the function documentation. Although not required
|
|
by the C standard, it is preferable and doable to have an O(1) space
|
|
implementation.
|
|
|
|
- The malloc for certain element size and element number adds
|
|
arbitrary latency (might even be worse if malloc is interposed).
|
|
|
|
- To avoid trigger swap from memory allocation the implementation
|
|
relies on system information that might be virtualized (for instance
|
|
VMs with overcommit memory) which might lead to potentially use of
|
|
swap even if system advertise more memory than actually has. The
|
|
check also have the downside of issuing syscalls where none is
|
|
expected (although only once per execution).
|
|
|
|
- The mergesort is suboptimal on an already sorted array (BZ#21719).
|
|
|
|
The introsort implementation is already optimized to use constant extra
|
|
space (due to the limit of total number of elements from maximum VM
|
|
size) and thus can be used to avoid the malloc usage issues.
|
|
|
|
Resulting performance is slower due the usage of qsort, specially in the
|
|
worst-case scenario (partialy or sorted arrays) and due the fact
|
|
mergesort uses a slight improved swap operations.
|
|
|
|
This change also renders the BZ#21719 fix unrequired (since it is meant
|
|
to fix the sorted input performance degradation for mergesort). The
|
|
manual is also updated to indicate the function is now async-cancel
|
|
safe.
|
|
|
|
Checked on x86_64-linux-gnu.
|
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
|
|
|
Conflicts:
|
|
stdlib/msort.c: Deletion had conflicts due to skipped backports.
|
|
|
|
diff --git a/include/stdlib.h b/include/stdlib.h
|
|
index 22c9fb65c3074765..fd108df58ddf8b89 100644
|
|
--- a/include/stdlib.h
|
|
+++ b/include/stdlib.h
|
|
@@ -107,8 +107,6 @@ extern int __posix_openpt (int __oflag) attribute_hidden;
|
|
extern int __add_to_environ (const char *name, const char *value,
|
|
const char *combines, int replace)
|
|
attribute_hidden;
|
|
-extern void _quicksort (void *const pbase, size_t total_elems,
|
|
- size_t size, __compar_d_fn_t cmp, void *arg);
|
|
|
|
extern int __on_exit (void (*__func) (int __status, void *__arg), void *__arg);
|
|
|
|
diff --git a/manual/argp.texi b/manual/argp.texi
|
|
index 0023441812d4e584..b77ad68285ecb732 100644
|
|
--- a/manual/argp.texi
|
|
+++ b/manual/argp.texi
|
|
@@ -735,7 +735,7 @@ for options, bad phase of the moon, etc.
|
|
@c hol_set_group ok
|
|
@c hol_find_entry ok
|
|
@c hol_sort @mtslocale @acucorrupt
|
|
-@c qsort dup @acucorrupt
|
|
+@c qsort dup
|
|
@c hol_entry_qcmp @mtslocale
|
|
@c hol_entry_cmp @mtslocale
|
|
@c group_cmp ok
|
|
diff --git a/manual/locale.texi b/manual/locale.texi
|
|
index 720e0ca952a665bd..f6afa5dc44a2a016 100644
|
|
--- a/manual/locale.texi
|
|
+++ b/manual/locale.texi
|
|
@@ -253,7 +253,7 @@ The symbols in this section are defined in the header file @file{locale.h}.
|
|
@c calculate_head_size ok
|
|
@c __munmap ok
|
|
@c compute_hashval ok
|
|
-@c qsort dup @acucorrupt
|
|
+@c qsort dup
|
|
@c rangecmp ok
|
|
@c malloc @ascuheap @acsmem
|
|
@c strdup @ascuheap @acsmem
|
|
@@ -275,7 +275,6 @@ The symbols in this section are defined in the header file @file{locale.h}.
|
|
@c realloc @ascuheap @acsmem
|
|
@c realloc @ascuheap @acsmem
|
|
@c fclose @ascuheap @asulock @acsmem @acsfd @aculock
|
|
-@c qsort @ascuheap @acsmem
|
|
@c alias_compare dup
|
|
@c libc_lock_unlock @aculock
|
|
@c _nl_explode_name @ascuheap @acsmem
|
|
diff --git a/manual/search.texi b/manual/search.texi
|
|
index 5691bf2f2b2bb861..a550858478f7fc83 100644
|
|
--- a/manual/search.texi
|
|
+++ b/manual/search.texi
|
|
@@ -159,7 +159,7 @@ To sort an array using an arbitrary comparison function, use the
|
|
|
|
@deftypefun void qsort (void *@var{array}, size_t @var{count}, size_t @var{size}, comparison_fn_t @var{compare})
|
|
@standards{ISO, stdlib.h}
|
|
-@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}}
|
|
+@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
|
|
The @code{qsort} function sorts the array @var{array}. The array
|
|
contains @var{count} elements, each of which is of size @var{size}.
|
|
|
|
@@ -199,9 +199,8 @@ Functions}):
|
|
The @code{qsort} function derives its name from the fact that it was
|
|
originally implemented using the ``quick sort'' algorithm.
|
|
|
|
-The implementation of @code{qsort} in this library might not be an
|
|
-in-place sort and might thereby use an extra amount of memory to store
|
|
-the array.
|
|
+The implementation of @code{qsort} in this library is an in-place sort
|
|
+and uses a constant extra space (allocated on the stack).
|
|
@end deftypefun
|
|
|
|
@node Search/Sort Example
|
|
diff --git a/stdlib/Makefile b/stdlib/Makefile
|
|
index 03f8478c64408ed3..3b89bc2aa0307321 100644
|
|
--- a/stdlib/Makefile
|
|
+++ b/stdlib/Makefile
|
|
@@ -94,7 +94,6 @@ routines := \
|
|
mbtowc \
|
|
mrand48 \
|
|
mrand48_r \
|
|
- msort \
|
|
nrand48 \
|
|
nrand48_r \
|
|
old_atexit \
|
|
@@ -370,7 +369,6 @@ generated += \
|
|
# generated
|
|
|
|
CFLAGS-bsearch.c += $(uses-callbacks)
|
|
-CFLAGS-msort.c += $(uses-callbacks)
|
|
CFLAGS-qsort.c += $(uses-callbacks)
|
|
CFLAGS-system.c += -fexceptions
|
|
CFLAGS-system.os = -fomit-frame-pointer
|
|
diff --git a/stdlib/msort.c b/stdlib/msort.c
|
|
deleted file mode 100644
|
|
index 8750cc59db2337cf..0000000000000000
|
|
--- a/stdlib/msort.c
|
|
+++ /dev/null
|
|
@@ -1,310 +0,0 @@
|
|
-/* An alternative to qsort, with an identical interface.
|
|
- This file is part of the GNU C Library.
|
|
- Copyright (C) 1992-2021 Free Software Foundation, Inc.
|
|
- Written by Mike Haertel, September 1988.
|
|
-
|
|
- The GNU C Library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Lesser General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2.1 of the License, or (at your option) any later version.
|
|
-
|
|
- The GNU C Library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Lesser General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Lesser General Public
|
|
- License along with the GNU C Library; if not, see
|
|
- <https://www.gnu.org/licenses/>. */
|
|
-
|
|
-#include <alloca.h>
|
|
-#include <stdint.h>
|
|
-#include <stdlib.h>
|
|
-#include <string.h>
|
|
-#include <unistd.h>
|
|
-#include <memcopy.h>
|
|
-#include <errno.h>
|
|
-#include <atomic.h>
|
|
-
|
|
-struct msort_param
|
|
-{
|
|
- size_t s;
|
|
- size_t var;
|
|
- __compar_d_fn_t cmp;
|
|
- void *arg;
|
|
- char *t;
|
|
-};
|
|
-static void msort_with_tmp (const struct msort_param *p, void *b, size_t n);
|
|
-
|
|
-static void
|
|
-msort_with_tmp (const struct msort_param *p, void *b, size_t n)
|
|
-{
|
|
- char *b1, *b2;
|
|
- size_t n1, n2;
|
|
-
|
|
- if (n <= 1)
|
|
- return;
|
|
-
|
|
- n1 = n / 2;
|
|
- n2 = n - n1;
|
|
- b1 = b;
|
|
- b2 = (char *) b + (n1 * p->s);
|
|
-
|
|
- msort_with_tmp (p, b1, n1);
|
|
- msort_with_tmp (p, b2, n2);
|
|
-
|
|
- char *tmp = p->t;
|
|
- const size_t s = p->s;
|
|
- __compar_d_fn_t cmp = p->cmp;
|
|
- void *arg = p->arg;
|
|
- switch (p->var)
|
|
- {
|
|
- case 0:
|
|
- while (n1 > 0 && n2 > 0)
|
|
- {
|
|
- if ((*cmp) (b1, b2, arg) <= 0)
|
|
- {
|
|
- *(uint32_t *) tmp = *(uint32_t *) b1;
|
|
- b1 += sizeof (uint32_t);
|
|
- --n1;
|
|
- }
|
|
- else
|
|
- {
|
|
- *(uint32_t *) tmp = *(uint32_t *) b2;
|
|
- b2 += sizeof (uint32_t);
|
|
- --n2;
|
|
- }
|
|
- tmp += sizeof (uint32_t);
|
|
- }
|
|
- break;
|
|
- case 1:
|
|
- while (n1 > 0 && n2 > 0)
|
|
- {
|
|
- if ((*cmp) (b1, b2, arg) <= 0)
|
|
- {
|
|
- *(uint64_t *) tmp = *(uint64_t *) b1;
|
|
- b1 += sizeof (uint64_t);
|
|
- --n1;
|
|
- }
|
|
- else
|
|
- {
|
|
- *(uint64_t *) tmp = *(uint64_t *) b2;
|
|
- b2 += sizeof (uint64_t);
|
|
- --n2;
|
|
- }
|
|
- tmp += sizeof (uint64_t);
|
|
- }
|
|
- break;
|
|
- case 2:
|
|
- while (n1 > 0 && n2 > 0)
|
|
- {
|
|
- unsigned long *tmpl = (unsigned long *) tmp;
|
|
- unsigned long *bl;
|
|
-
|
|
- tmp += s;
|
|
- if ((*cmp) (b1, b2, arg) <= 0)
|
|
- {
|
|
- bl = (unsigned long *) b1;
|
|
- b1 += s;
|
|
- --n1;
|
|
- }
|
|
- else
|
|
- {
|
|
- bl = (unsigned long *) b2;
|
|
- b2 += s;
|
|
- --n2;
|
|
- }
|
|
- while (tmpl < (unsigned long *) tmp)
|
|
- *tmpl++ = *bl++;
|
|
- }
|
|
- break;
|
|
- case 3:
|
|
- while (n1 > 0 && n2 > 0)
|
|
- {
|
|
- if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
|
|
- {
|
|
- *(void **) tmp = *(void **) b1;
|
|
- b1 += sizeof (void *);
|
|
- --n1;
|
|
- }
|
|
- else
|
|
- {
|
|
- *(void **) tmp = *(void **) b2;
|
|
- b2 += sizeof (void *);
|
|
- --n2;
|
|
- }
|
|
- tmp += sizeof (void *);
|
|
- }
|
|
- break;
|
|
- default:
|
|
- while (n1 > 0 && n2 > 0)
|
|
- {
|
|
- if ((*cmp) (b1, b2, arg) <= 0)
|
|
- {
|
|
- tmp = (char *) __mempcpy (tmp, b1, s);
|
|
- b1 += s;
|
|
- --n1;
|
|
- }
|
|
- else
|
|
- {
|
|
- tmp = (char *) __mempcpy (tmp, b2, s);
|
|
- b2 += s;
|
|
- --n2;
|
|
- }
|
|
- }
|
|
- break;
|
|
- }
|
|
-
|
|
- if (n1 > 0)
|
|
- memcpy (tmp, b1, n1 * s);
|
|
- memcpy (b, p->t, (n - n2) * s);
|
|
-}
|
|
-
|
|
-
|
|
-void
|
|
-__qsort_r (void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg)
|
|
-{
|
|
- size_t size = n * s;
|
|
- char *tmp = NULL;
|
|
- struct msort_param p;
|
|
-
|
|
- /* For large object sizes use indirect sorting. */
|
|
- if (s > 32)
|
|
- size = 2 * n * sizeof (void *) + s;
|
|
-
|
|
- if (size < 1024)
|
|
- /* The temporary array is small, so put it on the stack. */
|
|
- p.t = __alloca (size);
|
|
- else
|
|
- {
|
|
- /* We should avoid allocating too much memory since this might
|
|
- have to be backed up by swap space. */
|
|
- static long int phys_pages;
|
|
- static int pagesize;
|
|
-
|
|
- if (pagesize == 0)
|
|
- {
|
|
- phys_pages = __sysconf (_SC_PHYS_PAGES);
|
|
-
|
|
- if (phys_pages == -1)
|
|
- /* Error while determining the memory size. So let's
|
|
- assume there is enough memory. Otherwise the
|
|
- implementer should provide a complete implementation of
|
|
- the `sysconf' function. */
|
|
- phys_pages = (long int) (~0ul >> 1);
|
|
-
|
|
- /* The following determines that we will never use more than
|
|
- a quarter of the physical memory. */
|
|
- phys_pages /= 4;
|
|
-
|
|
- /* Make sure phys_pages is written to memory. */
|
|
- atomic_write_barrier ();
|
|
-
|
|
- pagesize = __sysconf (_SC_PAGESIZE);
|
|
- }
|
|
-
|
|
- /* Just a comment here. We cannot compute
|
|
- phys_pages * pagesize
|
|
- and compare the needed amount of memory against this value.
|
|
- The problem is that some systems might have more physical
|
|
- memory then can be represented with a `size_t' value (when
|
|
- measured in bytes. */
|
|
-
|
|
- /* If the memory requirements are too high don't allocate memory. */
|
|
- if (size / pagesize > (size_t) phys_pages)
|
|
- {
|
|
- _quicksort (b, n, s, cmp, arg);
|
|
- return;
|
|
- }
|
|
-
|
|
- /* It's somewhat large, so malloc it. */
|
|
- int save = errno;
|
|
- tmp = malloc (size);
|
|
- __set_errno (save);
|
|
- if (tmp == NULL)
|
|
- {
|
|
- /* Couldn't get space, so use the slower algorithm
|
|
- that doesn't need a temporary array. */
|
|
- _quicksort (b, n, s, cmp, arg);
|
|
- return;
|
|
- }
|
|
- p.t = tmp;
|
|
- }
|
|
-
|
|
- p.s = s;
|
|
- p.var = 4;
|
|
- p.cmp = cmp;
|
|
- p.arg = arg;
|
|
-
|
|
- if (s > 32)
|
|
- {
|
|
- /* Indirect sorting. */
|
|
- char *ip = (char *) b;
|
|
- void **tp = (void **) (p.t + n * sizeof (void *));
|
|
- void **t = tp;
|
|
- void *tmp_storage = (void *) (tp + n);
|
|
-
|
|
- while ((void *) t < tmp_storage)
|
|
- {
|
|
- *t++ = ip;
|
|
- ip += s;
|
|
- }
|
|
- p.s = sizeof (void *);
|
|
- p.var = 3;
|
|
- msort_with_tmp (&p, p.t + n * sizeof (void *), n);
|
|
-
|
|
- /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
|
|
- the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
|
|
- char *kp;
|
|
- size_t i;
|
|
- for (i = 0, ip = (char *) b; i < n; i++, ip += s)
|
|
- if ((kp = tp[i]) != ip)
|
|
- {
|
|
- size_t j = i;
|
|
- char *jp = ip;
|
|
- memcpy (tmp_storage, ip, s);
|
|
-
|
|
- do
|
|
- {
|
|
- size_t k = (kp - (char *) b) / s;
|
|
- tp[j] = jp;
|
|
- memcpy (jp, kp, s);
|
|
- j = k;
|
|
- jp = kp;
|
|
- kp = tp[k];
|
|
- }
|
|
- while (kp != ip);
|
|
-
|
|
- tp[j] = jp;
|
|
- memcpy (jp, tmp_storage, s);
|
|
- }
|
|
- }
|
|
- else
|
|
- {
|
|
- if ((s & (sizeof (uint32_t) - 1)) == 0
|
|
- && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0)
|
|
- {
|
|
- if (s == sizeof (uint32_t))
|
|
- p.var = 0;
|
|
- else if (s == sizeof (uint64_t)
|
|
- && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0)
|
|
- p.var = 1;
|
|
- else if ((s & (sizeof (unsigned long) - 1)) == 0
|
|
- && ((char *) b - (char *) 0)
|
|
- % __alignof__ (unsigned long) == 0)
|
|
- p.var = 2;
|
|
- }
|
|
- msort_with_tmp (&p, b, n);
|
|
- }
|
|
- free (tmp);
|
|
-}
|
|
-libc_hidden_def (__qsort_r)
|
|
-weak_alias (__qsort_r, qsort_r)
|
|
-
|
|
-
|
|
-void
|
|
-qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
|
|
-{
|
|
- return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
|
|
-}
|
|
-libc_hidden_def (qsort)
|
|
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
|
|
index db299eb333cf0302..cb1619aa0ae7de72 100644
|
|
--- a/stdlib/qsort.c
|
|
+++ b/stdlib/qsort.c
|
|
@@ -20,7 +20,6 @@
|
|
Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
|
|
Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
|
|
|
|
-#include <alloca.h>
|
|
#include <limits.h>
|
|
#include <memswap.h>
|
|
#include <stdlib.h>
|
|
@@ -266,8 +265,8 @@ insertion_sort_qsort_partitions (void *const pbase, size_t total_elems,
|
|
stack size is needed (actually O(1) in this case)! */
|
|
|
|
void
|
|
-_quicksort (void *const pbase, size_t total_elems, size_t size,
|
|
- __compar_d_fn_t cmp, void *arg)
|
|
+__qsort_r (void *const pbase, size_t total_elems, size_t size,
|
|
+ __compar_d_fn_t cmp, void *arg)
|
|
{
|
|
char *base_ptr = (char *) pbase;
|
|
|
|
@@ -399,3 +398,12 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
|
|
insertion_sort_qsort_partitions (pbase, total_elems, size, swap_type, cmp,
|
|
arg);
|
|
}
|
|
+libc_hidden_def (__qsort_r)
|
|
+weak_alias (__qsort_r, qsort_r)
|
|
+
|
|
+void
|
|
+qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
|
|
+{
|
|
+ return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
|
|
+}
|
|
+libc_hidden_def (qsort)
|