Limit threadpool size

Resolves: RHEL-132190
This commit is contained in:
Matthias Clasen 2026-06-01 21:55:05 -04:00
parent d818fdd0e0
commit 7eaaf6daab
7 changed files with 553 additions and 0 deletions

View File

@ -0,0 +1,156 @@
From c9d73c1c193acc5ca882b36a07f747ad5ebd4a9c Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Sun, 24 Nov 2024 17:48:43 -0500
Subject: [PATCH] Apply the same chunking to color conversion
When parallelizing color conversion, apply the same chunking that
we do for memory format conversion, to avoid overhead for small
sizes.
---
gdk/gdkmemoryformat.c | 69 ++++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 30 deletions(-)
diff --git a/gdk/gdkmemoryformat.c b/gdk/gdkmemoryformat.c
index 7e45659445..c17be80b46 100644
--- a/gdk/gdkmemoryformat.c
+++ b/gdk/gdkmemoryformat.c
@@ -2279,6 +2279,7 @@
GdkColorState *dest_cs;
gsize width;
gsize height;
+ gsize chunk_size;
/* atomic */ int rows_done;
};
@@ -2390,15 +2391,16 @@
gdk_memory_convert_color_state_srgb_to_srgb_linear (gpointer data)
{
MemoryConvertColorState *mc = data;
- int y;
+ int y0, y;
guint64 before = GDK_PROFILER_CURRENT_TIME;
gsize rows;
- for (y = g_atomic_int_add (&mc->rows_done, 1), rows = 0;
- y < mc->height;
- y = g_atomic_int_add (&mc->rows_done, 1), rows++)
+ for (y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size), rows = 0;
+ y0 < mc->height;
+ y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size))
{
- convert_srgb_to_srgb_linear (mc->data + y * mc->stride, mc->width);
+ for (y = y0; y < MIN (y0 + mc->chunk_size, mc->height); y++, rows++)
+ convert_srgb_to_srgb_linear (mc->data + y * mc->stride, mc->width);
}
ADD_MARK (before,
@@ -2410,15 +2412,16 @@
gdk_memory_convert_color_state_srgb_linear_to_srgb (gpointer data)
{
MemoryConvertColorState *mc = data;
- int y;
+ int y0, y;
guint64 before = GDK_PROFILER_CURRENT_TIME;
gsize rows;
- for (y = g_atomic_int_add (&mc->rows_done, 1), rows = 0;
- y < mc->height;
- y = g_atomic_int_add (&mc->rows_done, 1), rows++)
+ for (y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size), rows = 0;
+ y0 < mc->height;
+ y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size))
{
- convert_srgb_linear_to_srgb (mc->data + y * mc->stride, mc->width);
+ for (y = y0; y < MIN (y0 + mc->chunk_size, mc->height); y++, rows++)
+ convert_srgb_linear_to_srgb (mc->data + y * mc->stride, mc->width);
}
ADD_MARK (before,
@@ -2434,7 +2437,7 @@
GdkFloatColorConvert convert_func = NULL;
GdkFloatColorConvert convert_func2 = NULL;
float (*tmp)[4];
- int y;
+ int y0, y;
guint64 before = GDK_PROFILER_CURRENT_TIME;
gsize rows;
@@ -2454,27 +2457,30 @@
tmp = g_malloc (sizeof (*tmp) * mc->width);
- for (y = g_atomic_int_add (&mc->rows_done, 1), rows = 0;
- y < mc->height;
- y = g_atomic_int_add (&mc->rows_done, 1), rows++)
+ for (y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size), rows = 0;
+ y0 < mc->height;
+ y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size))
{
- guchar *data = mc->data + y * mc->stride;
+ for (y = y0; y < MIN (y0 + mc->chunk_size, mc->height); y++, rows++)
+ {
+ guchar *data = mc->data + y * mc->stride;
- desc->to_float (tmp, data, mc->width);
+ desc->to_float (tmp, data, mc->width);
- if (desc->alpha == GDK_MEMORY_ALPHA_PREMULTIPLIED)
- unpremultiply (tmp, mc->width);
+ if (desc->alpha == GDK_MEMORY_ALPHA_PREMULTIPLIED)
+ unpremultiply (tmp, mc->width);
- if (convert_func)
- convert_func (mc->src_cs, tmp, mc->width);
+ if (convert_func)
+ convert_func (mc->src_cs, tmp, mc->width);
- if (convert_func2)
- convert_func2 (mc->dest_cs, tmp, mc->width);
+ if (convert_func2)
+ convert_func2 (mc->dest_cs, tmp, mc->width);
- if (desc->alpha == GDK_MEMORY_ALPHA_PREMULTIPLIED)
- premultiply (tmp, mc->width);
+ if (desc->alpha == GDK_MEMORY_ALPHA_PREMULTIPLIED)
+ premultiply (tmp, mc->width);
- desc->from_float (data, tmp, mc->width);
+ desc->from_float (data, tmp, mc->width);
+ }
}
g_free (tmp);
@@ -2501,26 +2507,30 @@
.dest_cs = dest_cs,
.width = width,
.height = height,
+ .chunk_size = MAX (1, 512 / width),
};
+ guint n_tasks;
if (gdk_color_state_equal (src_cs, dest_cs))
return;
+ n_tasks = (mc.height + mc.chunk_size - 1) / mc.chunk_size;
+
if (format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED &&
src_cs == GDK_COLOR_STATE_SRGB &&
dest_cs == GDK_COLOR_STATE_SRGB_LINEAR)
{
- gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_to_srgb_linear, &mc, G_MAXUINT);
+ gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_to_srgb_linear, &mc, n_tasks);
}
else if (format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED &&
src_cs == GDK_COLOR_STATE_SRGB_LINEAR &&
dest_cs == GDK_COLOR_STATE_SRGB)
{
- gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_linear_to_srgb, &mc, G_MAXUINT);
+ gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_linear_to_srgb, &mc, n_tasks);
}
else
{
- gdk_parallel_task_run (gdk_memory_convert_color_state_generic, &mc, G_MAXUINT);
+ gdk_parallel_task_run (gdk_memory_convert_color_state_generic, &mc, n_tasks);
}
}

View File

@ -0,0 +1,38 @@
From 981fde3514d8b783615c912e465364dbca110659 Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Fri, 22 Nov 2024 13:08:00 -0500
Subject: [PATCH] Keep our threads alive for a bit
Our non-exclusive threadpool gives threads back to the global
pool, but they die right away there, because the default in
GLib is to only keep 2 threads alive. Bump that number to a
more reasonable value.
---
gdk/gdkparalleltask.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/gdk/gdkparalleltask.c b/gdk/gdkparalleltask.c
index 4b9c49e609..85e7b288c7 100644
--- a/gdk/gdkparalleltask.c
+++ b/gdk/gdkparalleltask.c
@@ -62,11 +62,13 @@ gdk_parallel_task_run (GdkTaskFunc task_func,
if (g_once_init_enter (&pool))
{
+ guint num_threads = CLAMP (2, g_get_num_processors () - 1, 32);
GThreadPool *the_pool = g_thread_pool_new (gdk_parallel_task_thread_func,
- NULL,
- MAX (2, g_get_num_processors ()) - 1,
- FALSE,
- NULL);
+ NULL,
+ num_threads,
+ FALSE,
+ NULL);
+ g_thread_pool_set_max_unused_threads (num_threads);
g_once_init_leave (&pool, the_pool);
}
--
2.53.0

View File

@ -0,0 +1,46 @@
From dc3ad33a43c92243dc757316b7ec4e874dcd2532 Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Sun, 24 Nov 2024 18:54:34 -0500
Subject: [PATCH] Limit parallelizm for small mipmaps too
Apply the same ideas that we use for memory format and color
conversions, to avoid overhead for small sizes.
---
gdk/gdkmemoryformat.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/gdk/gdkmemoryformat.c b/gdk/gdkmemoryformat.c
index c17be80b46..3f91bbac6d 100644
--- a/gdk/gdkmemoryformat.c
+++ b/gdk/gdkmemoryformat.c
@@ -2690,19 +2690,24 @@ gdk_memory_mipmap (guchar *dest,
.linear = linear,
.rows_done = 0,
};
+ gsize chunk_size;
+ guint n_tasks;
g_assert (lod_level > 0);
+ chunk_size = MAX (1, 512 / src_width),
+ n_tasks = (src_height + chunk_size - 1) / chunk_size;
+
if (dest_format == src_format)
{
if (linear)
- gdk_parallel_task_run (gdk_memory_mipmap_same_format_linear, &mipmap, G_MAXUINT);
+ gdk_parallel_task_run (gdk_memory_mipmap_same_format_linear, &mipmap, n_tasks);
else
- gdk_parallel_task_run (gdk_memory_mipmap_same_format_nearest, &mipmap, G_MAXUINT);
+ gdk_parallel_task_run (gdk_memory_mipmap_same_format_nearest, &mipmap, n_tasks);
}
else
{
- gdk_parallel_task_run (gdk_memory_mipmap_generic, &mipmap, G_MAXUINT);
+ gdk_parallel_task_run (gdk_memory_mipmap_generic, &mipmap, n_tasks);
}
}
--
2.53.0

View File

@ -0,0 +1,44 @@
From 5362aa2e796204762abb7a764273ac7fbec19a3d Mon Sep 17 00:00:00 2001
From: Khalid Abu Shawarib <kas@gnome.org>
Date: Thu, 6 Mar 2025 05:58:24 +0300
Subject: [PATCH] gdk/paralleltask: Initialize number of processers once
Reading for every call seems to be non-trivial due to required syscalls.
---
gdk/gdkparalleltask.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/gdk/gdkparalleltask.c b/gdk/gdkparalleltask.c
index 683df5b927..3da799d104 100644
--- a/gdk/gdkparalleltask.c
+++ b/gdk/gdkparalleltask.c
@@ -57,6 +57,7 @@ gdk_parallel_task_run (GdkTaskFunc task_func,
guint max_tasks)
{
static GThreadPool *pool;
+ static guint nproc;
TaskData task = {
.task_func = task_func,
.task_data = task_data,
@@ -71,7 +72,8 @@ gdk_parallel_task_run (GdkTaskFunc task_func,
if (g_once_init_enter (&pool))
{
- guint num_threads = CLAMP (2, g_get_num_processors () - 1, 32);
+ nproc = g_get_num_processors ();
+ guint num_threads = CLAMP (2, nproc - 1, 32);
GThreadPool *the_pool = g_thread_pool_new (gdk_parallel_task_thread_func,
NULL,
num_threads,
@@ -81,7 +83,7 @@ gdk_parallel_task_run (GdkTaskFunc task_func,
g_once_init_leave (&pool, the_pool);
}
- n_tasks = MIN (max_tasks, g_get_num_processors ());
+ n_tasks = MIN (max_tasks, nproc);
task.n_running_tasks = n_tasks;
/* Start with 1 because we run 1 task ourselves */
for (i = 1; i < n_tasks; i++)
--
2.53.0

View File

@ -0,0 +1,137 @@
From 33c5f7456c2c5d1d26f748ad1df92223c8f630e9 Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Sun, 24 Nov 2024 10:33:45 -0500
Subject: [PATCH] memoryformat: Use max_tasks
We now grab rows in chunks, and we can't make use of more threads
than the total number of rows, divided by the chunk size. This
helps reduce the threading overhead for small sizes, and makes the
parallel run code competitive with the single-threaded code in those
cases, until the size reacheds ~ 2000 pixels, when the threading
starts to be faster.
For data, see
https://gitlab.gnome.org/GNOME/gtk/-/merge_requests/7965
---
gdk/gdkmemoryformat.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/gdk/gdkmemoryformat.c b/gdk/gdkmemoryformat.c
index 05821e31d8..7e45659445 100644
--- a/gdk/gdkmemoryformat.c
+++ b/gdk/gdkmemoryformat.c
@@ -2100,6 +2100,7 @@
GdkColorState *src_cs;
gsize width;
gsize height;
+ gsize chunk_size;
/* atomic */ int rows_done;
};
@@ -2114,7 +2115,7 @@
GdkFloatColorConvert convert_func = NULL;
GdkFloatColorConvert convert_func2 = NULL;
gboolean needs_premultiply, needs_unpremultiply;
- gsize y, n;
+ gsize y0, y;
gint64 before = GDK_PROFILER_CURRENT_TIME;
gsize rows;
@@ -2126,16 +2127,17 @@
if (func != NULL)
{
- n = 1;
-
- for (y = g_atomic_int_add (&mc->rows_done, n);
- y < mc->height;
- y = g_atomic_int_add (&mc->rows_done, n))
+ for (y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size), rows = 0;
+ y0 < mc->height;
+ y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size))
{
- const guchar *src_data = mc->src_data + y * mc->src_stride;
- guchar *dest_data = mc->dest_data + y * mc->dest_stride;
+ for (y = y0; y < MIN (y0 + mc->chunk_size, mc->height); y++, rows++)
+ {
+ const guchar *src_data = mc->src_data + y * mc->src_stride;
+ guchar *dest_data = mc->dest_data + y * mc->dest_stride;
- func (dest_data, src_data, mc->width);
+ func (dest_data, src_data, mc->width);
+ }
}
return;
}
@@ -2167,30 +2169,32 @@
}
tmp = g_malloc (sizeof (*tmp) * mc->width);
- n = 1;
- for (y = g_atomic_int_add (&mc->rows_done, n), rows = 0;
- y < mc->height;
- y = g_atomic_int_add (&mc->rows_done, n), rows++)
+ for (y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size), rows = 0;
+ y0 < mc->height;
+ y0 = g_atomic_int_add (&mc->rows_done, mc->chunk_size), rows++)
{
- const guchar *src_data = mc->src_data + y * mc->src_stride;
- guchar *dest_data = mc->dest_data + y * mc->dest_stride;
+ for (y = y0; y < MIN (y0 + mc->chunk_size, mc->height); y++, rows++)
+ {
+ const guchar *src_data = mc->src_data + y * mc->src_stride;
+ guchar *dest_data = mc->dest_data + y * mc->dest_stride;
- src_desc->to_float (tmp, src_data, mc->width);
+ src_desc->to_float (tmp, src_data, mc->width);
- if (needs_unpremultiply)
- unpremultiply (tmp, mc->width);
+ if (needs_unpremultiply)
+ unpremultiply (tmp, mc->width);
- if (convert_func)
- convert_func (mc->src_cs, tmp, mc->width);
+ if (convert_func)
+ convert_func (mc->src_cs, tmp, mc->width);
- if (convert_func2)
- convert_func2 (mc->dest_cs, tmp, mc->width);
+ if (convert_func2)
+ convert_func2 (mc->dest_cs, tmp, mc->width);
- if (needs_premultiply)
- premultiply (tmp, mc->width);
+ if (needs_premultiply)
+ premultiply (tmp, mc->width);
- dest_desc->from_float (dest_data, tmp, mc->width);
+ dest_desc->from_float (dest_data, tmp, mc->width);
+ }
}
g_free (tmp);
@@ -2223,8 +2227,11 @@
.src_cs = src_cs,
.width = width,
.height = height,
+ .chunk_size = MAX (1, 512 / width),
};
+ guint n_tasks;
+
g_assert (dest_format < GDK_MEMORY_N_FORMATS);
g_assert (src_format < GDK_MEMORY_N_FORMATS);
/* We don't allow overlap here. If you want to do in-place color state conversions,
@@ -2256,7 +2263,9 @@
return;
}
- gdk_parallel_task_run (gdk_memory_convert_generic, &mc, G_MAXUINT);
+ n_tasks = (mc.height + mc.chunk_size - 1) / mc.chunk_size;
+
+ gdk_parallel_task_run (gdk_memory_convert_generic, &mc, n_tasks);
}
typedef struct _MemoryConvertColorState MemoryConvertColorState;

View File

@ -0,0 +1,125 @@
From 3f798a2017581b67001622584612ab9af92e2b96 Mon Sep 17 00:00:00 2001
From: Matthias Clasen <mclasen@redhat.com>
Date: Sun, 24 Nov 2024 10:02:20 -0500
Subject: [PATCH] paralleltask: Let callers limit parallelism
Add a max_tasks argument to gdk_parallel_task_run. This will
help reduce setup overhead in small cases. For now, all callers
pass G_MAXUINT.
---
gdk/gdkmemoryformat.c | 14 +++++++-------
gdk/gdkparalleltask.c | 8 +++++---
gdk/gdkparalleltaskprivate.h | 3 ++-
3 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/gdk/gdkmemoryformat.c b/gdk/gdkmemoryformat.c
index 6473fa1567..05821e31d8 100644
--- a/gdk/gdkmemoryformat.c
+++ b/gdk/gdkmemoryformat.c
@@ -2267,7 +2267,7 @@ gdk_memory_convert (guchar *dest_data,
return;
}
- gdk_parallel_task_run (gdk_memory_convert_generic, &mc);
+ gdk_parallel_task_run (gdk_memory_convert_generic, &mc, G_MAXUINT);
}
typedef struct _MemoryConvertColorState MemoryConvertColorState;
@@ -2512,17 +2512,17 @@ gdk_memory_convert_color_state (guchar *data,
src_cs == GDK_COLOR_STATE_SRGB &&
dest_cs == GDK_COLOR_STATE_SRGB_LINEAR)
{
- gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_to_srgb_linear, &mc);
+ gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_to_srgb_linear, &mc, G_MAXUINT);
}
else if (format == GDK_MEMORY_B8G8R8A8_PREMULTIPLIED &&
src_cs == GDK_COLOR_STATE_SRGB_LINEAR &&
dest_cs == GDK_COLOR_STATE_SRGB)
{
- gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_linear_to_srgb, &mc);
+ gdk_parallel_task_run (gdk_memory_convert_color_state_srgb_linear_to_srgb, &mc, G_MAXUINT);
}
else
{
- gdk_parallel_task_run (gdk_memory_convert_color_state_generic, &mc);
+ gdk_parallel_task_run (gdk_memory_convert_color_state_generic, &mc, G_MAXUINT);
}
}
@@ -2684,13 +2684,13 @@ gdk_memory_mipmap (guchar *dest,
if (dest_format == src_format)
{
if (linear)
- gdk_parallel_task_run (gdk_memory_mipmap_same_format_linear, &mipmap);
+ gdk_parallel_task_run (gdk_memory_mipmap_same_format_linear, &mipmap, G_MAXUINT);
else
- gdk_parallel_task_run (gdk_memory_mipmap_same_format_nearest, &mipmap);
+ gdk_parallel_task_run (gdk_memory_mipmap_same_format_nearest, &mipmap, G_MAXUINT);
}
else
{
- gdk_parallel_task_run (gdk_memory_mipmap_generic, &mipmap);
+ gdk_parallel_task_run (gdk_memory_mipmap_generic, &mipmap, G_MAXUINT);
}
}
diff --git a/gdk/gdkparalleltask.c b/gdk/gdkparalleltask.c
index 0cb4440a87..683df5b927 100644
--- a/gdk/gdkparalleltask.c
+++ b/gdk/gdkparalleltask.c
@@ -45,13 +45,15 @@
* gdk_parallel_task_run:
* @task_func: the function to spawn
* @task_data: data to pass to the function
+ * @max_tasks: maximum number of tasks to spawn
*
* Spawns the given function in many threads.
* Once all functions have exited, this function returns.
**/
void
gdk_parallel_task_run (GdkTaskFunc task_func,
- gpointer task_data)
+ gpointer task_data,
+ guint max_tasks)
{
static GThreadPool *pool;
TaskData task = {
@@ -60,6 +62,12 @@
};
int i, n_tasks;
+ if (max_tasks == 1)
+ {
+ task_func (task_data);
+ return;
+ }
+
if (g_once_init_enter (&pool))
{
guint num_threads = CLAMP (2, g_get_num_processors () - 1, 32);
@@ -72,7 +80,7 @@
g_once_init_leave (&pool, the_pool);
}
- n_tasks = g_get_num_processors ();
+ n_tasks = MIN (max_tasks, g_get_num_processors ());
task.n_running_tasks = n_tasks;
/* Start with 1 because we run 1 task ourselves */
for (i = 1; i < n_tasks; i++)
diff --git a/gdk/gdkparalleltaskprivate.h b/gdk/gdkparalleltaskprivate.h
index a20fb72f95..2e531beb40 100644
--- a/gdk/gdkparalleltaskprivate.h
+++ b/gdk/gdkparalleltaskprivate.h
@@ -26,7 +26,8 @@ G_BEGIN_DECLS
typedef void (* GdkTaskFunc) (gpointer user_data);
void gdk_parallel_task_run (GdkTaskFunc task_func,
- gpointer task_data);
+ gpointer task_data,
+ guint max_tasks);
G_END_DECLS
--
2.53.0

View File

@ -37,6 +37,13 @@ Patch0: gtk4-no-objcopy.patch
Patch1: gtk4-no-emoji-context-menu.patch
Patch2: 0001-Filechooser-Fix-a-focus-mishap.patch
Patch3: 0001-placesview-Use-gtk3-servers-if-available.patch
# The next few patches are about limiting thread pool size, all from 4.18
Patch4: 0001-Keep-our-threads-alive-for-a-bit.patch
Patch5: 0001-paralleltask-Let-callers-limit-parallelism.patch
Patch6: 0001-gdk-paralleltask-Initialize-number-of-processers-onc.patch
Patch7: 0001-memoryformat-Use-max_tasks.patch
Patch8: 0001-Apply-the-same-chunking-to-color-conversion.patch
Patch9: 0001-Limit-parallelizm-for-small-mipmaps-too.patch
BuildRequires: cups-devel
BuildRequires: desktop-file-utils