diff --git a/.gitignore b/.gitignore index 3dd396c..3dbf0e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -libclc-19.1.1.src.tar.xz -mesa-24.2.8.tar.xz -spirv-llvm-translator-90a9764.tar.gz +libclc-20.1.3.src.tar.xz +mesa-25.0.7.tar.xz +spirv-llvm-translator-834db1a.tar.gz diff --git a/0001-Revert-hasvk-elk-stop-turning-load_push_constants-in.patch b/0001-Revert-hasvk-elk-stop-turning-load_push_constants-in.patch new file mode 100644 index 0000000..1c8a598 --- /dev/null +++ b/0001-Revert-hasvk-elk-stop-turning-load_push_constants-in.patch @@ -0,0 +1,88 @@ +From 870b8717b25eceadac8dd653debe8346826227cb Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Mon, 9 Jun 2025 09:19:28 +1000 +Subject: [PATCH] Revert "hasvk/elk: stop turning load_push_constants into + load_uniform" + +This reverts commit b036d2ded2de32e81730ec8ce37c803bad112efa. + +This seems to break gtk4 and other stuff. + +Cc: mesa-stable +(taking ack from Lionel saying we should revert) + +Acked-by: Lionel Landwerlin +Part-of: +--- + src/intel/compiler/elk/elk_fs_nir.cpp | 4 ++-- + src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c | 6 +++--- + src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c | 3 +-- + 3 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp +index 459e2d966b7..6b78d6cf9f8 100644 +--- a/src/intel/compiler/elk/elk_fs_nir.cpp ++++ b/src/intel/compiler/elk/elk_fs_nir.cpp +@@ -4969,8 +4969,7 @@ fs_nir_emit_intrinsic(nir_to_elk_state &ntb, + break; + } + +- case nir_intrinsic_load_uniform: +- case nir_intrinsic_load_push_constant: { ++ case nir_intrinsic_load_uniform: { + /* Offsets are in bytes but they should always aligned to + * the type size + */ +@@ -7058,3 +7057,4 @@ nir_to_elk(elk_fs_visitor *s) + + ralloc_free(ntb.mem_ctx); + } ++ +diff --git a/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c +index 2bedf9017ed..1c613a9ebe7 100644 +--- a/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c ++++ b/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c +@@ -463,9 +463,9 @@ build_buffer_addr_for_res_index(nir_builder *b, + nir_iadd(b, res.dyn_offset_base, res.array_index); + + nir_def *dyn_load = +- nir_load_uniform(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4), +- .base = offsetof(struct anv_push_constants, dynamic_offsets), +- .range = MAX_DYNAMIC_BUFFERS * 4); ++ nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4), ++ .base = offsetof(struct anv_push_constants, dynamic_offsets), ++ .range = MAX_DYNAMIC_BUFFERS * 4); + + nir_def *dynamic_offset = + nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff), +diff --git a/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c b/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c +index ed314af8b98..8a3f83f2c26 100644 +--- a/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c ++++ b/src/intel/vulkan_hasvk/anv_nir_compute_push_layout.c +@@ -55,7 +55,6 @@ anv_nir_compute_push_layout(nir_shader *nir, + has_const_ubo = true; + break; + +- case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: { + unsigned base = nir_intrinsic_base(intrin); + unsigned range = nir_intrinsic_range(intrin); +@@ -132,7 +131,6 @@ anv_nir_compute_push_layout(nir_shader *nir, + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { +- case nir_intrinsic_load_uniform: + case nir_intrinsic_load_push_constant: { + /* With bindless shaders we load uniforms with SEND + * messages. All the push constants are located after the +@@ -141,6 +139,7 @@ anv_nir_compute_push_layout(nir_shader *nir, + * elk_nir_lower_rt_intrinsics.c). + */ + unsigned base_offset = push_start; ++ intrin->intrinsic = nir_intrinsic_load_uniform; + nir_intrinsic_set_base(intrin, + nir_intrinsic_base(intrin) - + base_offset); +-- +2.49.0 + diff --git a/0001-Revert-kopper-Explicitly-choose-zink.patch b/0001-Revert-kopper-Explicitly-choose-zink.patch new file mode 100644 index 0000000..954389e --- /dev/null +++ b/0001-Revert-kopper-Explicitly-choose-zink.patch @@ -0,0 +1,46 @@ +From 24f85c06c28736b73c14208a0ffa6657d2aee9cd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= +Date: Mon, 21 Apr 2025 13:52:14 +0200 +Subject: [PATCH] Revert "kopper: Explicitly choose zink" + +On QEMU (virtio driver), without 3D acceleration enabled and without +mesa-vulkan-drivers installed, this commit prevents Mutter to start. + +This reverts commit c0bc957c5d8c7edd57626284b712dd6ea1e375fc. + +Related: https://bugzilla.redhat.com/show_bug.cgi?id=2360851 +Related: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13009 +--- + .pick_status.json | 2 +- + src/gallium/frontends/dri/kopper.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/.pick_status.json b/.pick_status.json +index 99df2fb30c2..cdf3965678c 100644 +--- a/.pick_status.json ++++ b/.pick_status.json +@@ -16204,7 +16204,7 @@ + "description": "kopper: Explicitly choose zink", + "nominated": false, + "nomination_type": 0, +- "resolution": 1, ++ "resolution": 4, + "main_sha": null, + "because_sha": null, + "notes": null +diff --git a/src/gallium/frontends/dri/kopper.c b/src/gallium/frontends/dri/kopper.c +index a1d7dcb79b4..3bdb56022a9 100644 +--- a/src/gallium/frontends/dri/kopper.c ++++ b/src/gallium/frontends/dri/kopper.c +@@ -73,7 +73,7 @@ kopper_init_screen(struct dri_screen *screen, bool driver_name_is_inferred) + bool success; + #ifdef HAVE_LIBDRM + if (screen->fd != -1) +- success = pipe_loader_drm_probe_fd(&screen->dev, screen->fd, true); ++ success = pipe_loader_drm_probe_fd(&screen->dev, screen->fd, false); + else + success = pipe_loader_vk_probe_dri(&screen->dev); + #else +-- +2.49.0 + diff --git a/0001-gallivm-handle-u8-u16-const-loads-properly-on-big-en.patch b/0001-gallivm-handle-u8-u16-const-loads-properly-on-big-en.patch new file mode 100644 index 0000000..bd942c6 --- /dev/null +++ b/0001-gallivm-handle-u8-u16-const-loads-properly-on-big-en.patch @@ -0,0 +1,16 @@ +diff -up mesa-25.0.7/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c.dma mesa-25.0.7/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +--- mesa-25.0.7/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c.dma 2025-05-29 01:20:23.000000000 +1000 ++++ mesa-25.0.7/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c 2025-09-26 12:32:52.240568901 +1000 +@@ -1321,8 +1321,10 @@ emit_load_const(struct lp_build_nir_cont + + for (unsigned i = 0; i < instr->def.num_components; i++) { + outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, +- bits == 32 ? instr->value[i].u32 +- : instr->value[i].u64); ++ bits == 8 ? instr->value[i].u8 : ++ bits == 16 ? instr->value[i].u16 : ++ bits == 32 ? instr->value[i].u32 : ++ instr->value[i].u64); + } + for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) { + outval[i] = NULL; diff --git a/32886.patch b/32886.patch index 2bea80b..087e062 100644 --- a/32886.patch +++ b/32886.patch @@ -1,43 +1,3 @@ -From 622f7407d7a002030b24ed384532cb6d585b3479 Mon Sep 17 00:00:00 2001 -From: Benjamin ROBIN -Date: Sun, 5 Jan 2025 16:36:12 +0100 -Subject: [PATCH 1/2] util/disk_cache: Do not try to delete old cache if cache - is disabled - -Prevent following warning if not running as a normal user: -Failed to create /home for shader cache (Permission denied)---disabling - -disk_cache_delete_old_cache() is going to create first the cache directory -using disk_cache_generate_cache_dir(). From mkdir_if_needed(), the stat() -of "/home" is failing with "Permission denied" under some circumstances -when using Firefox. - -Fixes: #12168 -Fixes: c3bc6991d27c61b5c1b3 ("util/disk_cache: Delete the old multifile cache if using the default.") - -Signed-off-by: Benjamin ROBIN -Part-of: ---- - src/util/disk_cache.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c -index a6940ee494084..391f8cfdc1da9 100644 ---- a/src/util/disk_cache.c -+++ b/src/util/disk_cache.c -@@ -232,7 +232,7 @@ disk_cache_create(const char *gpu_name, const char *driver_id, - /* Since switching the default cache to , remove the - * old cache folder if it hasn't been modified for more than 7 days. - */ -- if (!getenv("MESA_SHADER_CACHE_DIR") && !getenv("MESA_GLSL_CACHE_DIR")) -+ if (!getenv("MESA_SHADER_CACHE_DIR") && !getenv("MESA_GLSL_CACHE_DIR") && disk_cache_enabled()) - disk_cache_delete_old_cache(); - } - --- -GitLab - - From 023db569e8f7bf325fba86a1fa4ba984026a532c Mon Sep 17 00:00:00 2001 From: Benjamin ROBIN Date: Sun, 5 Jan 2025 17:03:52 +0100 diff --git a/RHEL-53423.patch b/RHEL-53423.patch deleted file mode 100644 index 4a9c256..0000000 --- a/RHEL-53423.patch +++ /dev/null @@ -1,2565 +0,0 @@ -From 9797443fc2d655b8333ab80aec9d17a06706fe53 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Wed, 20 Nov 2024 10:34:27 -0500 -Subject: [PATCH 01/27] ac/surface: adjust HiZ enablement - -Acked-by: Pierre-Eric Pelloux-Prayer -Part-of: ---- - src/amd/common/ac_surface.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c -index 3ecb5d57be4..69f722bc4cc 100644 ---- a/src/amd/common/ac_surface.c -+++ b/src/amd/common/ac_surface.c -@@ -3022,7 +3022,7 @@ static bool gfx12_compute_hiz_his_info(struct ac_addrlib *addrlib, const struct - { - assert(surf_in->flags.depth != surf_in->flags.stencil); - -- if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev == 0)) -+ if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev <= 1)) - return true; - - ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; --- -2.48.1 - - -From 51f92a3545b42d4250da95e9623576b65e8260b9 Mon Sep 17 00:00:00 2001 -From: Pierre-Eric Pelloux-Prayer -Date: Thu, 21 Nov 2024 11:46:02 +0100 -Subject: [PATCH 02/27] radeonsi/gfx12: disable display dcc for front buffer - rendering -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Same logic as other chips, except we need to reallocate the texture -as we can't disable dcc. - -Reviewed-by: Marek Olšák -Part-of: ---- - src/gallium/drivers/radeonsi/si_texture.c | 33 ++++++++++++++++------- - 1 file changed, 24 insertions(+), 9 deletions(-) - -diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c -index ad234b2f018..ae65b725600 100644 ---- a/src/gallium/drivers/radeonsi/si_texture.c -+++ b/src/gallium/drivers/radeonsi/si_texture.c -@@ -797,16 +797,31 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex - assert(tex->surface.tile_swizzle == 0); - } - -- /* Since shader image stores don't support DCC on GFX8, -- * disable it for external clients that want write -- * access. -+ const bool debug_disable_dcc = sscreen->debug_flags & DBG(NO_EXPORTED_DCC); -+ /* Since shader image stores don't support DCC on GFX9 and older, -+ * disable it for external clients that want write access. - */ -- if (sscreen->debug_flags & DBG(NO_EXPORTED_DCC) || -- (usage & PIPE_HANDLE_USAGE_SHADER_WRITE && !tex->is_depth && tex->surface.meta_offset) || -- /* Displayable DCC requires an explicit flush. */ -- (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && -- si_displayable_dcc_needs_explicit_flush(tex))) { -- if (si_texture_disable_dcc(sctx, tex)) { -+ const bool shader_write = sscreen->info.gfx_level <= GFX9 && -+ usage & PIPE_HANDLE_USAGE_SHADER_WRITE && -+ !tex->is_depth && -+ tex->surface.meta_offset; -+ /* Another reason to disable display dcc is front buffer rendering. -+ * This can happens with Xorg. If the ddx driver uses GBM_BO_USE_FRONT_RENDERING, -+ * there's nothing to do because the texture is not using DCC. -+ * If the flag isn't set, we have to infer it to get correct rendering. -+ */ -+ const bool front_buffer_rendering = !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && -+ tex->buffer.b.b.bind & PIPE_BIND_SCANOUT; -+ -+ /* If display dcc requires a retiling step, drop dcc. */ -+ const bool explicit_flush = !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && -+ si_displayable_dcc_needs_explicit_flush(tex); -+ -+ if (debug_disable_dcc || shader_write || front_buffer_rendering || explicit_flush) { -+ if (sscreen->info.gfx_level >= GFX12) { -+ si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_CONST_BW, false); -+ update_metadata = true; -+ } else if (si_texture_disable_dcc(sctx, tex)) { - update_metadata = true; - /* si_texture_disable_dcc flushes the context */ - flush = false; --- -2.48.1 - - -From 89d3f1a550ade706459e6c2f0649608c084387ff Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Mon, 16 Dec 2024 03:48:37 -0500 -Subject: [PATCH 03/27] radeonsi/gfx12: set DB_RENDER_OVERRIDE based on stencil - state - -Acked-by: Pierre-Eric Pelloux-Prayer -Part-of: ---- - src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 + - src/gallium/drivers/radeonsi/si_state.c | 8 ++++++++ - src/gallium/drivers/radeonsi/si_state.h | 2 ++ - 3 files changed, 11 insertions(+) - -diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c -index 9b11eaf678e..31814bc94d2 100644 ---- a/src/gallium/drivers/radeonsi/si_gfx_cs.c -+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c -@@ -318,6 +318,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) - ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0; - -+ ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0; -diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c -index 6bb1037a79b..018be3dbe4f 100644 ---- a/src/gallium/drivers/radeonsi/si_state.c -+++ b/src/gallium/drivers/radeonsi/si_state.c -@@ -1573,6 +1573,12 @@ static void *si_create_dsa_state(struct pipe_context *ctx, - S_028090_TESTMASK_BF(state->stencil[1].valuemask); - dsa->db_stencil_write_mask = S_028094_WRITEMASK(state->stencil[0].writemask) | - S_028094_WRITEMASK_BF(state->stencil[1].writemask); -+ -+ bool force_s_valid = state->stencil[0].zpass_op != state->stencil[0].zfail_op || -+ (state->stencil[1].enabled && -+ state->stencil[1].zpass_op != state->stencil[1].zfail_op); -+ dsa->db_render_override = S_02800C_FORCE_STENCIL_READ(1) | -+ S_02800C_FORCE_STENCIL_VALID(force_s_valid); - } - - bool zfunc_is_ordered = -@@ -1608,6 +1614,8 @@ static void si_pm4_emit_dsa(struct si_context *sctx, unsigned index) - if (sctx->gfx_level >= GFX12) { - radeon_begin(&sctx->gfx_cs); - gfx12_begin_context_regs(); -+ gfx12_opt_set_context_reg(R_02800C_DB_RENDER_OVERRIDE, SI_TRACKED_DB_RENDER_OVERRIDE, -+ state->db_render_override); - gfx12_opt_set_context_reg(R_028070_DB_DEPTH_CONTROL, SI_TRACKED_DB_DEPTH_CONTROL, - state->db_depth_control); - if (state->stencil_enabled) { -diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h -index a984c7d6918..f964fa21606 100644 ---- a/src/gallium/drivers/radeonsi/si_state.h -+++ b/src/gallium/drivers/radeonsi/si_state.h -@@ -130,6 +130,7 @@ struct si_state_dsa { - unsigned spi_shader_user_data_ps_alpha_ref; - unsigned db_stencil_read_mask; - unsigned db_stencil_write_mask; -+ unsigned db_render_override; /* only gfx12 */ - - /* 0 = without stencil buffer, 1 = when both Z and S buffers are present */ - struct si_dsa_order_invariance order_invariance[2]; -@@ -314,6 +315,7 @@ enum si_tracked_reg - SI_TRACKED_SPI_PS_INPUT_ENA, - SI_TRACKED_SPI_PS_INPUT_ADDR, - -+ SI_TRACKED_DB_RENDER_OVERRIDE, - SI_TRACKED_DB_EQAA, - SI_TRACKED_DB_RENDER_OVERRIDE2, - SI_TRACKED_DB_SHADER_CONTROL, --- -2.48.1 - - -From 9a0377096ab33b12c10913ecc2b5a1358da29923 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Tue, 6 Aug 2024 16:23:01 -0400 -Subject: [PATCH 04/27] radeonsi: add a new PM4 helper radeon_event_write - -Reviewed-by: Pierre-Eric Pelloux-Prayer -Part-of: ---- - src/gallium/drivers/radeonsi/si_build_pm4.h | 11 +++ - src/gallium/drivers/radeonsi/si_compute.c | 7 +- - src/gallium/drivers/radeonsi/si_gfx_cs.c | 89 +++++++------------ - src/gallium/drivers/radeonsi/si_perfcounter.c | 21 ++--- - src/gallium/drivers/radeonsi/si_query.c | 3 +- - src/gallium/drivers/radeonsi/si_state.c | 24 +++-- - .../drivers/radeonsi/si_state_draw.cpp | 21 ++--- - .../drivers/radeonsi/si_state_shaders.cpp | 13 +-- - .../drivers/radeonsi/si_state_streamout.c | 3 +- - 9 files changed, 74 insertions(+), 118 deletions(-) - -diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h -index 15259aa329b..630b5885ff3 100644 ---- a/src/gallium/drivers/radeonsi/si_build_pm4.h -+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h -@@ -503,6 +503,17 @@ - } \ - } while (0) - -+/* Other packet helpers. */ -+#define radeon_event_write(event_type) do { \ -+ unsigned __event_type = (event_type); \ -+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \ -+ radeon_emit(EVENT_TYPE(__event_type) | \ -+ EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \ -+ __event_type == V_028A90_PS_PARTIAL_FLUSH || \ -+ __event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \ -+ __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \ -+} while (0) -+ - /* This should be evaluated at compile time if all parameters are constants. */ - static ALWAYS_INLINE unsigned - si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess, -diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c -index 727face471e..01aa75d222a 100644 ---- a/src/gallium/drivers/radeonsi/si_compute.c -+++ b/src/gallium/drivers/radeonsi/si_compute.c -@@ -1114,10 +1114,9 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ - radeon_emit(dispatch_initiator); - } - -- if (unlikely(sctx->sqtt_enabled && sctx->gfx_level >= GFX9)) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); -- } -+ if (unlikely(sctx->sqtt_enabled && sctx->gfx_level >= GFX9)) -+ radeon_event_write(V_028A90_THREAD_TRACE_MARKER); -+ - radeon_end(); - } - -diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c -index 31814bc94d2..d8dccd9b008 100644 ---- a/src/gallium/drivers/radeonsi/si_gfx_cs.c -+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c -@@ -160,8 +160,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h - */ - if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->has_tessellation) { - radeon_begin(cs); -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_SQ_NON_EVENT); - radeon_end(); - } - -@@ -797,10 +796,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) - - radeon_begin(cs); - -- if (flags & SI_CONTEXT_VGT_FLUSH) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); -- } -+ if (flags & SI_CONTEXT_VGT_FLUSH) -+ radeon_event_write(V_028A90_VGT_FLUSH); - - if (flags & SI_CONTEXT_INV_ICACHE) - gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL); -@@ -837,19 +834,15 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) - } - - if (flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) { -- if (ctx->gfx_level < GFX12 && flags & SI_CONTEXT_FLUSH_AND_INV_CB) { -- /* Flush CMASK/FMASK/DCC. Will wait for idle later. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); -- } -+ /* Flush CMASK/FMASK/DCC. Will wait for idle later. */ -+ if (ctx->gfx_level < GFX12 && flags & SI_CONTEXT_FLUSH_AND_INV_CB) -+ radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META); - - /* Gfx11 can't flush DB_META and should use a TS event instead. */ -+ /* Flush HTILE. Will wait for idle later. */ - if (ctx->gfx_level < GFX12 && ctx->gfx_level != GFX11 && -- flags & SI_CONTEXT_FLUSH_AND_INV_DB) { -- /* Flush HTILE. Will wait for idle later. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); -- } -+ flags & SI_CONTEXT_FLUSH_AND_INV_DB) -+ radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META); - - /* First flush CB/DB, then L1/L2. */ - gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD); -@@ -870,21 +863,18 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) - } else { - /* Wait for graphics shaders to go idle if requested. */ - if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -+ radeon_event_write(V_028A90_PS_PARTIAL_FLUSH); - /* Only count explicit shader flushes, not implicit ones. */ - ctx->num_vs_flushes++; - ctx->num_ps_flushes++; - } else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -+ radeon_event_write(V_028A90_VS_PARTIAL_FLUSH); - ctx->num_vs_flushes++; - } - } - - if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && ctx->compute_is_busy) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); -+ radeon_event_write(V_028A90_CS_PARTIAL_FLUSH); - ctx->num_cs_flushes++; - ctx->compute_is_busy = false; - } -@@ -1026,12 +1016,10 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs) - } - - if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PIPELINESTAT_START); - ctx->pipeline_stats_enabled = 1; - } else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && ctx->pipeline_stats_enabled != 0) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PIPELINESTAT_STOP); - ctx->pipeline_stats_enabled = 0; - } - radeon_end(); -@@ -1092,16 +1080,13 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) - - radeon_begin(cs); - -- if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { -- /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); -- } -- if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META)) { -- /* Flush HTILE. SURFACE_SYNC will wait for idle. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); -- } -+ /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */ -+ if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) -+ radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META); -+ -+ /* Flush HTILE. SURFACE_SYNC will wait for idle. */ -+ if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META)) -+ radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META); - - /* Wait for shader engines to go idle. - * VS and PS waits are unnecessary if SURFACE_SYNC is going to wait -@@ -1109,36 +1094,28 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) - */ - if (!flush_cb_db) { - if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -- /* Only count explicit shader flushes, not implicit ones -- * done by SURFACE_SYNC. -- */ -+ radeon_event_write(V_028A90_PS_PARTIAL_FLUSH); -+ /* Only count explicit shader flushes, not implicit ones done by SURFACE_SYNC. */ - sctx->num_vs_flushes++; - sctx->num_ps_flushes++; - } else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -+ radeon_event_write(V_028A90_VS_PARTIAL_FLUSH); - sctx->num_vs_flushes++; - } - } - - if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && sctx->compute_is_busy) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -+ radeon_event_write(V_028A90_CS_PARTIAL_FLUSH); - sctx->num_cs_flushes++; - sctx->compute_is_busy = false; - } - - /* VGT state synchronization. */ -- if (flags & SI_CONTEXT_VGT_FLUSH) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); -- } -- if (flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); -- } -+ if (flags & SI_CONTEXT_VGT_FLUSH) -+ radeon_event_write(V_028A90_VGT_FLUSH); -+ -+ if (flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) -+ radeon_event_write(V_028A90_VGT_STREAMOUT_SYNC); - - radeon_end(); - -@@ -1267,14 +1244,12 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs) - - if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) { - radeon_begin(cs); -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PIPELINESTAT_START); - radeon_end(); - sctx->pipeline_stats_enabled = 1; - } else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && sctx->pipeline_stats_enabled != 0) { - radeon_begin(cs); -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PIPELINESTAT_STOP); - radeon_end(); - sctx->pipeline_stats_enabled = 0; - } -diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c -index 9cbd08648af..785c98a9dc9 100644 ---- a/src/gallium/drivers/radeonsi/si_perfcounter.c -+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c -@@ -114,8 +114,7 @@ static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer - radeon_begin(cs); - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET)); -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PERFCOUNTER_STOP); - radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL, - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING)); - radeon_end(); -@@ -132,13 +131,10 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, - si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL); - - radeon_begin(cs); -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PERFCOUNTER_SAMPLE); - -- if (!sctx->screen->info.never_send_perfcounter_stop) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); -- } -+ if (!sctx->screen->info.never_send_perfcounter_stop) -+ radeon_event_write(V_028A90_PERFCOUNTER_STOP); - - radeon_set_uconfig_reg( - R_036020_CP_PERFMON_CNTL, -@@ -158,8 +154,7 @@ void si_pc_emit_spm_start(struct radeon_cmdbuf *cs) - S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING)); - /* Start windowed performance counters. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_PERFCOUNTER_START); - radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1)); - - radeon_end(); -@@ -171,10 +166,8 @@ void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_count - radeon_begin(cs); - - /* Stop windowed performance counters. */ -- if (!never_send_perfcounter_stop) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); -- } -+ if (!never_send_perfcounter_stop) -+ radeon_event_write(V_028A90_PERFCOUNTER_STOP); - - radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0)); - -diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c -index 18c19b58063..eca647505ae 100644 ---- a/src/gallium/drivers/radeonsi/si_query.c -+++ b/src/gallium/drivers/radeonsi/si_query.c -@@ -980,8 +980,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw - - radeon_begin(cs); - if (sctx->screen->use_ngg && query->flags & SI_QUERY_EMULATE_GS_COUNTERS) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -+ radeon_event_write(V_028A90_VS_PARTIAL_FLUSH); - - if (--sctx->num_pipeline_stat_emulated_queries == 0) { - si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL); -diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c -index 018be3dbe4f..8af8bae58ae 100644 ---- a/src/gallium/drivers/radeonsi/si_state.c -+++ b/src/gallium/drivers/radeonsi/si_state.c -@@ -62,8 +62,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, unsigned index) - sctx->last_cb_target_mask = cb_target_mask; - - radeon_begin(cs); -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_BREAK_BATCH); - radeon_end(); - } - -@@ -3191,10 +3190,9 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index) - S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); - - if (sctx->screen->dpbb_allowed && -- sctx->screen->pbb_context_states_per_bin > 1) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); -- } -+ sctx->screen->pbb_context_states_per_bin > 1) -+ radeon_event_write(V_028A90_BREAK_BATCH); -+ - radeon_end(); - - si_update_display_dcc_dirty(sctx); -@@ -3341,10 +3339,9 @@ static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned - gfx11_end_packed_context_regs(); - - if (sctx->screen->dpbb_allowed && -- sctx->screen->pbb_context_states_per_bin > 1) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); -- } -+ sctx->screen->pbb_context_states_per_bin > 1) -+ radeon_event_write(V_028A90_BREAK_BATCH); -+ - radeon_end(); - - si_update_display_dcc_dirty(sctx); -@@ -3478,10 +3475,9 @@ static void gfx12_emit_framebuffer_state(struct si_context *sctx, unsigned index - gfx12_end_context_regs(); - - if (sctx->screen->dpbb_allowed && -- sctx->screen->pbb_context_states_per_bin > 1) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); -- } -+ sctx->screen->pbb_context_states_per_bin > 1) -+ radeon_event_write(V_028A90_BREAK_BATCH); -+ - radeon_end(); - - sctx->framebuffer.dirty_cbufs = 0; -diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp -index bd2c936306b..81565b7694e 100644 ---- a/src/gallium/drivers/radeonsi/si_state_draw.cpp -+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp -@@ -1212,16 +1212,6 @@ void si_emit_buffered_compute_sh_regs(struct si_context *sctx) - - #endif - --#define EMIT_SQTT_END_DRAW \ -- do { \ -- if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt_enabled)) { \ -- radeon_begin(&sctx->gfx_cs); \ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \ -- radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); \ -- radeon_end(); \ -- } \ -- } while (0) -- - template ALWAYS_INLINE - static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info, -@@ -1638,10 +1628,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(0); - radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); - -- for (unsigned i = 0; i < 3; i++) { -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0)); -- } -+ for (unsigned i = 0; i < 3; i++) -+ radeon_event_write(V_028A90_SQ_NON_EVENT); - } else if (increment_draw_id) { - for (unsigned i = 0; i < num_draws; i++) { - if (i > 0) { -@@ -1675,9 +1663,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - } - } - } -- radeon_end(); - -- EMIT_SQTT_END_DRAW; -+ if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt_enabled)) -+ radeon_event_write(V_028A90_THREAD_TRACE_MARKER); -+ radeon_end(); - } - - /* Return false if not bound. */ -diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp -index 3eb56675313..a3a1b613337 100644 ---- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp -+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp -@@ -4081,12 +4081,10 @@ static void si_emit_vgt_flush(struct radeon_cmdbuf *cs) - radeon_begin(cs); - - /* This is required before VGT_FLUSH. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -+ radeon_event_write(V_028A90_VS_PARTIAL_FLUSH); - - /* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_VGT_FLUSH); - radeon_end(); - } - -@@ -4973,11 +4971,8 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index) - - radeon_begin(&sctx->gfx_cs); - /* Required before writing tessellation config registers. */ -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); -- -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_VS_PARTIAL_FLUSH); -+ radeon_event_write(V_028A90_VGT_FLUSH); - - if (sctx->gfx_level >= GFX7) { - radeon_set_uconfig_reg_seq(R_030938_VGT_TF_RING_SIZE, 3); -diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c -index 8047965ff84..9a92b4a4f3d 100644 ---- a/src/gallium/drivers/radeonsi/si_state_streamout.c -+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c -@@ -259,8 +259,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx) - radeon_set_config_reg(reg_strmout_cntl, 0); - } - -- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); -- radeon_emit(EVENT_TYPE(V_028A90_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); -+ radeon_event_write(V_028A90_SO_VGTSTREAMOUT_FLUSH); - - radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ --- -2.48.1 - - -From eefc15a11186a5b806ed5b2fcfbd2c9e1ad3d8e9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Mon, 16 Dec 2024 03:59:13 -0500 -Subject: [PATCH 05/27] radeonsi/gfx12: adjust HiZ/HiS logic - -Acked-by: Pierre-Eric Pelloux-Prayer -Part-of: ---- - src/amd/common/ac_gpu_info.c | 1 + - src/amd/common/ac_surface.c | 8 ++- - src/gallium/drivers/radeonsi/si_build_pm4.h | 14 +++++ - .../drivers/radeonsi/si_debug_options.h | 1 + - src/gallium/drivers/radeonsi/si_pipe.h | 1 + - src/gallium/drivers/radeonsi/si_state.c | 51 ++++++++++++---- - .../drivers/radeonsi/si_state_draw.cpp | 61 +++++++++++++++---- - 7 files changed, 110 insertions(+), 27 deletions(-) - -diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c -index 0d475cdab63..dd43af8dbb6 100644 ---- a/src/amd/common/ac_gpu_info.c -+++ b/src/amd/common/ac_gpu_info.c -@@ -593,6 +593,7 @@ static void handle_env_var_force_family(struct radeon_info *info) - info->gfx_level = ac_get_gfx_level(i); - info->family_id = ac_get_family_id(i); - info->family_overridden = true; -+ info->chip_rev = 1; - return; - } - } -diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c -index 69f722bc4cc..66d773921c4 100644 ---- a/src/amd/common/ac_surface.c -+++ b/src/amd/common/ac_surface.c -@@ -3022,7 +3022,7 @@ static bool gfx12_compute_hiz_his_info(struct ac_addrlib *addrlib, const struct - { - assert(surf_in->flags.depth != surf_in->flags.stencil); - -- if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev <= 1)) -+ if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev == 0)) - return true; - - ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; -@@ -3079,7 +3079,11 @@ static bool gfx12_compute_miptree(struct ac_addrlib *addrlib, const struct radeo - surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign)); - surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize; - -- return gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in); -+ if (info->chip_rev >= 2 && -+ !gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in)) -+ return false; -+ -+ return true; - } - - surf->u.gfx9.surf_slice_size = out.sliceSize; -diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h -index 630b5885ff3..c792d0fd2f8 100644 ---- a/src/gallium/drivers/radeonsi/si_build_pm4.h -+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h -@@ -514,6 +514,20 @@ - __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \ - } while (0) - -+#define radeon_emit_alt_hiz_logic() do { \ -+ static_assert(GFX_VERSION == GFX12 || !ALT_HIZ_LOGIC, ""); \ -+ if (GFX_VERSION == GFX12 && ALT_HIZ_LOGIC) { \ -+ radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \ -+ radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \ -+ radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \ -+ radeon_emit(0); /* ADDRESS_LO */ \ -+ radeon_emit(0); /* ADDRESS_HI */ \ -+ radeon_emit(0); /* DATA_LO */ \ -+ radeon_emit(0); /* DATA_HI */ \ -+ radeon_emit(0); /* INT_CTXID */ \ -+ } \ -+} while (0) -+ - /* This should be evaluated at compile time if all parameters are constants. */ - static ALWAYS_INLINE unsigned - si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess, -diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h -index ba2c2336ee0..a5057c4700e 100644 ---- a/src/gallium/drivers/radeonsi/si_debug_options.h -+++ b/src/gallium/drivers/radeonsi/si_debug_options.h -@@ -23,6 +23,7 @@ OPT_BOOL(zerovram, false, "Zero all VRAM allocations") - OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.") - OPT_BOOL(cache_rb_gl2, false, "Enable GL2 caching for CB and DB.") - OPT_BOOL(optimize_io, true, "Run nir_opt_varyings in the GLSL linker.") -+OPT_BOOL(alt_hiz_logic, false, "Enable alternative HiZ logic") - - #undef OPT_BOOL - #undef OPT_INT -diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h -index a7f5b544051..0002a3056c9 100644 ---- a/src/gallium/drivers/radeonsi/si_pipe.h -+++ b/src/gallium/drivers/radeonsi/si_pipe.h -@@ -398,6 +398,7 @@ struct si_texture { - bool can_sample_z : 1; - bool can_sample_s : 1; - bool need_flush_after_depth_decompression: 1; -+ bool force_disable_hiz_his : 1; - - /* We need to track DCC dirtiness, because st/dri usually calls - * flush_resource twice per frame (not a bug) and we don't wanna -diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c -index 8af8bae58ae..a647defc26c 100644 ---- a/src/gallium/drivers/radeonsi/si_state.c -+++ b/src/gallium/drivers/radeonsi/si_state.c -@@ -1713,6 +1713,20 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state) - si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); - } - -+ struct pipe_surface *zssurf = sctx->framebuffer.state.zsbuf; -+ struct si_texture *zstex = (struct si_texture*)(zssurf ? zssurf->texture : NULL); -+ -+ if (sctx->gfx_level == GFX12 && !sctx->screen->options.alt_hiz_logic && -+ sctx->framebuffer.has_stencil && dsa->stencil_enabled && !zstex->force_disable_hiz_his) { -+ zstex->force_disable_hiz_his = true; -+ si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); -+ -+ if (sctx->framebuffer.has_hiz_his) { -+ sctx->framebuffer.has_hiz_his = false; -+ si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); -+ } -+ } -+ - if (old_dsa->alpha_func != dsa->alpha_func) { - si_ps_key_update_dsa(sctx); - si_update_ps_inputs_read_or_disabled(sctx); -@@ -2814,9 +2828,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, - if (util_format_has_stencil(util_format_description(zstex->buffer.b.b.format))) - sctx->framebuffer.has_stencil = true; - -+ if (sctx->gfx_level == GFX12 && !sctx->screen->options.alt_hiz_logic && -+ sctx->framebuffer.has_stencil && sctx->queued.named.dsa->stencil_enabled) -+ zstex->force_disable_hiz_his = true; -+ - if (sctx->gfx_level >= GFX12) { -- sctx->framebuffer.has_hiz_his = zstex->surface.u.gfx9.zs.hiz.offset || -- zstex->surface.u.gfx9.zs.his.offset; -+ sctx->framebuffer.has_hiz_his = (zstex->surface.u.gfx9.zs.hiz.offset || -+ zstex->surface.u.gfx9.zs.his.offset) && -+ !zstex->force_disable_hiz_his; - } - } - -@@ -3443,18 +3462,24 @@ static void gfx12_emit_framebuffer_state(struct si_context *sctx, unsigned index - gfx12_set_context_reg(R_028034_DB_STENCIL_READ_BASE_HI, zb->ds.db_stencil_base >> 32); - gfx12_set_context_reg(R_028038_DB_STENCIL_WRITE_BASE, zb->ds.db_stencil_base); - gfx12_set_context_reg(R_02803C_DB_STENCIL_WRITE_BASE_HI, zb->ds.db_stencil_base >> 32); -- gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, zb->ds.u.gfx12.hiz_info); -- gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, zb->ds.u.gfx12.his_info); - -- if (zb->ds.u.gfx12.hiz_info) { -- gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, zb->ds.u.gfx12.hiz_base); -- gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, zb->ds.u.gfx12.hiz_base >> 32); -- gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, zb->ds.u.gfx12.hiz_size_xy); -- } -- if (zb->ds.u.gfx12.his_info) { -- gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, zb->ds.u.gfx12.his_base); -- gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, zb->ds.u.gfx12.his_base >> 32); -- gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, zb->ds.u.gfx12.his_size_xy); -+ if (tex->force_disable_hiz_his) { -+ gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, S_028B94_SURFACE_ENABLE(0)); -+ gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, S_028B98_SURFACE_ENABLE(0)); -+ } else { -+ gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, zb->ds.u.gfx12.hiz_info); -+ gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, zb->ds.u.gfx12.his_info); -+ -+ if (zb->ds.u.gfx12.hiz_info) { -+ gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, zb->ds.u.gfx12.hiz_base); -+ gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, zb->ds.u.gfx12.hiz_base >> 32); -+ gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, zb->ds.u.gfx12.hiz_size_xy); -+ } -+ if (zb->ds.u.gfx12.his_info) { -+ gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, zb->ds.u.gfx12.his_base); -+ gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, zb->ds.u.gfx12.his_base >> 32); -+ gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, zb->ds.u.gfx12.his_size_xy); -+ } - } - } else if (sctx->framebuffer.dirty_zsbuf) { - gfx12_set_context_reg(R_028018_DB_Z_INFO, -diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp -index 81565b7694e..bc4cf308936 100644 ---- a/src/gallium/drivers/radeonsi/si_state_draw.cpp -+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp -@@ -823,6 +823,11 @@ enum si_has_sh_pairs_packed { - HAS_SH_PAIRS_PACKED_ON, - }; - -+enum si_alt_hiz_logic { -+ ALT_HIZ_LOGIC_OFF, -+ ALT_HIZ_LOGIC_ON, -+}; -+ - template ALWAYS_INLINE - static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *indirect, - enum mesa_prim prim, -@@ -1213,7 +1218,8 @@ void si_emit_buffered_compute_sh_regs(struct si_context *sctx) - #endif - - template ALWAYS_INLINE -+ si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, -+ si_alt_hiz_logic ALT_HIZ_LOGIC> ALWAYS_INLINE - static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info, - unsigned drawid_base, - const struct pipe_draw_indirect_info *indirect, -@@ -1405,6 +1411,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); - radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); - radeon_emit(di_src_sel); -+ -+ radeon_emit_alt_hiz_logic(); - } else { - uint64_t count_va = 0; - -@@ -1430,6 +1438,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(count_va >> 32); - radeon_emit(indirect->stride); - radeon_emit(di_src_sel); -+ -+ radeon_emit_alt_hiz_logic(); - } - } else { - if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN || -@@ -1549,6 +1559,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(va >> 32); - radeon_emit(draws[i].count); - radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ -+ -+ radeon_emit_alt_hiz_logic(); - } - if (num_draws > 1) { - BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ -@@ -1568,6 +1580,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(va >> 32); - radeon_emit(draws[i].count); - radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ -+ -+ radeon_emit_alt_hiz_logic(); - } - if (num_draws > 1) { - BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */ -@@ -1588,6 +1602,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(va >> 32); - radeon_emit(draws[i].count); - radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ -+ -+ radeon_emit_alt_hiz_logic(); - } - if (num_draws > 1) { - BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ -@@ -1615,6 +1631,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(V_0287F0_DI_SRC_SEL_DMA | - S_0287F0_NOT_EOP(GFX_VERSION >= GFX10 && GFX_VERSION < GFX12 && - i < num_draws - 1)); -+ -+ radeon_emit_alt_hiz_logic(); - } - } - } -@@ -1628,6 +1646,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(0); - radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); - -+ radeon_emit_alt_hiz_logic(); - for (unsigned i = 0; i < 3; i++) - radeon_event_write(V_028A90_SQ_NON_EVENT); - } else if (increment_draw_id) { -@@ -1643,6 +1662,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); - radeon_emit(draws[i].count); - radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); -+ -+ radeon_emit_alt_hiz_logic(); - } - if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) { - BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ -@@ -1656,6 +1677,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw - radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); - radeon_emit(draws[i].count); - radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); -+ -+ radeon_emit_alt_hiz_logic(); - } - if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) { - BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ -@@ -2012,7 +2035,7 @@ static void si_emit_all_states(struct si_context *sctx, uint64_t skip_atom_mask) - - template ALWAYS_INLINE -+ util_popcnt POPCNT, si_alt_hiz_logic ALT_HIZ_LOGIC> ALWAYS_INLINE - static void si_draw(struct pipe_context *ctx, - const struct pipe_draw_info *info, - unsigned drawid_offset, -@@ -2312,7 +2335,8 @@ static void si_draw(struct pipe_context *ctx, - return; - } - -- si_emit_draw_packets -+ si_emit_draw_packets - (sctx, info, drawid_offset, indirect, draws, num_draws, indexbuf, - index_size, index_offset, instance_count); - /* <-- CUs start to get busy here if we waited. */ -@@ -2362,7 +2386,7 @@ static void si_draw(struct pipe_context *ctx, - } - - template -+ si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, si_alt_hiz_logic ALT_HIZ_LOGIC> - static void si_draw_vbo(struct pipe_context *ctx, - const struct pipe_draw_info *info, - unsigned drawid_offset, -@@ -2370,12 +2394,14 @@ static void si_draw_vbo(struct pipe_context *ctx, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws) - { -- si_draw -+ si_draw - (ctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0); - } - - template -+ si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, util_popcnt POPCNT, -+ si_alt_hiz_logic ALT_HIZ_LOGIC> - static void si_draw_vertex_state(struct pipe_context *ctx, - struct pipe_vertex_state *vstate, - uint32_t partial_velem_mask, -@@ -2391,7 +2417,8 @@ static void si_draw_vertex_state(struct pipe_context *ctx, - dinfo.instance_count = 1; - dinfo.index.resource = state->b.input.indexbuf; - -- si_draw -+ si_draw - (ctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask); - - if (info.take_vertex_state_ownership) -@@ -2453,18 +2480,28 @@ static void si_init_draw_vbo(struct si_context *sctx) - if (!NGG && GFX_VERSION >= GFX11) - return; - -- if (GFX_VERSION >= GFX11 && GFX_VERSION < GFX12 && sctx->screen->info.has_set_sh_pairs_packed) { -+ if (GFX_VERSION == GFX12 && sctx->screen->options.alt_hiz_logic) { -+ sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = -+ si_draw_vbo; -+ -+ sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] = -+ si_draw_vertex_state; -+ } else if (GFX_VERSION >= GFX11 && GFX_VERSION < GFX12 && -+ sctx->screen->info.has_set_sh_pairs_packed) { - sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = -- si_draw_vbo; -+ si_draw_vbo; - - sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] = -- si_draw_vertex_state; -+ si_draw_vertex_state; - } else { - sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = -- si_draw_vbo; -+ si_draw_vbo; - - sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] = -- si_draw_vertex_state; -+ si_draw_vertex_state; - } - } - --- -2.48.1 - - -From a4f5130de6d737aa1264d3a981f95b81a9f13d05 Mon Sep 17 00:00:00 2001 -From: Pierre-Eric Pelloux-Prayer -Date: Thu, 21 Nov 2024 13:32:55 +0100 -Subject: [PATCH 06/27] radeonsi: disable DCC for PIPE_BIND_USE_FRONT_RENDERING -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Front rendering and (display) DCC are causing artifacts on screen. - -si_texture_get_handle deals with this problem by disabling dcc, but -we can make it simpler by not allocating DCC at all when this flag -is set. - -Reviewed-by: Marek Olšák -Part-of: ---- - src/gallium/drivers/radeonsi/si_texture.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c -index ae65b725600..9cc11a81669 100644 ---- a/src/gallium/drivers/radeonsi/si_texture.c -+++ b/src/gallium/drivers/radeonsi/si_texture.c -@@ -240,6 +240,8 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac - - if (modifier == DRM_FORMAT_MOD_INVALID && - (ptex->bind & PIPE_BIND_CONST_BW || -+ ptex->bind & PIPE_BIND_PROTECTED || -+ ptex->bind & PIPE_BIND_USE_FRONT_RENDERING || - sscreen->debug_flags & DBG(NO_DCC) || - (ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC)))) - flags |= RADEON_SURF_DISABLE_DCC; -@@ -289,6 +291,9 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac - if (ptex->bind & PIPE_BIND_CONST_BW) - flags |= RADEON_SURF_DISABLE_DCC; - -+ if (ptex->bind & PIPE_BIND_USE_FRONT_RENDERING) -+ flags |= RADEON_SURF_DISABLE_DCC; -+ - switch (sscreen->info.gfx_level) { - case GFX8: - /* Stoney: 128bpp MSAA textures randomly fail piglit tests with DCC. */ -@@ -1638,6 +1643,20 @@ si_modifier_supports_resource(struct pipe_screen *screen, - struct si_screen *sscreen = (struct si_screen *)screen; - uint32_t max_width, max_height; - -+ if (((templ->bind & PIPE_BIND_LINEAR) || sscreen->debug_flags & DBG(NO_TILING)) && -+ modifier != DRM_FORMAT_MOD_LINEAR) -+ return false; -+ -+ if ((templ->bind & PIPE_BIND_USE_FRONT_RENDERING) && ac_modifier_has_dcc(modifier)) -+ return false; -+ -+ /* Protected content doesn't support DCC on GFX12. */ -+ if (sscreen->info.gfx_level >= GFX12 && templ->bind & PIPE_BIND_PROTECTED && -+ IS_AMD_FMT_MOD(modifier) && -+ AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12 && -+ AMD_FMT_MOD_GET(DCC, modifier)) -+ return false; -+ - ac_modifier_max_extent(&sscreen->info, modifier, &max_width, &max_height); - return templ->width0 <= max_width && templ->height0 <= max_height; - } --- -2.48.1 - - -From 35435069c1063180814aff055365dce939d95e60 Mon Sep 17 00:00:00 2001 -From: Pierre-Eric Pelloux-Prayer -Date: Wed, 29 Jan 2025 18:10:12 +0100 -Subject: [PATCH 07/27] radeonsi: update si_need_gfx_cs_space upper bound -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -radeon_emit_alt_hiz_logic can add 8 extra dw per draw. - -Fixes: cdecbee9225 ("radeonsi/gfx12: adjust HiZ/HiS logic") -Reviewed-by: Marek Olšák -Part-of: ---- - src/gallium/drivers/radeonsi/si_compute.c | 2 +- - src/gallium/drivers/radeonsi/si_cp_dma.c | 2 +- - src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +- - src/gallium/drivers/radeonsi/si_pipe.h | 12 ++++++++++-- - src/gallium/drivers/radeonsi/si_query.c | 6 +++--- - src/gallium/drivers/radeonsi/si_state_draw.cpp | 2 +- - 6 files changed, 17 insertions(+), 9 deletions(-) - -diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c -index 01aa75d222a..d2c2bc16500 100644 ---- a/src/gallium/drivers/radeonsi/si_compute.c -+++ b/src/gallium/drivers/radeonsi/si_compute.c -@@ -1218,7 +1218,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info - } - } - -- si_need_gfx_cs_space(sctx, 0); -+ si_need_gfx_cs_space(sctx, 0, 0); - - /* If we're using a secure context, determine if cs must be secure or not */ - if (unlikely(radeon_uses_secure_bos(sctx->ws))) { -diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c -index 89ee80e14f5..f0ba0796377 100644 ---- a/src/gallium/drivers/radeonsi/si_cp_dma.c -+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c -@@ -146,7 +146,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst - bool *is_first, unsigned *packet_flags) - { - if (!(user_flags & SI_OP_CPDMA_SKIP_CHECK_CS_SPACE)) -- si_need_gfx_cs_space(sctx, 0); -+ si_need_gfx_cs_space(sctx, 0, 0); - - /* This must be done after need_cs_space. */ - if (dst) -diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c -index 785c98a9dc9..806a842a08d 100644 ---- a/src/gallium/drivers/radeonsi/si_perfcounter.c -+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c -@@ -277,7 +277,7 @@ static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery) - - if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size)) - return; -- si_need_gfx_cs_space(sctx, 0); -+ si_need_gfx_cs_space(sctx, 0, 0); - - if (query->shaders) - si_pc_emit_shaders(&sctx->gfx_cs, query->shaders); -diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h -index 0002a3056c9..f132e182b4f 100644 ---- a/src/gallium/drivers/radeonsi/si_pipe.h -+++ b/src/gallium/drivers/radeonsi/si_pipe.h -@@ -2026,11 +2026,19 @@ static inline bool util_rast_prim_is_lines_or_triangles(unsigned prim) - return ((1 << prim) & (UTIL_ALL_PRIM_LINE_MODES | UTIL_ALL_PRIM_TRIANGLE_MODES)) != 0; - } - --static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) -+static inline void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws, -+ unsigned extra_dw_per_draw) - { - struct radeon_cmdbuf *cs = &ctx->gfx_cs; -+ /* Don't count the needed CS space exactly and just use an upper bound. -+ * -+ * Also reserve space for stopping queries at the end of IB, because -+ * the number of active queries is unlimited in theory. -+ */ -+ unsigned reserve_dw = 2048 + ctx->num_cs_dw_queries_suspend + -+ num_draws * (10 + extra_dw_per_draw); - -- if (!ctx->ws->cs_check_space(cs, si_get_minimum_num_gfx_cs_dwords(ctx, num_draws))) -+ if (!ctx->ws->cs_check_space(cs, reserve_dw)) - si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - } - -diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c -index eca647505ae..67dd732f615 100644 ---- a/src/gallium/drivers/radeonsi/si_query.c -+++ b/src/gallium/drivers/radeonsi/si_query.c -@@ -918,7 +918,7 @@ static void si_query_hw_emit_start(struct si_context *sctx, struct si_query_hw * - si_update_prims_generated_query_state(sctx, query->b.type, 1); - si_update_hw_pipeline_stats(sctx, query->b.type, 1); - -- si_need_gfx_cs_space(sctx, 0); -+ si_need_gfx_cs_space(sctx, 0, 0); - - va = query->buffer.buf->gpu_address + query->buffer.results_end; - si_query_hw_do_emit_start(sctx, query, query->buffer.buf, va); -@@ -1014,7 +1014,7 @@ static void si_query_hw_emit_stop(struct si_context *sctx, struct si_query_hw *q - - /* The queries which need begin already called this in begin_query. */ - if (query->flags & SI_QUERY_HW_FLAG_NO_START) { -- si_need_gfx_cs_space(sctx, 0); -+ si_need_gfx_cs_space(sctx, 0, 0); - if (!si_query_buffer_alloc(sctx, &query->buffer, si_query_hw_prepare_buffer, - query->result_size)) - return; -@@ -1726,7 +1726,7 @@ void si_resume_queries(struct si_context *sctx) - struct si_query *query; - - /* Check CS space here. Resuming must not be interrupted by flushes. */ -- si_need_gfx_cs_space(sctx, 0); -+ si_need_gfx_cs_space(sctx, 0, 0); - - LIST_FOR_EACH_ENTRY (query, &sctx->active_queries, active_list) - query->ops->resume(sctx, query); -diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp -index bc4cf308936..8b57ddb3b70 100644 ---- a/src/gallium/drivers/radeonsi/si_state_draw.cpp -+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp -@@ -2060,7 +2060,7 @@ static void si_draw(struct pipe_context *ctx, - else if (GFX_VERSION < GFX12) - gfx11_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)); - -- si_need_gfx_cs_space(sctx, num_draws); -+ si_need_gfx_cs_space(sctx, num_draws, ALT_HIZ_LOGIC ? 8 : 0); - - if (u_trace_perfetto_active(&sctx->ds.trace_context)) - trace_si_begin_draw(&sctx->trace); --- -2.48.1 - - -From 1e95d1b0fec92e3cba014e32ac731705eb6e8425 Mon Sep 17 00:00:00 2001 -From: Samuel Pitoiset -Date: Wed, 15 Jan 2025 12:00:43 +0100 -Subject: [PATCH 08/27] radv: disable GFX12+ support - -It's mostly broken, but 25.0+ should be good enough. - -Signed-off-by: Samuel Pitoiset -Part-of: ---- - src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c -index 6ed5718e695..b6ce7ba6ac5 100644 ---- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c -+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c -@@ -22,12 +22,25 @@ - #include "vk_drm_syncobj.h" - #include "xf86drm.h" - -+static bool -+radv_is_gpu_supported(const struct radeon_info *info) -+{ -+ /* GFX12 isn't supported. */ -+ if (info->gfx_level >= GFX12) -+ return false; -+ -+ return true; -+} -+ - static bool - do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) - { - if (!ac_query_gpu_info(fd, ws->dev, &ws->info, true)) - return false; - -+ if (!radv_is_gpu_supported(&ws->info)) -+ return false; -+ - /* - * Override the max submits on video queues. - * If you submit multiple session contexts in the same IB sequence the --- -2.48.1 - - -From 317d71daef2588ff76c62aa54a5f1920e717a465 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Wed, 18 Dec 2024 19:18:30 -0500 -Subject: [PATCH 09/27] radeonsi: fix a gfx10.3 regression due to a gfx12 - change - -This fixes: - Assertion `!"BITSET_TEST_RANGE: bit range crosses word boundary"' failed. - -Fixes: e3cef02c245 - radeonsi/gfx12: set DB_RENDER_OVERRIDE based on stencil state - -Reviewed-by: Qiang Yu -Part-of: ---- - src/gallium/drivers/radeonsi/si_gfx_cs.c | 7 +++++-- - src/gallium/drivers/radeonsi/si_state.h | 2 +- - 2 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c -index d8dccd9b008..9782e1afaa1 100644 ---- a/src/gallium/drivers/radeonsi/si_gfx_cs.c -+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c -@@ -317,7 +317,6 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) - ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0; - -- ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0; -@@ -368,7 +367,11 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3] = 0; - -- ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0; -+ if (ctx->gfx_level >= GFX12) -+ ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE] = 0; -+ else -+ ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0; -+ - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_PRIMITIVEID_EN] = 0; - ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0; - -diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h -index f964fa21606..75159c475e6 100644 ---- a/src/gallium/drivers/radeonsi/si_state.h -+++ b/src/gallium/drivers/radeonsi/si_state.h -@@ -315,7 +315,6 @@ enum si_tracked_reg - SI_TRACKED_SPI_PS_INPUT_ENA, - SI_TRACKED_SPI_PS_INPUT_ADDR, - -- SI_TRACKED_DB_RENDER_OVERRIDE, - SI_TRACKED_DB_EQAA, - SI_TRACKED_DB_RENDER_OVERRIDE2, - SI_TRACKED_DB_SHADER_CONTROL, -@@ -371,6 +370,7 @@ enum si_tracked_reg - SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3, /* GFX6-10 (GFX11+ can reuse this slot) */ - - SI_TRACKED_SPI_VS_OUT_CONFIG, /* GFX6-11 */ -+ SI_TRACKED_DB_RENDER_OVERRIDE = SI_TRACKED_SPI_VS_OUT_CONFIG, /* GFX12+ (slot reused) */ - SI_TRACKED_VGT_PRIMITIVEID_EN, /* GFX6-11 */ - SI_TRACKED_CB_DCC_CONTROL, /* GFX8-11 */ - SI_TRACKED_DB_STENCIL_READ_MASK, /* GFX12+ */ --- -2.48.1 - - -From 5e4f6e7f2c644219f998672b90c6de9146ca6952 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Wed, 8 Jan 2025 15:00:50 -0500 -Subject: [PATCH 10/27] radeonsi/gfx12: enable alt_hiz_logic - -Reviewed-by: Pierre-Eric Pelloux-Prayer -Part-of: ---- - src/gallium/drivers/radeonsi/si_debug_options.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h -index a5057c4700e..74b9b59a455 100644 ---- a/src/gallium/drivers/radeonsi/si_debug_options.h -+++ b/src/gallium/drivers/radeonsi/si_debug_options.h -@@ -23,7 +23,7 @@ OPT_BOOL(zerovram, false, "Zero all VRAM allocations") - OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.") - OPT_BOOL(cache_rb_gl2, false, "Enable GL2 caching for CB and DB.") - OPT_BOOL(optimize_io, true, "Run nir_opt_varyings in the GLSL linker.") --OPT_BOOL(alt_hiz_logic, false, "Enable alternative HiZ logic") -+OPT_BOOL(alt_hiz_logic, true, "Enable alternative HiZ logic") - - #undef OPT_BOOL - #undef OPT_INT --- -2.48.1 - - -From 074851b27e9b59bf9b450907e3231824495e589a Mon Sep 17 00:00:00 2001 -From: Pierre-Eric Pelloux-Prayer -Date: Mon, 10 Feb 2025 12:27:48 +0100 -Subject: [PATCH 11/27] radeonsi: disable dcc when external shader stores are - used -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -See comment. - -Fixes: 666a6eb871d ("radeonsi/gfx12: disable display dcc for front buffer rendering") -Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12552 -Reviewed-by: Marek Olšák -Part-of: ---- - src/gallium/drivers/radeonsi/si_texture.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c -index 9cc11a81669..54e2bfe7e53 100644 ---- a/src/gallium/drivers/radeonsi/si_texture.c -+++ b/src/gallium/drivers/radeonsi/si_texture.c -@@ -803,10 +803,12 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex - } - - const bool debug_disable_dcc = sscreen->debug_flags & DBG(NO_EXPORTED_DCC); -- /* Since shader image stores don't support DCC on GFX9 and older, -- * disable it for external clients that want write access. -+ /* Disable DCC for external clients that might use shader image stores. -+ * They don't support DCC on GFX9 and older. GFX10/10.3 is also problematic -+ * if the view formats between clients are incompatible or if DCC clear is -+ * used. - */ -- const bool shader_write = sscreen->info.gfx_level <= GFX9 && -+ const bool shader_write = sscreen->info.gfx_level < GFX11 && - usage & PIPE_HANDLE_USAGE_SHADER_WRITE && - !tex->is_depth && - tex->surface.meta_offset; --- -2.48.1 - - -From ec81fa2026aa8760158dcf4520bc307f627db40f Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Thu, 25 Jul 2024 21:39:25 +0100 -Subject: [PATCH 12/27] aco: split CounterMap off from VGPRCounterMap - -Signed-off-by: Rhys Perry -Reviewed-by: Georg Lehmann -Part-of: ---- - src/amd/compiler/aco_insert_NOPs.cpp | 38 ++++++++++++++++------------ - 1 file changed, 22 insertions(+), 16 deletions(-) - -diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp -index a6290a7f457..3f4b5d6c85f 100644 ---- a/src/amd/compiler/aco_insert_NOPs.cpp -+++ b/src/amd/compiler/aco_insert_NOPs.cpp -@@ -164,14 +164,14 @@ struct NOP_ctx_gfx10 { - } - }; - --template struct VGPRCounterMap { -+template struct CounterMap { - public: - int base = 0; -- BITSET_DECLARE(resident, 256); -- int val[256]; -+ BITSET_DECLARE(resident, Size); -+ int val[Size]; - - /* Initializes all counters to Max. */ -- VGPRCounterMap() { BITSET_ZERO(resident); } -+ CounterMap() { BITSET_ZERO(resident); } - - /* Increase all counters, clamping at Max. */ - void inc() { base++; } -@@ -185,11 +185,12 @@ public: - - void set(PhysReg reg, unsigned bytes) - { -- if (reg.reg() < 256) -+ if (reg.reg() < Start) - return; - -- for (unsigned i = 0; i < DIV_ROUND_UP(bytes, 4); i++) -- set(reg.reg() - 256 + i); -+ unsigned size = MIN2(DIV_ROUND_UP(bytes, 4), Start + Size - reg.reg()); -+ for (unsigned i = 0; i < size; i++) -+ set(reg.reg() - Start + i); - } - - /* Reset all counters to Max. */ -@@ -201,11 +202,12 @@ public: - - void reset(PhysReg reg, unsigned bytes) - { -- if (reg.reg() < 256) -+ if (reg.reg() < Start) - return; - -- for (unsigned i = 0; i < DIV_ROUND_UP(bytes, 4); i++) -- BITSET_CLEAR(resident, reg.reg() - 256 + i); -+ unsigned size = MIN2(DIV_ROUND_UP(bytes, 4), Start + Size - reg.reg()); -+ for (unsigned i = 0; i < size; i++) -+ BITSET_CLEAR(resident, reg.reg() - Start + i); - } - - uint8_t get(unsigned idx) -@@ -215,14 +217,14 @@ public: - - uint8_t get(PhysReg reg, unsigned offset = 0) - { -- assert(reg.reg() >= 256); -- return get(reg.reg() - 256 + offset); -+ assert(reg.reg() >= Start); -+ return get(reg.reg() - Start + offset); - } - -- void join_min(const VGPRCounterMap& other) -+ void join_min(const CounterMap& other) - { - unsigned i; -- BITSET_FOREACH_SET (i, other.resident, 256) { -+ BITSET_FOREACH_SET (i, other.resident, Size) { - if (BITSET_TEST(resident, i)) - val[i] = MIN2(val[i] + base, other.val[i] + other.base) - base; - else -@@ -231,13 +233,13 @@ public: - BITSET_OR(resident, resident, other.resident); - } - -- bool operator==(const VGPRCounterMap& other) const -+ bool operator==(const CounterMap& other) const - { - if (!BITSET_EQUAL(resident, other.resident)) - return false; - - unsigned i; -- BITSET_FOREACH_SET (i, other.resident, 256) { -+ BITSET_FOREACH_SET (i, other.resident, Size) { - if (!BITSET_TEST(resident, i)) - return false; - if (val[i] + base != other.val[i] + other.base) -@@ -245,8 +247,12 @@ public: - } - return true; - } -+ -+ unsigned size() const { return Size; } - }; - -+template using VGPRCounterMap = CounterMap<256, 256, Max>; -+ - struct NOP_ctx_gfx11 { - /* VcmpxPermlaneHazard */ - bool has_Vcmpx = false; --- -2.48.1 - - -From 8cfd8daf2e0ed1783f3aeac6a5274bc70e1d103a Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Thu, 25 Jul 2024 21:36:29 +0100 -Subject: [PATCH 13/27] aco: minor CounterMap::operator== fix - -I don't think this matters for how we use CounterMap::operator==. - -The BITSET_TEST() was unnecessary because of the BITSET_EQUAL above. - -Signed-off-by: Rhys Perry -Reviewed-by: Georg Lehmann -Part-of: ---- - src/amd/compiler/aco_insert_NOPs.cpp | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp -index 3f4b5d6c85f..d6df9190864 100644 ---- a/src/amd/compiler/aco_insert_NOPs.cpp -+++ b/src/amd/compiler/aco_insert_NOPs.cpp -@@ -240,9 +240,7 @@ public: - - unsigned i; - BITSET_FOREACH_SET (i, other.resident, Size) { -- if (!BITSET_TEST(resident, i)) -- return false; -- if (val[i] + base != other.val[i] + other.base) -+ if (MIN2(val[i] + base, Max) != MIN2(other.val[i] + other.base, Max)) - return false; - } - return true; --- -2.48.1 - - -From 62f14df1ca6435f30e13f44e7246fa763706cd9e Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Thu, 25 Jul 2024 21:40:02 +0100 -Subject: [PATCH 14/27] aco: workaround VALUReadSGPRHazard - -fossil-db (gfx1200): -Totals from 65112 (82.01% of 79395) affected shaders: -Instrs: 41732906 -> 42987198 (+3.01%); split: -0.00%, +3.01% -CodeSize: 222451964 -> 226942644 (+2.02%); split: -0.01%, +2.03% -Latency: 290411063 -> 290944688 (+0.18%); split: -0.00%, +0.18% -InvThroughput: 45854913 -> 45910275 (+0.12%); split: -0.00%, +0.12% - -Signed-off-by: Rhys Perry -Reviewed-by: Georg Lehmann -Part-of: ---- - src/amd/compiler/aco_insert_NOPs.cpp | 91 +++++++++++++++++++++++++--- - 1 file changed, 84 insertions(+), 7 deletions(-) - -diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp -index d6df9190864..1bf53050e78 100644 ---- a/src/amd/compiler/aco_insert_NOPs.cpp -+++ b/src/amd/compiler/aco_insert_NOPs.cpp -@@ -273,6 +273,10 @@ struct NOP_ctx_gfx11 { - /* WMMAHazards */ - std::bitset<256> vgpr_written_by_wmma; - -+ /* VALUReadSGPRHazard */ -+ std::bitset sgpr_read_by_valu; /* SGPR pairs, excluding null, exec, m0 and scc */ -+ CounterMap<0, m0.reg(), 11> sgpr_read_by_valu_then_wr_by_salu; -+ - void join(const NOP_ctx_gfx11& other) - { - has_Vcmpx |= other.has_Vcmpx; -@@ -287,6 +291,8 @@ struct NOP_ctx_gfx11 { - sgpr_read_by_valu_as_lanemask_then_wr_by_salu |= - other.sgpr_read_by_valu_as_lanemask_then_wr_by_salu; - vgpr_written_by_wmma |= other.vgpr_written_by_wmma; -+ sgpr_read_by_valu |= other.sgpr_read_by_valu; -+ sgpr_read_by_valu_then_wr_by_salu.join_min(other.sgpr_read_by_valu_then_wr_by_salu); - } - - bool operator==(const NOP_ctx_gfx11& other) -@@ -302,7 +308,9 @@ struct NOP_ctx_gfx11 { - sgpr_read_by_valu_as_lanemask == other.sgpr_read_by_valu_as_lanemask && - sgpr_read_by_valu_as_lanemask_then_wr_by_salu == - other.sgpr_read_by_valu_as_lanemask_then_wr_by_salu && -- vgpr_written_by_wmma == other.vgpr_written_by_wmma; -+ vgpr_written_by_wmma == other.vgpr_written_by_wmma && -+ sgpr_read_by_valu == other.sgpr_read_by_valu && -+ sgpr_read_by_valu_then_wr_by_salu == other.sgpr_read_by_valu_then_wr_by_salu; - } - }; - -@@ -1527,6 +1535,48 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& - } - } - } -+ } else { -+ /* VALUReadSGPRHazard -+ * VALU reads SGPR and later written by SALU cannot safely be read by VALU/SALU. -+ */ -+ if (instr->isVALU() || instr->isSALU()) { -+ unsigned expiry_count = instr->isSALU() ? 10 : 11; -+ for (Operand& op : instr->operands) { -+ if (sa_sdst == 0) -+ break; -+ -+ for (unsigned i = 0; i < op.size(); i++) { -+ unsigned reg = op.physReg() + i; -+ if (reg < ctx.sgpr_read_by_valu_then_wr_by_salu.size() && -+ ctx.sgpr_read_by_valu_then_wr_by_salu.get(reg) < expiry_count) { -+ bld.sopp(aco_opcode::s_waitcnt_depctr, 0xfffe); -+ sa_sdst = 0; -+ break; -+ } -+ } -+ } -+ } -+ -+ if (sa_sdst == 0) -+ ctx.sgpr_read_by_valu_then_wr_by_salu.reset(); -+ else if (instr->isSALU() && !instr->isSOPP()) -+ ctx.sgpr_read_by_valu_then_wr_by_salu.inc(); -+ -+ if (instr->isVALU()) { -+ for (const Operand& op : instr->operands) { -+ for (unsigned i = 0; i < DIV_ROUND_UP(op.size(), 2); i++) { -+ unsigned reg = (op.physReg() / 2) + i; -+ if (reg < ctx.sgpr_read_by_valu.size()) -+ ctx.sgpr_read_by_valu.set(reg); -+ } -+ } -+ } else if (instr->isSALU() && !instr->definitions.empty()) { -+ for (unsigned i = 0; i < instr->definitions[0].size(); i++) { -+ unsigned def_reg = instr->definitions[0].physReg() + i; -+ if ((def_reg / 2) < ctx.sgpr_read_by_valu.size() && ctx.sgpr_read_by_valu[def_reg / 2]) -+ ctx.sgpr_read_by_valu_then_wr_by_salu.set(def_reg); -+ } -+ } - } - - /* LdsDirectVMEMHazard -@@ -1683,6 +1733,15 @@ resolve_all_gfx11(State& state, NOP_ctx_gfx11& ctx, - } - } - -+ /* VALUReadSGPRHazard */ -+ if (state.program->gfx_level >= GFX12) { -+ for (unsigned i = 0; i < ctx.sgpr_read_by_valu_then_wr_by_salu.size(); i++) { -+ if (ctx.sgpr_read_by_valu_then_wr_by_salu.get(i) < 11) -+ waitcnt_depctr &= 0xfffe; -+ } -+ ctx.sgpr_read_by_valu_then_wr_by_salu.reset(); -+ } -+ - /* LdsDirectVMEMHazard */ - if (ctx.vgpr_used_by_vmem_load.any() || ctx.vgpr_used_by_vmem_store.any() || - ctx.vgpr_used_by_ds.any() || ctx.vgpr_used_by_vmem_sample.any() || -@@ -1758,7 +1817,7 @@ handle_block(Program* program, Ctx& ctx, Block& block) - - template Handle, ResolveAll Resolve> - void --mitigate_hazards(Program* program) -+mitigate_hazards(Program* program, Ctx initial_ctx = Ctx()) - { - std::vector all_ctx(program->blocks.size()); - std::stack> loop_header_indices; -@@ -1767,6 +1826,9 @@ mitigate_hazards(Program* program) - Block& block = program->blocks[i]; - Ctx& ctx = all_ctx[i]; - -+ if (i == 0 || (block.kind & block_kind_resume)) -+ ctx = initial_ctx; -+ - if (block.kind & block_kind_loop_header) { - loop_header_indices.push(i); - } else if (block.kind & block_kind_loop_exit) { -@@ -1864,14 +1926,29 @@ required_export_priority(Program* program) - void - insert_NOPs(Program* program) - { -- if (program->gfx_level >= GFX11) -- mitigate_hazards(program); -- else if (program->gfx_level >= GFX10_3) -+ if (program->gfx_level >= GFX11) { -+ NOP_ctx_gfx11 initial_ctx; -+ -+ bool has_previous_part = -+ program->is_epilog || program->info.vs.has_prolog || program->info.ps.has_prolog || -+ (program->info.merged_shader_compiled_separately && program->stage.sw != SWStage::VS && -+ program->stage.sw != SWStage::TES) || program->stage == raytracing_cs; -+ if (program->gfx_level >= GFX12 && has_previous_part) { -+ /* resolve_all_gfx11 can't resolve VALUReadSGPRHazard entirely. We have to assume that any -+ * SGPR might have been read by VALU if there was a previous shader part. -+ */ -+ initial_ctx.sgpr_read_by_valu.flip(); -+ } -+ -+ mitigate_hazards(program, -+ initial_ctx); -+ } else if (program->gfx_level >= GFX10_3) { - ; /* no hazards/bugs to mitigate */ -- else if (program->gfx_level >= GFX10) -+ } else if (program->gfx_level >= GFX10) { - mitigate_hazards(program); -- else -+ } else { - mitigate_hazards(program); -+ } - - if (program->gfx_level == GFX11_5 && (program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER || - program->stage.hw == AC_HW_PIXEL_SHADER)) --- -2.48.1 - - -From d6d58210c9658d759622022c7d196849c8f4e28d Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Tue, 26 Nov 2024 12:00:35 +0000 -Subject: [PATCH 15/27] aco/gfx12: insert wait between VMEM WaW - -https://github.com/llvm/llvm-project/pull/105549 - -fossil-db (gfx1200): -Totals from 1783 (2.25% of 79395) affected shaders: -Instrs: 7398391 -> 7404566 (+0.08%); split: -0.00%, +0.08% -CodeSize: 38862456 -> 38886364 (+0.06%); split: -0.00%, +0.06% -Latency: 83191513 -> 84211504 (+1.23%); split: -0.00%, +1.23% -InvThroughput: 15185936 -> 15345744 (+1.05%); split: -0.01%, +1.06% - -Signed-off-by: Rhys Perry -Part-of: ---- - src/amd/compiler/aco_insert_waitcnt.cpp | 8 ++++++-- - src/amd/compiler/tests/test_insert_waitcnt.cpp | 5 +++++ - 2 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp -index ae079dcd755..b19f49e8571 100644 ---- a/src/amd/compiler/aco_insert_waitcnt.cpp -+++ b/src/amd/compiler/aco_insert_waitcnt.cpp -@@ -385,9 +385,13 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i - - wait_imm reg_imm = it->second.imm; - -- /* Vector Memory reads and writes return in the order they were issued */ -+ /* Vector Memory reads and writes decrease the counter in the order they were issued. -+ * Before GFX12, they also write VGPRs in order if they're of the same type. -+ * TODO: We can do this for GFX12 and different types for GFX11 if we know that the two -+ * VMEM loads do not write the same lanes. Since GFX11, we track VMEM operations on the -+ * linear CFG, so this is difficult */ - uint8_t vmem_type = get_vmem_type(ctx.gfx_level, instr); -- if (vmem_type) { -+ if (vmem_type && ctx.gfx_level < GFX12) { - wait_event event = get_vmem_event(ctx, instr, vmem_type); - wait_type type = (wait_type)(ffs(ctx.info->get_counters_for_event(event)) - 1); - if ((it->second.events & ctx.info->events[type]) == event && -diff --git a/src/amd/compiler/tests/test_insert_waitcnt.cpp b/src/amd/compiler/tests/test_insert_waitcnt.cpp -index edc34bfc7f8..c69772c5d05 100644 ---- a/src/amd/compiler/tests/test_insert_waitcnt.cpp -+++ b/src/amd/compiler/tests/test_insert_waitcnt.cpp -@@ -192,6 +192,7 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types) - - //>> p_unit_test 0 - //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 -+ //~gfx12! s_wait_loadcnt imm:0 - //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); - bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); -@@ -220,6 +221,7 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types) - - //>> p_unit_test 3 - //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d -+ //~gfx12! s_wait_samplecnt imm:0 - //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d - bld.reset(program->create_and_insert_block()); - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); -@@ -249,6 +251,7 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types) - - //>> p_unit_test 6 - //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d -+ //~gfx12! s_wait_bvhcnt imm:0 - //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d - bld.reset(program->create_and_insert_block()); - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); -@@ -300,6 +303,7 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types) - //>> BB11 - //! /* logical preds: BB9, BB10, / linear preds: BB9, BB10, / kind: uniform, */ - //! p_unit_test 9 -+ //~gfx12! s_wait_loadcnt imm:0 - //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9)); - bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); -@@ -326,6 +330,7 @@ BEGIN_TEST(insert_waitcnt.waw.vmem_types) - //! /* logical preds: BB12, BB13, / linear preds: BB12, BB13, / kind: uniform, */ - //! p_unit_test 10 - //~gfx11! s_waitcnt vmcnt(0) -+ //~gfx12! s_wait_loadcnt imm:0 - //~gfx12! s_wait_samplecnt imm:0 - //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10)); --- -2.48.1 - - -From 41edfd3cd9470ab4e54efb0b3af9eab7242f5e5f Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Wed, 27 Nov 2024 14:51:32 +0000 -Subject: [PATCH 16/27] aco: force linear for event_vmem_sample and - event_vmem_bvh - -I don't know if this issue affects GFX12, but workaround it anyway to be -safe. - -fossil-db (gfx1200): -Totals from 3463 (4.36% of 79395) affected shaders: -Instrs: 9794280 -> 9833253 (+0.40%); split: -0.00%, +0.40% -CodeSize: 52306040 -> 52457988 (+0.29%); split: -0.01%, +0.30% -Latency: 90549385 -> 93617517 (+3.39%); split: -0.00%, +3.39% -InvThroughput: 13189030 -> 13602942 (+3.14%); split: -0.00%, +3.14% - -Signed-off-by: Rhys Perry -Reviewed-by: Georg Lehmann -Part-of: ---- - src/amd/compiler/aco_insert_waitcnt.cpp | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp -index b19f49e8571..66ce8603ea4 100644 ---- a/src/amd/compiler/aco_insert_waitcnt.cpp -+++ b/src/amd/compiler/aco_insert_waitcnt.cpp -@@ -735,7 +735,8 @@ insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_ - * Also, follow linear control flow for ALU because it's unlikely that the hardware does per-lane - * dependency checks. - */ -- uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat; -+ uint32_t ds_vmem_events = -+ event_lds | event_gds | event_vmem | event_vmem_sample | event_vmem_bvh | event_flat; - uint32_t alu_events = event_trans | event_valu | event_salu; - bool force_linear = ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events)); - --- -2.48.1 - - -From c56875aa56504eab06cc07245b78f9acfb3a1bf6 Mon Sep 17 00:00:00 2001 -From: Georg Lehmann -Date: Thu, 5 Dec 2024 11:33:08 +0100 -Subject: [PATCH 17/27] aco/gfx12+: do not use v_pack_b32_f16 to pack untyped - data -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -GFX12 removed IEEE_MODE, and made its signalling NaN quieting the default. - -Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12251 -Reviewed-by: Daniel Schürmann -Part-of: ---- - src/amd/compiler/aco_lower_to_hw_instr.cpp | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp -index ed955a32a1e..bceaa0d25e9 100644 ---- a/src/amd/compiler/aco_lower_to_hw_instr.cpp -+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp -@@ -1562,9 +1562,15 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera - return; - } - -+ /* v_pack_b32_f16 can be used for bit exact copies if: -+ * - fp16 input denorms are enabled, otherwise they get flushed to zero -+ * - signalling input NaNs are kept, which is the case with IEEE_MODE=0 -+ * GFX12+ always quiets signalling NaNs, IEEE_MODE was removed -+ */ - bool can_use_pack = (ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in) && - (ctx->program->gfx_level >= GFX10 || -- (ctx->program->gfx_level >= GFX9 && !lo.isLiteral() && !hi.isLiteral())); -+ (ctx->program->gfx_level >= GFX9 && !lo.isLiteral() && !hi.isLiteral())) && -+ ctx->program->gfx_level < GFX12; - - if (can_use_pack) { - Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi); --- -2.48.1 - - -From 04d7f9ed9e5b0cd98c32a9d8b35c92698e427dfa Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Wed, 4 Dec 2024 13:36:04 +0000 -Subject: [PATCH 18/27] aco: don't CSE p_shader_cycles_hi_lo_hi - -Signed-off-by: Rhys Perry -Reviewed-by: Georg Lehmann -Fixes: fae2a85d57a4 ("aco/gfx12: implement subgroup shader clock") -Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12243 -Part-of: ---- - src/amd/compiler/aco_opt_value_numbering.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp -index e040221be61..e0c8eeffbbb 100644 ---- a/src/amd/compiler/aco_opt_value_numbering.cpp -+++ b/src/amd/compiler/aco_opt_value_numbering.cpp -@@ -306,6 +306,7 @@ can_eliminate(aco_ptr& instr) - if (instr->definitions.empty() || instr->opcode == aco_opcode::p_phi || - instr->opcode == aco_opcode::p_linear_phi || - instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id || -+ instr->opcode == aco_opcode::p_shader_cycles_hi_lo_hi || - instr->definitions[0].isNoCSE()) - return false; - --- -2.48.1 - - -From 8de2e82ca04d973c70eec66c650809a89000e90d Mon Sep 17 00:00:00 2001 -From: Georg Lehmann -Date: Mon, 9 Dec 2024 13:40:49 +0100 -Subject: [PATCH 19/27] aco/gfx12: don't assume memory operations complete in - order - -Reviewed-by: Rhys Perry -Part-of: ---- - src/amd/compiler/aco_insert_waitcnt.cpp | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp -index 66ce8603ea4..22822d92f24 100644 ---- a/src/amd/compiler/aco_insert_waitcnt.cpp -+++ b/src/amd/compiler/aco_insert_waitcnt.cpp -@@ -449,9 +449,9 @@ perform_barrier(wait_ctx& ctx, wait_imm& imm, memory_sync_info sync, unsigned se - if (bar_scope_lds <= subgroup_scope) - events &= ~event_lds; - -- /* in non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations -+ /* Until GFX12, in non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations - * in-order for the same workgroup */ -- if (!ctx.program->wgp_mode && sync.scope <= scope_workgroup) -+ if (ctx.gfx_level < GFX12 && !ctx.program->wgp_mode && sync.scope <= scope_workgroup) - events &= ~(event_vmem | event_vmem_store | event_smem); - - if (events) --- -2.48.1 - - -From 054163a6be4c130b5a95fb5fa2d39d64ea8ee259 Mon Sep 17 00:00:00 2001 -From: Qiang Yu -Date: Mon, 9 Dec 2024 10:01:21 +0800 -Subject: [PATCH 20/27] aco: enable gfx12 support for radeonsi - -Part-of: ---- - src/amd/compiler/aco_interface.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp -index ab518aa14ef..8508cc0445e 100644 ---- a/src/amd/compiler/aco_interface.cpp -+++ b/src/amd/compiler/aco_interface.cpp -@@ -422,6 +422,7 @@ aco_is_gpu_supported(const struct radeon_info* info) - case GFX10_3: - case GFX11: - case GFX11_5: -+ case GFX12: - return true; - default: - return false; --- -2.48.1 - - -From a3006214823ffacb34383fad792b3184a171c135 Mon Sep 17 00:00:00 2001 -From: Samuel Pitoiset -Date: Fri, 3 Jan 2025 08:29:34 -0800 -Subject: [PATCH 21/27] aco: fix VS prologs on GFX12 - -MTBUF/MUBUF instructions must use zero for SOFFSET, use const_offset -instead. - -Signed-off-by: Samuel Pitoiset -Part-of: ---- - .../compiler/aco_instruction_selection.cpp | 30 +++++++++++++++---- - 1 file changed, 25 insertions(+), 5 deletions(-) - -diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp -index 93a88c74d30..5e4806f608f 100644 ---- a/src/amd/compiler/aco_instruction_selection.cpp -+++ b/src/amd/compiler/aco_instruction_selection.cpp -@@ -12903,8 +12903,16 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index, - } else { - for (unsigned i = 0; i < size; i++) { - Definition def(i ? scratch.advance(i * 4 - 4) : dst, v1); -- bld.mubuf(aco_opcode::buffer_load_ubyte, def, desc, index, Operand::c32(offset + i), 0, -- false, true); -+ unsigned soffset = 0, const_offset = 0; -+ -+ if (bld.program->gfx_level >= GFX12) { -+ const_offset = offset + i; -+ } else { -+ soffset = offset + i; -+ } -+ -+ bld.mubuf(aco_opcode::buffer_load_ubyte, def, desc, index, Operand::c32(soffset), -+ const_offset, false, true); - } - } - -@@ -13082,6 +13090,17 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh - for (unsigned j = 0; j < (vtx_info->chan_byte_size ? vtx_info->num_channels : 1); j++) { - bool post_shuffle = pinfo->post_shuffle & (1u << loc); - unsigned offset = vtx_info->chan_byte_size * (post_shuffle && j < 3 ? 2 - j : j); -+ unsigned soffset = 0, const_offset = 0; -+ -+ /* We need to use soffset on GFX6-7 to avoid being considered -+ * out-of-bounds when offset>=stride. GFX12 doesn't support a -+ * non-zero constant soffset. -+ */ -+ if (program->gfx_level >= GFX12) { -+ const_offset = offset; -+ } else { -+ soffset = offset; -+ } - - if ((pinfo->unaligned_mask & (1u << loc)) && vtx_info->chan_byte_size <= 4) - load_unaligned_vs_attrib(bld, dest.advance(j * 4u), Operand(cur_desc, s4), -@@ -13089,11 +13108,12 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh - else if (vtx_info->chan_byte_size == 8) - bld.mtbuf(aco_opcode::tbuffer_load_format_xy, - Definition(dest.advance(j * 8u), v2), Operand(cur_desc, s4), -- fetch_index, Operand::c32(offset), dfmt, nfmt, 0, false, true); -+ fetch_index, Operand::c32(soffset), dfmt, nfmt, const_offset, false, -+ true); - else - bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(dest.advance(j * 4u), v1), -- Operand(cur_desc, s4), fetch_index, Operand::c32(offset), dfmt, nfmt, -- 0, false, true); -+ Operand(cur_desc, s4), fetch_index, Operand::c32(soffset), dfmt, nfmt, -+ const_offset, false, true); - } - - unsigned slots = vtx_info->chan_byte_size == 8 && vtx_info->num_channels > 2 ? 2 : 1; --- -2.48.1 - - -From 9aeed54b7f1050648612f59844c4cfbedfe1b182 Mon Sep 17 00:00:00 2001 -From: Samuel Pitoiset -Date: Wed, 8 Jan 2025 02:03:09 -0800 -Subject: [PATCH 22/27] aco: always use ds_bpermute for shuffle/rotate on GFX12 - -ds_bpermute supports both 32 and 64 lanes now. - -Signed-off-by: Samuel Pitoiset -Part-of: ---- - src/amd/compiler/aco_instruction_selection.cpp | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - -diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp -index 5e4806f608f..8ab9c18ee2a 100644 ---- a/src/amd/compiler/aco_instruction_selection.cpp -+++ b/src/amd/compiler/aco_instruction_selection.cpp -@@ -189,9 +189,9 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) - - return bld.pseudo(aco_opcode::p_bpermute_readlane, bld.def(v1), bld.def(bld.lm), - bld.def(bld.lm, vcc), index_op, input_data); -- } else if (ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64) { -- -- /* GFX10 wave64 mode: emulate full-wave bpermute */ -+ } else if (ctx->options->gfx_level >= GFX10 && ctx->options->gfx_level <= GFX11_5 && -+ ctx->program->wave_size == 64) { -+ /* GFX10-11.5 wave64 mode: emulate full-wave bpermute */ - Temp index_is_lo = - bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand::c32(31u), index); - Builder::Result index_is_lo_split = -@@ -221,7 +221,7 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) - same_half); - } - } else { -- /* GFX8-9 or GFX10 wave32: bpermute works normally */ -+ /* wave32 or GFX8-9, GFX12+: bpermute works normally */ - Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index); - return bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, data); - } -@@ -8747,8 +8747,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) - Temp tid = emit_mbcnt(ctx, bld.tmp(v1)); - Temp src_lane = bld.vadd32(bld.def(v1), tid, delta); - -- if (ctx->program->gfx_level >= GFX10 && cluster_size == 32) { -- /* ds_bpermute is restricted to 32 lanes on GFX10+. */ -+ if (ctx->program->gfx_level >= GFX10 && ctx->program->gfx_level <= GFX11_5 && -+ cluster_size == 32) { -+ /* ds_bpermute is restricted to 32 lanes on GFX10-GFX11.5. */ - Temp index_x4 = - bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), src_lane); - tmp = bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, src); --- -2.48.1 - - -From 4929c358e0d44cb25cb486cc634ee96536a674db Mon Sep 17 00:00:00 2001 -From: Georg Lehmann -Date: Sun, 19 Jan 2025 10:17:12 +0100 -Subject: [PATCH 23/27] aco: update is_dual_issue_capable for gfx11.5+ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Reviewed-by: Daniel Schürmann -Reviewed-by: Daniel Schürmann -Part-of: ---- - src/amd/compiler/aco_statistics.cpp | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp -index 552bfd2cee4..00934b89ceb 100644 ---- a/src/amd/compiler/aco_statistics.cpp -+++ b/src/amd/compiler/aco_statistics.cpp -@@ -142,7 +142,12 @@ is_dual_issue_capable(const Program& program, const Instruction& instr) - } - return false; - } -- default: return false; -+ default: -+ if (instr.isVINTERP_INREG()) -+ return program.gfx_level >= GFX11_5; -+ if (instr.isVOPC() && instr_info.classes[(int)instr.opcode] == instr_class::valu32) -+ return program.gfx_level == GFX11_5; -+ return false; - } - } - --- -2.48.1 - - -From 9f0671af164968432750d7b1be5da6d838765179 Mon Sep 17 00:00:00 2001 -From: Samuel Pitoiset -Date: Thu, 12 Sep 2024 15:46:29 +0200 -Subject: [PATCH 24/27] aco,radv,radeonsi: move has_epilog to the fragment - shader info - -Signed-off-by: Samuel Pitoiset -Part-of: ---- - src/amd/compiler/aco_assembler.cpp | 3 ++- - src/amd/compiler/aco_instruction_selection.cpp | 8 ++++---- - src/amd/compiler/aco_interface.cpp | 2 +- - src/amd/compiler/aco_shader_info.h | 2 +- - src/amd/vulkan/radv_aco_shader_info.h | 2 +- - src/amd/vulkan/radv_cmd_buffer.c | 6 +++--- - src/amd/vulkan/radv_pipeline.c | 2 +- - src/amd/vulkan/radv_pipeline_graphics.c | 2 +- - src/amd/vulkan/radv_shader_args.c | 2 +- - src/amd/vulkan/radv_shader_info.c | 6 +++--- - src/amd/vulkan/radv_shader_info.h | 3 ++- - src/gallium/drivers/radeonsi/si_shader_aco.c | 2 +- - 12 files changed, 21 insertions(+), 19 deletions(-) - -diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp -index 714648b252c..cf57a7e95c6 100644 ---- a/src/amd/compiler/aco_assembler.cpp -+++ b/src/amd/compiler/aco_assembler.cpp -@@ -1727,7 +1727,8 @@ emit_program(Program* program, std::vector& code, std::vectorinfo.merged_shader_compiled_separately; - - /* Prolog has no exports. */ -- if (!program->is_prolog && !program->info.has_epilog && !is_separately_compiled_ngg_vs_or_es && -+ if (!program->is_prolog && !program->info.ps.has_epilog && -+ !is_separately_compiled_ngg_vs_or_es && - (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER || - program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)) - fix_exports(ctx, code, program); -diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp -index 8ab9c18ee2a..987ba7d22ae 100644 ---- a/src/amd/compiler/aco_instruction_selection.cpp -+++ b/src/amd/compiler/aco_instruction_selection.cpp -@@ -177,7 +177,7 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) - const bool avoid_shared_vgprs = - ctx->options->gfx_level >= GFX10 && ctx->options->gfx_level < GFX11 && - ctx->program->wave_size == 64 && -- (ctx->program->info.has_epilog || ctx->program->info.merged_shader_compiled_separately || -+ (ctx->program->info.ps.has_epilog || ctx->program->info.merged_shader_compiled_separately || - ctx->program->info.vs.has_prolog || ctx->stage == raytracing_cs); - - if (ctx->options->gfx_level <= GFX7 || avoid_shared_vgprs) { -@@ -5611,7 +5611,7 @@ store_output_to_temps(isel_context* ctx, nir_intrinsic_instr* instr) - idx++; - } - -- if (ctx->stage == fragment_fs && ctx->program->info.has_epilog && base >= FRAG_RESULT_DATA0) { -+ if (ctx->stage == fragment_fs && ctx->program->info.ps.has_epilog && base >= FRAG_RESULT_DATA0) { - unsigned index = base - FRAG_RESULT_DATA0; - - if (nir_intrinsic_src_type(instr) == nir_type_float16) { -@@ -11927,7 +11927,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons - nir_function_impl* func = nir_shader_get_entrypoint(nir); - visit_cf_list(&ctx, &func->body); - -- if (ctx.program->info.has_epilog) { -+ if (ctx.program->info.ps.has_epilog) { - if (ctx.stage == fragment_fs) { - if (ctx.options->is_opengl) - create_fs_end_for_epilog(&ctx); -@@ -11965,7 +11965,7 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons - append_logical_end(ctx.block); - ctx.block->kind |= block_kind_uniform; - -- if ((!program->info.has_epilog && !is_first_stage_of_merged_shader) || -+ if ((!program->info.ps.has_epilog && !is_first_stage_of_merged_shader) || - (nir->info.stage == MESA_SHADER_TESS_CTRL && program->gfx_level >= GFX9)) { - Builder(program, ctx.block).sopp(aco_opcode::s_endpgm); - } -diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp -index 8508cc0445e..64396cfbc75 100644 ---- a/src/amd/compiler/aco_interface.cpp -+++ b/src/amd/compiler/aco_interface.cpp -@@ -277,7 +277,7 @@ aco_compile_shader(const struct aco_compiler_options* options, const struct aco_ - /* OpenGL combine multi shader parts into one continous code block, - * so only last part need the s_endpgm instruction. - */ -- bool append_endpgm = !(options->is_opengl && info->has_epilog); -+ bool append_endpgm = !(options->is_opengl && info->ps.has_epilog); - unsigned exec_size = emit_program(program.get(), code, &symbols, append_endpgm); - - if (program->collect_statistics) -diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h -index 1cc2bc11db4..7f6fe7a5e9e 100644 ---- a/src/amd/compiler/aco_shader_info.h -+++ b/src/amd/compiler/aco_shader_info.h -@@ -102,7 +102,6 @@ struct aco_shader_info { - bool has_ngg_early_prim_export; - bool image_2d_view_of_3d; - unsigned workgroup_size; -- bool has_epilog; /* Only for TCS or PS. */ - bool merged_shader_compiled_separately; /* GFX9+ */ - struct ac_arg next_stage_pc; - struct ac_arg epilog_pc; /* Vulkan only */ -@@ -127,6 +126,7 @@ struct aco_shader_info { - uint32_t num_interp; - unsigned spi_ps_input_ena; - unsigned spi_ps_input_addr; -+ bool has_epilog; - - /* OpenGL only */ - struct ac_arg alpha_reference; -diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h -index b9665b2b20f..912d6b6ca4e 100644 ---- a/src/amd/vulkan/radv_aco_shader_info.h -+++ b/src/amd/vulkan/radv_aco_shader_info.h -@@ -31,7 +31,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv - ASSIGN_FIELD(has_ngg_culling); - ASSIGN_FIELD(has_ngg_early_prim_export); - ASSIGN_FIELD(workgroup_size); -- ASSIGN_FIELD(has_epilog); -+ ASSIGN_FIELD(ps.has_epilog); - ASSIGN_FIELD(merged_shader_compiled_separately); - ASSIGN_FIELD(vs.tcs_in_out_eq); - ASSIGN_FIELD(vs.tcs_temp_only_input_mask); -diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c -index d0cf1f1123b..671b35cb972 100644 ---- a/src/amd/vulkan/radv_cmd_buffer.c -+++ b/src/amd/vulkan/radv_cmd_buffer.c -@@ -8096,7 +8096,7 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DB_SHADER_CONTROL; - - /* Re-emit the PS epilog when a new fragment shader is bound. */ -- if (ps->info.has_epilog) -+ if (ps->info.ps.has_epilog) - cmd_buffer->state.emitted_ps_epilog = NULL; - } - -@@ -10703,7 +10703,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r - struct radv_shader_part *ps_epilog = NULL; - - if (cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT] && -- cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]->info.has_epilog) { -+ cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]->info.ps.has_epilog) { - if ((cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline || - ((cmd_buffer->state.dirty & (RADV_CMD_DIRTY_GRAPHICS_SHADERS | RADV_CMD_DIRTY_FRAMEBUFFER)) || - (cmd_buffer->state.dirty_dynamic & -@@ -10921,7 +10921,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) - } - - const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; -- if (ps && !ps->info.has_epilog) { -+ if (ps && !ps->info.ps.has_epilog) { - uint32_t col_format = 0, cb_shader_mask = 0; - if (radv_needs_null_export_workaround(device, ps, 0)) - col_format = V_028714_SPI_SHADER_32_R; -diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c -index e07737cd742..738fc736b2c 100644 ---- a/src/amd/vulkan/radv_pipeline.c -+++ b/src/amd/vulkan/radv_pipeline.c -@@ -551,7 +551,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat - .use_aco = !radv_use_llvm_for_stage(pdev, stage->stage), - .uses_discard = true, - .alpha_func = COMPARE_FUNC_ALWAYS, -- .no_color_export = stage->info.has_epilog, -+ .no_color_export = stage->info.ps.has_epilog, - .no_depth_export = stage->info.ps.exports_mrtz_via_epilog, - - .bc_optimize_for_persp = G_0286CC_PERSP_CENTER_ENA(stage->info.ps.spi_ps_input_ena) && -diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c -index ed380341ba5..daab8ce2ed6 100644 ---- a/src/amd/vulkan/radv_pipeline_graphics.c -+++ b/src/amd/vulkan/radv_pipeline_graphics.c -@@ -3150,7 +3150,7 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv - radv_pipeline_init_dynamic_state(device, pipeline, &gfx_state.vk, pCreateInfo); - - const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; -- if (ps && !ps->info.has_epilog) { -+ if (ps && !ps->info.ps.has_epilog) { - pipeline->spi_shader_col_format = ps->info.ps.spi_shader_col_format; - pipeline->cb_shader_mask = ps->info.ps.cb_shader_mask; - } -diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c -index 10f1ee3ad07..479bc49c803 100644 ---- a/src/amd/vulkan/radv_shader_args.c -+++ b/src/amd/vulkan/radv_shader_args.c -@@ -846,7 +846,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics - case MESA_SHADER_FRAGMENT: - declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args); - -- if (info->has_epilog) { -+ if (info->ps.has_epilog) { - add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC); - } - -diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c -index c8c0b77201d..a1050a9ec07 100644 ---- a/src/amd/vulkan/radv_shader_info.c -+++ b/src/amd/vulkan/radv_shader_info.c -@@ -972,9 +972,9 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, - info->ps.spi_ps_input_addr &= C_02865C_COVERAGE_TO_SHADER_SELECT; - } - -- info->has_epilog = gfx_state->ps.has_epilog && info->ps.colors_written; -+ info->ps.has_epilog = gfx_state->ps.has_epilog && info->ps.colors_written; - -- if (!info->has_epilog) { -+ if (!info->ps.has_epilog) { - info->ps.mrt0_is_dual_src = gfx_state->ps.epilog.mrt0_is_dual_src; - info->ps.spi_shader_col_format = gfx_state->ps.epilog.spi_shader_col_format; - -@@ -988,7 +988,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, - (info->ps.color0_written & 0x8) && (info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask); - - info->ps.exports_mrtz_via_epilog = -- info->has_epilog && gfx_state->ps.exports_mrtz_via_epilog && export_alpha_and_mrtz; -+ info->ps.has_epilog && gfx_state->ps.exports_mrtz_via_epilog && export_alpha_and_mrtz; - - if (!info->ps.exports_mrtz_via_epilog) { - info->ps.writes_mrt0_alpha = gfx_state->ms.alpha_to_coverage_via_mrtz && export_alpha_and_mrtz; -diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h -index e072c9ffd5b..0ffa972b078 100644 ---- a/src/amd/vulkan/radv_shader_info.h -+++ b/src/amd/vulkan/radv_shader_info.h -@@ -106,7 +106,6 @@ struct radv_shader_info { - uint32_t user_data_0; - bool inputs_linked; - bool outputs_linked; -- bool has_epilog; /* Only for TCS or PS */ - bool merged_shader_compiled_separately; /* GFX9+ */ - bool force_indirect_desc_sets; - -@@ -213,6 +212,8 @@ struct radv_shader_info { - bool load_provoking_vtx; - bool load_rasterization_prim; - bool force_sample_iter_shading_rate; -+ bool uses_fbfetch_output; -+ bool has_epilog; - } ps; - struct { - bool uses_grid_size; -diff --git a/src/gallium/drivers/radeonsi/si_shader_aco.c b/src/gallium/drivers/radeonsi/si_shader_aco.c -index 67ba140ed67..232104954fb 100644 ---- a/src/gallium/drivers/radeonsi/si_shader_aco.c -+++ b/src/gallium/drivers/radeonsi/si_shader_aco.c -@@ -100,7 +100,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info, - info->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena; - info->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr; - info->ps.alpha_reference = args->alpha_reference; -- info->has_epilog = !shader->is_monolithic; -+ info->ps.has_epilog = !shader->is_monolithic; - break; - default: - break; --- -2.48.1 - - -From 82939c7825d3a45d5e992e10491e43183e511a9d Mon Sep 17 00:00:00 2001 -From: Rhys Perry -Date: Thu, 24 Oct 2024 11:01:46 +0100 -Subject: [PATCH 25/27] aco,radv,radeonsi: add aco_shader_info::ps::has_prolog - -Signed-off-by: Rhys Perry -Reviewed-by: Georg Lehmann -Part-of: ---- - src/amd/compiler/aco_shader_info.h | 1 + - src/amd/vulkan/radv_aco_shader_info.h | 1 + - src/gallium/drivers/radeonsi/si_shader_aco.c | 1 + - 3 files changed, 3 insertions(+) - -diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h -index 7f6fe7a5e9e..ad615e19992 100644 ---- a/src/amd/compiler/aco_shader_info.h -+++ b/src/amd/compiler/aco_shader_info.h -@@ -126,6 +126,7 @@ struct aco_shader_info { - uint32_t num_interp; - unsigned spi_ps_input_ena; - unsigned spi_ps_input_addr; -+ bool has_prolog; - bool has_epilog; - - /* OpenGL only */ -diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h -index 912d6b6ca4e..9a0b4d562e0 100644 ---- a/src/amd/vulkan/radv_aco_shader_info.h -+++ b/src/amd/vulkan/radv_aco_shader_info.h -@@ -41,6 +41,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv - ASSIGN_FIELD(cs.uses_full_subgroups); - aco_info->ps.spi_ps_input_ena = radv->ps.spi_ps_input_ena; - aco_info->ps.spi_ps_input_addr = radv->ps.spi_ps_input_addr; -+ aco_info->ps.has_prolog = false; - aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size; - aco_info->is_trap_handler_shader = radv->type == RADV_SHADER_TYPE_TRAP_HANDLER; - aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d; -diff --git a/src/gallium/drivers/radeonsi/si_shader_aco.c b/src/gallium/drivers/radeonsi/si_shader_aco.c -index 232104954fb..dd0b3759a2b 100644 ---- a/src/gallium/drivers/radeonsi/si_shader_aco.c -+++ b/src/gallium/drivers/radeonsi/si_shader_aco.c -@@ -100,6 +100,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info, - info->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena; - info->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr; - info->ps.alpha_reference = args->alpha_reference; -+ info->ps.has_prolog = !shader->is_monolithic; - info->ps.has_epilog = !shader->is_monolithic; - break; - default: --- -2.48.1 - - -From af418b1dcedd56326872945c18dff6dae79ebfdf Mon Sep 17 00:00:00 2001 -From: Samuel Pitoiset -Date: Thu, 12 Sep 2024 15:44:43 +0200 -Subject: [PATCH 26/27] radv,radeonsi: remove remaining occurrences of TCS - epilog - -TCS epilog has been removed few months ago. - -Signed-off-by: Samuel Pitoiset -Part-of: ---- - src/amd/vulkan/radv_shader_args.c | 8 -------- - src/gallium/drivers/radeonsi/si_shader_aco.c | 1 - - 2 files changed, 9 deletions(-) - -diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c -index 479bc49c803..100797c7389 100644 ---- a/src/amd/vulkan/radv_shader_args.c -+++ b/src/amd/vulkan/radv_shader_args.c -@@ -683,10 +683,6 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics - add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT); - } - -- if (info->has_epilog) { -- add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC); -- } -- - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); - -@@ -703,10 +699,6 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics - add_ud_arg(args, 1, AC_ARG_INT, &args->tcs_offchip_layout, AC_UD_TCS_OFFCHIP_LAYOUT); - } - -- if (info->has_epilog) { -- add_ud_arg(args, 1, AC_ARG_INT, &args->epilog_pc, AC_UD_EPILOG_PC); -- } -- - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); - if (args->explicit_scratch_args) { -diff --git a/src/gallium/drivers/radeonsi/si_shader_aco.c b/src/gallium/drivers/radeonsi/si_shader_aco.c -index dd0b3759a2b..dacfe15bb5a 100644 ---- a/src/gallium/drivers/radeonsi/si_shader_aco.c -+++ b/src/gallium/drivers/radeonsi/si_shader_aco.c -@@ -88,7 +88,6 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info, - case MESA_SHADER_TESS_CTRL: - info->vs.tcs_in_out_eq = key->ge.opt.same_patch_vertices; - info->vs.tcs_temp_only_input_mask = sel->info.tcs_vgpr_only_inputs; -- info->has_epilog = !shader->is_monolithic; - info->tcs.pass_tessfactors_by_reg = sel->info.tessfactors_are_def_in_all_invocs; - info->tcs.patch_stride = si_get_tcs_out_patch_stride(&sel->info); - info->tcs.tcs_offchip_layout = args->tcs_offchip_layout; --- -2.48.1 - - -From fdd1b46c2683b34bb7171038999e0dc4620a9296 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= -Date: Wed, 8 Jan 2025 15:03:32 -0500 -Subject: [PATCH 27/27] radeonsi/gfx12: use ACO if LLVM is 19 or older - -LLVM 19 is missing a SALU hazard fix. - -(cherry-picked + adapted from 239840556f99aaa1b71c450a3b389bd9be1a24f3) - -Reviewed-by: Pierre-Eric Pelloux-Prayer -Part-of: ---- - src/gallium/drivers/radeonsi/si_pipe.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c -index 4e063c1ce3e..f3f23ab0dd0 100644 ---- a/src/gallium/drivers/radeonsi/si_pipe.c -+++ b/src/gallium/drivers/radeonsi/si_pipe.c -@@ -1198,7 +1198,14 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, - sscreen->info.register_shadowing_required = true; - - #if AMD_LLVM_AVAILABLE -- sscreen->use_aco = (sscreen->debug_flags & DBG(USE_ACO)); -+ /* For GFX11.5, LLVM < 19 is missing a workaround that can cause GPU hangs. ACO is the only -+ * alternative that has the workaround and is always available. Same for GFX12. -+ */ -+ if ((sscreen->info.gfx_level == GFX12 && LLVM_VERSION_MAJOR < 20) || -+ (sscreen->info.gfx_level == GFX11_5 && LLVM_VERSION_MAJOR < 19)) -+ sscreen->use_aco = true; -+ else -+ sscreen->use_aco = sscreen->debug_flags & DBG(USE_ACO); - #else - sscreen->use_aco = true; - #endif --- -2.48.1 - diff --git a/e4eb5e80c316c0af3fff310ca89e1175d81556c1.patch b/e4eb5e80c316c0af3fff310ca89e1175d81556c1.patch new file mode 100644 index 0000000..5cf4073 --- /dev/null +++ b/e4eb5e80c316c0af3fff310ca89e1175d81556c1.patch @@ -0,0 +1,144 @@ +From e4eb5e80c316c0af3fff310ca89e1175d81556c1 Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Thu, 27 Feb 2025 15:44:27 +0100 +Subject: [PATCH] clover: Don't include libclc headers + +Per https://github.com/llvm/llvm-project/issues/119967 these +headers are internal implementation details of libclc and were +never supposed to be installed. They are not available anymore +since LLVM 20. Instead opencl-c.h should be used. + +There already ise a code path for including opencl-c.h, so always +use it. + +This didn't work for me out of the box, because the build system +currently hardcodes the clang resource directory, which is incorrect +for Fedora at least. Fix this by using GetResourcePath + +CLANG_RESOURCE_DIR provided by clang instead. This is basically +the same as what is done in clc_helper.c + +I've still retained the old behavior as a fallback just in case +(e.g. if clang is linked statically?) + +Reviewed-by: Karol Herbst +Part-of: +--- + .../frontends/clover/llvm/invocation.cpp | 53 +++++++++++++------ + src/gallium/frontends/clover/meson.build | 5 +- + 2 files changed, 39 insertions(+), 19 deletions(-) + +diff --git a/src/gallium/frontends/clover/llvm/invocation.cpp b/src/gallium/frontends/clover/llvm/invocation.cpp +index 3cbb05baecf67..ca030b404d791 100644 +--- a/src/gallium/frontends/clover/llvm/invocation.cpp ++++ b/src/gallium/frontends/clover/llvm/invocation.cpp +@@ -24,6 +24,8 @@ + // OTHER DEALINGS IN THE SOFTWARE. + // + ++#include ++ + #include + #include + #include +@@ -39,6 +41,8 @@ + #include + #include + #include ++#include ++#include + + #if LLVM_VERSION_MAJOR >= 20 + #include +@@ -323,6 +327,30 @@ namespace { + return c; + } + ++ std::string getResourceDirectory() { ++ Dl_info info; ++ if (dladdr((void *)clang::CompilerInvocation::CreateFromArgs, &info) == 0) { ++ return FALLBACK_CLANG_RESOURCE_DIR; ++ } ++ ++ char *libclang_path = realpath(info.dli_fname, NULL); ++ if (libclang_path == nullptr) { ++ return FALLBACK_CLANG_RESOURCE_DIR; ++ } ++ ++ // GetResourcePath is a way to retrieve the actual libclang resource dir based on a given ++ // binary or library. ++ std::string clang_resource_dir = ++#if LLVM_VERSION_MAJOR >= 20 ++ clang::driver::Driver::GetResourcesPath(std::string(libclang_path)); ++#else ++ clang::driver::Driver::GetResourcesPath(std::string(libclang_path), CLANG_RESOURCE_DIR); ++#endif ++ free(libclang_path); ++ ++ return clang_resource_dir; ++ } ++ + std::unique_ptr + compile(LLVMContext &ctx, clang::CompilerInstance &c, + const std::string &name, const std::string &source, +@@ -331,25 +359,18 @@ namespace { + c.getFrontendOpts().ProgramAction = clang::frontend::EmitLLVMOnly; + c.getHeaderSearchOpts().UseBuiltinIncludes = true; + c.getHeaderSearchOpts().UseStandardSystemIncludes = true; +- c.getHeaderSearchOpts().ResourceDir = CLANG_RESOURCE_DIR; + +- if (use_libclc) { +- // Add libclc generic search path +- c.getHeaderSearchOpts().AddPath(LIBCLC_INCLUDEDIR, +- clang::frontend::Angled, +- false, false); ++ std::string clang_resource_dir = getResourceDirectory(); ++ c.getHeaderSearchOpts().ResourceDir = clang_resource_dir; + +- // Add libclc include +- c.getPreprocessorOpts().Includes.push_back("clc/clc.h"); +- } else { +- // Add opencl-c generic search path +- c.getHeaderSearchOpts().AddPath(CLANG_RESOURCE_DIR, +- clang::frontend::Angled, +- false, false); ++ // Add opencl-c generic search path ++ std::string clang_include_path = clang_resource_dir + "/include"; ++ c.getHeaderSearchOpts().AddPath(clang_include_path, ++ clang::frontend::Angled, ++ false, false); + +- // Add opencl include +- c.getPreprocessorOpts().Includes.push_back("opencl-c.h"); +- } ++ // Add opencl include ++ c.getPreprocessorOpts().Includes.push_back("opencl-c.h"); + + // Add definition for the OpenCL version + const auto dev_version = dev.device_version(); +diff --git a/src/gallium/frontends/clover/meson.build b/src/gallium/frontends/clover/meson.build +index e569b86a1bea7..56a9894f0dbb1 100644 +--- a/src/gallium/frontends/clover/meson.build ++++ b/src/gallium/frontends/clover/meson.build +@@ -10,7 +10,6 @@ clover_opencl_cpp_args = [ + '-DCL_USE_DEPRECATED_OPENCL_2_0_APIS', + '-DCL_USE_DEPRECATED_OPENCL_2_1_APIS', + '-DCL_USE_DEPRECATED_OPENCL_2_2_APIS', +- '-DLIBCLC_INCLUDEDIR="@0@/"'.format(dep_clc.get_variable(pkgconfig : 'includedir')), + '-DLIBCLC_LIBEXECDIR="@0@/"'.format(dep_clc.get_variable(pkgconfig : 'libexecdir')) + ] + clover_incs = [inc_include, inc_src, inc_gallium, inc_gallium_aux] +@@ -43,9 +42,9 @@ libclllvm = static_library( + cpp_args : [ + clover_cpp_args, + clover_opencl_cpp_args, +- '-DCLANG_RESOURCE_DIR="@0@"'.format(join_paths( ++ '-DFALLBACK_CLANG_RESOURCE_DIR="@0@"'.format(join_paths( + dep_llvm.get_variable(cmake : 'LLVM_LIBRARY_DIR', configtool: 'libdir'), 'clang', +- dep_llvm.version(), 'include', ++ dep_llvm.version() + )), + ], + gnu_symbol_visibility : 'hidden', +-- +GitLab + diff --git a/mesa.spec b/mesa.spec index eb7a4e9..083e701 100644 --- a/mesa.spec +++ b/mesa.spec @@ -2,7 +2,7 @@ ## (rpmautospec version 0.6.5) ## RPMAUTOSPEC: autorelease, autochangelog %define autorelease(e:s:pb:n) %{?-p:0.}%{lua: - release_number = 3; + release_number = 6; base_release_number = tonumber(rpm.expand("%{?-b*}%{!?-b:1}")); print(release_number + base_release_number - 1); }%{?-e:.%{-e*}}%{?-s:.%{-s*}}%{!?-n:%{?dist}}.alma.1 @@ -84,7 +84,7 @@ Name: mesa Summary: Mesa graphics libraries -%global ver 24.2.8 +%global ver 25.0.7 Version: %{lua:ver = string.gsub(rpm.expand("%{ver}"), "-", "~"); print(ver)} Release: %autorelease License: MIT AND BSD-3-Clause AND SGI-B-2.0 @@ -98,7 +98,7 @@ Source1: Mesa-MLAA-License-Clarification-Email.txt # libclc is not available in RHEL but it is required for Intel drivers since # mesa >= 24.1.0 -%global libclc_version 19.1.1 +%global libclc_version 20.1.3 Source3: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{libclc_version}/libclc-%{libclc_version}.src.tar.xz BuildRequires: libedit-devel BuildRequires: clang-devel >= %{libclc_version} @@ -106,8 +106,8 @@ BuildRequires: clang-devel >= %{libclc_version} # BuildRequires: spirv-llvm-translator-tools # spirv-llvm-translator is a dependency of libclc -%global spirv_llvm_trans_ver 19.1.1 -%global spirv_llvm_trans_commit 90a976491d3847657396456e0e94d7dc48d35996 +%global spirv_llvm_trans_ver 20.1.0 +%global spirv_llvm_trans_commit 834db1a1985ac36d5a3e1b4b34dc1ca3f919ad5b %global spirv_llvm_trans_shortcommit %(c=%{spirv_llvm_trans_commit}; echo ${c:0:7}) Source4: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/archive/%{spirv_llvm_trans_commit}/spirv-llvm-translator-%{spirv_llvm_trans_shortcommit}.tar.gz BuildRequires: cmake @@ -119,15 +119,25 @@ BuildRequires: zlib-devel Patch10: gnome-shell-glthread-disable.patch -# AMD Navi4x support: -# Backport fixes for radeonsi and disable GFX12 on radv -# https://issues.redhat.com/browse/RHEL-53423 -Patch11: RHEL-53423.patch +# Backport of https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33805 +# to fix clover with libclc from LLVM 20. +Patch20: e4eb5e80c316c0af3fff310ca89e1175d81556c1.patch -# NV dGFX firefox fix -# Backport fixes for firefox browser issue seen on NV dGFX -# https://issues.redhat.com/browse/RHEL-108121 -Patch12: 32886.patch +# This patch makes Fedora CI fail and causes issues in QEMU. Revert it until +# we find a fix. +# https://bugzilla.redhat.com/show_bug.cgi?id=2360851 +# https://gitlab.freedesktop.org/mesa/mesa/-/issues/13009 +Patch40: 0001-Revert-kopper-Explicitly-choose-zink.patch + +# Upstream revert for gtk corruption on haswell +Patch50: 0001-Revert-hasvk-elk-stop-turning-load_push_constants-in.patch + +# Backport of https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32886 +# to fix firefox browser issue with NV dGFX driver +Patch60: 32886.patch + +# Fix s390 loop counters +Patch70: 0001-gallivm-handle-u8-u16-const-loads-properly-on-big-en.patch BuildRequires: meson BuildRequires: gcc @@ -139,7 +149,7 @@ BuildRequires: kernel-headers # We only check for the minimum version of pkgconfig(libdrm) needed so that the # SRPMs for each arch still have the same build dependencies. See: # https://bugzilla.redhat.com/show_bug.cgi?id=1859515 -BuildRequires: pkgconfig(libdrm) >= 2.4.121 +BuildRequires: pkgconfig(libdrm) >= 2.4.122 %if 0%{?with_libunwind} BuildRequires: pkgconfig(libunwind) %endif @@ -237,16 +247,15 @@ Obsoletes: mesa-omx-drivers < %{?epoch:%{epoch}:}%{version}-%{release} %package libGL Summary: Mesa libGL runtime libraries -Requires: %{name}-libglapi%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} Requires: libglvnd-glx%{?_isa} >= 1:1.3.2 -Recommends: %{name}-dri-drivers%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: %{name}-dri-drivers%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} %description libGL %{summary}. %package libGL-devel Summary: Mesa libGL development package -Requires: %{name}-libGL%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: (%{name}-libGL%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} if %{name}-libGL%{?_isa}) Requires: libglvnd-devel%{?_isa} >= 1:1.3.2 Provides: libGL-devel Provides: libGL-devel%{?_isa} @@ -259,15 +268,14 @@ Recommends: gl-manpages Summary: Mesa libEGL runtime libraries Requires: libglvnd-egl%{?_isa} >= 1:1.3.2 Requires: %{name}-libgbm%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} -Requires: %{name}-libglapi%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} -Recommends: %{name}-dri-drivers%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: %{name}-dri-drivers%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} %description libEGL %{summary}. %package libEGL-devel Summary: Mesa libEGL development package -Requires: %{name}-libEGL%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: (%{name}-libEGL%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} if %{name}-libEGL%{?_isa}) Requires: libglvnd-devel%{?_isa} >= 1:1.3.2 Requires: %{name}-khr-devel%{?_isa} Provides: libEGL-devel @@ -279,10 +287,11 @@ Provides: libEGL-devel%{?_isa} %package dri-drivers Summary: Mesa-based DRI drivers Requires: %{name}-filesystem%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} -Requires: %{name}-libglapi%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} %if 0%{?with_va} Recommends: %{name}-va-drivers%{?_isa} %endif +Obsoletes: %{name}-libglapi < 25.0.0~rc2-1 +Provides: %{name}-libglapi >= 25.0.0~rc2-1 %description dri-drivers %{summary}. @@ -308,7 +317,6 @@ Requires: %{name}-filesystem%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{rel %package libOSMesa Summary: Mesa offscreen rendering libraries -Requires: %{name}-libglapi%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} Provides: libOSMesa Provides: libOSMesa%{?_isa} @@ -363,22 +371,10 @@ Provides: libxatracker-devel%{?_isa} %{summary}. %endif -%package libglapi -Summary: Mesa shared glapi -Provides: libglapi -Provides: libglapi%{?_isa} -# If mesa-dri-drivers are installed, they must match in version. This is here to prevent using -# older mesa-dri-drivers together with a newer mesa-libglapi or its dependants. -# See https://bugzilla.redhat.com/show_bug.cgi?id=2193135 . -Requires: (%{name}-dri-drivers%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} if %{name}-dri-drivers%{?_isa}) - -%description libglapi -%{summary}. - %if 0%{?with_opencl} %package libOpenCL Summary: Mesa OpenCL runtime library -Requires: ocl-icd%{?_isa} +Requires: (ocl-icd%{?_isa} or OpenCL-ICD-Loader%{?_isa}) Requires: libclc%{?_isa} Requires: %{name}-libgbm%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release} Requires: opencl-filesystem @@ -497,12 +493,11 @@ export MESON_PACKAGE_CACHE_DIR="%{cargo_registry}/" %meson \ -Dplatforms=x11,wayland \ - -Ddri3=enabled \ -Dosmesa=true \ %if 0%{?with_hardware} - -Dgallium-drivers=swrast,virgl,nouveau%{?with_r300:,r300}%{?with_crocus:,crocus}%{?with_iris:,iris}%{?with_vmware:,svga}%{?with_radeonsi:,radeonsi}%{?with_r600:,r600}%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_v3d:,v3d}%{?with_lima:,lima}%{?with_panfrost:,panfrost}%{?with_vulkan_hw:,zink} \ + -Dgallium-drivers=llvmpipe,virgl,nouveau%{?with_r300:,r300}%{?with_crocus:,crocus}%{?with_i915:,i915}%{?with_iris:,iris}%{?with_vmware:,svga}%{?with_radeonsi:,radeonsi}%{?with_r600:,r600}%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_v3d:,v3d}%{?with_lima:,lima}%{?with_panfrost:,panfrost}%{?with_vulkan_hw:,zink} \ %else - -Dgallium-drivers=swrast,virgl \ + -Dgallium-drivers=llvmpipe,virgl \ %endif -Dgallium-vdpau=%{?with_vdpau:enabled}%{!?with_vdpau:disabled} \ -Dgallium-va=%{?with_va:enabled}%{!?with_va:disabled} \ @@ -592,7 +587,6 @@ popd %dir %{_includedir}/GL/internal %{_includedir}/GL/internal/dri_interface.h %{_libdir}/pkgconfig/dri.pc -%{_libdir}/libglapi.so %files libEGL %{_datadir}/glvnd/egl_vendor.d/50_mesa.json @@ -602,10 +596,6 @@ popd %{_includedir}/EGL/eglext_angle.h %{_includedir}/EGL/eglmesaext.h -%files libglapi -%{_libdir}/libglapi.so.0 -%{_libdir}/libglapi.so.0.* - %files libOSMesa %{_libdir}/libOSMesa.so.8* %files libOSMesa-devel @@ -670,6 +660,7 @@ popd %files dri-drivers %{_datadir}/drirc.d/00-mesa-defaults.conf %{_libdir}/libgallium-*.so +%{_libdir}/gbm/dri_gbm.so %{_libdir}/dri/kms_swrast_dri.so %{_libdir}/dri/libdril_dri.so %{_libdir}/dri/swrast_dri.so @@ -823,13 +814,28 @@ popd %endif %changelog -* Tue Aug 12 2025 Koichiro Iwao - 24.2.8-3.alma.1 +* Tue Nov 11 2025 Koichiro Iwao - 25.0.7-6.alma.1 - Enable vc4 and v3d for Raspberry Pi graphics in AlmaLinux (Resolves: https://github.com/AlmaLinux/raspberry-pi/issues/32) ## START: Generated by rpmautospec -* Thu Aug 07 2025 Anusha Srivatsa - 24.2.8-3 -- Bring in the fix for the firfox crashes seen on NV dGFX driver. +* Thu Oct 23 2025 José Expósito - 25.0.7-6 +- Rebuild for mesa-25.0.7-6.el10_1 + +* Thu Oct 23 2025 José Expósito - 25.0.7-5 +- Rebuild for mesa-25.0.7-5.el10_1 + +* Tue Oct 21 2025 Dave Airlie - 25.0.7-4 +- fix llvmpipe constant loads on s390x + +* Thu Jul 17 2025 Tomas Pelka - 25.0.7-3 +- Rebuild for mesa-25.0.7-3.el10 + +* Tue Jul 15 2025 Anusha Srivatsa - 25.0.7-2 +- backport !32886 + +* Wed Jun 18 2025 José Expósito - 25.0.7-1 +- Update to 25.0.7 * Wed Feb 26 2025 José Expósito - 24.2.8-2 - AMD Navi4x support diff --git a/sources b/sources index e9ebfb6..e18f958 100644 --- a/sources +++ b/sources @@ -1,3 +1,3 @@ -SHA512 (libclc-19.1.1.src.tar.xz) = 41ba80e2ed8f874d79c40cfbb1de89ad9e1ecc3709519e697617c14bc2583b2f8cdb8ca20bd2095b436afcc69144a6d88d4334de1e152d78ef3a19ec14c0733d -SHA512 (mesa-24.2.8.tar.xz) = 3aa1051a72e1428e42f9537d8f6a26f2ebddc78894e0f71d2cdcc9ed555ea4d6489ad8e74d4c59b8cdf7ea1c629fa725ac2fe1e385db5d3a582d8fe8186392d6 -SHA512 (spirv-llvm-translator-90a9764.tar.gz) = c0be7326fa76927f9900a9d91ced0035aeee66cdab35baa9c708b27c5d5e423f4819f148d17c3b7b812d1e8991e1057d71d52c22df2de37efdd72f1c20dcf05e +SHA512 (libclc-20.1.3.src.tar.xz) = ab6fb0dd0250ab9087b84cf6ec253473cdbcf473e24b626509f1aca1893718608ba31902fa6925ec99f64b1b06d60d49fecb2138c72c8aec433c124c57efad57 +SHA512 (mesa-25.0.7.tar.xz) = 825bbd8bc5507de147488519786c0200afacf97dae621c80ead24b2c5dd55c5a442757ac8452698ae611e9344025465080795cf8f2dc4eb7ce07b5cc521b2b5c +SHA512 (spirv-llvm-translator-834db1a.tar.gz) = 4fb522087728a76204d1db9fb782afbe9475e57135d56d4b694d111aa1092febebd829fe42007d2e15e2c9bd1222bab6c48e89181fc5bdae7f3628fdf7ad74ac