From 79d9f41ab8c9610ae78d212f180d09db22974ee7 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 9 Nov 2021 04:59:16 -0500 Subject: [PATCH] import mesa-21.1.5-1.el8 --- .gitignore | 2 +- .mesa.metadata | 2 +- SOURCES/Makefile | 2 +- SOURCES/anv-remove-warning.patch | 13 - SOURCES/cpu-affinity-fixes-20.3.3.patch | 1583 ----------------------- SOURCES/lavapipe-disable-env-var.patch | 16 +- SOURCES/mesa-20.3.3-stable-fixes.patch | 930 ------------- SOURCES/mesa-vk-wsi-sw-fixes.patch | 403 ++++++ SPECS/mesa.spec | 43 +- 9 files changed, 436 insertions(+), 2558 deletions(-) delete mode 100644 SOURCES/anv-remove-warning.patch delete mode 100644 SOURCES/cpu-affinity-fixes-20.3.3.patch delete mode 100644 SOURCES/mesa-20.3.3-stable-fixes.patch create mode 100644 SOURCES/mesa-vk-wsi-sw-fixes.patch diff --git a/.gitignore b/.gitignore index a4e3e90..056c3c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/mesa-20.3.3.tar.xz +SOURCES/mesa-21.1.5.tar.xz diff --git a/.mesa.metadata b/.mesa.metadata index 3d34979..b800049 100644 --- a/.mesa.metadata +++ b/.mesa.metadata @@ -1 +1 @@ -c0e42fada2b306a6d9740376398c0d8b0a130427 SOURCES/mesa-20.3.3.tar.xz +6962198a822b83195065611e253cde98f627e904 SOURCES/mesa-21.1.5.tar.xz diff --git a/SOURCES/Makefile b/SOURCES/Makefile index eea9f33..f17e9fd 100644 --- a/SOURCES/Makefile +++ b/SOURCES/Makefile @@ -1,4 +1,4 @@ -VERSION ?= 20.3.3 +VERSION ?= 21.1.5 SANITIZE ?= 1 DIRNAME = mesa-${VERSION} diff --git a/SOURCES/anv-remove-warning.patch b/SOURCES/anv-remove-warning.patch deleted file mode 100644 index 130a050..0000000 --- a/SOURCES/anv-remove-warning.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff -up mesa-20.3.3/src/intel/vulkan/anv_perf.c.dma mesa-20.3.3/src/intel/vulkan/anv_perf.c ---- mesa-20.3.3/src/intel/vulkan/anv_perf.c.dma 2021-02-16 12:56:09.881084752 +1000 -+++ mesa-20.3.3/src/intel/vulkan/anv_perf.c 2021-02-16 12:56:14.626213956 +1000 -@@ -47,9 +47,6 @@ anv_get_perf(const struct gen_device_inf - gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */); - - if (!perf->n_queries) { -- if (perf->platform_supported) -- mesa_logw("Performance support disabled, " -- "consider sysctl dev.i915.perf_stream_paranoid=0\n"); - goto err; - } - diff --git a/SOURCES/cpu-affinity-fixes-20.3.3.patch b/SOURCES/cpu-affinity-fixes-20.3.3.patch deleted file mode 100644 index d11f5c4..0000000 --- a/SOURCES/cpu-affinity-fixes-20.3.3.patch +++ /dev/null @@ -1,1583 +0,0 @@ -diff --git a/src/amd/compiler/tests/main.cpp b/src/amd/compiler/tests/main.cpp -index cb646e2dd30..eac0a244adf 100644 ---- a/src/amd/compiler/tests/main.cpp -+++ b/src/amd/compiler/tests/main.cpp -@@ -34,6 +34,8 @@ - #include "aco_ir.h" - #include "framework.h" - -+#include "util/u_cpu_detect.h" -+ - static const char *help_message = - "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n" - "\n" -@@ -227,6 +229,8 @@ int main(int argc, char **argv) - return 99; - } - -+ util_cpu_detect(); -+ - if (do_list) { - for (auto test : tests) - printf("%s\n", test.first.c_str()); -diff --git a/src/compiler/glsl/standalone.cpp b/src/compiler/glsl/standalone.cpp -index ca187001186..2714d8b95ed 100644 ---- a/src/compiler/glsl/standalone.cpp -+++ b/src/compiler/glsl/standalone.cpp -@@ -401,6 +401,8 @@ standalone_compile_shader(const struct standalone_options *_options, - int status = EXIT_SUCCESS; - bool glsl_es = false; - -+ util_cpu_detect(); -+ - options = _options; - - switch (options->glsl_version) { -diff --git a/src/compiler/nir/tests/negative_equal_tests.cpp b/src/compiler/nir/tests/negative_equal_tests.cpp -index f83041a4fbf..76472e48309 100644 ---- a/src/compiler/nir/tests/negative_equal_tests.cpp -+++ b/src/compiler/nir/tests/negative_equal_tests.cpp -@@ -36,6 +36,7 @@ protected: - const_value_negative_equal_test() - { - glsl_type_singleton_init_or_ref(); -+ util_cpu_detect(); - - memset(c1, 0, sizeof(c1)); - memset(c2, 0, sizeof(c2)); -@@ -55,6 +56,7 @@ protected: - alu_srcs_negative_equal_test() - { - glsl_type_singleton_init_or_ref(); -+ util_cpu_detect(); - - static const nir_shader_compiler_options options = { }; - nir_builder_init_simple_shader(&bld, NULL, MESA_SHADER_VERTEX, &options); -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c -index 165d73d94fc..33269e528fe 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c -@@ -104,13 +104,13 @@ lp_build_min_simple(struct lp_build_context *bld, - - /* TODO: optimize the constant case */ - -- if (type.floating && util_cpu_caps.has_sse) { -+ if (type.floating && util_get_cpu_caps()->has_sse) { - if (type.width == 32) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse.min.ss"; - intr_size = 128; - } -- else if (type.length <= 4 || !util_cpu_caps.has_avx) { -+ else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse.min.ps"; - intr_size = 128; - } -@@ -119,12 +119,12 @@ lp_build_min_simple(struct lp_build_context *bld, - intr_size = 256; - } - } -- if (type.width == 64 && util_cpu_caps.has_sse2) { -+ if (type.width == 64 && util_get_cpu_caps()->has_sse2) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse2.min.sd"; - intr_size = 128; - } -- else if (type.length == 2 || !util_cpu_caps.has_avx) { -+ else if (type.length == 2 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse2.min.pd"; - intr_size = 128; - } -@@ -134,7 +134,7 @@ lp_build_min_simple(struct lp_build_context *bld, - } - } - } -- else if (type.floating && util_cpu_caps.has_altivec) { -+ else if (type.floating && util_get_cpu_caps()->has_altivec) { - if (nan_behavior == GALLIVM_NAN_RETURN_NAN || - nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { - debug_printf("%s: altivec doesn't support nan return nan behavior\n", -@@ -144,7 +144,7 @@ lp_build_min_simple(struct lp_build_context *bld, - intrinsic = "llvm.ppc.altivec.vminfp"; - intr_size = 128; - } -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intr_size = 128; - if (type.width == 8) { - if (!type.sign) { -@@ -174,7 +174,7 @@ lp_build_min_simple(struct lp_build_context *bld, - * The sse intrinsics return the second operator in case of nan by - * default so we need to special code to handle those. - */ -- if (util_cpu_caps.has_sse && type.floating && -+ if (util_get_cpu_caps()->has_sse && type.floating && - nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && - nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { -@@ -274,13 +274,13 @@ lp_build_max_simple(struct lp_build_context *bld, - - /* TODO: optimize the constant case */ - -- if (type.floating && util_cpu_caps.has_sse) { -+ if (type.floating && util_get_cpu_caps()->has_sse) { - if (type.width == 32) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse.max.ss"; - intr_size = 128; - } -- else if (type.length <= 4 || !util_cpu_caps.has_avx) { -+ else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse.max.ps"; - intr_size = 128; - } -@@ -289,12 +289,12 @@ lp_build_max_simple(struct lp_build_context *bld, - intr_size = 256; - } - } -- if (type.width == 64 && util_cpu_caps.has_sse2) { -+ if (type.width == 64 && util_get_cpu_caps()->has_sse2) { - if (type.length == 1) { - intrinsic = "llvm.x86.sse2.max.sd"; - intr_size = 128; - } -- else if (type.length == 2 || !util_cpu_caps.has_avx) { -+ else if (type.length == 2 || !util_get_cpu_caps()->has_avx) { - intrinsic = "llvm.x86.sse2.max.pd"; - intr_size = 128; - } -@@ -304,7 +304,7 @@ lp_build_max_simple(struct lp_build_context *bld, - } - } - } -- else if (type.floating && util_cpu_caps.has_altivec) { -+ else if (type.floating && util_get_cpu_caps()->has_altivec) { - if (nan_behavior == GALLIVM_NAN_RETURN_NAN || - nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { - debug_printf("%s: altivec doesn't support nan return nan behavior\n", -@@ -314,7 +314,7 @@ lp_build_max_simple(struct lp_build_context *bld, - intrinsic = "llvm.ppc.altivec.vmaxfp"; - intr_size = 128; - } -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intr_size = 128; - if (type.width == 8) { - if (!type.sign) { -@@ -338,7 +338,7 @@ lp_build_max_simple(struct lp_build_context *bld, - } - - if (intrinsic) { -- if (util_cpu_caps.has_sse && type.floating && -+ if (util_get_cpu_caps()->has_sse && type.floating && - nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && - nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN && - nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) { -@@ -472,12 +472,12 @@ lp_build_add(struct lp_build_context *bld, - return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); - } - if (type.width * type.length == 128) { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if (type.width == 16) -@@ -485,7 +485,7 @@ lp_build_add(struct lp_build_context *bld, - } - } - if (type.width * type.length == 256) { -- if (util_cpu_caps.has_avx2) { -+ if (util_get_cpu_caps()->has_avx2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if (type.width == 16) -@@ -713,11 +713,11 @@ lp_build_hadd_partial4(struct lp_build_context *bld, - tmp[2] = num_vecs > 2 ? vectors[2] : vectors[0]; - tmp[3] = num_vecs > 3 ? vectors[3] : vectors[0]; - -- if (util_cpu_caps.has_sse3 && bld->type.width == 32 && -+ if (util_get_cpu_caps()->has_sse3 && bld->type.width == 32 && - bld->type.length == 4) { - intrinsic = "llvm.x86.sse3.hadd.ps"; - } -- else if (util_cpu_caps.has_avx && bld->type.width == 32 && -+ else if (util_get_cpu_caps()->has_avx && bld->type.width == 32 && - bld->type.length == 8) { - intrinsic = "llvm.x86.avx.hadd.ps.256"; - } -@@ -796,12 +796,12 @@ lp_build_sub(struct lp_build_context *bld, - return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b); - } - if (type.width * type.length == 128) { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if (type.width == 16) - intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; - if (type.width == 16) -@@ -809,7 +809,7 @@ lp_build_sub(struct lp_build_context *bld, - } - } - if (type.width * type.length == 256) { -- if (util_cpu_caps.has_avx2) { -+ if (util_get_cpu_caps()->has_avx2) { - if (type.width == 8) - intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b"; - if (type.width == 16) -@@ -1078,8 +1078,8 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, - */ - if (LLVM_VERSION_MAJOR < 7 && - (bld->type.length == 4 || bld->type.length == 8) && -- ((util_cpu_caps.has_sse2 && (bld->type.sign == 0)) || -- util_cpu_caps.has_sse4_1)) { -+ ((util_get_cpu_caps()->has_sse2 && (bld->type.sign == 0)) || -+ util_get_cpu_caps()->has_sse4_1)) { - const char *intrinsic = NULL; - LLVMValueRef aeven, aodd, beven, bodd, muleven, mulodd; - LLVMValueRef shuf[LP_MAX_VECTOR_WIDTH / 32], shuf_vec; -@@ -1096,7 +1096,7 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, - aodd = LLVMBuildShuffleVector(builder, aeven, bld->undef, shuf_vec, ""); - bodd = LLVMBuildShuffleVector(builder, beven, bld->undef, shuf_vec, ""); - -- if (util_cpu_caps.has_avx2 && bld->type.length == 8) { -+ if (util_get_cpu_caps()->has_avx2 && bld->type.length == 8) { - if (bld->type.sign) { - intrinsic = "llvm.x86.avx2.pmul.dq"; - } else { -@@ -1331,8 +1331,8 @@ lp_build_div(struct lp_build_context *bld, - - /* fast rcp is disabled (just uses div), so makes no sense to try that */ - if(FALSE && -- ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) && -+ ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) || -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) && - type.floating) - return lp_build_mul(bld, a, lp_build_rcp(bld, b)); - -@@ -1745,7 +1745,7 @@ lp_build_abs(struct lp_build_context *bld, - return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a); - } - -- if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && LLVM_VERSION_MAJOR < 6) { -+ if(type.width*type.length == 128 && util_get_cpu_caps()->has_ssse3 && LLVM_VERSION_MAJOR < 6) { - switch(type.width) { - case 8: - return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); -@@ -1755,7 +1755,7 @@ lp_build_abs(struct lp_build_context *bld, - return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); - } - } -- else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && LLVM_VERSION_MAJOR < 6) { -+ else if (type.width*type.length == 256 && util_get_cpu_caps()->has_avx2 && LLVM_VERSION_MAJOR < 6) { - switch(type.width) { - case 8: - return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a); -@@ -1897,15 +1897,15 @@ lp_build_int_to_float(struct lp_build_context *bld, - static boolean - arch_rounding_available(const struct lp_type type) - { -- if ((util_cpu_caps.has_sse4_1 && -+ if ((util_get_cpu_caps()->has_sse4_1 && - (type.length == 1 || type.width*type.length == 128)) || -- (util_cpu_caps.has_avx && type.width*type.length == 256) || -- (util_cpu_caps.has_avx512f && type.width*type.length == 512)) -+ (util_get_cpu_caps()->has_avx && type.width*type.length == 256) || -+ (util_get_cpu_caps()->has_avx512f && type.width*type.length == 512)) - return TRUE; -- else if ((util_cpu_caps.has_altivec && -+ else if ((util_get_cpu_caps()->has_altivec && - (type.width == 32 && type.length == 4))) - return TRUE; -- else if (util_cpu_caps.has_neon) -+ else if (util_get_cpu_caps()->has_neon) - return TRUE; - - return FALSE; -@@ -1935,7 +1935,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, - assert(type.width == 32); - - assert(lp_check_value(type, a)); -- assert(util_cpu_caps.has_sse2); -+ assert(util_get_cpu_caps()->has_sse2); - - /* This is relying on MXCSR rounding mode, which should always be nearest. */ - if (type.length == 1) { -@@ -1961,7 +1961,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, - } - else { - assert(type.width*type.length == 256); -- assert(util_cpu_caps.has_avx); -+ assert(util_get_cpu_caps()->has_avx); - - intrinsic = "llvm.x86.avx.cvt.ps2dq.256"; - } -@@ -1987,7 +1987,7 @@ lp_build_round_altivec(struct lp_build_context *bld, - assert(type.floating); - - assert(lp_check_value(type, a)); -- assert(util_cpu_caps.has_altivec); -+ assert(util_get_cpu_caps()->has_altivec); - - (void)type; - -@@ -2014,7 +2014,7 @@ lp_build_round_arch(struct lp_build_context *bld, - LLVMValueRef a, - enum lp_build_round_mode mode) - { -- if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) { -+ if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon) { - LLVMBuilderRef builder = bld->gallivm->builder; - const struct lp_type type = bld->type; - const char *intrinsic_root; -@@ -2042,7 +2042,7 @@ lp_build_round_arch(struct lp_build_context *bld, - lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type); - return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); - } -- else /* (util_cpu_caps.has_altivec) */ -+ else /* (util_get_cpu_caps()->has_altivec) */ - return lp_build_round_altivec(bld, a, mode); - } - -@@ -2377,9 +2377,9 @@ lp_build_iround(struct lp_build_context *bld, - - assert(lp_check_value(type, a)); - -- if ((util_cpu_caps.has_sse2 && -+ if ((util_get_cpu_caps()->has_sse2 && - ((type.width == 32) && (type.length == 1 || type.length == 4))) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) { - return lp_build_iround_nearest_sse2(bld, a); - } - if (arch_rounding_available(type)) { -@@ -2664,8 +2664,8 @@ lp_build_rcp(struct lp_build_context *bld, - * particular uses that require less workarounds. - */ - -- if (FALSE && ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8))){ -+ if (FALSE && ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) || -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8))){ - const unsigned num_iterations = 0; - LLVMValueRef res; - unsigned i; -@@ -2784,8 +2784,8 @@ lp_build_fast_rsqrt_available(struct lp_type type) - { - assert(type.floating); - -- if ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) || -- (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { -+ if ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) || -+ (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) { - return true; - } - return false; -@@ -3694,7 +3694,7 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm, - LLVMValueRef - lp_build_fpstate_get(struct gallivm_state *gallivm) - { -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef mxcsr_ptr = lp_build_alloca( - gallivm, -@@ -3715,7 +3715,7 @@ void - lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, - boolean zero) - { -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - /* turn on DAZ (64) | FTZ (32768) = 32832 if available */ - int daz_ftz = _MM_FLUSH_ZERO_MASK; - -@@ -3724,7 +3724,7 @@ lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm, - LLVMValueRef mxcsr = - LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr"); - -- if (util_cpu_caps.has_daz) { -+ if (util_get_cpu_caps()->has_daz) { - /* Enable denormals are zero mode */ - daz_ftz |= _MM_DENORMALS_ZERO_MASK; - } -@@ -3745,7 +3745,7 @@ void - lp_build_fpstate_set(struct gallivm_state *gallivm, - LLVMValueRef mxcsr_ptr) - { -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - LLVMBuilderRef builder = gallivm->builder; - mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr, - LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c -index c68b8850473..af445b00c1a 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c -@@ -101,7 +101,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm, - LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type); - LLVMValueRef h; - -- if (util_cpu_caps.has_f16c && -+ if (util_get_cpu_caps()->has_f16c && - (src_length == 4 || src_length == 8)) { - if (LLVM_VERSION_MAJOR < 11) { - const char *intrinsic = NULL; -@@ -167,7 +167,7 @@ lp_build_float_to_half(struct gallivm_state *gallivm, - * useless. - */ - -- if (util_cpu_caps.has_f16c && -+ if (util_get_cpu_caps()->has_f16c && - (length == 4 || length == 8)) { - struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); - unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */ -@@ -489,7 +489,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm, - - /* Special case 4x4x32 --> 1x16x8 */ - if (src_type.length == 4 && -- (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec)) -+ (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec)) - { - num_dsts = (num_srcs + 3) / 4; - dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4; -@@ -500,7 +500,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm, - - /* Special case 2x8x32 --> 1x16x8 */ - if (src_type.length == 8 && -- util_cpu_caps.has_avx) -+ util_get_cpu_caps()->has_avx) - { - num_dsts = (num_srcs + 1) / 2; - dst_type->length = num_srcs * 8 >= 16 ? 16 : num_srcs * 8; -@@ -597,7 +597,7 @@ lp_build_conv(struct gallivm_state *gallivm, - ((dst_type.length == 16 && 4 * num_dsts == num_srcs) || - (num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) && - -- (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec)) -+ (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec)) - { - struct lp_build_context bld; - struct lp_type int16_type, int32_type; -@@ -710,7 +710,7 @@ lp_build_conv(struct gallivm_state *gallivm, - ((dst_type.length == 16 && 2 * num_dsts == num_srcs) || - (num_dsts == 1 && dst_type.length * num_srcs == 8)) && - -- util_cpu_caps.has_avx) { -+ util_get_cpu_caps()->has_avx) { - - struct lp_build_context bld; - struct lp_type int16_type, int32_type; -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c -index 174857e06d9..e17c7881e7d 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c -@@ -642,8 +642,8 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm, - * XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1. - * Much cheaper (but we don't care that much if n == 1). - */ -- if ((util_cpu_caps.has_sse2 && n == 4) || -- (util_cpu_caps.has_avx2 && n == 8)) { -+ if ((util_get_cpu_caps()->has_sse2 && n == 4) || -+ (util_get_cpu_caps()->has_avx2 && n == 8)) { - color2_2 = lp_build_pavgb(&bld8, colors0, colors1); - color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, ""); - } -@@ -1350,7 +1350,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm, - if (is_dxt1_variant) { - LLVMValueRef color23_2, color2_2; - -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - LLVMValueRef intrargs[2]; - intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, ""); - /* same interleave as for lerp23 - correct result in 2nd element */ -@@ -1389,7 +1389,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm, - color23 = lp_build_select(&bld32, sel_mask, color23, color23_2); - } - -- if (util_cpu_caps.has_ssse3) { -+ if (util_get_cpu_caps()->has_ssse3) { - /* - * Use pshufb as mini-lut. (Only doable with intrinsics as the - * final shuffles are non-constant. pshufb is awesome!) -@@ -1689,7 +1689,7 @@ s3tc_decode_block_dxt5(struct gallivm_state *gallivm, - type16.sign = FALSE; - sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, ""); - -- if (!util_cpu_caps.has_ssse3) { -+ if (!util_get_cpu_caps()->has_ssse3) { - LLVMValueRef acodeg, mask1, acode0, acode1; - - /* extraction of the 3 bit values into something more useful is HARD */ -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c -index 121452d7596..97deffe1de0 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c -@@ -90,7 +90,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm, - * per element. Didn't measure performance but cuts shader size - * by quite a bit (less difference if cpu has no sse4.1 support). - */ -- if (util_cpu_caps.has_sse2 && n > 1) { -+ if (util_get_cpu_caps()->has_sse2 && n > 1) { - LLVMValueRef sel, tmp, tmp2; - struct lp_build_context bld32; - -@@ -174,7 +174,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm, - * per element. Didn't measure performance but cuts shader size - * by quite a bit (less difference if cpu has no sse4.1 support). - */ -- if (util_cpu_caps.has_sse2 && n > 1) { -+ if (util_get_cpu_caps()->has_sse2 && n > 1) { - LLVMValueRef sel, tmp; - struct lp_build_context bld32; - -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c -index e991b0dc375..42cc17371a0 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c -@@ -488,7 +488,7 @@ lp_build_gather(struct gallivm_state *gallivm, - * 32bit/64bit fetches you're doing it wrong (this is gather, not - * conversion) and it would be awkward for floats. - */ -- } else if (util_cpu_caps.has_avx2 && !need_expansion && -+ } else if (util_get_cpu_caps()->has_avx2 && !need_expansion && - src_width == 32 && (length == 4 || length == 8)) { - return lp_build_gather_avx2(gallivm, length, src_width, dst_type, - base_ptr, offsets); -@@ -500,7 +500,7 @@ lp_build_gather(struct gallivm_state *gallivm, - * (In general, should be more of a win if the fetch is 256bit wide - - * this is true for the 32bit case above too.) - */ -- } else if (0 && util_cpu_caps.has_avx2 && !need_expansion && -+ } else if (0 && util_get_cpu_caps()->has_avx2 && !need_expansion && - src_width == 64 && (length == 2 || length == 4)) { - return lp_build_gather_avx2(gallivm, length, src_width, dst_type, - base_ptr, offsets); -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c -index 685ed0e58aa..dd428242cb9 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_init.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c -@@ -433,6 +433,7 @@ lp_build_init(void) - /* For simulating less capable machines */ - #ifdef DEBUG - if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) { -+ extern struct util_cpu_caps_t util_cpu_caps; - assert(util_cpu_caps.has_sse2); - util_cpu_caps.has_sse3 = 0; - util_cpu_caps.has_ssse3 = 0; -@@ -445,7 +446,7 @@ lp_build_init(void) - } - #endif - -- if (util_cpu_caps.has_avx2 || util_cpu_caps.has_avx) { -+ if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) { - lp_native_vector_width = 256; - } else { - /* Leave it at 128, even when no SIMD extensions are available. -@@ -460,16 +461,16 @@ lp_build_init(void) - #if LLVM_VERSION_MAJOR < 4 - if (lp_native_vector_width <= 128) { - /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by -- * "util_cpu_caps.has_avx" predicate, and lack the -+ * "util_get_cpu_caps()->has_avx" predicate, and lack the - * "lp_native_vector_width > 128" predicate. And also to ensure a more - * consistent behavior, allowing one to test SSE2 on AVX machines. - * XXX: should not play games with util_cpu_caps directly as it might - * get used for other things outside llvm too. - */ -- util_cpu_caps.has_avx = 0; -- util_cpu_caps.has_avx2 = 0; -- util_cpu_caps.has_f16c = 0; -- util_cpu_caps.has_fma = 0; -+ util_get_cpu_caps()->has_avx = 0; -+ util_get_cpu_caps()->has_avx2 = 0; -+ util_get_cpu_caps()->has_f16c = 0; -+ util_get_cpu_caps()->has_fma = 0; - } - #endif - -@@ -482,7 +483,7 @@ lp_build_init(void) - * Right now denorms get explicitly disabled (but elsewhere) for x86, - * whereas ppc64 explicitly enables them... - */ -- if (util_cpu_caps.has_altivec) { -+ if (util_get_cpu_caps()->has_altivec) { - unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF, 0xFFFE, 0xFFFF }; - __asm ( -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c -index 315977ae745..3ed3b5a74b1 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c -@@ -196,7 +196,7 @@ lp_build_compare(struct gallivm_state *gallivm, - - if (!type.floating && !type.sign && - type.width * type.length == 128 && -- util_cpu_caps.has_sse2 && -+ util_get_cpu_caps()->has_sse2 && - (func == PIPE_FUNC_LESS || - func == PIPE_FUNC_LEQUAL || - func == PIPE_FUNC_GREATER || -@@ -348,11 +348,11 @@ lp_build_select(struct lp_build_context *bld, - - res = LLVMBuildSelect(builder, mask, a, b, ""); - } -- else if (((util_cpu_caps.has_sse4_1 && -+ else if (((util_get_cpu_caps()->has_sse4_1 && - type.width * type.length == 128) || -- (util_cpu_caps.has_avx && -+ (util_get_cpu_caps()->has_avx && - type.width * type.length == 256 && type.width >= 32) || -- (util_cpu_caps.has_avx2 && -+ (util_get_cpu_caps()->has_avx2 && - type.width * type.length == 256)) && - !LLVMIsConstant(a) && - !LLVMIsConstant(b) && -@@ -379,7 +379,7 @@ lp_build_select(struct lp_build_context *bld, - intrinsic = "llvm.x86.avx.blendv.ps.256"; - arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); - } else { -- assert(util_cpu_caps.has_avx2); -+ assert(util_get_cpu_caps()->has_avx2); - intrinsic = "llvm.x86.avx2.pblendvb"; - arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); - } -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp -index 9b75676a4e2..4f3e696816c 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp -+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp -@@ -400,22 +400,22 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - * http://llvm.org/PR19429 - * http://llvm.org/PR16721 - */ -- MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" ); -- MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" ); -- MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" ); -- MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" ); -- MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1"); -- MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2"); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse ? "+sse" : "-sse" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse2 ? "+sse2" : "-sse2" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse3 ? "+sse3" : "-sse3" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_ssse3 ? "+ssse3" : "-ssse3" ); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse4_1 ? "+sse4.1" : "-sse4.1"); -+ MAttrs.push_back(util_get_cpu_caps()->has_sse4_2 ? "+sse4.2" : "-sse4.2"); - /* - * AVX feature is not automatically detected from CPUID by the X86 target - * yet, because the old (yet default) JIT engine is not capable of - * emitting the opcodes. On newer llvm versions it is and at least some - * versions (tested with 3.3) will emit avx opcodes without this anyway. - */ -- MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx"); -- MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c"); -- MAttrs.push_back(util_cpu_caps.has_fma ? "+fma" : "-fma"); -- MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2"); -+ MAttrs.push_back(util_get_cpu_caps()->has_avx ? "+avx" : "-avx"); -+ MAttrs.push_back(util_get_cpu_caps()->has_f16c ? "+f16c" : "-f16c"); -+ MAttrs.push_back(util_get_cpu_caps()->has_fma ? "+fma" : "-fma"); -+ MAttrs.push_back(util_get_cpu_caps()->has_avx2 ? "+avx2" : "-avx2"); - /* disable avx512 and all subvariants */ - MAttrs.push_back("-avx512cd"); - MAttrs.push_back("-avx512er"); -@@ -426,7 +426,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - MAttrs.push_back("-avx512vl"); - #endif - #if defined(PIPE_ARCH_ARM) -- if (!util_cpu_caps.has_neon) { -+ if (!util_get_cpu_caps()->has_neon) { - MAttrs.push_back("-neon"); - MAttrs.push_back("-crypto"); - MAttrs.push_back("-vfp2"); -@@ -434,7 +434,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - #endif - - #if defined(PIPE_ARCH_PPC) -- MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec"); -+ MAttrs.push_back(util_get_cpu_caps()->has_altivec ? "+altivec" : "-altivec"); - #if (LLVM_VERSION_MAJOR < 4) - /* - * Make sure VSX instructions are disabled -@@ -444,7 +444,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0) - * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0) - */ -- if (util_cpu_caps.has_altivec) { -+ if (util_get_cpu_caps()->has_altivec) { - MAttrs.push_back("-vsx"); - } - #else -@@ -458,8 +458,8 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, - * Make sure VSX instructions are ENABLED (if supported), unless - * VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0. - */ -- if (util_cpu_caps.has_altivec) { -- MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx"); -+ if (util_get_cpu_caps()->has_altivec) { -+ MAttrs.push_back(util_get_cpu_caps()->has_vsx ? "+vsx" : "-vsx"); - } - #endif - #endif -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c -index e1f652a9342..76e57c52f80 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c -@@ -322,7 +322,7 @@ lp_build_interleave2(struct gallivm_state *gallivm, - { - LLVMValueRef shuffle; - -- if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) { -+ if (type.length == 2 && type.width == 128 && util_get_cpu_caps()->has_avx) { - /* - * XXX: This is a workaround for llvm code generation deficiency. Strangely - * enough, while this needs vinsertf128/vextractf128 instructions (hence -@@ -484,7 +484,7 @@ lp_build_unpack2_native(struct gallivm_state *gallivm, - - /* Interleave bits */ - #if UTIL_ARCH_LITTLE_ENDIAN -- if (src_type.length * src_type.width == 256 && util_cpu_caps.has_avx2) { -+ if (src_type.length * src_type.width == 256 && util_get_cpu_caps()->has_avx2) { - *dst_lo = lp_build_interleave2_half(gallivm, src_type, src, msb, 0); - *dst_hi = lp_build_interleave2_half(gallivm, src_type, src, msb, 1); - } else { -@@ -585,22 +585,22 @@ lp_build_pack2(struct gallivm_state *gallivm, - assert(src_type.length * 2 == dst_type.length); - - /* Check for special cases first */ -- if ((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) && -+ if ((util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec) && - src_type.width * src_type.length >= 128) { - const char *intrinsic = NULL; - boolean swap_intrinsic_operands = FALSE; - - switch(src_type.width) { - case 32: -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - if (dst_type.sign) { - intrinsic = "llvm.x86.sse2.packssdw.128"; - } else { -- if (util_cpu_caps.has_sse4_1) { -+ if (util_get_cpu_caps()->has_sse4_1) { - intrinsic = "llvm.x86.sse41.packusdw"; - } - } -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - if (dst_type.sign) { - intrinsic = "llvm.ppc.altivec.vpkswss"; - } else { -@@ -613,18 +613,18 @@ lp_build_pack2(struct gallivm_state *gallivm, - break; - case 16: - if (dst_type.sign) { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - intrinsic = "llvm.x86.sse2.packsswb.128"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intrinsic = "llvm.ppc.altivec.vpkshss"; - #if UTIL_ARCH_LITTLE_ENDIAN - swap_intrinsic_operands = TRUE; - #endif - } - } else { -- if (util_cpu_caps.has_sse2) { -+ if (util_get_cpu_caps()->has_sse2) { - intrinsic = "llvm.x86.sse2.packuswb.128"; -- } else if (util_cpu_caps.has_altivec) { -+ } else if (util_get_cpu_caps()->has_altivec) { - intrinsic = "llvm.ppc.altivec.vpkshus"; - #if UTIL_ARCH_LITTLE_ENDIAN - swap_intrinsic_operands = TRUE; -@@ -740,7 +740,7 @@ lp_build_pack2_native(struct gallivm_state *gallivm, - - /* At this point only have special case for avx2 */ - if (src_type.length * src_type.width == 256 && -- util_cpu_caps.has_avx2) { -+ util_get_cpu_caps()->has_avx2) { - switch(src_type.width) { - case 32: - if (dst_type.sign) { -@@ -793,7 +793,7 @@ lp_build_packs2(struct gallivm_state *gallivm, - - /* All X86 SSE non-interleaved pack instructions take signed inputs and - * saturate them, so no need to clamp for those cases. */ -- if(util_cpu_caps.has_sse2 && -+ if(util_get_cpu_caps()->has_sse2 && - src_type.width * src_type.length >= 128 && - src_type.sign && - (src_type.width == 32 || src_type.width == 16)) -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c -index 686abc08620..98dcde912b5 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c -@@ -1152,7 +1152,7 @@ lp_build_minify(struct lp_build_context *bld, - LLVMValueRef size; - assert(bld->type.sign); - if (lod_scalar || -- (util_cpu_caps.has_avx2 || !util_cpu_caps.has_sse)) { -+ (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) { - size = LLVMBuildLShr(builder, base_size, level, "minify"); - size = lp_build_max(bld, size, bld->one); - } -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c -index 2b91edd37c7..6e47640e70d 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c -@@ -3234,7 +3234,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, - * as it appears to be a loss with just AVX) - */ - if (num_quads == 1 || !use_aos || -- (util_cpu_caps.has_avx2 && -+ (util_get_cpu_caps()->has_avx2 && - (bld.num_lods == 1 || - derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { - if (use_aos) { -diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c -index b1c8b990ef1..03b11f914b4 100644 ---- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c -+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c -@@ -35,10 +35,10 @@ - - DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false); - --static struct util_cpu_caps *get_cpu_caps(void) -+static const struct util_cpu_caps_t *get_cpu_caps(void) - { - util_cpu_detect(); -- return &util_cpu_caps; -+ return util_get_cpu_caps(); - } - - int rtasm_cpu_has_sse(void) -diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c -index ad687f32853..ddd65fb6a08 100644 ---- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c -+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c -@@ -2152,17 +2152,17 @@ static void x86_init_func_common( struct x86_function *p ) - { - util_cpu_detect(); - p->caps = 0; -- if(util_cpu_caps.has_mmx) -+ if(util_get_cpu_caps()->has_mmx) - p->caps |= X86_MMX; -- if(util_cpu_caps.has_mmx2) -+ if(util_get_cpu_caps()->has_mmx2) - p->caps |= X86_MMX2; -- if(util_cpu_caps.has_sse) -+ if(util_get_cpu_caps()->has_sse) - p->caps |= X86_SSE; -- if(util_cpu_caps.has_sse2) -+ if(util_get_cpu_caps()->has_sse2) - p->caps |= X86_SSE2; -- if(util_cpu_caps.has_sse3) -+ if(util_get_cpu_caps()->has_sse3) - p->caps |= X86_SSE3; -- if(util_cpu_caps.has_sse4_1) -+ if(util_get_cpu_caps()->has_sse4_1) - p->caps |= X86_SSE4_1; - p->csr = p->store; - #if defined(PIPE_ARCH_X86) -diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c -index 1eaff77724e..bf56993db09 100644 ---- a/src/gallium/auxiliary/util/u_threaded_context.c -+++ b/src/gallium/auxiliary/util/u_threaded_context.c -@@ -2071,8 +2071,8 @@ tc_set_context_param(struct pipe_context *_pipe, - if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { - /* Pin the gallium thread as requested. */ - util_set_thread_affinity(tc->queue.threads[0], -- util_cpu_caps.L3_affinity_mask[value], -- NULL, UTIL_MAX_CPUS); -+ util_get_cpu_caps()->L3_affinity_mask[value], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); - - /* Execute this immediately (without enqueuing). - * It's required to be thread-safe. -@@ -2720,7 +2720,7 @@ threaded_context_create(struct pipe_context *pipe, - - util_cpu_detect(); - -- if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1)) -+ if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1)) - return pipe; - - tc = os_malloc_aligned(sizeof(struct threaded_context), 16); -diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c -index 64cf72ae101..913c1bd2462 100644 ---- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c -+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c -@@ -435,7 +435,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, - assert(type.length <= 16); - assert(type.floating); - -- if(util_cpu_caps.has_sse && type.length == 4) { -+ if(util_get_cpu_caps()->has_sse && type.length == 4) { - const char *movmskintr = "llvm.x86.sse.movmsk.ps"; - const char *popcntintr = "llvm.ctpop.i32"; - LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, -@@ -446,7 +446,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, - LLVMInt32TypeInContext(context), bits); - count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); - } -- else if(util_cpu_caps.has_avx && type.length == 8) { -+ else if(util_get_cpu_caps()->has_avx && type.length == 8) { - const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; - const char *popcntintr = "llvm.ctpop.i32"; - LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, -diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c -index f133bbf8a4d..628a4338c1e 100644 ---- a/src/gallium/drivers/llvmpipe/lp_screen.c -+++ b/src/gallium/drivers/llvmpipe/lp_screen.c -@@ -915,7 +915,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys) - - screen->allow_cl = !!getenv("LP_CL"); - screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR); -- screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; -+ screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0; - #ifdef EMBEDDED_DEVICE - screen->num_threads = 0; - #endif -diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c -index 873dcf37fac..725854cc25c 100644 ---- a/src/gallium/drivers/llvmpipe/lp_test_arit.c -+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c -@@ -382,7 +382,7 @@ flush_denorm_to_zero(float val) - fi_val.f = val; - - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - if ((fi_val.ui & 0x7f800000) == 0) { - fi_val.ui &= 0xff800000; - } -@@ -458,7 +458,7 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned - continue; - } - -- if (!util_cpu_caps.has_neon && -+ if (!util_get_cpu_caps()->has_neon && - test->ref == &nearbyintf && length == 2 && - ref != roundf(testval)) { - /* FIXME: The generic (non SSE) path in lp_build_iround, which is -diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c -index 2bf223d66f9..815736166d5 100644 ---- a/src/gallium/drivers/llvmpipe/lp_texture.c -+++ b/src/gallium/drivers/llvmpipe/lp_texture.c -@@ -85,7 +85,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, - * of a block for all formats) though this should not be strictly necessary - * neither. In any case it can only affect compressed or 1d textures. - */ -- unsigned mip_align = MAX2(64, util_cpu_caps.cacheline); -+ unsigned mip_align = MAX2(64, util_get_cpu_caps()->cacheline); - - assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS); - assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); -@@ -123,7 +123,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, - if (util_format_is_compressed(pt->format)) - lpr->row_stride[level] = nblocksx * block_size; - else -- lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline); -+ lpr->row_stride[level] = align(nblocksx * block_size, util_get_cpu_caps()->cacheline); - - /* if row_stride * height > LP_MAX_TEXTURE_SIZE */ - if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) { -diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp -index 97db7ca3e8b..d891b6b14e8 100644 ---- a/src/gallium/drivers/swr/swr_loader.cpp -+++ b/src/gallium/drivers/swr/swr_loader.cpp -@@ -91,7 +91,7 @@ swr_create_screen(struct sw_winsys *winsys) - - util_cpu_detect(); - -- if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) { -+ if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512er) { - swr_print_info("SWR detected KNL instruction support "); - #ifndef HAVE_SWR_KNL - swr_print_info("(skipping: not built).\n"); -@@ -103,7 +103,7 @@ swr_create_screen(struct sw_winsys *winsys) - #endif - } - -- if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) { -+ if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512bw) { - swr_print_info("SWR detected SKX instruction support "); - #ifndef HAVE_SWR_SKX - swr_print_info("(skipping not built).\n"); -@@ -113,7 +113,7 @@ swr_create_screen(struct sw_winsys *winsys) - #endif - } - -- if (util_cpu_caps.has_avx2) { -+ if (util_get_cpu_caps()->has_avx2) { - swr_print_info("SWR detected AVX2 instruction support "); - #ifndef HAVE_SWR_AVX2 - swr_print_info("(skipping not built).\n"); -@@ -123,7 +123,7 @@ swr_create_screen(struct sw_winsys *winsys) - #endif - } - -- if (util_cpu_caps.has_avx) { -+ if (util_get_cpu_caps()->has_avx) { - swr_print_info("SWR detected AVX instruction support "); - #ifndef HAVE_SWR_AVX - swr_print_info("(skipping not built).\n"); -diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h -index 66767e7f1f8..5afe32939a8 100644 ---- a/src/gallium/drivers/vc4/vc4_tiling.h -+++ b/src/gallium/drivers/vc4/vc4_tiling.h -@@ -90,7 +90,7 @@ vc4_load_lt_image(void *dst, uint32_t dst_stride, - int cpp, const struct pipe_box *box) - { - #ifdef USE_ARM_ASM -- if (util_cpu_caps.has_neon) { -+ if (util_get_cpu_caps()->has_neon) { - vc4_load_lt_image_neon(dst, dst_stride, src, src_stride, - cpp, box); - return; -@@ -106,7 +106,7 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride, - int cpp, const struct pipe_box *box) - { - #ifdef USE_ARM_ASM -- if (util_cpu_caps.has_neon) { -+ if (util_get_cpu_caps()->has_neon) { - vc4_store_lt_image_neon(dst, dst_stride, src, src_stride, - cpp, box); - return; -diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c -index 4d9c4e27ebf..782f16e7f78 100644 ---- a/src/gallium/tests/unit/translate_test.c -+++ b/src/gallium/tests/unit/translate_test.c -@@ -50,6 +50,7 @@ int main(int argc, char** argv) - { - struct translate *(*create_fn)(const struct translate_key *key) = 0; - -+ extern struct util_cpu_caps_t util_cpu_caps; - struct translate_key key; - unsigned output_format; - unsigned input_format; -@@ -87,7 +88,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse")) - { -- if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE (test with qemu)\n"); - return 2; -@@ -99,7 +100,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse2")) - { -- if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse2 || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE2 (test with qemu)\n"); - return 2; -@@ -110,7 +111,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse3")) - { -- if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse3 || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE3 (test with qemu)\n"); - return 2; -@@ -120,7 +121,7 @@ int main(int argc, char** argv) - } - else if (!strcmp(argv[1], "sse4.1")) - { -- if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse()) -+ if(!util_get_cpu_caps()->has_sse4_1 || !rtasm_cpu_has_sse()) - { - printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n"); - return 2; -diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c -index 7f2eba9382b..4474cfb82b0 100644 ---- a/src/gallium/tests/unit/u_half_test.c -+++ b/src/gallium/tests/unit/u_half_test.c -@@ -36,13 +36,14 @@ test(void) - int - main(int argc, char **argv) - { -- assert(!util_cpu_caps.has_f16c); -+ util_cpu_detect(); - test(); - -- /* Test f16c. */ -- util_cpu_detect(); -- if (util_cpu_caps.has_f16c) -+ /* Test non-f16c. */ -+ if (util_get_cpu_caps()->has_f16c) { -+ ((struct util_cpu_caps_t *)util_get_cpu_caps())->has_f16c = false; - test(); -+ } - - printf("Success!\n"); - return 0; -diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c -index 8a0aedfed64..a18362ce6ea 100644 ---- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c -+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c -@@ -312,8 +312,8 @@ static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws, - struct amdgpu_winsys *ws = amdgpu_winsys(rws); - - util_set_thread_affinity(ws->cs_queue.threads[0], -- util_cpu_caps.L3_affinity_mask[cache], -- NULL, UTIL_MAX_CPUS); -+ util_get_cpu_caps()->L3_affinity_mask[cache], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); - } - - static uint32_t kms_handle_hash(const void *key) -diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c -index f0e1b9f7df3..4430ce50466 100644 ---- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c -+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c -@@ -801,8 +801,8 @@ static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, - - if (util_queue_is_initialized(&rws->cs_queue)) { - util_set_thread_affinity(rws->cs_queue.threads[0], -- util_cpu_caps.L3_affinity_mask[cache], -- NULL, UTIL_MAX_CPUS); -+ util_get_cpu_caps()->L3_affinity_mask[cache], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); - } - } - -diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c -index eb8eb30cabc..c9dfef541fc 100644 ---- a/src/mesa/main/glthread.c -+++ b/src/mesa/main/glthread.c -@@ -199,19 +199,20 @@ _mesa_glthread_flush_batch(struct gl_context *ctx) - /* Pin threads regularly to the same Zen CCX that the main thread is - * running on. The main thread can move between CCXs. - */ -- if (util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 && -+ if (util_get_cpu_caps()->nr_cpus != util_get_cpu_caps()->cores_per_L3 && - /* driver support */ - ctx->Driver.PinDriverToL3Cache && - ++glthread->pin_thread_counter % 128 == 0) { - int cpu = util_get_current_cpu(); - - if (cpu >= 0) { -- unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu]; -- -- util_set_thread_affinity(glthread->queue.threads[0], -- util_cpu_caps.L3_affinity_mask[L3_cache], -- NULL, UTIL_MAX_CPUS); -- ctx->Driver.PinDriverToL3Cache(ctx, L3_cache); -+ uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu]; -+ if (L3_cache != U_CPU_INVALID_L3) { -+ util_set_thread_affinity(glthread->queue.threads[0], -+ util_get_cpu_caps()->L3_affinity_mask[L3_cache], -+ NULL, util_get_cpu_caps()->num_cpu_mask_bits); -+ ctx->Driver.PinDriverToL3Cache(ctx, L3_cache); -+ } - } - } - -diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c -index 40364296664..f27fa7ff29c 100644 ---- a/src/mesa/state_tracker/st_context.c -+++ b/src/mesa/state_tracker/st_context.c -@@ -815,6 +815,10 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe, - !st->lower_ucp; - st->shader_has_one_variant[MESA_SHADER_COMPUTE] = st->has_shareable_shaders; - -+ if (util_get_cpu_caps()->cores_per_L3 == util_get_cpu_caps()->nr_cpus || -+ !st->pipe->set_context_param) -+ st->pin_thread_counter = ST_L3_PINNING_DISABLED; -+ - st->bitmap.cache.empty = true; - - if (ctx->Const.ForceGLNamesReuse && ctx->Shared->RefCount == 1) { -diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h -index b1fda06ff3e..9ab6969de62 100644 ---- a/src/mesa/state_tracker/st_context.h -+++ b/src/mesa/state_tracker/st_context.h -@@ -55,6 +55,7 @@ struct st_program; - struct st_perf_monitor_group; - struct u_upload_mgr; - -+#define ST_L3_PINNING_DISABLED 0xffffffff - - struct st_bitmap_cache - { -@@ -130,6 +131,9 @@ struct st_context - struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */ - struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */ - struct draw_stage *rastpos_stage; /**< For glRasterPos */ -+ -+ unsigned pin_thread_counter; /* for L3 thread pinning on AMD Zen */ -+ - GLboolean clamp_frag_color_in_shader; - GLboolean clamp_vert_color_in_shader; - boolean clamp_frag_depth_in_shader; -@@ -235,8 +239,6 @@ struct st_context - /** This masks out unused shader resources. Only valid in draw calls. */ - uint64_t active_states; - -- unsigned pin_thread_counter; /* for L3 thread pinning on AMD Zen */ -- - /* If true, further analysis of states is required to know if something - * has changed. Used mainly for shaders. - */ -diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c -index 996d985510c..159d7017b07 100644 ---- a/src/mesa/state_tracker/st_draw.c -+++ b/src/mesa/state_tracker/st_draw.c -@@ -124,26 +124,26 @@ prepare_draw(struct st_context *st, struct gl_context *ctx) - st_validate_state(st, ST_PIPELINE_RENDER); - } - -- struct pipe_context *pipe = st->pipe; -- - /* Pin threads regularly to the same Zen CCX that the main thread is - * running on. The main thread can move between CCXs. - */ -- if (unlikely(/* AMD Zen */ -- util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 && -+ if (unlikely(st->pin_thread_counter != ST_L3_PINNING_DISABLED && - /* no glthread */ - ctx->CurrentClientDispatch != ctx->MarshalExec && -- /* driver support */ -- pipe->set_context_param && - /* do it occasionally */ - ++st->pin_thread_counter % 512 == 0)) { -+ st->pin_thread_counter = 0; -+ - int cpu = util_get_current_cpu(); - if (cpu >= 0) { -- unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu]; -- -- pipe->set_context_param(pipe, -- PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, -- L3_cache); -+ struct pipe_context *pipe = st->pipe; -+ uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu]; -+ -+ if (L3_cache != U_CPU_INVALID_L3) { -+ pipe->set_context_param(pipe, -+ PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, -+ L3_cache); -+ } - } - } - } -diff --git a/src/util/half_float.h b/src/util/half_float.h -index c52bccf8d1e..8f1a1dbf11d 100644 ---- a/src/util/half_float.h -+++ b/src/util/half_float.h -@@ -59,7 +59,7 @@ static inline uint16_t - _mesa_float_to_half(float val) - { - #if defined(USE_X86_64_ASM) -- if (util_cpu_caps.has_f16c) { -+ if (util_get_cpu_caps()->has_f16c) { - __m128 in = {val}; - __m128i out; - -@@ -75,7 +75,7 @@ static inline float - _mesa_half_to_float(uint16_t val) - { - #if defined(USE_X86_64_ASM) -- if (util_cpu_caps.has_f16c) { -+ if (util_get_cpu_caps()->has_f16c) { - __m128i in = {val}; - __m128 out; - -@@ -90,7 +90,7 @@ static inline uint16_t - _mesa_float_to_float16_rtz(float val) - { - #if defined(USE_X86_64_ASM) -- if (util_cpu_caps.has_f16c) { -+ if (util_get_cpu_caps()->has_f16c) { - __m128 in = {val}; - __m128i out; - -diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c -index f4a62a5c6a8..e6473c2bf6d 100644 ---- a/src/util/tests/format/u_format_test.c -+++ b/src/util/tests/format/u_format_test.c -@@ -850,6 +850,8 @@ int main(int argc, char **argv) - { - boolean success; - -+ util_cpu_detect(); -+ - success = test_all(); - - return success ? 0 : 1; -diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c -index 025f2f30156..4a4b06e1bc6 100644 ---- a/src/util/u_cpu_detect.c -+++ b/src/util/u_cpu_detect.c -@@ -90,7 +90,7 @@ - DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false) - - --struct util_cpu_caps util_cpu_caps; -+struct util_cpu_caps_t util_cpu_caps; - - #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - static int has_cpuid(void); -@@ -438,26 +438,22 @@ get_cpu_topology(void) - util_cpu_caps.cores_per_L3 = util_cpu_caps.nr_cpus; - util_cpu_caps.num_L3_caches = 1; - -+ memset(util_cpu_caps.cpu_to_L3, 0xff, sizeof(util_cpu_caps.cpu_to_L3)); -+ - #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - /* AMD Zen */ - if (util_cpu_caps.family >= CPU_AMD_ZEN1_ZEN2 && - util_cpu_caps.family < CPU_AMD_LAST) { - uint32_t regs[4]; - -- /* Query the L3 cache count. */ -- cpuid_count(0x8000001D, 3, regs); -- unsigned cache_level = (regs[0] >> 5) & 0x7; -- unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1; -- -- if (cache_level != 3 || cores_per_L3 == util_cpu_caps.nr_cpus) -- return; -- - uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0}; - uint32_t mask[UTIL_MAX_CPUS / 32] = {0}; -- uint32_t allowed_mask[UTIL_MAX_CPUS / 32] = {0}; -- uint32_t apic_id[UTIL_MAX_CPUS]; - bool saved = false; - -+ uint32_t L3_found[UTIL_MAX_CPUS] = {0}; -+ uint32_t num_L3_caches = 0; -+ util_affinity_mask *L3_affinity_masks = NULL; -+ - /* Query APIC IDs from each CPU core. - * - * An APIC ID is a logical ID of the CPU with respect to the cache -@@ -482,41 +478,60 @@ get_cpu_topology(void) - - if (util_set_current_thread_affinity(mask, - !saved ? saved_mask : NULL, -- UTIL_MAX_CPUS)) { -+ util_cpu_caps.num_cpu_mask_bits)) { - saved = true; -- allowed_mask[i / 32] |= cpu_bit; - - /* Query the APIC ID of the current core. */ - cpuid(0x00000001, regs); -- apic_id[i] = regs[1] >> 24; -+ unsigned apic_id = regs[1] >> 24; -+ -+ /* Query the total core count for the CPU */ -+ uint32_t core_count = 1; -+ if (regs[3] & (1 << 28)) -+ core_count = (regs[1] >> 16) & 0xff; -+ -+ core_count = util_next_power_of_two(core_count); -+ -+ /* Query the L3 cache count. */ -+ cpuid_count(0x8000001D, 3, regs); -+ unsigned cache_level = (regs[0] >> 5) & 0x7; -+ unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1; -+ -+ if (cache_level != 3) -+ continue; -+ -+ unsigned local_core_id = apic_id & (core_count - 1); -+ unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count); -+ unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3); -+#define L3_ID(p, i) (p << 16 | i << 1 | 1); -+ -+ unsigned l3_id = L3_ID(phys_id, local_l3_cache_index); -+ int idx = -1; -+ for (unsigned c = 0; c < num_L3_caches; c++) { -+ if (L3_found[c] == l3_id) { -+ idx = c; -+ break; -+ } -+ } -+ if (idx == -1) { -+ idx = num_L3_caches; -+ L3_found[num_L3_caches++] = l3_id; -+ L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches); -+ if (!L3_affinity_masks) -+ return; -+ memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask)); -+ } -+ util_cpu_caps.cpu_to_L3[i] = idx; -+ L3_affinity_masks[idx][i / 32] |= cpu_bit; -+ - } - mask[i / 32] = 0; - } - -- if (saved) { -- -- /* We succeeded in using at least one CPU. */ -- util_cpu_caps.num_L3_caches = util_cpu_caps.nr_cpus / cores_per_L3; -- util_cpu_caps.cores_per_L3 = cores_per_L3; -- util_cpu_caps.L3_affinity_mask = calloc(sizeof(util_affinity_mask), -- util_cpu_caps.num_L3_caches); -- -- for (unsigned i = 0; i < util_cpu_caps.nr_cpus && i < UTIL_MAX_CPUS; -- i++) { -- uint32_t cpu_bit = 1u << (i % 32); -- -- if (allowed_mask[i / 32] & cpu_bit) { -- /* Each APIC ID bit represents a topology level, so we need -- * to round up to the next power of two. -- */ -- unsigned L3_index = apic_id[i] / -- util_next_power_of_two(cores_per_L3); -- -- util_cpu_caps.L3_affinity_mask[L3_index][i / 32] |= cpu_bit; -- util_cpu_caps.cpu_to_L3[i] = L3_index; -- } -- } -+ util_cpu_caps.num_L3_caches = num_L3_caches; -+ util_cpu_caps.L3_affinity_mask = L3_affinity_masks; - -+ if (saved) { - if (debug_get_option_dump_cpu()) { - fprintf(stderr, "CPU <-> L3 cache mapping:\n"); - for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) { -@@ -528,7 +543,8 @@ get_cpu_topology(void) - } - - /* Restore the original affinity mask. */ -- util_set_current_thread_affinity(saved_mask, NULL, UTIL_MAX_CPUS); -+ util_set_current_thread_affinity(saved_mask, NULL, -+ util_cpu_caps.num_cpu_mask_bits); - } else { - if (debug_get_option_dump_cpu()) - fprintf(stderr, "Cannot set thread affinity for any thread.\n"); -@@ -547,7 +563,7 @@ util_cpu_detect_once(void) - { - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); -- util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors; -+ util_cpu_caps.nr_cpus = MAX2(1, system_info.dwNumberOfProcessors); - } - #elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN) - util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -@@ -569,6 +585,8 @@ util_cpu_detect_once(void) - util_cpu_caps.nr_cpus = 1; - #endif - -+ util_cpu_caps.num_cpu_mask_bits = align(util_cpu_caps.nr_cpus, 32); -+ - /* Make the fallback cacheline size nonzero so that it can be - * safely passed to align(). - */ -diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h -index a76fd912910..1c7239b2ec7 100644 ---- a/src/util/u_cpu_detect.h -+++ b/src/util/u_cpu_detect.h -@@ -55,7 +55,7 @@ enum cpu_family { - - typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; - --struct util_cpu_caps { -+struct util_cpu_caps_t { - int nr_cpus; - enum cpu_family family; - -@@ -98,14 +98,27 @@ struct util_cpu_caps { - - unsigned num_L3_caches; - unsigned cores_per_L3; -+ unsigned num_cpu_mask_bits; - - uint16_t cpu_to_L3[UTIL_MAX_CPUS]; - /* Affinity masks for each L3 cache. */ - util_affinity_mask *L3_affinity_mask; - }; - --extern struct util_cpu_caps --util_cpu_caps; -+#define U_CPU_INVALID_L3 0xffff -+ -+static inline const struct util_cpu_caps_t * -+util_get_cpu_caps(void) -+{ -+ extern struct util_cpu_caps_t util_cpu_caps; -+ -+ /* If you hit this assert, it means that something is using the -+ * cpu-caps without having first called util_cpu_detect() -+ */ -+ assert(util_cpu_caps.nr_cpus >= 1); -+ -+ return &util_cpu_caps; -+} - - void util_cpu_detect(void); - -diff --git a/src/util/u_math.c b/src/util/u_math.c -index 9a8a9ecbbde..41e7f599eb0 100644 ---- a/src/util/u_math.c -+++ b/src/util/u_math.c -@@ -92,7 +92,7 @@ util_fpstate_get(void) - unsigned mxcsr = 0; - - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - mxcsr = _mm_getcsr(); - } - #endif -@@ -110,10 +110,10 @@ unsigned - util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) - { - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - /* Enable flush to zero mode */ - current_mxcsr |= _MM_FLUSH_ZERO_MASK; -- if (util_cpu_caps.has_daz) { -+ if (util_get_cpu_caps()->has_daz) { - /* Enable denormals are zero mode */ - current_mxcsr |= _MM_DENORMALS_ZERO_MASK; - } -@@ -132,7 +132,7 @@ void - util_fpstate_set(unsigned mxcsr) - { - #if defined(PIPE_ARCH_SSE) -- if (util_cpu_caps.has_sse) { -+ if (util_get_cpu_caps()->has_sse) { - _mm_setcsr(mxcsr); - } - #endif -diff --git a/src/util/u_queue.c b/src/util/u_queue.c -index b11b297a45c..8f21f0667c6 100644 ---- a/src/util/u_queue.c -+++ b/src/util/u_queue.c -@@ -27,7 +27,7 @@ - #include "u_queue.h" - - #include "c11/threads.h" -- -+#include "util/u_cpu_detect.h" - #include "util/os_time.h" - #include "util/u_string.h" - #include "util/u_thread.h" -@@ -258,7 +258,8 @@ util_queue_thread_func(void *input) - uint32_t mask[UTIL_MAX_CPUS / 32]; - - memset(mask, 0xff, sizeof(mask)); -- util_set_current_thread_affinity(mask, NULL, UTIL_MAX_CPUS); -+ util_set_current_thread_affinity(mask, NULL, -+ util_get_cpu_caps()->num_cpu_mask_bits); - } - - #if defined(__linux__) diff --git a/SOURCES/lavapipe-disable-env-var.patch b/SOURCES/lavapipe-disable-env-var.patch index 9b59577..ba50bee 100644 --- a/SOURCES/lavapipe-disable-env-var.patch +++ b/SOURCES/lavapipe-disable-env-var.patch @@ -1,13 +1,13 @@ -diff -up mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c.dma mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c ---- mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c.dma 2020-11-19 15:11:42.483134826 +1000 -+++ mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c 2020-11-19 15:13:08.556425782 +1000 -@@ -118,6 +118,9 @@ VkResult lvp_CreateInstance( - client_version = VK_API_VERSION_1_0; - } +diff -up mesa-21.1.1/src/gallium/frontends/lavapipe/lvp_device.c.dma mesa-21.1.1/src/gallium/frontends/lavapipe/lvp_device.c +--- mesa-21.1.1/src/gallium/frontends/lavapipe/lvp_device.c.dma 2021-05-20 13:08:02.207217380 +1000 ++++ mesa-21.1.1/src/gallium/frontends/lavapipe/lvp_device.c 2021-05-20 13:08:35.868127094 +1000 +@@ -224,6 +224,9 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_Creat + if (pAllocator == NULL) + pAllocator = &default_alloc; + if (!getenv("RH_SW_VULKAN")) + return VK_ERROR_INITIALIZATION_FAILED; + - instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + instance = vk_zalloc(pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!instance) diff --git a/SOURCES/mesa-20.3.3-stable-fixes.patch b/SOURCES/mesa-20.3.3-stable-fixes.patch deleted file mode 100644 index 231e20b..0000000 --- a/SOURCES/mesa-20.3.3-stable-fixes.patch +++ /dev/null @@ -1,930 +0,0 @@ -diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c -index d49bc0f0564..90512d4f276 100644 ---- a/src/amd/vulkan/radv_query.c -+++ b/src/amd/vulkan/radv_query.c -@@ -1679,13 +1679,14 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, - - va += 8 * idx; - -- si_cs_emit_write_event_eop(cs, -- cmd_buffer->device->physical_device->rad_info.chip_class, -- radv_cmd_buffer_uses_mec(cmd_buffer), -- V_028A90_PS_DONE, 0, -- EOP_DST_SEL_TC_L2, -- EOP_DATA_SEL_GDS, -- va, EOP_DATA_GDS(0, 1), 0); -+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); -+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | -+ COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | -+ COPY_DATA_WR_CONFIRM); -+ radeon_emit(cs, 0); -+ radeon_emit(cs, 0); -+ radeon_emit(cs, va); -+ radeon_emit(cs, va >> 32); - - /* Record that the command buffer needs GDS. */ - cmd_buffer->gds_needed = true; -@@ -1769,13 +1770,14 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, - - va += 8 * idx; - -- si_cs_emit_write_event_eop(cs, -- cmd_buffer->device->physical_device->rad_info.chip_class, -- radv_cmd_buffer_uses_mec(cmd_buffer), -- V_028A90_PS_DONE, 0, -- EOP_DST_SEL_TC_L2, -- EOP_DATA_SEL_GDS, -- va, EOP_DATA_GDS(0, 1), 0); -+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); -+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | -+ COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | -+ COPY_DATA_WR_CONFIRM); -+ radeon_emit(cs, 0); -+ radeon_emit(cs, 0); -+ radeon_emit(cs, va); -+ radeon_emit(cs, va >> 32); - - cmd_buffer->state.active_pipeline_gds_queries--; - } -diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h -index 9d9491d4361..2eb3ba4e64e 100644 ---- a/src/amd/vulkan/radv_shader.h -+++ b/src/amd/vulkan/radv_shader.h -@@ -573,9 +573,11 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, - if (chip_class >= GFX7 && family != CHIP_STONEY) - hardware_lds_size = 65536; - -- num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); -+ if (input_patch_size + output_patch_size) -+ num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); - /* Make sure the output data fits in the offchip buffer */ -- num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size); -+ if (output_patch_size) -+ num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size); - /* Not necessary for correctness, but improves performance. The - * specific value is taken from the proprietary driver. - */ -diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c -index 1eef6aac70c..a6a663d97a6 100644 ---- a/src/gallium/auxiliary/cso_cache/cso_context.c -+++ b/src/gallium/auxiliary/cso_cache/cso_context.c -@@ -402,10 +402,13 @@ void cso_destroy_context( struct cso_context *ctx ) - PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); - int maxcb = scr->get_shader_param(scr, sh, - PIPE_SHADER_CAP_MAX_CONST_BUFFERS); -+ int maximg = scr->get_shader_param(scr, sh, -+ PIPE_SHADER_CAP_MAX_SHADER_IMAGES); - assert(maxsam <= PIPE_MAX_SAMPLERS); - assert(maxview <= PIPE_MAX_SHADER_SAMPLER_VIEWS); - assert(maxssbo <= PIPE_MAX_SHADER_BUFFERS); - assert(maxcb <= PIPE_MAX_CONSTANT_BUFFERS); -+ assert(maximg <= PIPE_MAX_SHADER_IMAGES); - if (maxsam > 0) { - ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros); - } -@@ -415,6 +418,9 @@ void cso_destroy_context( struct cso_context *ctx ) - if (maxssbo > 0) { - ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0); - } -+ if (maximg > 0) { -+ ctx->pipe->set_shader_images(ctx->pipe, sh, 0, maximg, NULL); -+ } - for (int i = 0; i < maxcb; i++) { - ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, NULL); - } -diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c -index 8157e921850..971fc80b5ac 100644 ---- a/src/gallium/drivers/iris/iris_program.c -+++ b/src/gallium/drivers/iris/iris_program.c -@@ -2109,8 +2109,8 @@ iris_get_scratch_space(struct iris_context *ice, - * in the base configuration. - */ - unsigned subslice_total = screen->subslice_total; -- if (devinfo->gen >= 12) -- subslice_total = devinfo->num_subslices[0]; -+ if (devinfo->gen == 12) -+ subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); - else if (devinfo->gen == 11) - subslice_total = 8; - else if (devinfo->gen < 11) -diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c -index 276ad62b1dd..045f43ed8c0 100644 ---- a/src/gallium/drivers/iris/iris_resolve.c -+++ b/src/gallium/drivers/iris/iris_resolve.c -@@ -793,7 +793,9 @@ iris_resource_set_aux_state(struct iris_context *ice, - if (res->aux.state[level][start_layer + a] != aux_state) { - res->aux.state[level][start_layer + a] = aux_state; - /* XXX: Need to track which bindings to make dirty */ -- ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER; -+ ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER | -+ IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES | -+ IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES; - ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; - } - } -diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c -index 8747ef4aa8a..3b34e32cd21 100644 ---- a/src/gallium/drivers/iris/iris_resource.c -+++ b/src/gallium/drivers/iris/iris_resource.c -@@ -1125,6 +1125,20 @@ iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource) - 0, INTEL_REMAINING_LAYERS, - mod ? mod->aux_usage : ISL_AUX_USAGE_NONE, - mod ? mod->supports_clear_color : false); -+ -+ if (!res->mod_info && res->aux.usage != ISL_AUX_USAGE_NONE) { -+ /* flush_resource may be used to prepare an image for sharing external -+ * to the driver (e.g. via eglCreateImage). To account for this, make -+ * sure to get rid of any compression that a consumer wouldn't know how -+ * to handle. -+ */ -+ for (int i = 0; i < IRIS_BATCH_COUNT; i++) { -+ if (iris_batch_references(&ice->batches[i], res->bo)) -+ iris_batch_flush(&ice->batches[i]); -+ } -+ -+ iris_resource_disable_aux(res); -+ } - } - - static void -diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c -index 59a63f7bbab..b9ddb863a16 100644 ---- a/src/gallium/drivers/iris/iris_state.c -+++ b/src/gallium/drivers/iris/iris_state.c -@@ -1666,6 +1666,8 @@ struct iris_rasterizer_state { - bool multisample; - bool force_persample_interp; - bool conservative_rasterization; -+ bool fill_mode_point; -+ bool fill_mode_line; - bool fill_mode_point_or_line; - enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */ - uint16_t sprite_coord_enable; -@@ -1729,11 +1731,15 @@ iris_create_rasterizer_state(struct pipe_context *ctx, - cso->conservative_rasterization = - state->conservative_raster_mode == PIPE_CONSERVATIVE_RASTER_POST_SNAP; - -- cso->fill_mode_point_or_line = -- state->fill_front == PIPE_POLYGON_MODE_LINE || -+ cso->fill_mode_point = - state->fill_front == PIPE_POLYGON_MODE_POINT || -- state->fill_back == PIPE_POLYGON_MODE_LINE || - state->fill_back == PIPE_POLYGON_MODE_POINT; -+ cso->fill_mode_line = -+ state->fill_front == PIPE_POLYGON_MODE_LINE || -+ state->fill_back == PIPE_POLYGON_MODE_LINE; -+ cso->fill_mode_point_or_line = -+ cso->fill_mode_point || -+ cso->fill_mode_line; - - if (state->clip_plane_enable != 0) - cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1; -@@ -4059,6 +4065,28 @@ iris_emit_sbe_swiz(struct iris_batch *batch, - } - } - -+static bool -+iris_is_drawing_points(const struct iris_context *ice) -+{ -+ const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; -+ -+ if (cso_rast->fill_mode_point) { -+ return true; -+ } -+ -+ if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) { -+ const struct brw_gs_prog_data *gs_prog_data = -+ (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data; -+ return gs_prog_data->output_topology == _3DPRIM_POINTLIST; -+ } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) { -+ const struct brw_tes_prog_data *tes_data = -+ (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data; -+ return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT; -+ } else { -+ return ice->state.prim_mode == PIPE_PRIM_POINTS; -+ } -+} -+ - static unsigned - iris_calculate_point_sprite_overrides(const struct brw_wm_prog_data *prog_data, - const struct iris_rasterizer_state *cso) -@@ -4093,7 +4121,8 @@ iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice) - &urb_read_offset, &urb_read_length); - - unsigned sprite_coord_overrides = -- iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast); -+ iris_is_drawing_points(ice) ? -+ iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast) : 0; - - iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { - sbe.AttributeSwizzleEnable = true; -diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c -index 8f688fa3650..ef35f86b05f 100644 ---- a/src/gallium/drivers/radeonsi/si_descriptors.c -+++ b/src/gallium/drivers/radeonsi/si_descriptors.c -@@ -1482,11 +1482,12 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) - /* Reset descriptors of buffer resources after \p buf has been invalidated. - * If buf == NULL, reset all descriptors. - */ --static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, -+static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, - unsigned descriptors_idx, uint64_t slot_mask, - struct pipe_resource *buf, enum radeon_bo_priority priority) - { - struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; -+ bool noop = true; - uint64_t mask = buffers->enabled_mask & slot_mask; - - while (mask) { -@@ -1501,8 +1502,10 @@ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_ - sctx, si_resource(buffer), - buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, - priority, true); -+ noop = false; - } - } -+ return !noop; - } - - /* Update all buffer bindings where the buffer is bound, including -@@ -1577,11 +1580,15 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) - } - - if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { -- for (shader = 0; shader < SI_NUM_SHADERS; shader++) -- si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], -- si_const_and_shader_buffer_descriptors_idx(shader), -- u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, -- sctx->const_and_shader_buffers[shader].priority); -+ for (shader = 0; shader < SI_NUM_SHADERS; shader++) { -+ if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], -+ si_const_and_shader_buffer_descriptors_idx(shader), -+ u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, -+ sctx->const_and_shader_buffers[shader].priority) && -+ shader == PIPE_SHADER_COMPUTE) { -+ sctx->compute_shaderbuf_sgprs_dirty = true; -+ } -+ } - } - - if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { -@@ -1633,6 +1640,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) - radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), - RADEON_USAGE_READWRITE, - RADEON_PRIO_SAMPLER_BUFFER, true); -+ -+ if (shader == PIPE_SHADER_COMPUTE) -+ sctx->compute_image_sgprs_dirty = true; - } - } - } -diff --git a/src/gallium/frontends/dri/dri_helpers.c b/src/gallium/frontends/dri/dri_helpers.c -index 01a1fb3d96c..5e87df35a55 100644 ---- a/src/gallium/frontends/dri/dri_helpers.c -+++ b/src/gallium/frontends/dri/dri_helpers.c -@@ -258,7 +258,9 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context, - int renderbuffer, void *loaderPrivate, - unsigned *error) - { -- struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx; -+ struct st_context *st_ctx = (struct st_context *)dri_context(context)->st; -+ struct gl_context *ctx = st_ctx->ctx; -+ struct pipe_context *p_ctx = st_ctx->pipe; - struct gl_renderbuffer *rb; - struct pipe_resource *tex; - __DRIimage *img; -@@ -299,6 +301,13 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context, - - pipe_resource_reference(&img->texture, tex); - -+ /* If the resource supports EGL_MESA_image_dma_buf_export, make sure that -+ * it's in a shareable state. Do this now while we still have the access to -+ * the context. -+ */ -+ if (dri2_get_mapping_by_format(img->dri_format)) -+ p_ctx->flush_resource(p_ctx, tex); -+ - *error = __DRI_IMAGE_ERROR_SUCCESS; - return img; - } -@@ -326,7 +335,9 @@ dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture, - void *loaderPrivate) - { - __DRIimage *img; -- struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx; -+ struct st_context *st_ctx = (struct st_context *)dri_context(context)->st; -+ struct gl_context *ctx = st_ctx->ctx; -+ struct pipe_context *p_ctx = st_ctx->pipe; - struct gl_texture_object *obj; - struct pipe_resource *tex; - GLuint face = 0; -@@ -376,6 +387,13 @@ dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture, - - pipe_resource_reference(&img->texture, tex); - -+ /* If the resource supports EGL_MESA_image_dma_buf_export, make sure that -+ * it's in a shareable state. Do this now while we still have the access to -+ * the context. -+ */ -+ if (dri2_get_mapping_by_format(img->dri_format)) -+ p_ctx->flush_resource(p_ctx, tex); -+ - *error = __DRI_IMAGE_ERROR_SUCCESS; - return img; - } -@@ -547,6 +565,9 @@ dri2_get_mapping_by_fourcc(int fourcc) - const struct dri2_format_mapping * - dri2_get_mapping_by_format(int format) - { -+ if (format == __DRI_IMAGE_FORMAT_NONE) -+ return NULL; -+ - for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) { - if (dri2_format_table[i].dri_format == format) - return &dri2_format_table[i]; -diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c -index 45734f95880..187aecde1f8 100644 ---- a/src/gallium/frontends/lavapipe/lvp_device.c -+++ b/src/gallium/frontends/lavapipe/lvp_device.c -@@ -52,8 +52,6 @@ lvp_physical_device_init(struct lvp_physical_device *device, - if (!device->pscreen) - return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - -- fprintf(stderr, "WARNING: lavapipe is not a conformant vulkan implementation, testing use only.\n"); -- - device->max_images = device->pscreen->get_shader_param(device->pscreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_SHADER_IMAGES); - lvp_physical_device_get_supported_extensions(device, &device->supported_extensions); - result = lvp_init_wsi(device); -@@ -575,6 +573,19 @@ void lvp_GetPhysicalDeviceProperties2( - } - } - -+static void lvp_get_physical_device_queue_family_properties( -+ VkQueueFamilyProperties* pQueueFamilyProperties) -+{ -+ *pQueueFamilyProperties = (VkQueueFamilyProperties) { -+ .queueFlags = VK_QUEUE_GRAPHICS_BIT | -+ VK_QUEUE_COMPUTE_BIT | -+ VK_QUEUE_TRANSFER_BIT, -+ .queueCount = 1, -+ .timestampValidBits = 64, -+ .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, -+ }; -+} -+ - void lvp_GetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, -@@ -586,15 +597,21 @@ void lvp_GetPhysicalDeviceQueueFamilyProperties( - } - - assert(*pCount >= 1); -+ lvp_get_physical_device_queue_family_properties(pQueueFamilyProperties); -+} - -- *pQueueFamilyProperties = (VkQueueFamilyProperties) { -- .queueFlags = VK_QUEUE_GRAPHICS_BIT | -- VK_QUEUE_COMPUTE_BIT | -- VK_QUEUE_TRANSFER_BIT, -- .queueCount = 1, -- .timestampValidBits = 64, -- .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, -- }; -+void lvp_GetPhysicalDeviceQueueFamilyProperties2( -+ VkPhysicalDevice physicalDevice, -+ uint32_t* pCount, -+ VkQueueFamilyProperties2 *pQueueFamilyProperties) -+{ -+ if (pQueueFamilyProperties == NULL) { -+ *pCount = 1; -+ return; -+ } -+ -+ assert(*pCount >= 1); -+ lvp_get_physical_device_queue_family_properties(&pQueueFamilyProperties->queueFamilyProperties); - } - - void lvp_GetPhysicalDeviceMemoryProperties( -@@ -617,6 +634,14 @@ void lvp_GetPhysicalDeviceMemoryProperties( - }; - } - -+void lvp_GetPhysicalDeviceMemoryProperties2( -+ VkPhysicalDevice physicalDevice, -+ VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) -+{ -+ lvp_GetPhysicalDeviceMemoryProperties(physicalDevice, -+ &pMemoryProperties->memoryProperties); -+} -+ - PFN_vkVoidFunction lvp_GetInstanceProcAddr( - VkInstance _instance, - const char* pName) -@@ -822,6 +847,8 @@ VkResult lvp_CreateDevice( - const VkAllocationCallbacks* pAllocator, - VkDevice* pDevice) - { -+ fprintf(stderr, "WARNING: lavapipe is not a conformant vulkan implementation, testing use only.\n"); -+ - LVP_FROM_HANDLE(lvp_physical_device, physical_device, physicalDevice); - struct lvp_device *device; - -diff --git a/src/glx/g_glxglvnddispatchfuncs.c b/src/glx/g_glxglvnddispatchfuncs.c -index 0f02ed2d321..e0ea27c0b18 100644 ---- a/src/glx/g_glxglvnddispatchfuncs.c -+++ b/src/glx/g_glxglvnddispatchfuncs.c -@@ -87,6 +87,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = { - __ATTRIB(SelectEventSGIX), - // glXSwapBuffers implemented by libglvnd - __ATTRIB(SwapBuffersMscOML), -+ __ATTRIB(SwapIntervalEXT), - __ATTRIB(SwapIntervalMESA), - __ATTRIB(SwapIntervalSGI), - // glXUseXFont implemented by libglvnd -@@ -893,6 +894,24 @@ static int dispatch_SwapIntervalMESA(unsigned int interval) - - - -+static void dispatch_SwapIntervalEXT(Display *dpy, GLXDrawable drawable, int interval) -+{ -+ PFNGLXSWAPINTERVALEXTPROC pSwapIntervalEXT; -+ __GLXvendorInfo *dd; -+ -+ dd = GetDispatchFromDrawable(dpy, drawable); -+ if (dd == NULL) -+ return; -+ -+ __FETCH_FUNCTION_PTR(SwapIntervalEXT); -+ if (pSwapIntervalEXT == NULL) -+ return; -+ -+ pSwapIntervalEXT(dpy, drawable, interval); -+} -+ -+ -+ - static Bool dispatch_WaitForMscOML(Display *dpy, GLXDrawable drawable, - int64_t target_msc, int64_t divisor, - int64_t remainder, int64_t *ust, -@@ -974,6 +993,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = { - __ATTRIB(ReleaseTexImageEXT), - __ATTRIB(SelectEventSGIX), - __ATTRIB(SwapBuffersMscOML), -+ __ATTRIB(SwapIntervalEXT), - __ATTRIB(SwapIntervalMESA), - __ATTRIB(SwapIntervalSGI), - __ATTRIB(WaitForMscOML), -diff --git a/src/glx/g_glxglvnddispatchindices.h b/src/glx/g_glxglvnddispatchindices.h -index 3ba50a74abb..b65d078098f 100644 ---- a/src/glx/g_glxglvnddispatchindices.h -+++ b/src/glx/g_glxglvnddispatchindices.h -@@ -79,6 +79,7 @@ typedef enum __GLXdispatchIndex { - DI_SelectEventSGIX, - // SwapBuffers implemented by libglvnd - DI_SwapBuffersMscOML, -+ DI_SwapIntervalEXT, - DI_SwapIntervalMESA, - DI_SwapIntervalSGI, - // UseXFont implemented by libglvnd -diff --git a/src/intel/common/gen_mi_builder.h b/src/intel/common/gen_mi_builder.h -index ddd8459ef07..47fb98e99f7 100644 ---- a/src/intel/common/gen_mi_builder.h -+++ b/src/intel/common/gen_mi_builder.h -@@ -932,6 +932,13 @@ gen_mi_store_address(struct gen_mi_builder *b, - static inline void - gen_mi_self_mod_barrier(struct gen_mi_builder *b) - { -+ /* First make sure all the memory writes from previous modifying commands -+ * have landed. We want to do this before going through the CS cache, -+ * otherwise we could be fetching memory that hasn't been written to yet. -+ */ -+ gen_mi_builder_emit(b, GENX(PIPE_CONTROL), pc) { -+ pc.CommandStreamerStallEnable = true; -+ } - /* Documentation says Gen11+ should be able to invalidate the command cache - * but experiment show it doesn't work properly, so for now just get over - * the CS prefetch. -diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp -index 917c3abfe9e..6896987055f 100644 ---- a/src/intel/compiler/brw_fs_copy_propagation.cpp -+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp -@@ -437,6 +437,7 @@ instruction_requires_packed_data(fs_inst *inst) - case FS_OPCODE_DDX_COARSE: - case FS_OPCODE_DDY_FINE: - case FS_OPCODE_DDY_COARSE: -+ case SHADER_OPCODE_QUAD_SWIZZLE: - return true; - default: - return false; -diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h -index 6ba3a6ca97e..3a4acc1834a 100644 ---- a/src/intel/compiler/brw_ir_fs.h -+++ b/src/intel/compiler/brw_ir_fs.h -@@ -451,13 +451,15 @@ regs_written(const fs_inst *inst) - * Return the number of dataflow registers read by the instruction (either - * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / - * register_size)'. The somewhat arbitrary register size unit is 4B for the -- * UNIFORM and IMM files and 32B for all other files. -+ * UNIFORM files and 32B for all other files. - */ - inline unsigned - regs_read(const fs_inst *inst, unsigned i) - { -- const unsigned reg_size = -- inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE; -+ if (inst->src[i].file == IMM) -+ return 1; -+ -+ const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE; - return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + - inst->size_read(i) - - MIN2(inst->size_read(i), reg_padding(inst->src[i])), -diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c -index 9007cd00e85..48811912e95 100644 ---- a/src/intel/vulkan/anv_allocator.c -+++ b/src/intel/vulkan/anv_allocator.c -@@ -1447,8 +1447,8 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, - * For, Gen11+, scratch space allocation is based on the number of threads - * in the base configuration. - */ -- if (devinfo->gen >= 12) -- subslices = devinfo->num_subslices[0]; -+ if (devinfo->gen == 12) -+ subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); - else if (devinfo->gen == 11) - subslices = 8; - else if (devinfo->gen >= 9) -diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c -index 0290431f145..80307cd612f 100644 ---- a/src/intel/vulkan/anv_image.c -+++ b/src/intel/vulkan/anv_image.c -@@ -684,6 +684,25 @@ choose_drm_format_mod(const struct anv_physical_device *device, - return NULL; - } - -+static VkImageUsageFlags -+anv_image_create_usage(const VkImageCreateInfo *pCreateInfo, -+ VkImageUsageFlags usage) -+{ -+ /* Add TRANSFER_SRC usage for multisample attachment images. This is -+ * because we might internally use the TRANSFER_SRC layout on them for -+ * blorp operations associated with resolving those into other attachments -+ * at the end of a subpass. -+ * -+ * Without this additional usage, we compute an incorrect AUX state in -+ * anv_layout_to_aux_state(). -+ */ -+ if (pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT && -+ (usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | -+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) -+ usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; -+ return usage; -+} -+ - VkResult - anv_image_create(VkDevice _device, - const struct anv_image_create_info *create_info, -@@ -732,7 +751,7 @@ anv_image_create(VkDevice _device, - image->levels = pCreateInfo->mipLevels; - image->array_size = pCreateInfo->arrayLayers; - image->samples = pCreateInfo->samples; -- image->usage = pCreateInfo->usage; -+ image->usage = anv_image_create_usage(pCreateInfo, pCreateInfo->usage); - image->create_flags = pCreateInfo->flags; - image->tiling = pCreateInfo->tiling; - image->disjoint = pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT; -@@ -745,8 +764,11 @@ anv_image_create(VkDevice _device, - const VkImageStencilUsageCreateInfoEXT *stencil_usage_info = - vk_find_struct_const(pCreateInfo->pNext, - IMAGE_STENCIL_USAGE_CREATE_INFO_EXT); -- if (stencil_usage_info) -- image->stencil_usage = stencil_usage_info->stencilUsage; -+ if (stencil_usage_info) { -+ image->stencil_usage = -+ anv_image_create_usage(pCreateInfo, -+ stencil_usage_info->stencilUsage); -+ } - } - - /* In case of external format, We don't know format yet, -diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c -index af23b87969d..1818f6c587b 100644 ---- a/src/intel/vulkan/anv_pass.c -+++ b/src/intel/vulkan/anv_pass.c -@@ -23,6 +23,7 @@ - - #include "anv_private.h" - -+#include "vk_format_info.h" - #include "vk_util.h" - - static void -@@ -406,6 +407,70 @@ num_subpass_attachments2(const VkSubpassDescription2KHR *desc) - (ds_resolve && ds_resolve->pDepthStencilResolveAttachment); - } - -+static bool -+vk_image_layout_depth_only(VkImageLayout layout) -+{ -+ switch (layout) { -+ case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: -+ case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+/* From the Vulkan Specification 1.2.166 - VkAttachmentReference2: -+ * -+ * "If layout only specifies the layout of the depth aspect of the -+ * attachment, the layout of the stencil aspect is specified by the -+ * stencilLayout member of a VkAttachmentReferenceStencilLayout structure -+ * included in the pNext chain. Otherwise, layout describes the layout for -+ * all relevant image aspects." -+ */ -+static VkImageLayout -+stencil_ref_layout(const VkAttachmentReference2KHR *att_ref) -+{ -+ if (!vk_image_layout_depth_only(att_ref->layout)) -+ return att_ref->layout; -+ -+ const VkAttachmentReferenceStencilLayoutKHR *stencil_ref = -+ vk_find_struct_const(att_ref->pNext, -+ ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); -+ if (!stencil_ref) -+ return VK_IMAGE_LAYOUT_UNDEFINED; -+ return stencil_ref->stencilLayout; -+} -+ -+/* From the Vulkan Specification 1.2.166 - VkAttachmentDescription2: -+ * -+ * "If format is a depth/stencil format, and initialLayout only specifies -+ * the initial layout of the depth aspect of the attachment, the initial -+ * layout of the stencil aspect is specified by the stencilInitialLayout -+ * member of a VkAttachmentDescriptionStencilLayout structure included in -+ * the pNext chain. Otherwise, initialLayout describes the initial layout -+ * for all relevant image aspects." -+ */ -+static VkImageLayout -+stencil_desc_layout(const VkAttachmentDescription2KHR *att_desc, bool final) -+{ -+ if (!vk_format_has_stencil(att_desc->format)) -+ return VK_IMAGE_LAYOUT_UNDEFINED; -+ -+ const VkImageLayout main_layout = -+ final ? att_desc->finalLayout : att_desc->initialLayout; -+ if (!vk_image_layout_depth_only(main_layout)) -+ return main_layout; -+ -+ const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc = -+ vk_find_struct_const(att_desc->pNext, -+ ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR); -+ assert(stencil_desc); -+ return final ? -+ stencil_desc->stencilFinalLayout : -+ stencil_desc->stencilInitialLayout; -+} -+ - VkResult anv_CreateRenderPass2( - VkDevice _device, - const VkRenderPassCreateInfo2KHR* pCreateInfo, -@@ -450,10 +515,6 @@ VkResult anv_CreateRenderPass2( - pass->subpass_flushes = subpass_flushes; - - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { -- const VkAttachmentDescriptionStencilLayoutKHR *stencil_layout = -- vk_find_struct_const(pCreateInfo->pAttachments[i].pNext, -- ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR); -- - pass->attachments[i] = (struct anv_render_pass_attachment) { - .format = pCreateInfo->pAttachments[i].format, - .samples = pCreateInfo->pAttachments[i].samples, -@@ -463,12 +524,10 @@ VkResult anv_CreateRenderPass2( - .initial_layout = pCreateInfo->pAttachments[i].initialLayout, - .final_layout = pCreateInfo->pAttachments[i].finalLayout, - -- .stencil_initial_layout = (stencil_layout ? -- stencil_layout->stencilInitialLayout : -- pCreateInfo->pAttachments[i].initialLayout), -- .stencil_final_layout = (stencil_layout ? -- stencil_layout->stencilFinalLayout : -- pCreateInfo->pAttachments[i].finalLayout), -+ .stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], -+ false), -+ .stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], -+ true), - }; - } - -@@ -487,17 +546,11 @@ VkResult anv_CreateRenderPass2( - subpass_attachments += desc->inputAttachmentCount; - - for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { -- const VkAttachmentReferenceStencilLayoutKHR *stencil_layout = -- vk_find_struct_const(desc->pInputAttachments[j].pNext, -- ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); -- - subpass->input_attachments[j] = (struct anv_subpass_attachment) { - .usage = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, - .attachment = desc->pInputAttachments[j].attachment, - .layout = desc->pInputAttachments[j].layout, -- .stencil_layout = (stencil_layout ? -- stencil_layout->stencilLayout : -- desc->pInputAttachments[j].layout), -+ .stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]), - }; - } - } -@@ -531,17 +584,11 @@ VkResult anv_CreateRenderPass2( - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = subpass_attachments++; - -- const VkAttachmentReferenceStencilLayoutKHR *stencil_attachment = -- vk_find_struct_const(desc->pDepthStencilAttachment->pNext, -- ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); -- - *subpass->depth_stencil_attachment = (struct anv_subpass_attachment) { - .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, - .attachment = desc->pDepthStencilAttachment->attachment, - .layout = desc->pDepthStencilAttachment->layout, -- .stencil_layout = stencil_attachment ? -- stencil_attachment->stencilLayout : -- desc->pDepthStencilAttachment->layout, -+ .stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment), - }; - } - -@@ -552,17 +599,11 @@ VkResult anv_CreateRenderPass2( - if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) { - subpass->ds_resolve_attachment = subpass_attachments++; - -- const VkAttachmentReferenceStencilLayoutKHR *stencil_resolve_attachment = -- vk_find_struct_const(ds_resolve->pDepthStencilResolveAttachment->pNext, -- ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR); -- - *subpass->ds_resolve_attachment = (struct anv_subpass_attachment) { - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, - .attachment = ds_resolve->pDepthStencilResolveAttachment->attachment, - .layout = ds_resolve->pDepthStencilResolveAttachment->layout, -- .stencil_layout = stencil_resolve_attachment ? -- stencil_resolve_attachment->stencilLayout : -- ds_resolve->pDepthStencilResolveAttachment->layout, -+ .stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment), - }; - subpass->depth_resolve_mode = ds_resolve->depthResolveMode; - subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; -diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c -index a9c49e0f592..e3eb376fa5a 100644 ---- a/src/intel/vulkan/genX_cmd_buffer.c -+++ b/src/intel/vulkan/genX_cmd_buffer.c -@@ -462,8 +462,10 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer, - { - uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - -+ const struct anv_surface *surface = &image->planes[plane].surface; - uint64_t base_address = -- anv_address_physical(image->planes[plane].address); -+ anv_address_physical(anv_address_add(image->planes[plane].address, -+ surface->offset)); - - const struct isl_surf *isl_surf = &image->planes[plane].surface.isl; - uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf); -@@ -1231,6 +1233,17 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, - uint32_t level_layer_count = - MIN2(layer_count, aux_layers - base_layer); - -+ /* If will_full_fast_clear is set, the caller promises to -+ * fast-clear the largest portion of the specified range as it can. -+ * For color images, that means only the first LOD and array slice. -+ */ -+ if (level == 0 && base_layer == 0 && will_full_fast_clear) { -+ base_layer++; -+ level_layer_count--; -+ if (level_layer_count == 0) -+ continue; -+ } -+ - anv_image_ccs_op(cmd_buffer, image, - image->planes[plane].surface.isl.format, - ISL_SWIZZLE_IDENTITY, -@@ -1250,6 +1263,12 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, - "define an MCS buffer."); - } - -+ /* If will_full_fast_clear is set, the caller promises to fast-clear -+ * the largest portion of the specified range as it can. -+ */ -+ if (will_full_fast_clear) -+ return; -+ - assert(base_level == 0 && level_count == 1); - anv_image_mcs_op(cmd_buffer, image, - image->planes[plane].surface.isl.format, -diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c -index 205e8677f19..33f071019b7 100644 ---- a/src/intel/vulkan/genX_pipeline.c -+++ b/src/intel/vulkan/genX_pipeline.c -@@ -1180,7 +1180,22 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline, - #endif - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], -- .ColorBufferBlendEnable = a->blendEnable, -+ /* Vulkan specification 1.2.168, VkLogicOp: -+ * -+ * "Logical operations are controlled by the logicOpEnable and -+ * logicOp members of VkPipelineColorBlendStateCreateInfo. If -+ * logicOpEnable is VK_TRUE, then a logical operation selected by -+ * logicOp is applied between each color attachment and the -+ * fragment’s corresponding output value, and blending of all -+ * attachments is treated as if it were disabled." -+ * -+ * From the Broadwell PRM Volume 2d: Command Reference: Structures: -+ * BLEND_STATE_ENTRY: -+ * -+ * "Enabling LogicOp and Color Buffer Blending at the same time is -+ * UNDEFINED" -+ */ -+ .ColorBufferBlendEnable = !info->logicOpEnable && a->blendEnable, - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, -diff --git a/src/intel/vulkan/vk_format_info.h b/src/intel/vulkan/vk_format_info.h -index 006e1f4a6ad..4e72c244742 100644 ---- a/src/intel/vulkan/vk_format_info.h -+++ b/src/intel/vulkan/vk_format_info.h -@@ -164,4 +164,11 @@ vk_format_has_depth(VkFormat format) - return aspects & VK_IMAGE_ASPECT_DEPTH_BIT; - } - -+static inline bool -+vk_format_has_stencil(VkFormat format) -+{ -+ const VkImageAspectFlags aspects = vk_format_aspects(format); -+ return aspects & VK_IMAGE_ASPECT_STENCIL_BIT; -+} -+ - #endif /* VK_FORMAT_INFO_H */ -diff --git a/src/mesa/state_tracker/st_pbo.c b/src/mesa/state_tracker/st_pbo.c -index 65a1ce8862a..b03921c1be6 100644 ---- a/src/mesa/state_tracker/st_pbo.c -+++ b/src/mesa/state_tracker/st_pbo.c -@@ -431,16 +431,21 @@ create_fs(struct st_context *st, bool download, - nir_ssa_def *coord = nir_load_var(&b, fragcoord); - - nir_ssa_def *layer = NULL; -- if (st->pbo.layers && need_layer && (!download || target == PIPE_TEXTURE_1D_ARRAY || -- target == PIPE_TEXTURE_2D_ARRAY || -- target == PIPE_TEXTURE_3D || -- target == PIPE_TEXTURE_CUBE || -- target == PIPE_TEXTURE_CUBE_ARRAY)) { -- nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in, -- glsl_int_type(), "gl_Layer"); -- var->data.location = VARYING_SLOT_LAYER; -- var->data.interpolation = INTERP_MODE_FLAT; -- layer = nir_load_var(&b, var); -+ if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY || -+ target == PIPE_TEXTURE_2D_ARRAY || -+ target == PIPE_TEXTURE_3D || -+ target == PIPE_TEXTURE_CUBE || -+ target == PIPE_TEXTURE_CUBE_ARRAY)) { -+ if (need_layer) { -+ nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in, -+ glsl_int_type(), "gl_Layer"); -+ var->data.location = VARYING_SLOT_LAYER; -+ var->data.interpolation = INTERP_MODE_FLAT; -+ layer = nir_load_var(&b, var); -+ } -+ else { -+ layer = zero; -+ } - } - - /* offset_pos = param.xy + f2i(coord.xy) */ -diff --git a/src/util/format/u_format.csv b/src/util/format/u_format.csv -index 8acfb869bdb..237c4c95475 100644 ---- a/src/util/format/u_format.csv -+++ b/src/util/format/u_format.csv -@@ -500,7 +500,7 @@ PIPE_FORMAT_R4G4B4A4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , xy - PIPE_FORMAT_B4G4R4A4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , zyxw, rgb, up4 , up4 , up4 , up4 , yzwx - PIPE_FORMAT_A4R4G4B4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , yzwx, rgb, up4 , up4 , up4 , up4 , zyxw - PIPE_FORMAT_A4B4G4R4_UINT , plain, 1, 1, 1, up4 , up4 , up4 , up4 , wzyx, rgb, up4 , up4 , up4 , up4 , xyzw --PIPE_FORMAT_A1R5G5B5_UINT , plain, 1, 1, 1, up1 , up5 , up5 , up5 , wzyx, rgb, up5 , up5 , up5 , up1 , zyxw -+PIPE_FORMAT_A1R5G5B5_UINT , plain, 1, 1, 1, up1 , up5 , up5 , up5 , yzwx, rgb, up5 , up5 , up5 , up1 , zyxw - PIPE_FORMAT_A1B5G5R5_UINT , plain, 1, 1, 1, up1 , up5 , up5 , up5 , wzyx, rgb, up5 , up5 , up5 , up1 , xyzw - PIPE_FORMAT_R5G5B5A1_UINT , plain, 1, 1, 1, up5 , up5 , up5 , up1 , xyzw, rgb, up5 , up5 , up5 , up1 , wzyx - PIPE_FORMAT_B5G5R5A1_UINT , plain, 1, 1, 1, up5 , up5 , up5 , up1 , zyxw, rgb, up1 , up5 , up5 , up5 , yzwx -diff --git a/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json b/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json -index 1d5fffd0135..361ae9fe74e 100644 ---- a/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json -+++ b/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json -@@ -4,7 +4,7 @@ - "name": "VK_LAYER_MESA_device_select", - "type": "GLOBAL", - "library_path": "libVkLayer_MESA_device_select.so", -- "api_version": "1.1.73", -+ "api_version": "1.2.73", - "implementation_version": "1", - "description": "Linux device selection layer", - "functions": { diff --git a/SOURCES/mesa-vk-wsi-sw-fixes.patch b/SOURCES/mesa-vk-wsi-sw-fixes.patch new file mode 100644 index 0000000..a72e411 --- /dev/null +++ b/SOURCES/mesa-vk-wsi-sw-fixes.patch @@ -0,0 +1,403 @@ +diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c +index e2a7d337ecf..bc4d87611e0 100644 +--- a/src/vulkan/wsi/wsi_common_wayland.c ++++ b/src/vulkan/wsi/wsi_common_wayland.c +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + + #include "drm-uapi/drm_fourcc.h" + +@@ -44,9 +45,15 @@ + #include + #include + #include ++#include + + struct wsi_wayland; + ++struct wsi_wl_display_swrast { ++ struct wl_shm * wl_shm; ++ struct u_vector formats; ++}; ++ + struct wsi_wl_display_drm { + struct wl_drm * wl_drm; + struct u_vector formats; +@@ -69,6 +76,7 @@ struct wsi_wl_display { + struct wl_display * wl_display_wrapper; + struct wl_event_queue * queue; + ++ struct wsi_wl_display_swrast swrast; + struct wsi_wl_display_drm drm; + struct wsi_wl_display_dmabuf dmabuf; + +@@ -79,6 +87,8 @@ struct wsi_wl_display { + + /* Only used for displays created by wsi_wl_display_create */ + uint32_t refcount; ++ ++ bool sw; + }; + + struct wsi_wayland { +@@ -183,6 +193,40 @@ wsi_wl_display_add_wl_format(struct wsi_wl_display *display, + } + } + ++static void ++wsi_wl_display_add_wl_shm_format(struct wsi_wl_display *display, ++ struct u_vector *formats, ++ uint32_t wl_shm_format) ++{ ++ switch (wl_shm_format) { ++ case WL_SHM_FORMAT_XBGR8888: ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_R8G8B8_SRGB); ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_R8G8B8_UNORM); ++ FALLTHROUGH; ++ case WL_SHM_FORMAT_ABGR8888: ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_R8G8B8A8_SRGB); ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_R8G8B8A8_UNORM); ++ break; ++ case WL_SHM_FORMAT_XRGB8888: ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_B8G8R8_SRGB); ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_B8G8R8_UNORM); ++ FALLTHROUGH; ++ case WL_SHM_FORMAT_ARGB8888: ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_B8G8R8A8_SRGB); ++ wsi_wl_display_add_vk_format(display, formats, ++ VK_FORMAT_B8G8R8A8_UNORM); ++ break; ++ } ++} ++ ++ + static void + drm_handle_device(void *data, struct wl_drm *drm, const char *name) + { +@@ -232,6 +276,23 @@ wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) + } + } + ++static uint32_t ++wl_shm_format_for_vk_format(VkFormat vk_format, bool alpha) ++{ ++ switch (vk_format) { ++ case VK_FORMAT_R8G8B8A8_UNORM: ++ case VK_FORMAT_R8G8B8A8_SRGB: ++ return alpha ? WL_SHM_FORMAT_ABGR8888 : WL_SHM_FORMAT_XBGR8888; ++ case VK_FORMAT_B8G8R8A8_UNORM: ++ case VK_FORMAT_B8G8R8A8_SRGB: ++ return alpha ? WL_SHM_FORMAT_ARGB8888 : WL_SHM_FORMAT_XRGB8888; ++ ++ default: ++ assert(!"Unsupported Vulkan format"); ++ return 0; ++ } ++} ++ + static void + drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) + { +@@ -311,12 +372,34 @@ static const struct zwp_linux_dmabuf_v1_listener dmabuf_listener = { + dmabuf_handle_modifier, + }; + ++static void ++shm_handle_format(void *data, struct wl_shm *shm, uint32_t format) ++{ ++ struct wsi_wl_display *display = data; ++ if (display->swrast.formats.element_size == 0) ++ return; ++ ++ wsi_wl_display_add_wl_shm_format(display, &display->swrast.formats, format); ++} ++ ++static const struct wl_shm_listener shm_listener = { ++ .format = shm_handle_format ++}; ++ + static void + registry_handle_global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) + { + struct wsi_wl_display *display = data; + ++ if (display->sw) { ++ if (strcmp(interface, "wl_shm") == 0) { ++ display->swrast.wl_shm = wl_registry_bind(registry, name, &wl_shm_interface, 1); ++ wl_shm_add_listener(display->swrast.wl_shm, &shm_listener, display); ++ } ++ return; ++ } ++ + if (strcmp(interface, "wl_drm") == 0) { + assert(display->drm.wl_drm == NULL); + +@@ -348,10 +431,13 @@ wsi_wl_display_finish(struct wsi_wl_display *display) + { + assert(display->refcount == 0); + ++ u_vector_finish(&display->swrast.formats); + u_vector_finish(&display->drm.formats); + u_vector_finish(&display->dmabuf.formats); + u_vector_finish(&display->dmabuf.modifiers.argb8888); + u_vector_finish(&display->dmabuf.modifiers.xrgb8888); ++ if (display->swrast.wl_shm) ++ wl_shm_destroy(display->swrast.wl_shm); + if (display->drm.wl_drm) + wl_drm_destroy(display->drm.wl_drm); + if (display->dmabuf.wl_dmabuf) +@@ -366,16 +452,18 @@ static VkResult + wsi_wl_display_init(struct wsi_wayland *wsi_wl, + struct wsi_wl_display *display, + struct wl_display *wl_display, +- bool get_format_list) ++ bool get_format_list, bool sw) + { + VkResult result = VK_SUCCESS; + memset(display, 0, sizeof(*display)); + + display->wsi_wl = wsi_wl; + display->wl_display = wl_display; ++ display->sw = sw; + + if (get_format_list) { +- if (!u_vector_init(&display->drm.formats, sizeof(VkFormat), 8) || ++ if (!u_vector_init(&display->swrast.formats, sizeof(VkFormat), 8) || ++ !u_vector_init(&display->drm.formats, sizeof(VkFormat), 8) || + !u_vector_init(&display->dmabuf.formats, sizeof(VkFormat), 8) || + !u_vector_init(&display->dmabuf.modifiers.argb8888, + sizeof(uint64_t), 32) || +@@ -414,7 +502,7 @@ wsi_wl_display_init(struct wsi_wayland *wsi_wl, + wl_display_roundtrip_queue(display->wl_display, display->queue); + + /* Round-trip again to get formats, modifiers and capabilities */ +- if (display->drm.wl_drm || display->dmabuf.wl_dmabuf) ++ if (display->drm.wl_drm || display->dmabuf.wl_dmabuf || display->swrast.wl_shm) + wl_display_roundtrip_queue(display->wl_display, display->queue); + + if (wsi_wl->wsi->force_bgra8_unorm_first) { +@@ -432,8 +520,10 @@ wsi_wl_display_init(struct wsi_wayland *wsi_wl, + } + } + ++ if (display->sw) ++ display->formats = &display->swrast.formats; + /* We need prime support for wl_drm */ +- if (display->drm.wl_drm && ++ else if (display->drm.wl_drm && + (display->drm.capabilities & WL_DRM_CAPABILITY_PRIME)) { + display->formats = &display->drm.formats; + } else if (display->dmabuf.wl_dmabuf) { +@@ -463,6 +553,7 @@ fail: + + static VkResult + wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display, ++ bool sw, + struct wsi_wl_display **display_out) + { + struct wsi_wl_display *display = +@@ -471,7 +562,8 @@ wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display, + if (!display) + return VK_ERROR_OUT_OF_HOST_MEMORY; + +- VkResult result = wsi_wl_display_init(wsi, display, wl_display, true); ++ VkResult result = wsi_wl_display_init(wsi, display, wl_display, true, ++ sw); + if (result != VK_SUCCESS) { + vk_free(wsi->alloc, display); + return result; +@@ -509,7 +601,8 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device, + (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + struct wsi_wl_display display; +- VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false); ++ VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false, ++ wsi_device->sw); + if (ret == VK_SUCCESS) + wsi_wl_display_finish(&display); + +@@ -612,7 +705,8 @@ wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, + (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + struct wsi_wl_display display; +- if (wsi_wl_display_init(wsi, &display, surface->display, true)) ++ if (wsi_wl_display_init(wsi, &display, surface->display, true, ++ wsi_device->sw)) + return VK_ERROR_SURFACE_LOST_KHR; + + VK_OUTARRAY_MAKE(out, pSurfaceFormats, pSurfaceFormatCount); +@@ -642,7 +736,8 @@ wsi_wl_surface_get_formats2(VkIcdSurfaceBase *icd_surface, + (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + struct wsi_wl_display display; +- if (wsi_wl_display_init(wsi, &display, surface->display, true)) ++ if (wsi_wl_display_init(wsi, &display, surface->display, true, ++ wsi_device->sw)) + return VK_ERROR_SURFACE_LOST_KHR; + + VK_OUTARRAY_MAKE(out, pSurfaceFormats, pSurfaceFormatCount); +@@ -722,10 +817,12 @@ struct wsi_wl_image { + struct wsi_image base; + struct wl_buffer * buffer; + bool busy; ++ void * data_ptr; ++ uint32_t data_size; + }; + + struct wsi_wl_swapchain { +- struct wsi_swapchain base; ++ struct wsi_swapchain base; + + struct wsi_wl_display *display; + +@@ -742,6 +839,7 @@ struct wsi_wl_swapchain { + VkExtent2D extent; + VkFormat vk_format; + uint32_t drm_format; ++ uint32_t shm_format; + + uint32_t num_drm_modifiers; + const uint64_t * drm_modifiers; +@@ -859,6 +957,23 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, + { + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain; + ++ if (chain->display->sw) { ++ struct wsi_wl_image *image = &chain->images[image_index]; ++ void *dptr = image->data_ptr; ++ void *sptr; ++ chain->base.wsi->MapMemory(chain->base.device, ++ image->base.memory, ++ 0, 0, 0, &sptr); ++ ++ for (unsigned r = 0; r < chain->extent.height; r++) { ++ memcpy(dptr, sptr, image->base.row_pitches[0]); ++ dptr += image->base.row_pitches[0]; ++ sptr += image->base.row_pitches[0]; ++ } ++ chain->base.wsi->UnmapMemory(chain->base.device, ++ image->base.memory); ++ ++ } + if (chain->base.present_mode == VK_PRESENT_MODE_FIFO_KHR) { + while (!chain->fifo_ready) { + int ret = wl_display_dispatch_queue(chain->display->wl_display, +@@ -928,7 +1043,31 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain, + if (result != VK_SUCCESS) + return result; + +- if (!chain->drm_wrapper) { ++ if (display->sw) { ++ int fd, stride; ++ ++ stride = image->base.row_pitches[0]; ++ image->data_size = stride * chain->extent.height; ++ ++ /* Create a shareable buffer */ ++ fd = os_create_anonymous_file(image->data_size, NULL); ++ if (fd < 0) ++ goto fail_image; ++ ++ image->data_ptr = mmap(NULL, image->data_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ++ if (image->data_ptr == MAP_FAILED) { ++ close(fd); ++ goto fail_image; ++ } ++ /* Share it in a wl_buffer */ ++ struct wl_shm_pool *pool = wl_shm_create_pool(display->swrast.wl_shm, fd, image->data_size); ++ wl_proxy_set_queue((struct wl_proxy *)pool, display->queue); ++ image->buffer = wl_shm_pool_create_buffer(pool, 0, chain->extent.width, ++ chain->extent.height, stride, ++ chain->shm_format); ++ wl_shm_pool_destroy(pool); ++ close(fd); ++ } else if (!chain->drm_wrapper) { + /* Only request modifiers if we have dmabuf, else it must be implicit. */ + assert(display->dmabuf.wl_dmabuf); + assert(image->base.drm_modifier != DRM_FORMAT_MOD_INVALID); +@@ -995,6 +1134,8 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain, + if (chain->images[i].buffer) { + wl_buffer_destroy(chain->images[i].buffer); + wsi_destroy_image(&chain->base, &chain->images[i].base); ++ if (chain->images[i].data_ptr) ++ munmap(chain->images[i].data_ptr, chain->images[i].data_size); + } + } + +@@ -1049,8 +1190,10 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + /* Mark a bunch of stuff as NULL. This way we can just call + * destroy_swapchain for cleanup. + */ +- for (uint32_t i = 0; i < num_images; i++) ++ for (uint32_t i = 0; i < num_images; i++) { + chain->images[i].buffer = NULL; ++ chain->images[i].data_ptr = NULL; ++ } + chain->surface = NULL; + chain->drm_wrapper = NULL; + chain->frame = NULL; +@@ -1066,7 +1209,10 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + chain->base.image_count = num_images; + chain->extent = pCreateInfo->imageExtent; + chain->vk_format = pCreateInfo->imageFormat; +- chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, alpha); ++ if (wsi_device->sw) ++ chain->shm_format = wl_shm_format_for_vk_format(chain->vk_format, alpha); ++ else ++ chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, alpha); + + if (pCreateInfo->oldSwapchain) { + /* If we have an oldSwapchain parameter, copy the display struct over +@@ -1076,7 +1222,8 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + chain->display = wsi_wl_display_ref(old_chain->display); + } else { + chain->display = NULL; +- result = wsi_wl_display_create(wsi, surface->display, &chain->display); ++ result = wsi_wl_display_create(wsi, surface->display, ++ wsi_device->sw, &chain->display); + if (result != VK_SUCCESS) + goto fail; + } +diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c +index 54769b81ccc..fa0c3d997dc 100644 +--- a/src/vulkan/wsi/wsi_common_x11.c ++++ b/src/vulkan/wsi/wsi_common_x11.c +@@ -439,8 +439,10 @@ VkBool32 wsi_get_physical_device_xcb_presentation_support( + if (!wsi_conn) + return false; + +- if (!wsi_x11_check_for_dri3(wsi_conn)) +- return false; ++ if (!wsi_device->sw) { ++ if (!wsi_x11_check_for_dri3(wsi_conn)) ++ return false; ++ } + + unsigned visual_depth; + if (!connection_get_visualtype(connection, visual_id, &visual_depth)) +@@ -484,9 +486,11 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface, + if (!wsi_conn) + return VK_ERROR_OUT_OF_HOST_MEMORY; + +- if (!wsi_x11_check_for_dri3(wsi_conn)) { +- *pSupported = false; +- return VK_SUCCESS; ++ if (!wsi_device->sw) { ++ if (!wsi_x11_check_for_dri3(wsi_conn)) { ++ *pSupported = false; ++ return VK_SUCCESS; ++ } + } + + unsigned visual_depth; diff --git a/SPECS/mesa.spec b/SPECS/mesa.spec index 4c1ccd7..64bb68e 100644 --- a/SPECS/mesa.spec +++ b/SPECS/mesa.spec @@ -25,10 +25,6 @@ %define with_xa 1 %endif -%ifnarch %{x86} -%global with_asm 1 -%endif - %global dri_drivers %{?platform_drivers} %if 0%{?with_vulkan_hw} @@ -43,8 +39,8 @@ Name: mesa Summary: Mesa graphics libraries -Version: 20.3.3 -Release: 2%{?rctag:.%{rctag}}%{?dist} +Version: 21.1.5 +Release: 1%{?rctag:.%{rctag}}%{?dist} License: MIT URL: http://www.mesa3d.org @@ -59,10 +55,7 @@ Source3: Makefile Source4: Mesa-MLAA-License-Clarification-Email.txt Patch0: lavapipe-disable-env-var.patch -Patch1: mesa-20.3.3-stable-fixes.patch -Patch2: anv-remove-warning.patch - -Patch10: cpu-affinity-fixes-20.3.3.patch +Patch1: mesa-vk-wsi-sw-fixes.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -329,8 +322,9 @@ export ASFLAGS="--generate-missing-build-notes=yes" %meson -Dcpp_std=gnu++14 \ -Db_ndebug=true \ -Dplatforms=x11,wayland \ - -Ddri3=true \ + -Ddri3=enabled \ -Ddri-drivers=%{?dri_drivers} \ + -Dosmesa=true \ %if 0%{?with_hardware} -Dgallium-drivers=swrast%{?with_iris:,iris},virgl,nouveau%{?with_vmware:,svga},radeonsi,r600%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro} \ %else @@ -344,22 +338,21 @@ export ASFLAGS="--generate-missing-build-notes=yes" -Dgallium-nine=%{?with_nine:true}%{!?with_nine:false} \ -Dgallium-opencl=%{?with_opencl:icd}%{!?with_opencl:disabled} \ -Dvulkan-drivers=%{?vulkan_drivers} \ - -Dshared-glapi=true \ - -Dgles1=false \ - -Dgles2=true \ + -Dvulkan-layers=device-select \ + -Dshared-glapi=enabled \ + -Dgles1=disabled \ + -Dgles2=enabled \ -Dopengl=true \ - -Dgbm=true \ + -Dgbm=enabled \ -Dglx=dri \ -Degl=true \ -Dglvnd=true \ - -Dasm=%{?with_asm:true}%{!?with_asm:false} \ + -Dmicrosoft-clc=disabled \ -Dllvm=true \ -Dshared-llvm=true \ -Dvalgrind=%{?with_valgrind:true}%{!?with_valgrind:false} \ -Dbuild-tests=false \ -Dselinux=true \ - -Dosmesa=gallium \ - -Dvulkan-device-select-layer=true \ %{nil} %meson_build @@ -551,12 +544,20 @@ done %if 0%{?with_vulkan_hw} %files vulkan-devel -%{_includedir}/vulkan/ %endif %changelog -* Fri Mar 26 2021 Dave Airlie - 20.3.3-2 -- Fix CPU affinity memory corruption crash (#1938788) +* Thu Jul 22 2021 Dave Airlie - 21.1.5-1 +- Fix vulkan sw with wayland, pull in .4 + .5 fixes + +* Sat Jun 19 2021 Dave Airlie - 21.1.3-1 +- rebase to 21.1.3 + +* Tue Jun 01 2021 Dave Airlie - 21.1.1-2 +- rebuild against llvm 12 + +* Thu May 20 2021 Dave Airlie - 21.1.1-1 +- Update to 21.1.1 * Tue Feb 16 2021 Dave Airlie - 20.3.3-1 - Update to 20.3.3 + upstream fixes for qemu regression