diff --git a/0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch b/0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch
deleted file mode 100644
index 1f6816c..0000000
--- a/0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 70259f75a5546d331b0d687227341f653a4bf544 Mon Sep 17 00:00:00 2001
-From: Dave Airlie <airlied@redhat.com>
-Date: Thu, 25 Mar 2021 08:34:28 +1000
-Subject: [PATCH] drisw: move zink down the list below the sw drivers.
-
-We don't ever want drisw path picking zink as the driver,
-we can revisit this when the penny wrapper work gets further
-along.
-
-This selection causes systems with nvidia/intel dual-gpus
-to try and pick the intel gpu for rendering in the nvidia
-context if there is no nvidia GL driver or accel doesn't work.
-
-This is a partial revert of the original commit.
-
-Fixes: 4a3b42a717ce ("drisw: Prefer hardware-layered sw-winsys drivers over pure sw")
----
- src/gallium/auxiliary/target-helpers/inline_sw_helper.h | 6 +++---
- src/gallium/auxiliary/target-helpers/sw_helper.h        | 6 +++---
- 2 files changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
-index c494840c44e..76eda8467b8 100644
---- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
-+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
-@@ -81,9 +81,6 @@ sw_screen_create(struct sw_winsys *winsys)
-    UNUSED bool only_sw = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false);
-    const char *drivers[] = {
-       debug_get_option("GALLIUM_DRIVER", ""),
--#if defined(GALLIUM_ZINK)
--      only_sw ? "" : "zink",
--#endif
- #if defined(GALLIUM_D3D12)
-       only_sw ? "" : "d3d12",
- #endif
-@@ -95,6 +92,9 @@ sw_screen_create(struct sw_winsys *winsys)
- #endif
- #if defined(GALLIUM_SWR)
-       "swr",
-+#endif
-+#if defined(GALLIUM_ZINK)
-+      only_sw ? "" : "zink",
- #endif
-    };
- 
-diff --git a/src/gallium/auxiliary/target-helpers/sw_helper.h b/src/gallium/auxiliary/target-helpers/sw_helper.h
-index d9469d9f5e3..88a5086d261 100644
---- a/src/gallium/auxiliary/target-helpers/sw_helper.h
-+++ b/src/gallium/auxiliary/target-helpers/sw_helper.h
-@@ -86,9 +86,6 @@ sw_screen_create(struct sw_winsys *winsys)
-    UNUSED bool only_sw = env_var_as_boolean("LIBGL_ALWAYS_SOFTWARE", false);
-    const char *drivers[] = {
-       debug_get_option("GALLIUM_DRIVER", ""),
--#if defined(GALLIUM_ZINK)
--      only_sw ? "" : "zink",
--#endif
- #if defined(GALLIUM_D3D12)
-       only_sw ? "" : "d3d12",
- #endif
-@@ -100,6 +97,9 @@ sw_screen_create(struct sw_winsys *winsys)
- #endif
- #if defined(GALLIUM_SWR)
-       "swr",
-+#endif
-+#if defined(GALLIUM_ZINK)
-+      only_sw ? "" : "zink",
- #endif
-    };
- 
--- 
-2.29.2
-
diff --git a/cpu_caps_fixes.patch b/cpu_caps_fixes.patch
deleted file mode 100644
index 31d5283..0000000
--- a/cpu_caps_fixes.patch
+++ /dev/null
@@ -1,1506 +0,0 @@
-diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
-index d5c7891de62..b02ea8879e6 100644
---- a/src/amd/common/ac_gpu_info.c
-+++ b/src/amd/common/ac_gpu_info.c
-@@ -510,8 +510,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
-    util_cpu_detect();
-    info->smart_access_memory = info->all_vram_visible &&
-                                info->chip_class >= GFX10_3 &&
--                               util_cpu_caps.family >= CPU_AMD_ZEN3 &&
--                               util_cpu_caps.family < CPU_AMD_LAST;
-+                               util_get_cpu_caps()->family >= CPU_AMD_ZEN3 &&
-+                               util_get_cpu_caps()->family < CPU_AMD_LAST;
- 
-    /* Set chip identification. */
-    info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
-diff --git a/src/amd/compiler/tests/main.cpp b/src/amd/compiler/tests/main.cpp
-index 8f5e8ea914b..e0abf63b525 100644
---- a/src/amd/compiler/tests/main.cpp
-+++ b/src/amd/compiler/tests/main.cpp
-@@ -34,6 +34,8 @@
- #include "aco_ir.h"
- #include "framework.h"
- 
-+#include "util/u_cpu_detect.h"
-+
- static const char *help_message =
-    "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
-    "\n"
-@@ -241,6 +243,8 @@ int main(int argc, char **argv)
-       return 99;
-    }
- 
-+   util_cpu_detect();
-+
-    if (do_list) {
-       for (auto test : tests)
-          printf("%s\n", test.first.c_str());
-diff --git a/src/compiler/glsl/standalone.cpp b/src/compiler/glsl/standalone.cpp
-index b34583e54bd..ad1da65bcef 100644
---- a/src/compiler/glsl/standalone.cpp
-+++ b/src/compiler/glsl/standalone.cpp
-@@ -398,6 +398,8 @@ standalone_compile_shader(const struct standalone_options *_options,
-    int status = EXIT_SUCCESS;
-    bool glsl_es = false;
- 
-+   util_cpu_detect();
-+
-    options = _options;
- 
-    switch (options->glsl_version) {
-diff --git a/src/compiler/nir/tests/negative_equal_tests.cpp b/src/compiler/nir/tests/negative_equal_tests.cpp
-index ff9eeb27f40..c7cf53543bb 100644
---- a/src/compiler/nir/tests/negative_equal_tests.cpp
-+++ b/src/compiler/nir/tests/negative_equal_tests.cpp
-@@ -36,6 +36,7 @@ protected:
-    const_value_negative_equal_test()
-    {
-       glsl_type_singleton_init_or_ref();
-+      util_cpu_detect();
- 
-       memset(c1, 0, sizeof(c1));
-       memset(c2, 0, sizeof(c2));
-@@ -55,6 +56,7 @@ protected:
-    alu_srcs_negative_equal_test()
-    {
-       glsl_type_singleton_init_or_ref();
-+      util_cpu_detect();
- 
-       static const nir_shader_compiler_options options = { };
-       bld = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options,
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
-index 165d73d94fc..33269e528fe 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
-@@ -104,13 +104,13 @@ lp_build_min_simple(struct lp_build_context *bld,
- 
-    /* TODO: optimize the constant case */
- 
--   if (type.floating && util_cpu_caps.has_sse) {
-+   if (type.floating && util_get_cpu_caps()->has_sse) {
-       if (type.width == 32) {
-          if (type.length == 1) {
-             intrinsic = "llvm.x86.sse.min.ss";
-             intr_size = 128;
-          }
--         else if (type.length <= 4 || !util_cpu_caps.has_avx) {
-+         else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) {
-             intrinsic = "llvm.x86.sse.min.ps";
-             intr_size = 128;
-          }
-@@ -119,12 +119,12 @@ lp_build_min_simple(struct lp_build_context *bld,
-             intr_size = 256;
-          }
-       }
--      if (type.width == 64 && util_cpu_caps.has_sse2) {
-+      if (type.width == 64 && util_get_cpu_caps()->has_sse2) {
-          if (type.length == 1) {
-             intrinsic = "llvm.x86.sse2.min.sd";
-             intr_size = 128;
-          }
--         else if (type.length == 2 || !util_cpu_caps.has_avx) {
-+         else if (type.length == 2 || !util_get_cpu_caps()->has_avx) {
-             intrinsic = "llvm.x86.sse2.min.pd";
-             intr_size = 128;
-          }
-@@ -134,7 +134,7 @@ lp_build_min_simple(struct lp_build_context *bld,
-          }
-       }
-    }
--   else if (type.floating && util_cpu_caps.has_altivec) {
-+   else if (type.floating && util_get_cpu_caps()->has_altivec) {
-       if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
-           nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
-          debug_printf("%s: altivec doesn't support nan return nan behavior\n",
-@@ -144,7 +144,7 @@ lp_build_min_simple(struct lp_build_context *bld,
-          intrinsic = "llvm.ppc.altivec.vminfp";
-          intr_size = 128;
-       }
--   } else if (util_cpu_caps.has_altivec) {
-+   } else if (util_get_cpu_caps()->has_altivec) {
-       intr_size = 128;
-       if (type.width == 8) {
-          if (!type.sign) {
-@@ -174,7 +174,7 @@ lp_build_min_simple(struct lp_build_context *bld,
-        * The sse intrinsics return the second operator in case of nan by
-        * default so we need to special code to handle those.
-        */
--      if (util_cpu_caps.has_sse && type.floating &&
-+      if (util_get_cpu_caps()->has_sse && type.floating &&
-           nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-           nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
-           nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
-@@ -274,13 +274,13 @@ lp_build_max_simple(struct lp_build_context *bld,
- 
-    /* TODO: optimize the constant case */
- 
--   if (type.floating && util_cpu_caps.has_sse) {
-+   if (type.floating && util_get_cpu_caps()->has_sse) {
-       if (type.width == 32) {
-          if (type.length == 1) {
-             intrinsic = "llvm.x86.sse.max.ss";
-             intr_size = 128;
-          }
--         else if (type.length <= 4 || !util_cpu_caps.has_avx) {
-+         else if (type.length <= 4 || !util_get_cpu_caps()->has_avx) {
-             intrinsic = "llvm.x86.sse.max.ps";
-             intr_size = 128;
-          }
-@@ -289,12 +289,12 @@ lp_build_max_simple(struct lp_build_context *bld,
-             intr_size = 256;
-          }
-       }
--      if (type.width == 64 && util_cpu_caps.has_sse2) {
-+      if (type.width == 64 && util_get_cpu_caps()->has_sse2) {
-          if (type.length == 1) {
-             intrinsic = "llvm.x86.sse2.max.sd";
-             intr_size = 128;
-          }
--         else if (type.length == 2 || !util_cpu_caps.has_avx) {
-+         else if (type.length == 2 || !util_get_cpu_caps()->has_avx) {
-             intrinsic = "llvm.x86.sse2.max.pd";
-             intr_size = 128;
-          }
-@@ -304,7 +304,7 @@ lp_build_max_simple(struct lp_build_context *bld,
-          }
-       }
-    }
--   else if (type.floating && util_cpu_caps.has_altivec) {
-+   else if (type.floating && util_get_cpu_caps()->has_altivec) {
-       if (nan_behavior == GALLIVM_NAN_RETURN_NAN ||
-           nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
-          debug_printf("%s: altivec doesn't support nan return nan behavior\n",
-@@ -314,7 +314,7 @@ lp_build_max_simple(struct lp_build_context *bld,
-          intrinsic = "llvm.ppc.altivec.vmaxfp";
-          intr_size = 128;
-       }
--   } else if (util_cpu_caps.has_altivec) {
-+   } else if (util_get_cpu_caps()->has_altivec) {
-      intr_size = 128;
-      if (type.width == 8) {
-        if (!type.sign) {
-@@ -338,7 +338,7 @@ lp_build_max_simple(struct lp_build_context *bld,
-    }
- 
-    if (intrinsic) {
--      if (util_cpu_caps.has_sse && type.floating &&
-+      if (util_get_cpu_caps()->has_sse && type.floating &&
-           nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-           nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
-           nan_behavior != GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
-@@ -472,12 +472,12 @@ lp_build_add(struct lp_build_context *bld,
-             return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
-          }
-          if (type.width * type.length == 128) {
--            if (util_cpu_caps.has_sse2) {
-+            if (util_get_cpu_caps()->has_sse2) {
-                if (type.width == 8)
-                  intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
-                if (type.width == 16)
-                  intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
--            } else if (util_cpu_caps.has_altivec) {
-+            } else if (util_get_cpu_caps()->has_altivec) {
-                if (type.width == 8)
-                   intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
-                if (type.width == 16)
-@@ -485,7 +485,7 @@ lp_build_add(struct lp_build_context *bld,
-             }
-          }
-          if (type.width * type.length == 256) {
--            if (util_cpu_caps.has_avx2) {
-+            if (util_get_cpu_caps()->has_avx2) {
-                if (type.width == 8)
-                   intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b";
-                if (type.width == 16)
-@@ -713,11 +713,11 @@ lp_build_hadd_partial4(struct lp_build_context *bld,
-    tmp[2] = num_vecs > 2 ? vectors[2] : vectors[0];
-    tmp[3] = num_vecs > 3 ? vectors[3] : vectors[0];
- 
--   if (util_cpu_caps.has_sse3 && bld->type.width == 32 &&
-+   if (util_get_cpu_caps()->has_sse3 && bld->type.width == 32 &&
-        bld->type.length == 4) {
-       intrinsic = "llvm.x86.sse3.hadd.ps";
-    }
--   else if (util_cpu_caps.has_avx && bld->type.width == 32 &&
-+   else if (util_get_cpu_caps()->has_avx && bld->type.width == 32 &&
-             bld->type.length == 8) {
-       intrinsic = "llvm.x86.avx.hadd.ps.256";
-    }
-@@ -796,12 +796,12 @@ lp_build_sub(struct lp_build_context *bld,
-             return lp_build_intrinsic_binary(builder, intrin, bld->vec_type, a, b);
-          }
-          if (type.width * type.length == 128) {
--            if (util_cpu_caps.has_sse2) {
-+            if (util_get_cpu_caps()->has_sse2) {
-                if (type.width == 8)
-                   intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
-                if (type.width == 16)
-                   intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
--            } else if (util_cpu_caps.has_altivec) {
-+            } else if (util_get_cpu_caps()->has_altivec) {
-                if (type.width == 8)
-                   intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
-                if (type.width == 16)
-@@ -809,7 +809,7 @@ lp_build_sub(struct lp_build_context *bld,
-             }
-          }
-          if (type.width * type.length == 256) {
--            if (util_cpu_caps.has_avx2) {
-+            if (util_get_cpu_caps()->has_avx2) {
-                if (type.width == 8)
-                   intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b";
-                if (type.width == 16)
-@@ -1078,8 +1078,8 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
-     */
-    if (LLVM_VERSION_MAJOR < 7 &&
-        (bld->type.length == 4 || bld->type.length == 8) &&
--       ((util_cpu_caps.has_sse2 && (bld->type.sign == 0)) ||
--        util_cpu_caps.has_sse4_1)) {
-+       ((util_get_cpu_caps()->has_sse2 && (bld->type.sign == 0)) ||
-+        util_get_cpu_caps()->has_sse4_1)) {
-       const char *intrinsic = NULL;
-       LLVMValueRef aeven, aodd, beven, bodd, muleven, mulodd;
-       LLVMValueRef shuf[LP_MAX_VECTOR_WIDTH / 32], shuf_vec;
-@@ -1096,7 +1096,7 @@ lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
-       aodd = LLVMBuildShuffleVector(builder, aeven, bld->undef, shuf_vec, "");
-       bodd = LLVMBuildShuffleVector(builder, beven, bld->undef, shuf_vec, "");
- 
--      if (util_cpu_caps.has_avx2 && bld->type.length == 8) {
-+      if (util_get_cpu_caps()->has_avx2 && bld->type.length == 8) {
-          if (bld->type.sign) {
-             intrinsic = "llvm.x86.avx2.pmul.dq";
-          } else {
-@@ -1331,8 +1331,8 @@ lp_build_div(struct lp_build_context *bld,
- 
-    /* fast rcp is disabled (just uses div), so makes no sense to try that */
-    if(FALSE &&
--      ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) ||
--       (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) &&
-+      ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) ||
-+       (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) &&
-       type.floating)
-       return lp_build_mul(bld, a, lp_build_rcp(bld, b));
- 
-@@ -1745,7 +1745,7 @@ lp_build_abs(struct lp_build_context *bld,
-       return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
-    }
- 
--   if(type.width*type.length == 128 && util_cpu_caps.has_ssse3 && LLVM_VERSION_MAJOR < 6) {
-+   if(type.width*type.length == 128 && util_get_cpu_caps()->has_ssse3 && LLVM_VERSION_MAJOR < 6) {
-       switch(type.width) {
-       case 8:
-          return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
-@@ -1755,7 +1755,7 @@ lp_build_abs(struct lp_build_context *bld,
-          return lp_build_intrinsic_unary(builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
-       }
-    }
--   else if (type.width*type.length == 256 && util_cpu_caps.has_avx2 && LLVM_VERSION_MAJOR < 6) {
-+   else if (type.width*type.length == 256 && util_get_cpu_caps()->has_avx2 && LLVM_VERSION_MAJOR < 6) {
-       switch(type.width) {
-       case 8:
-          return lp_build_intrinsic_unary(builder, "llvm.x86.avx2.pabs.b", vec_type, a);
-@@ -1897,15 +1897,15 @@ lp_build_int_to_float(struct lp_build_context *bld,
- static boolean
- arch_rounding_available(const struct lp_type type)
- {
--   if ((util_cpu_caps.has_sse4_1 &&
-+   if ((util_get_cpu_caps()->has_sse4_1 &&
-        (type.length == 1 || type.width*type.length == 128)) ||
--       (util_cpu_caps.has_avx && type.width*type.length == 256) ||
--       (util_cpu_caps.has_avx512f && type.width*type.length == 512))
-+       (util_get_cpu_caps()->has_avx && type.width*type.length == 256) ||
-+       (util_get_cpu_caps()->has_avx512f && type.width*type.length == 512))
-       return TRUE;
--   else if ((util_cpu_caps.has_altivec &&
-+   else if ((util_get_cpu_caps()->has_altivec &&
-             (type.width == 32 && type.length == 4)))
-       return TRUE;
--   else if (util_cpu_caps.has_neon)
-+   else if (util_get_cpu_caps()->has_neon)
-       return TRUE;
- 
-    return FALSE;
-@@ -1935,7 +1935,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
-    assert(type.width == 32);
- 
-    assert(lp_check_value(type, a));
--   assert(util_cpu_caps.has_sse2);
-+   assert(util_get_cpu_caps()->has_sse2);
- 
-    /* This is relying on MXCSR rounding mode, which should always be nearest. */
-    if (type.length == 1) {
-@@ -1961,7 +1961,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
-       }
-       else {
-          assert(type.width*type.length == 256);
--         assert(util_cpu_caps.has_avx);
-+         assert(util_get_cpu_caps()->has_avx);
- 
-          intrinsic = "llvm.x86.avx.cvt.ps2dq.256";
-       }
-@@ -1987,7 +1987,7 @@ lp_build_round_altivec(struct lp_build_context *bld,
-    assert(type.floating);
- 
-    assert(lp_check_value(type, a));
--   assert(util_cpu_caps.has_altivec);
-+   assert(util_get_cpu_caps()->has_altivec);
- 
-    (void)type;
- 
-@@ -2014,7 +2014,7 @@ lp_build_round_arch(struct lp_build_context *bld,
-                     LLVMValueRef a,
-                     enum lp_build_round_mode mode)
- {
--   if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) {
-+   if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon) {
-       LLVMBuilderRef builder = bld->gallivm->builder;
-       const struct lp_type type = bld->type;
-       const char *intrinsic_root;
-@@ -2042,7 +2042,7 @@ lp_build_round_arch(struct lp_build_context *bld,
-       lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type);
-       return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
-    }
--   else /* (util_cpu_caps.has_altivec) */
-+   else /* (util_get_cpu_caps()->has_altivec) */
-      return lp_build_round_altivec(bld, a, mode);
- }
- 
-@@ -2377,9 +2377,9 @@ lp_build_iround(struct lp_build_context *bld,
- 
-    assert(lp_check_value(type, a));
- 
--   if ((util_cpu_caps.has_sse2 &&
-+   if ((util_get_cpu_caps()->has_sse2 &&
-        ((type.width == 32) && (type.length == 1 || type.length == 4))) ||
--       (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) {
-+       (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) {
-       return lp_build_iround_nearest_sse2(bld, a);
-    }
-    if (arch_rounding_available(type)) {
-@@ -2664,8 +2664,8 @@ lp_build_rcp(struct lp_build_context *bld,
-     * particular uses that require less workarounds.
-     */
- 
--   if (FALSE && ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) ||
--         (util_cpu_caps.has_avx && type.width == 32 && type.length == 8))){
-+   if (FALSE && ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) ||
-+         (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8))){
-       const unsigned num_iterations = 0;
-       LLVMValueRef res;
-       unsigned i;
-@@ -2784,8 +2784,8 @@ lp_build_fast_rsqrt_available(struct lp_type type)
- {
-    assert(type.floating);
- 
--   if ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) ||
--       (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) {
-+   if ((util_get_cpu_caps()->has_sse && type.width == 32 && type.length == 4) ||
-+       (util_get_cpu_caps()->has_avx && type.width == 32 && type.length == 8)) {
-       return true;
-    }
-    return false;
-@@ -3694,7 +3694,7 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
- LLVMValueRef
- lp_build_fpstate_get(struct gallivm_state *gallivm)
- {
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       LLVMBuilderRef builder = gallivm->builder;
-       LLVMValueRef mxcsr_ptr = lp_build_alloca(
-          gallivm,
-@@ -3715,7 +3715,7 @@ void
- lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
-                                   boolean zero)
- {
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
-       int daz_ftz = _MM_FLUSH_ZERO_MASK;
- 
-@@ -3724,7 +3724,7 @@ lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
-       LLVMValueRef mxcsr =
-          LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
- 
--      if (util_cpu_caps.has_daz) {
-+      if (util_get_cpu_caps()->has_daz) {
-          /* Enable denormals are zero mode */
-          daz_ftz |= _MM_DENORMALS_ZERO_MASK;
-       }
-@@ -3745,7 +3745,7 @@ void
- lp_build_fpstate_set(struct gallivm_state *gallivm,
-                      LLVMValueRef mxcsr_ptr)
- {
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       LLVMBuilderRef builder = gallivm->builder;
-       mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr,
-                      LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
-index 31affad2233..1c050535301 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
-@@ -110,7 +110,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
-    LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
-    LLVMValueRef h;
- 
--   if (util_cpu_caps.has_f16c &&
-+   if (util_get_cpu_caps()->has_f16c &&
-        (src_length == 4 || src_length == 8)) {
-       if (LLVM_VERSION_MAJOR < 11) {
-          const char *intrinsic = NULL;
-@@ -176,7 +176,7 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
-     * useless.
-     */
- 
--   if (util_cpu_caps.has_f16c &&
-+   if (util_get_cpu_caps()->has_f16c &&
-        (length == 4 || length == 8)) {
-       struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
-       unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
-@@ -498,7 +498,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
- 
-       /* Special case 4x4x32 --> 1x16x8 */
-       if (src_type.length == 4 &&
--            (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
-+            (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec))
-       {
-          num_dsts = (num_srcs + 3) / 4;
-          dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4;
-@@ -509,7 +509,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
- 
-       /* Special case 2x8x32 --> 1x16x8 */
-       if (src_type.length == 8 &&
--          util_cpu_caps.has_avx)
-+          util_get_cpu_caps()->has_avx)
-       {
-          num_dsts = (num_srcs + 1) / 2;
-          dst_type->length = num_srcs * 8 >= 16 ? 16 : num_srcs * 8;
-@@ -606,7 +606,7 @@ lp_build_conv(struct gallivm_state *gallivm,
-        ((dst_type.length == 16 && 4 * num_dsts == num_srcs) ||
-         (num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) &&
- 
--       (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
-+       (util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec))
-    {
-       struct lp_build_context bld;
-       struct lp_type int16_type, int32_type;
-@@ -719,7 +719,7 @@ lp_build_conv(struct gallivm_state *gallivm,
-       ((dst_type.length == 16 && 2 * num_dsts == num_srcs) ||
-        (num_dsts == 1 && dst_type.length * num_srcs == 8)) &&
- 
--      util_cpu_caps.has_avx) {
-+      util_get_cpu_caps()->has_avx) {
- 
-       struct lp_build_context bld;
-       struct lp_type int16_type, int32_type;
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
-index 174857e06d9..e17c7881e7d 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
-@@ -642,8 +642,8 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm,
-        * XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1.
-        * Much cheaper (but we don't care that much if n == 1).
-        */
--      if ((util_cpu_caps.has_sse2 && n == 4) ||
--          (util_cpu_caps.has_avx2 && n == 8)) {
-+      if ((util_get_cpu_caps()->has_sse2 && n == 4) ||
-+          (util_get_cpu_caps()->has_avx2 && n == 8)) {
-          color2_2 = lp_build_pavgb(&bld8, colors0, colors1);
-          color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
-       }
-@@ -1350,7 +1350,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
-    if (is_dxt1_variant) {
-       LLVMValueRef color23_2, color2_2;
- 
--      if (util_cpu_caps.has_sse2) {
-+      if (util_get_cpu_caps()->has_sse2) {
-          LLVMValueRef intrargs[2];
-          intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, "");
-          /* same interleave as for lerp23 - correct result in 2nd element */
-@@ -1389,7 +1389,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
-       color23 = lp_build_select(&bld32, sel_mask, color23, color23_2);
-    }
- 
--   if (util_cpu_caps.has_ssse3) {
-+   if (util_get_cpu_caps()->has_ssse3) {
-       /*
-        * Use pshufb as mini-lut. (Only doable with intrinsics as the
-        * final shuffles are non-constant. pshufb is awesome!)
-@@ -1689,7 +1689,7 @@ s3tc_decode_block_dxt5(struct gallivm_state *gallivm,
-    type16.sign = FALSE;
-    sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, "");
- 
--   if (!util_cpu_caps.has_ssse3) {
-+   if (!util_get_cpu_caps()->has_ssse3) {
-       LLVMValueRef acodeg, mask1, acode0, acode1;
- 
-       /* extraction of the 3 bit values into something more useful is HARD */
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
-index 121452d7596..97deffe1de0 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
-@@ -90,7 +90,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm,
-     * per element. Didn't measure performance but cuts shader size
-     * by quite a bit (less difference if cpu has no sse4.1 support).
-     */
--   if (util_cpu_caps.has_sse2 && n > 1) {
-+   if (util_get_cpu_caps()->has_sse2 && n > 1) {
-       LLVMValueRef sel, tmp, tmp2;
-       struct lp_build_context bld32;
- 
-@@ -174,7 +174,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
-     * per element. Didn't measure performance but cuts shader size
-     * by quite a bit (less difference if cpu has no sse4.1 support).
-     */
--   if (util_cpu_caps.has_sse2 && n > 1) {
-+   if (util_get_cpu_caps()->has_sse2 && n > 1) {
-       LLVMValueRef sel, tmp;
-       struct lp_build_context bld32;
- 
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
-index e991b0dc375..42cc17371a0 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
-@@ -488,7 +488,7 @@ lp_build_gather(struct gallivm_state *gallivm,
-        * 32bit/64bit fetches you're doing it wrong (this is gather, not
-        * conversion) and it would be awkward for floats.
-        */
--   } else if (util_cpu_caps.has_avx2 && !need_expansion &&
-+   } else if (util_get_cpu_caps()->has_avx2 && !need_expansion &&
-               src_width == 32 && (length == 4 || length == 8)) {
-       return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
-                                   base_ptr, offsets);
-@@ -500,7 +500,7 @@ lp_build_gather(struct gallivm_state *gallivm,
-     * (In general, should be more of a win if the fetch is 256bit wide -
-     * this is true for the 32bit case above too.)
-     */
--   } else if (0 && util_cpu_caps.has_avx2 && !need_expansion &&
-+   } else if (0 && util_get_cpu_caps()->has_avx2 && !need_expansion &&
-               src_width == 64 && (length == 2 || length == 4)) {
-       return lp_build_gather_avx2(gallivm, length, src_width, dst_type,
-                                   base_ptr, offsets);
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
-index 685ed0e58aa..dd428242cb9 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
-@@ -433,6 +433,7 @@ lp_build_init(void)
-    /* For simulating less capable machines */
- #ifdef DEBUG
-    if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
-+      extern struct util_cpu_caps_t util_cpu_caps;
-       assert(util_cpu_caps.has_sse2);
-       util_cpu_caps.has_sse3 = 0;
-       util_cpu_caps.has_ssse3 = 0;
-@@ -445,7 +446,7 @@ lp_build_init(void)
-    }
- #endif
- 
--   if (util_cpu_caps.has_avx2 || util_cpu_caps.has_avx) {
-+   if (util_get_cpu_caps()->has_avx2 || util_get_cpu_caps()->has_avx) {
-       lp_native_vector_width = 256;
-    } else {
-       /* Leave it at 128, even when no SIMD extensions are available.
-@@ -460,16 +461,16 @@ lp_build_init(void)
- #if LLVM_VERSION_MAJOR < 4
-    if (lp_native_vector_width <= 128) {
-       /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by
--       * "util_cpu_caps.has_avx" predicate, and lack the
-+       * "util_get_cpu_caps()->has_avx" predicate, and lack the
-        * "lp_native_vector_width > 128" predicate. And also to ensure a more
-        * consistent behavior, allowing one to test SSE2 on AVX machines.
-        * XXX: should not play games with util_cpu_caps directly as it might
-        * get used for other things outside llvm too.
-        */
--      util_cpu_caps.has_avx = 0;
--      util_cpu_caps.has_avx2 = 0;
--      util_cpu_caps.has_f16c = 0;
--      util_cpu_caps.has_fma = 0;
-+      util_get_cpu_caps()->has_avx = 0;
-+      util_get_cpu_caps()->has_avx2 = 0;
-+      util_get_cpu_caps()->has_f16c = 0;
-+      util_get_cpu_caps()->has_fma = 0;
-    }
- #endif
- 
-@@ -482,7 +483,7 @@ lp_build_init(void)
-     * Right now denorms get explicitly disabled (but elsewhere) for x86,
-     * whereas ppc64 explicitly enables them...
-     */
--   if (util_cpu_caps.has_altivec) {
-+   if (util_get_cpu_caps()->has_altivec) {
-       unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
-                                 0xFFFF, 0xFFFF, 0xFFFE, 0xFFFF };
-       __asm (
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
-index 315977ae745..3ed3b5a74b1 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
-@@ -196,7 +196,7 @@ lp_build_compare(struct gallivm_state *gallivm,
- 
-    if (!type.floating && !type.sign &&
-        type.width * type.length == 128 &&
--       util_cpu_caps.has_sse2 &&
-+       util_get_cpu_caps()->has_sse2 &&
-        (func == PIPE_FUNC_LESS ||
-         func == PIPE_FUNC_LEQUAL ||
-         func == PIPE_FUNC_GREATER ||
-@@ -348,11 +348,11 @@ lp_build_select(struct lp_build_context *bld,
- 
-       res = LLVMBuildSelect(builder, mask, a, b, "");
-    }
--   else if (((util_cpu_caps.has_sse4_1 &&
-+   else if (((util_get_cpu_caps()->has_sse4_1 &&
-               type.width * type.length == 128) ||
--             (util_cpu_caps.has_avx &&
-+             (util_get_cpu_caps()->has_avx &&
-               type.width * type.length == 256 && type.width >= 32) ||
--             (util_cpu_caps.has_avx2 &&
-+             (util_get_cpu_caps()->has_avx2 &&
-               type.width * type.length == 256)) &&
-             !LLVMIsConstant(a) &&
-             !LLVMIsConstant(b) &&
-@@ -379,7 +379,7 @@ lp_build_select(struct lp_build_context *bld,
-             intrinsic = "llvm.x86.avx.blendv.ps.256";
-             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
-          } else {
--            assert(util_cpu_caps.has_avx2);
-+            assert(util_get_cpu_caps()->has_avx2);
-             intrinsic = "llvm.x86.avx2.pblendvb";
-             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
-          }
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
-index 9b75676a4e2..4f3e696816c 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
-@@ -400,22 +400,22 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
-     * http://llvm.org/PR19429
-     * http://llvm.org/PR16721
-     */
--   MAttrs.push_back(util_cpu_caps.has_sse    ? "+sse"    : "-sse"   );
--   MAttrs.push_back(util_cpu_caps.has_sse2   ? "+sse2"   : "-sse2"  );
--   MAttrs.push_back(util_cpu_caps.has_sse3   ? "+sse3"   : "-sse3"  );
--   MAttrs.push_back(util_cpu_caps.has_ssse3  ? "+ssse3"  : "-ssse3" );
--   MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
--   MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
-+   MAttrs.push_back(util_get_cpu_caps()->has_sse    ? "+sse"    : "-sse"   );
-+   MAttrs.push_back(util_get_cpu_caps()->has_sse2   ? "+sse2"   : "-sse2"  );
-+   MAttrs.push_back(util_get_cpu_caps()->has_sse3   ? "+sse3"   : "-sse3"  );
-+   MAttrs.push_back(util_get_cpu_caps()->has_ssse3  ? "+ssse3"  : "-ssse3" );
-+   MAttrs.push_back(util_get_cpu_caps()->has_sse4_1 ? "+sse4.1" : "-sse4.1");
-+   MAttrs.push_back(util_get_cpu_caps()->has_sse4_2 ? "+sse4.2" : "-sse4.2");
-    /*
-     * AVX feature is not automatically detected from CPUID by the X86 target
-     * yet, because the old (yet default) JIT engine is not capable of
-     * emitting the opcodes. On newer llvm versions it is and at least some
-     * versions (tested with 3.3) will emit avx opcodes without this anyway.
-     */
--   MAttrs.push_back(util_cpu_caps.has_avx  ? "+avx"  : "-avx");
--   MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
--   MAttrs.push_back(util_cpu_caps.has_fma  ? "+fma"  : "-fma");
--   MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
-+   MAttrs.push_back(util_get_cpu_caps()->has_avx  ? "+avx"  : "-avx");
-+   MAttrs.push_back(util_get_cpu_caps()->has_f16c ? "+f16c" : "-f16c");
-+   MAttrs.push_back(util_get_cpu_caps()->has_fma  ? "+fma"  : "-fma");
-+   MAttrs.push_back(util_get_cpu_caps()->has_avx2 ? "+avx2" : "-avx2");
-    /* disable avx512 and all subvariants */
-    MAttrs.push_back("-avx512cd");
-    MAttrs.push_back("-avx512er");
-@@ -426,7 +426,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
-    MAttrs.push_back("-avx512vl");
- #endif
- #if defined(PIPE_ARCH_ARM)
--   if (!util_cpu_caps.has_neon) {
-+   if (!util_get_cpu_caps()->has_neon) {
-       MAttrs.push_back("-neon");
-       MAttrs.push_back("-crypto");
-       MAttrs.push_back("-vfp2");
-@@ -434,7 +434,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
- #endif
- 
- #if defined(PIPE_ARCH_PPC)
--   MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
-+   MAttrs.push_back(util_get_cpu_caps()->has_altivec ? "+altivec" : "-altivec");
- #if (LLVM_VERSION_MAJOR < 4)
-    /*
-     * Make sure VSX instructions are disabled
-@@ -444,7 +444,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
-     * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
-     * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0)
-     */
--   if (util_cpu_caps.has_altivec) {
-+   if (util_get_cpu_caps()->has_altivec) {
-       MAttrs.push_back("-vsx");
-    }
- #else
-@@ -458,8 +458,8 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
-     * Make sure VSX instructions are ENABLED (if supported), unless
-     * VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
-     */
--   if (util_cpu_caps.has_altivec) {
--      MAttrs.push_back(util_cpu_caps.has_vsx ? "+vsx" : "-vsx");
-+   if (util_get_cpu_caps()->has_altivec) {
-+      MAttrs.push_back(util_get_cpu_caps()->has_vsx ? "+vsx" : "-vsx");
-    }
- #endif
- #endif
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
-index e1f652a9342..76e57c52f80 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
-@@ -322,7 +322,7 @@ lp_build_interleave2(struct gallivm_state *gallivm,
- {
-    LLVMValueRef shuffle;
- 
--   if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) {
-+   if (type.length == 2 && type.width == 128 && util_get_cpu_caps()->has_avx) {
-       /*
-        * XXX: This is a workaround for llvm code generation deficiency. Strangely
-        * enough, while this needs vinsertf128/vextractf128 instructions (hence
-@@ -484,7 +484,7 @@ lp_build_unpack2_native(struct gallivm_state *gallivm,
- 
-    /* Interleave bits */
- #if UTIL_ARCH_LITTLE_ENDIAN
--   if (src_type.length * src_type.width == 256 && util_cpu_caps.has_avx2) {
-+   if (src_type.length * src_type.width == 256 && util_get_cpu_caps()->has_avx2) {
-       *dst_lo = lp_build_interleave2_half(gallivm, src_type, src, msb, 0);
-       *dst_hi = lp_build_interleave2_half(gallivm, src_type, src, msb, 1);
-    } else {
-@@ -585,22 +585,22 @@ lp_build_pack2(struct gallivm_state *gallivm,
-    assert(src_type.length * 2 == dst_type.length);
- 
-    /* Check for special cases first */
--   if ((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
-+   if ((util_get_cpu_caps()->has_sse2 || util_get_cpu_caps()->has_altivec) &&
-         src_type.width * src_type.length >= 128) {
-       const char *intrinsic = NULL;
-       boolean swap_intrinsic_operands = FALSE;
- 
-       switch(src_type.width) {
-       case 32:
--         if (util_cpu_caps.has_sse2) {
-+         if (util_get_cpu_caps()->has_sse2) {
-            if (dst_type.sign) {
-               intrinsic = "llvm.x86.sse2.packssdw.128";
-            } else {
--              if (util_cpu_caps.has_sse4_1) {
-+              if (util_get_cpu_caps()->has_sse4_1) {
-                  intrinsic = "llvm.x86.sse41.packusdw";
-               }
-            }
--         } else if (util_cpu_caps.has_altivec) {
-+         } else if (util_get_cpu_caps()->has_altivec) {
-             if (dst_type.sign) {
-                intrinsic = "llvm.ppc.altivec.vpkswss";
-             } else {
-@@ -613,18 +613,18 @@ lp_build_pack2(struct gallivm_state *gallivm,
-          break;
-       case 16:
-          if (dst_type.sign) {
--            if (util_cpu_caps.has_sse2) {
-+            if (util_get_cpu_caps()->has_sse2) {
-                intrinsic = "llvm.x86.sse2.packsswb.128";
--            } else if (util_cpu_caps.has_altivec) {
-+            } else if (util_get_cpu_caps()->has_altivec) {
-                intrinsic = "llvm.ppc.altivec.vpkshss";
- #if UTIL_ARCH_LITTLE_ENDIAN
-                swap_intrinsic_operands = TRUE;
- #endif
-             }
-          } else {
--            if (util_cpu_caps.has_sse2) {
-+            if (util_get_cpu_caps()->has_sse2) {
-                intrinsic = "llvm.x86.sse2.packuswb.128";
--            } else if (util_cpu_caps.has_altivec) {
-+            } else if (util_get_cpu_caps()->has_altivec) {
-                intrinsic = "llvm.ppc.altivec.vpkshus";
- #if UTIL_ARCH_LITTLE_ENDIAN
-                swap_intrinsic_operands = TRUE;
-@@ -740,7 +740,7 @@ lp_build_pack2_native(struct gallivm_state *gallivm,
- 
-    /* At this point only have special case for avx2 */
-    if (src_type.length * src_type.width == 256 &&
--       util_cpu_caps.has_avx2) {
-+       util_get_cpu_caps()->has_avx2) {
-       switch(src_type.width) {
-       case 32:
-          if (dst_type.sign) {
-@@ -793,7 +793,7 @@ lp_build_packs2(struct gallivm_state *gallivm,
- 
-    /* All X86 SSE non-interleaved pack instructions take signed inputs and
-     * saturate them, so no need to clamp for those cases. */
--   if(util_cpu_caps.has_sse2 &&
-+   if(util_get_cpu_caps()->has_sse2 &&
-       src_type.width * src_type.length >= 128 &&
-       src_type.sign &&
-       (src_type.width == 32 || src_type.width == 16))
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
-index 686abc08620..98dcde912b5 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
-@@ -1152,7 +1152,7 @@ lp_build_minify(struct lp_build_context *bld,
-       LLVMValueRef size;
-       assert(bld->type.sign);
-       if (lod_scalar ||
--         (util_cpu_caps.has_avx2 || !util_cpu_caps.has_sse)) {
-+         (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
-          size = LLVMBuildLShr(builder, base_size, level, "minify");
-          size = lp_build_max(bld, size, bld->one);
-       }
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
-index 6740907ebcb..f35a27562e7 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
-@@ -3235,7 +3235,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
-        * as it appears to be a loss with just AVX)
-        */
-       if (num_quads == 1 || !use_aos ||
--          (util_cpu_caps.has_avx2 &&
-+          (util_get_cpu_caps()->has_avx2 &&
-            (bld.num_lods == 1 ||
-             derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
-          if (use_aos) {
-diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
-index b1c8b990ef1..03b11f914b4 100644
---- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
-+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
-@@ -35,10 +35,10 @@
- 
- DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", false);
- 
--static struct util_cpu_caps *get_cpu_caps(void)
-+static const struct util_cpu_caps_t *get_cpu_caps(void)
- {
-    util_cpu_detect();
--   return &util_cpu_caps;
-+   return util_get_cpu_caps();
- }
- 
- int rtasm_cpu_has_sse(void)
-diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
-index ad687f32853..ddd65fb6a08 100644
---- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
-+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
-@@ -2152,17 +2152,17 @@ static void x86_init_func_common( struct x86_function *p )
- {
-    util_cpu_detect();
-    p->caps = 0;
--   if(util_cpu_caps.has_mmx)
-+   if(util_get_cpu_caps()->has_mmx)
-       p->caps |= X86_MMX;
--   if(util_cpu_caps.has_mmx2)
-+   if(util_get_cpu_caps()->has_mmx2)
-       p->caps |= X86_MMX2;
--   if(util_cpu_caps.has_sse)
-+   if(util_get_cpu_caps()->has_sse)
-       p->caps |= X86_SSE;
--   if(util_cpu_caps.has_sse2)
-+   if(util_get_cpu_caps()->has_sse2)
-       p->caps |= X86_SSE2;
--   if(util_cpu_caps.has_sse3)
-+   if(util_get_cpu_caps()->has_sse3)
-       p->caps |= X86_SSE3;
--   if(util_cpu_caps.has_sse4_1)
-+   if(util_get_cpu_caps()->has_sse4_1)
-       p->caps |= X86_SSE4_1;
-    p->csr = p->store;
- #if defined(PIPE_ARCH_X86)
-diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
-index 7d5150528bc..28d893a8d82 100644
---- a/src/gallium/auxiliary/util/u_threaded_context.c
-+++ b/src/gallium/auxiliary/util/u_threaded_context.c
-@@ -2172,8 +2172,8 @@ tc_set_context_param(struct pipe_context *_pipe,
-    if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
-       /* Pin the gallium thread as requested. */
-       util_set_thread_affinity(tc->queue.threads[0],
--                               util_cpu_caps.L3_affinity_mask[value],
--                               NULL, util_cpu_caps.num_cpu_mask_bits);
-+                               util_get_cpu_caps()->L3_affinity_mask[value],
-+                               NULL, util_get_cpu_caps()->num_cpu_mask_bits);
- 
-       /* Execute this immediately (without enqueuing).
-        * It's required to be thread-safe.
-@@ -2951,7 +2951,7 @@ threaded_context_create(struct pipe_context *pipe,
- 
-    util_cpu_detect();
- 
--   if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1))
-+   if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
-       return pipe;
- 
-    tc = os_malloc_aligned(sizeof(struct threaded_context), 16);
-diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
-index b95e2f0017f..dc559bc3ffb 100644
---- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
-+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
-@@ -436,7 +436,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
-    assert(type.length <= 16);
-    assert(type.floating);
- 
--   if(util_cpu_caps.has_sse && type.length == 4) {
-+   if(util_get_cpu_caps()->has_sse && type.length == 4) {
-       const char *movmskintr = "llvm.x86.sse.movmsk.ps";
-       const char *popcntintr = "llvm.ctpop.i32";
-       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
-@@ -447,7 +447,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
-                                        LLVMInt32TypeInContext(context), bits);
-       count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
-    }
--   else if(util_cpu_caps.has_avx && type.length == 8) {
-+   else if(util_get_cpu_caps()->has_avx && type.length == 8) {
-       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
-       const char *popcntintr = "llvm.ctpop.i32";
-       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
-diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
-index 0ba4b48c469..6447a67634b 100644
---- a/src/gallium/drivers/llvmpipe/lp_screen.c
-+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
-@@ -919,7 +919,7 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
- 
-    screen->allow_cl = !!getenv("LP_CL");
-    screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR);
--   screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0;
-+   screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0;
- #ifdef EMBEDDED_DEVICE
-    screen->num_threads = 0;
- #endif
-diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
-index c602d001f2f..db330eebb8e 100644
---- a/src/gallium/drivers/llvmpipe/lp_test_arit.c
-+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
-@@ -403,7 +403,7 @@ flush_denorm_to_zero(float val)
-    fi_val.f = val;
- 
- #if defined(PIPE_ARCH_SSE)
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       if ((fi_val.ui & 0x7f800000) == 0) {
-          fi_val.ui &= 0xff800000;
-       }
-@@ -479,7 +479,7 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned
-             continue;
-          }
- 
--         if (!util_cpu_caps.has_neon &&
-+         if (!util_get_cpu_caps()->has_neon &&
-              test->ref == &nearbyintf && length == 2 &&
-              ref != roundf(testval)) {
-             /* FIXME: The generic (non SSE) path in lp_build_iround, which is
-diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
-index 2bf223d66f9..815736166d5 100644
---- a/src/gallium/drivers/llvmpipe/lp_texture.c
-+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
-@@ -85,7 +85,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
-     * of a block for all formats) though this should not be strictly necessary
-     * neither. In any case it can only affect compressed or 1d textures.
-     */
--   unsigned mip_align = MAX2(64, util_cpu_caps.cacheline);
-+   unsigned mip_align = MAX2(64, util_get_cpu_caps()->cacheline);
- 
-    assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
-    assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
-@@ -123,7 +123,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
-       if (util_format_is_compressed(pt->format))
-          lpr->row_stride[level] = nblocksx * block_size;
-       else
--         lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline);
-+         lpr->row_stride[level] = align(nblocksx * block_size, util_get_cpu_caps()->cacheline);
- 
-       /* if row_stride * height > LP_MAX_TEXTURE_SIZE */
-       if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) {
-diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp
-index 84d4b44ee57..1fb14e636d7 100644
---- a/src/gallium/drivers/swr/swr_loader.cpp
-+++ b/src/gallium/drivers/swr/swr_loader.cpp
-@@ -91,7 +91,7 @@ swr_create_screen(struct sw_winsys *winsys)
- 
-    util_cpu_detect();
- 
--   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
-+   if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512er) {
-       swr_print_info("SWR detected KNL instruction support ");
- #ifndef HAVE_SWR_KNL
-       swr_print_info("(skipping: not built).\n");
-@@ -103,7 +103,7 @@ swr_create_screen(struct sw_winsys *winsys)
- #endif
-    }
- 
--   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
-+   if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512bw) {
-       swr_print_info("SWR detected SKX instruction support ");
- #ifndef HAVE_SWR_SKX
-       swr_print_info("(skipping not built).\n");
-@@ -113,7 +113,7 @@ swr_create_screen(struct sw_winsys *winsys)
- #endif
-    }
- 
--   if (util_cpu_caps.has_avx2) {
-+   if (util_get_cpu_caps()->has_avx2) {
-       swr_print_info("SWR detected AVX2 instruction support ");
- #ifndef HAVE_SWR_AVX2
-       swr_print_info("(skipping not built).\n");
-@@ -123,7 +123,7 @@ swr_create_screen(struct sw_winsys *winsys)
- #endif
-    }
- 
--   if (util_cpu_caps.has_avx) {
-+   if (util_get_cpu_caps()->has_avx) {
-       swr_print_info("SWR detected AVX instruction support ");
- #ifndef HAVE_SWR_AVX
-       swr_print_info("(skipping not built).\n");
-diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h
-index 66767e7f1f8..5afe32939a8 100644
---- a/src/gallium/drivers/vc4/vc4_tiling.h
-+++ b/src/gallium/drivers/vc4/vc4_tiling.h
-@@ -90,7 +90,7 @@ vc4_load_lt_image(void *dst, uint32_t dst_stride,
-                   int cpp, const struct pipe_box *box)
- {
- #ifdef USE_ARM_ASM
--        if (util_cpu_caps.has_neon) {
-+        if (util_get_cpu_caps()->has_neon) {
-                 vc4_load_lt_image_neon(dst, dst_stride, src, src_stride,
-                                        cpp, box);
-                 return;
-@@ -106,7 +106,7 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride,
-                    int cpp, const struct pipe_box *box)
- {
- #ifdef USE_ARM_ASM
--        if (util_cpu_caps.has_neon) {
-+        if (util_get_cpu_caps()->has_neon) {
-                 vc4_store_lt_image_neon(dst, dst_stride, src, src_stride,
-                                         cpp, box);
-                 return;
-diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c
-index 4d9c4e27ebf..782f16e7f78 100644
---- a/src/gallium/tests/unit/translate_test.c
-+++ b/src/gallium/tests/unit/translate_test.c
-@@ -50,6 +50,7 @@ int main(int argc, char** argv)
- {
-    struct translate *(*create_fn)(const struct translate_key *key) = 0;
- 
-+   extern struct util_cpu_caps_t util_cpu_caps;
-    struct translate_key key;
-    unsigned output_format;
-    unsigned input_format;
-@@ -87,7 +88,7 @@ int main(int argc, char** argv)
-    }
-    else if (!strcmp(argv[1], "sse"))
-    {
--      if(!util_cpu_caps.has_sse || !rtasm_cpu_has_sse())
-+      if(!util_get_cpu_caps()->has_sse || !rtasm_cpu_has_sse())
-       {
-          printf("Error: CPU doesn't support SSE (test with qemu)\n");
-          return 2;
-@@ -99,7 +100,7 @@ int main(int argc, char** argv)
-    }
-    else if (!strcmp(argv[1], "sse2"))
-    {
--      if(!util_cpu_caps.has_sse2 || !rtasm_cpu_has_sse())
-+      if(!util_get_cpu_caps()->has_sse2 || !rtasm_cpu_has_sse())
-       {
-          printf("Error: CPU doesn't support SSE2 (test with qemu)\n");
-          return 2;
-@@ -110,7 +111,7 @@ int main(int argc, char** argv)
-    }
-    else if (!strcmp(argv[1], "sse3"))
-    {
--      if(!util_cpu_caps.has_sse3 || !rtasm_cpu_has_sse())
-+      if(!util_get_cpu_caps()->has_sse3 || !rtasm_cpu_has_sse())
-       {
-          printf("Error: CPU doesn't support SSE3 (test with qemu)\n");
-          return 2;
-@@ -120,7 +121,7 @@ int main(int argc, char** argv)
-    }
-    else if (!strcmp(argv[1], "sse4.1"))
-    {
--      if(!util_cpu_caps.has_sse4_1 || !rtasm_cpu_has_sse())
-+      if(!util_get_cpu_caps()->has_sse4_1 || !rtasm_cpu_has_sse())
-       {
-          printf("Error: CPU doesn't support SSE4.1 (test with qemu)\n");
-          return 2;
-diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c
-index 7f2eba9382b..4474cfb82b0 100644
---- a/src/gallium/tests/unit/u_half_test.c
-+++ b/src/gallium/tests/unit/u_half_test.c
-@@ -36,13 +36,14 @@ test(void)
- int
- main(int argc, char **argv)
- {
--   assert(!util_cpu_caps.has_f16c);
-+   util_cpu_detect();
-    test();
- 
--   /* Test f16c. */
--   util_cpu_detect();
--   if (util_cpu_caps.has_f16c)
-+   /* Test non-f16c. */
-+   if (util_get_cpu_caps()->has_f16c) {
-+      ((struct util_cpu_caps_t *)util_get_cpu_caps())->has_f16c = false;
-       test();
-+   }
- 
-    printf("Success!\n");
-    return 0;
-diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
-index 6b87601f1be..9b3279fc461 100644
---- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
-+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
-@@ -327,8 +327,8 @@ static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
-    struct amdgpu_winsys *ws = amdgpu_winsys(rws);
- 
-    util_set_thread_affinity(ws->cs_queue.threads[0],
--                            util_cpu_caps.L3_affinity_mask[cache],
--                            NULL, util_cpu_caps.num_cpu_mask_bits);
-+                            util_get_cpu_caps()->L3_affinity_mask[cache],
-+                            NULL, util_get_cpu_caps()->num_cpu_mask_bits);
- }
- 
- static uint32_t kms_handle_hash(const void *key)
-diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
-index 6b306a6ce7b..91b1dd6b68f 100644
---- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
-+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
-@@ -804,8 +804,8 @@ static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
- 
-    if (util_queue_is_initialized(&rws->cs_queue)) {
-       util_set_thread_affinity(rws->cs_queue.threads[0],
--                               util_cpu_caps.L3_affinity_mask[cache],
--                               NULL, util_cpu_caps.num_cpu_mask_bits);
-+                               util_get_cpu_caps()->L3_affinity_mask[cache],
-+                               NULL, util_get_cpu_caps()->num_cpu_mask_bits);
-    }
- }
- 
-diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
-index 6316cad4e32..ed019b55395 100644
---- a/src/mesa/main/glthread.c
-+++ b/src/mesa/main/glthread.c
-@@ -214,19 +214,20 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
-    /* Pin threads regularly to the same Zen CCX that the main thread is
-     * running on. The main thread can move between CCXs.
-     */
--   if (util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 &&
-+   if (util_get_cpu_caps()->nr_cpus != util_get_cpu_caps()->cores_per_L3 &&
-        /* driver support */
-        ctx->Driver.PinDriverToL3Cache &&
-        ++glthread->pin_thread_counter % 128 == 0) {
-       int cpu = util_get_current_cpu();
- 
-       if (cpu >= 0) {
--         unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu];
--
--         util_set_thread_affinity(glthread->queue.threads[0],
--                                  util_cpu_caps.L3_affinity_mask[L3_cache],
--                                  NULL, util_cpu_caps.num_cpu_mask_bits);
--         ctx->Driver.PinDriverToL3Cache(ctx, L3_cache);
-+         uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu];
-+         if (L3_cache != U_CPU_INVALID_L3) {
-+            util_set_thread_affinity(glthread->queue.threads[0],
-+                                     util_get_cpu_caps()->L3_affinity_mask[L3_cache],
-+                                     NULL, util_get_cpu_caps()->num_cpu_mask_bits);
-+            ctx->Driver.PinDriverToL3Cache(ctx, L3_cache);
-+         }
-       }
-    }
- 
-diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
-index d5b20e1955b..204c00a057e 100644
---- a/src/mesa/state_tracker/st_context.c
-+++ b/src/mesa/state_tracker/st_context.c
-@@ -821,7 +821,7 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
-          !st->lower_ucp;
-    st->shader_has_one_variant[MESA_SHADER_COMPUTE] = st->has_shareable_shaders;
- 
--   if (util_cpu_caps.cores_per_L3 == util_cpu_caps.nr_cpus ||
-+   if (util_get_cpu_caps()->cores_per_L3 == util_get_cpu_caps()->nr_cpus ||
-        !st->pipe->set_context_param)
-       st->pin_thread_counter = ST_L3_PINNING_DISABLED;
- 
-diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
-index c11934c0a39..482a7cdf745 100644
---- a/src/mesa/state_tracker/st_draw.c
-+++ b/src/mesa/state_tracker/st_draw.c
-@@ -116,11 +116,13 @@ prepare_draw(struct st_context *st, struct gl_context *ctx)
-       int cpu = util_get_current_cpu();
-       if (cpu >= 0) {
-          struct pipe_context *pipe = st->pipe;
--         unsigned L3_cache = util_cpu_caps.cpu_to_L3[cpu];
-+         uint16_t L3_cache = util_get_cpu_caps()->cpu_to_L3[cpu];
- 
--         pipe->set_context_param(pipe,
--                                 PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
--                                 L3_cache);
-+         if (L3_cache != U_CPU_INVALID_L3) {
-+            pipe->set_context_param(pipe,
-+                                    PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
-+                                    L3_cache);
-+         }
-       }
-    }
- }
-diff --git a/src/util/half_float.h b/src/util/half_float.h
-index 4e15b2bdb0b..91d4ebd41f0 100644
---- a/src/util/half_float.h
-+++ b/src/util/half_float.h
-@@ -59,7 +59,7 @@ static inline uint16_t
- _mesa_float_to_half(float val)
- {
- #if defined(USE_X86_64_ASM)
--   if (util_cpu_caps.has_f16c) {
-+   if (util_get_cpu_caps()->has_f16c) {
-       __m128 in = {val};
-       __m128i out;
- 
-@@ -75,7 +75,7 @@ static inline float
- _mesa_half_to_float(uint16_t val)
- {
- #if defined(USE_X86_64_ASM)
--   if (util_cpu_caps.has_f16c) {
-+   if (util_get_cpu_caps()->has_f16c) {
-       __m128i in = {val};
-       __m128 out;
- 
-@@ -90,7 +90,7 @@ static inline uint16_t
- _mesa_float_to_float16_rtz(float val)
- {
- #if defined(USE_X86_64_ASM)
--   if (util_cpu_caps.has_f16c) {
-+   if (util_get_cpu_caps()->has_f16c) {
-       __m128 in = {val};
-       __m128i out;
- 
-diff --git a/src/util/tests/format/u_format_test.c b/src/util/tests/format/u_format_test.c
-index f4a62a5c6a8..e6473c2bf6d 100644
---- a/src/util/tests/format/u_format_test.c
-+++ b/src/util/tests/format/u_format_test.c
-@@ -850,6 +850,8 @@ int main(int argc, char **argv)
- {
-    boolean success;
- 
-+   util_cpu_detect();
-+
-    success = test_all();
- 
-    return success ? 0 : 1;
-diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c
-index 8cfe3286b1f..4a4b06e1bc6 100644
---- a/src/util/u_cpu_detect.c
-+++ b/src/util/u_cpu_detect.c
-@@ -90,7 +90,7 @@
- DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false)
- 
- 
--struct util_cpu_caps util_cpu_caps;
-+struct util_cpu_caps_t util_cpu_caps;
- 
- #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- static int has_cpuid(void);
-@@ -438,26 +438,22 @@ get_cpu_topology(void)
-    util_cpu_caps.cores_per_L3 = util_cpu_caps.nr_cpus;
-    util_cpu_caps.num_L3_caches = 1;
- 
-+   memset(util_cpu_caps.cpu_to_L3, 0xff, sizeof(util_cpu_caps.cpu_to_L3));
-+
- #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-    /* AMD Zen */
-    if (util_cpu_caps.family >= CPU_AMD_ZEN1_ZEN2 &&
-        util_cpu_caps.family < CPU_AMD_LAST) {
-       uint32_t regs[4];
- 
--      /* Query the L3 cache count. */
--      cpuid_count(0x8000001D, 3, regs);
--      unsigned cache_level = (regs[0] >> 5) & 0x7;
--      unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1;
--
--      if (cache_level != 3 || cores_per_L3 == util_cpu_caps.nr_cpus)
--         return;
--
-       uint32_t saved_mask[UTIL_MAX_CPUS / 32] = {0};
-       uint32_t mask[UTIL_MAX_CPUS / 32] = {0};
--      uint32_t allowed_mask[UTIL_MAX_CPUS / 32] = {0};
--      uint32_t apic_id[UTIL_MAX_CPUS];
-       bool saved = false;
- 
-+      uint32_t L3_found[UTIL_MAX_CPUS] = {0};
-+      uint32_t num_L3_caches = 0;
-+      util_affinity_mask *L3_affinity_masks = NULL;
-+
-       /* Query APIC IDs from each CPU core.
-        *
-        * An APIC ID is a logical ID of the CPU with respect to the cache
-@@ -484,39 +480,58 @@ get_cpu_topology(void)
-                                               !saved ? saved_mask : NULL,
-                                               util_cpu_caps.num_cpu_mask_bits)) {
-             saved = true;
--            allowed_mask[i / 32] |= cpu_bit;
- 
-             /* Query the APIC ID of the current core. */
-             cpuid(0x00000001, regs);
--            apic_id[i] = regs[1] >> 24;
-+            unsigned apic_id = regs[1] >> 24;
-+
-+            /* Query the total core count for the CPU */
-+            uint32_t core_count = 1;
-+            if (regs[3] & (1 << 28))
-+               core_count = (regs[1] >> 16) & 0xff;
-+
-+            core_count = util_next_power_of_two(core_count);
-+
-+            /* Query the L3 cache count. */
-+            cpuid_count(0x8000001D, 3, regs);
-+            unsigned cache_level = (regs[0] >> 5) & 0x7;
-+            unsigned cores_per_L3 = ((regs[0] >> 14) & 0xfff) + 1;
-+
-+            if (cache_level != 3)
-+               continue;
-+
-+            unsigned local_core_id = apic_id & (core_count - 1);
-+            unsigned phys_id = (apic_id & ~(core_count - 1)) >> util_logbase2(core_count);
-+            unsigned local_l3_cache_index = local_core_id / util_next_power_of_two(cores_per_L3);
-+#define L3_ID(p, i) (p << 16 | i << 1 | 1);
-+
-+            unsigned l3_id = L3_ID(phys_id, local_l3_cache_index);
-+            int idx = -1;
-+            for (unsigned c = 0; c < num_L3_caches; c++) {
-+               if (L3_found[c] == l3_id) {
-+                  idx = c;
-+                  break;
-+               }
-+            }
-+            if (idx == -1) {
-+               idx = num_L3_caches;
-+               L3_found[num_L3_caches++] = l3_id;
-+               L3_affinity_masks = realloc(L3_affinity_masks, sizeof(util_affinity_mask) * num_L3_caches);
-+               if (!L3_affinity_masks)
-+                  return;
-+               memset(&L3_affinity_masks[num_L3_caches - 1], 0, sizeof(util_affinity_mask));
-+            }
-+            util_cpu_caps.cpu_to_L3[i] = idx;
-+            L3_affinity_masks[idx][i / 32] |= cpu_bit;
-+
-          }
-          mask[i / 32] = 0;
-       }
- 
--      if (saved) {
--
--         /* We succeeded in using at least one CPU. */
--         util_cpu_caps.num_L3_caches = util_cpu_caps.nr_cpus / cores_per_L3;
--         util_cpu_caps.cores_per_L3 = cores_per_L3;
--         util_cpu_caps.L3_affinity_mask = calloc(sizeof(util_affinity_mask),
--                                                 util_cpu_caps.num_L3_caches);
--
--         for (unsigned i = 0; i < util_cpu_caps.nr_cpus && i < UTIL_MAX_CPUS;
--              i++) {
--            uint32_t cpu_bit = 1u << (i % 32);
--
--            if (allowed_mask[i / 32] & cpu_bit) {
--               /* Each APIC ID bit represents a topology level, so we need
--                * to round up to the next power of two.
--                */
--               unsigned L3_index = apic_id[i] /
--                                   util_next_power_of_two(cores_per_L3);
--
--               util_cpu_caps.L3_affinity_mask[L3_index][i / 32] |= cpu_bit;
--               util_cpu_caps.cpu_to_L3[i] = L3_index;
--            }
--         }
-+      util_cpu_caps.num_L3_caches = num_L3_caches;
-+      util_cpu_caps.L3_affinity_mask = L3_affinity_masks;
- 
-+      if (saved) {
-          if (debug_get_option_dump_cpu()) {
-             fprintf(stderr, "CPU <-> L3 cache mapping:\n");
-             for (unsigned i = 0; i < util_cpu_caps.num_L3_caches; i++) {
-@@ -548,7 +563,7 @@ util_cpu_detect_once(void)
-    {
-       SYSTEM_INFO system_info;
-       GetSystemInfo(&system_info);
--      util_cpu_caps.nr_cpus = system_info.dwNumberOfProcessors;
-+      util_cpu_caps.nr_cpus = MAX2(1, system_info.dwNumberOfProcessors);
-    }
- #elif defined(PIPE_OS_UNIX) && defined(_SC_NPROCESSORS_ONLN)
-    util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h
-index 5a9a139c990..1c7239b2ec7 100644
---- a/src/util/u_cpu_detect.h
-+++ b/src/util/u_cpu_detect.h
-@@ -55,7 +55,7 @@ enum cpu_family {
- 
- typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
- 
--struct util_cpu_caps {
-+struct util_cpu_caps_t {
-    int nr_cpus;
-    enum cpu_family family;
- 
-@@ -105,8 +105,20 @@ struct util_cpu_caps {
-    util_affinity_mask *L3_affinity_mask;
- };
- 
--extern struct util_cpu_caps
--util_cpu_caps;
-+#define U_CPU_INVALID_L3 0xffff
-+
-+static inline const struct util_cpu_caps_t *
-+util_get_cpu_caps(void)
-+{
-+	extern struct util_cpu_caps_t util_cpu_caps;
-+
-+	/* If you hit this assert, it means that something is using the
-+	 * cpu-caps without having first called util_cpu_detect()
-+	 */
-+	assert(util_cpu_caps.nr_cpus >= 1);
-+
-+	return &util_cpu_caps;
-+}
- 
- void util_cpu_detect(void);
- 
-diff --git a/src/util/u_math.c b/src/util/u_math.c
-index 9a8a9ecbbde..41e7f599eb0 100644
---- a/src/util/u_math.c
-+++ b/src/util/u_math.c
-@@ -92,7 +92,7 @@ util_fpstate_get(void)
-    unsigned mxcsr = 0;
- 
- #if defined(PIPE_ARCH_SSE)
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       mxcsr = _mm_getcsr();
-    }
- #endif
-@@ -110,10 +110,10 @@ unsigned
- util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)
- {
- #if defined(PIPE_ARCH_SSE)
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       /* Enable flush to zero mode */
-       current_mxcsr |= _MM_FLUSH_ZERO_MASK;
--      if (util_cpu_caps.has_daz) {
-+      if (util_get_cpu_caps()->has_daz) {
-          /* Enable denormals are zero mode */
-          current_mxcsr |= _MM_DENORMALS_ZERO_MASK;
-       }
-@@ -132,7 +132,7 @@ void
- util_fpstate_set(unsigned mxcsr)
- {
- #if defined(PIPE_ARCH_SSE)
--   if (util_cpu_caps.has_sse) {
-+   if (util_get_cpu_caps()->has_sse) {
-       _mm_setcsr(mxcsr);
-    }
- #endif
-diff --git a/src/util/u_queue.c b/src/util/u_queue.c
-index b456871290d..489a8a14be4 100644
---- a/src/util/u_queue.c
-+++ b/src/util/u_queue.c
-@@ -267,7 +267,7 @@ util_queue_thread_func(void *input)
-       util_cpu_detect();
- 
-       util_set_current_thread_affinity(mask, NULL,
--                                       util_cpu_caps.num_cpu_mask_bits);
-+                                       util_get_cpu_caps()->num_cpu_mask_bits);
-    }
- 
- #if defined(__linux__)
diff --git a/mesa.spec b/mesa.spec
index d25c86c..5f4d000 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -56,9 +56,9 @@
 
 Name:           mesa
 Summary:        Mesa graphics libraries
-%global ver 21.0.1
+%global ver 21.0.2
 Version:        %{lua:ver = string.gsub(rpm.expand("%{ver}"), "-", "~"); print(ver)}
-Release:        4%{?dist}
+Release:        1%{?dist}
 License:        MIT
 URL:            http://www.mesa3d.org
 
@@ -71,10 +71,6 @@ Source1:        Mesa-MLAA-License-Clarification-Email.txt
 # https://gitlab.freedesktop.org/mesa/mesa/-/issues/4442
 Patch0:         mesa-llvm12.patch
 
-Patch1: 0001-drisw-move-zink-down-the-list-below-the-sw-drivers.patch
-# fix AMD EPYC 2-socket machines
-Patch2: cpu_caps_fixes.patch
-
 BuildRequires:  meson >= 0.45
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
@@ -616,6 +612,9 @@ popd
 %endif
 
 %changelog
+* Wed Apr 07 2021 Pete Walter <pwalter@fedoraproject.org> - 21.0.2-1
+- Update to 21.0.2
+
 * Thu Apr 01 2021 Dave Airlie <airlied@redhat.com> - 21.0.1-4
 - Backport CPU caps fixes
 
diff --git a/sources b/sources
index a80ab6f..1e0226f 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (mesa-21.0.1.tar.xz) = b31b78778b6092dfaf0712f90de3074217574389c4236f8379c127739874f6bd1b47883140a26445d25e58df87e6207278efd048453096ee710d334b1dcfe419
+SHA512 (mesa-21.0.2.tar.xz) = c3d7969b56e1c31ee642e3b7143d565c4233173dab7cc5576b686c873c27134dc8292a9f2caa0a0dd3c54d0c89d27d6030f36a2c84f85dcedee7ae80b19e5c3b