From 5b7b7014945956da50e87441e88a57cf58273420 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Fri, 11 Aug 2023 16:10:13 +0200 Subject: [PATCH] Fix AMD cache size computation for hypervisors, old CPUs (#2166710) Related: #2166710 --- glibc-rh2166710-2.patch | 111 ++++++++++++++++ glibc-rh2166710-3.patch | 281 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+) create mode 100644 glibc-rh2166710-2.patch create mode 100644 glibc-rh2166710-3.patch diff --git a/glibc-rh2166710-2.patch b/glibc-rh2166710-2.patch new file mode 100644 index 0000000..d102af9 --- /dev/null +++ b/glibc-rh2166710-2.patch @@ -0,0 +1,111 @@ +commit 856bab7717ef6d1033fd7cbf7cfb2ddefbfffb07 +Author: Andreas Schwab +Date: Thu Feb 9 14:56:21 2023 +0100 + + x86/dl-cacheinfo: remove unsused parameter from handle_amd + + Also replace an unreachable assert with __builtin_unreachable. + +Conflicts: + sysdeps/x86/dl-cacheinfo.h + (different backport order downstream) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index 3408700fc0b06e5b..cc2f8862ce88f655 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -311,7 +311,7 @@ handle_intel (int name, const struct cpu_features *cpu_features) + + + static long int __attribute__ ((noinline)) +-handle_amd (int name, const struct cpu_features *cpu_features) ++handle_amd (int name) + { + unsigned int eax; + unsigned int ebx; +@@ -334,24 +334,23 @@ handle_amd (int name, const struct cpu_features *cpu_features) + + switch (name) + { +- case _SC_LEVEL1_ICACHE_ASSOC: +- case _SC_LEVEL1_DCACHE_ASSOC: +- case _SC_LEVEL2_CACHE_ASSOC: +- case _SC_LEVEL3_CACHE_ASSOC: +- return ecx?((ebx >> 22) & 0x3ff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_LINESIZE: +- case _SC_LEVEL1_DCACHE_LINESIZE: +- case _SC_LEVEL2_CACHE_LINESIZE: +- case _SC_LEVEL3_CACHE_LINESIZE: +- return ecx?(ebx & 0xfff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_SIZE: +- case _SC_LEVEL1_DCACHE_SIZE: +- case _SC_LEVEL2_CACHE_SIZE: +- case _SC_LEVEL3_CACHE_SIZE: +- return ecx?(((ebx >> 22) & 0x3ff) + 1)*((ebx & 0xfff) + 1)\ +- *(ecx + 1):0; +- default: +- assert (! "cannot happen"); ++ case _SC_LEVEL1_ICACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL3_CACHE_ASSOC: ++ return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; ++ case _SC_LEVEL1_ICACHE_LINESIZE: ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ case _SC_LEVEL3_CACHE_LINESIZE: ++ return ecx ? (ebx & 0xfff) + 1 : 0; ++ case _SC_LEVEL1_ICACHE_SIZE: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ case _SC_LEVEL2_CACHE_SIZE: ++ case _SC_LEVEL3_CACHE_SIZE: ++ return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; ++ default: ++ __builtin_unreachable (); + } + return -1; + } +@@ -701,31 +700,26 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + } + else if (cpu_features->basic.kind == arch_kind_amd) + { +- data = handle_amd (_SC_LEVEL1_DCACHE_SIZE, cpu_features); +- core = handle_amd (_SC_LEVEL2_CACHE_SIZE, cpu_features); +- shared = handle_amd (_SC_LEVEL3_CACHE_SIZE, cpu_features); ++ data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); ++ core = handle_amd (_SC_LEVEL2_CACHE_SIZE); ++ shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); + shared_per_thread = shared; + +- level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE, cpu_features); +- level1_icache_linesize +- = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE, cpu_features); ++ level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); ++ level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); + level1_dcache_size = data; +- level1_dcache_assoc +- = handle_amd (_SC_LEVEL1_DCACHE_ASSOC, cpu_features); +- level1_dcache_linesize +- = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features); ++ level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC); ++ level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE); + level2_cache_size = core; +- level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC, cpu_features); +- level2_cache_linesize +- = handle_amd (_SC_LEVEL2_CACHE_LINESIZE, cpu_features); ++ level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC); ++ level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE); + level3_cache_size = shared; +- level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC, cpu_features); +- level3_cache_linesize +- = handle_amd (_SC_LEVEL3_CACHE_LINESIZE, cpu_features); ++ level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC); ++ level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); + + if (shared <= 0) + /* No shared L3 cache. All we have is the L2 cache. */ +- shared = core; ++ shared = core; + + if (shared_per_thread <= 0) + shared_per_thread = shared; diff --git a/glibc-rh2166710-3.patch b/glibc-rh2166710-3.patch new file mode 100644 index 0000000..94b056e --- /dev/null +++ b/glibc-rh2166710-3.patch @@ -0,0 +1,281 @@ +commit dcad5c8578130dec7f35fd5b0885304b59f9f543 +Author: Sajan Karumanchi +Date: Tue Aug 1 15:20:55 2023 +0000 + + x86: Fix for cache computation on AMD legacy cpus. + + Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D' + set to Zero, thus resulting in zeroed-out computed cache values. + This patch reintroduces the old way of cache computation as a + fail-safe option to handle these exceptions. + Fixed 'level4_cache_size' value through handle_amd(). + + Reviewed-by: Premachandra Mallappa + Tested-by: Florian Weimer + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index cc2f8862ce88f655..aed1a7be56610e99 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -315,40 +315,206 @@ handle_amd (int name) + { + unsigned int eax; + unsigned int ebx; +- unsigned int ecx; ++ unsigned int ecx = 0; + unsigned int edx; +- unsigned int count = 0x1; ++ unsigned int max_cpuid = 0; ++ unsigned int fn = 0; + + /* No level 4 cache (yet). */ + if (name > _SC_LEVEL3_CACHE_LINESIZE) + return 0; + +- if (name >= _SC_LEVEL3_CACHE_SIZE) +- count = 0x3; +- else if (name >= _SC_LEVEL2_CACHE_SIZE) +- count = 0x2; +- else if (name >= _SC_LEVEL1_DCACHE_SIZE) +- count = 0x0; ++ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx); ++ ++ if (max_cpuid >= 0x8000001D) ++ /* Use __cpuid__ '0x8000_001D' to compute cache details. */ ++ { ++ unsigned int count = 0x1; ++ ++ if (name >= _SC_LEVEL3_CACHE_SIZE) ++ count = 0x3; ++ else if (name >= _SC_LEVEL2_CACHE_SIZE) ++ count = 0x2; ++ else if (name >= _SC_LEVEL1_DCACHE_SIZE) ++ count = 0x0; ++ ++ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); ++ ++ if (ecx != 0) ++ { ++ switch (name) ++ { ++ case _SC_LEVEL1_ICACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL3_CACHE_ASSOC: ++ return ((ebx >> 22) & 0x3ff) + 1; ++ case _SC_LEVEL1_ICACHE_LINESIZE: ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ case _SC_LEVEL3_CACHE_LINESIZE: ++ return (ebx & 0xfff) + 1; ++ case _SC_LEVEL1_ICACHE_SIZE: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ case _SC_LEVEL2_CACHE_SIZE: ++ case _SC_LEVEL3_CACHE_SIZE: ++ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); ++ default: ++ __builtin_unreachable (); ++ } ++ return -1; ++ } ++ } ++ ++ /* Legacy cache computation for CPUs prior to Bulldozer family. ++ This is also a fail-safe mechanism for some hypervisors that ++ accidentally configure __cpuid__ '0x8000_001D' to Zero. */ + +- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); ++ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); ++ ++ if (max_cpuid < fn) ++ return 0; ++ ++ __cpuid (fn, eax, ebx, ecx, edx); ++ ++ if (name < _SC_LEVEL1_DCACHE_SIZE) ++ { ++ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; ++ ecx = edx; ++ } + + switch (name) + { +- case _SC_LEVEL1_ICACHE_ASSOC: +- case _SC_LEVEL1_DCACHE_ASSOC: +- case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ return (ecx >> 14) & 0x3fc00; ++ ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ ecx >>= 16; ++ if ((ecx & 0xff) == 0xff) ++ { ++ /* Fully associative. */ ++ return (ecx << 2) & 0x3fc00; ++ } ++ return ecx & 0xff; ++ ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ return ecx & 0xff; ++ ++ case _SC_LEVEL2_CACHE_SIZE: ++ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; ++ ++ case _SC_LEVEL2_CACHE_ASSOC: ++ switch ((ecx >> 12) & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return (ecx >> 12) & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 10: ++ return 32; ++ case 11: ++ return 48; ++ case 12: ++ return 64; ++ case 13: ++ return 96; ++ case 14: ++ return 128; ++ case 15: ++ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); ++ default: ++ return 0; ++ } ++ ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; ++ ++ case _SC_LEVEL3_CACHE_SIZE: ++ { ++ long int total_l3_cache = 0, l3_cache_per_thread = 0; ++ unsigned int threads = 0; ++ const struct cpu_features *cpu_features; ++ ++ if ((edx & 0xf000) == 0) ++ return 0; ++ ++ total_l3_cache = (edx & 0x3ffc0000) << 1; ++ cpu_features = __get_cpu_features (); ++ ++ /* Figure out the number of logical threads that share L3. */ ++ if (max_cpuid >= 0x80000008) ++ { ++ /* Get width of APIC ID. */ ++ __cpuid (0x80000008, eax, ebx, ecx, edx); ++ threads = (ecx & 0xff) + 1; ++ } ++ ++ if (threads == 0) ++ { ++ /* If APIC ID width is not available, use logical ++ processor count. */ ++ __cpuid (0x00000001, eax, ebx, ecx, edx); ++ if ((edx & (1 << 28)) != 0) ++ threads = (ebx >> 16) & 0xff; ++ } ++ ++ /* Cap usage of highest cache level to the number of ++ supported threads. */ ++ if (threads > 0) ++ l3_cache_per_thread = total_l3_cache/threads; ++ ++ /* Get shared cache per ccx for Zen architectures. */ ++ if (cpu_features->basic.family >= 0x17) ++ { ++ long int l3_cache_per_ccx = 0; ++ /* Get number of threads share the L3 cache in CCX. */ ++ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); ++ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; ++ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx; ++ return l3_cache_per_ccx; ++ } ++ else ++ { ++ return l3_cache_per_thread; ++ } ++ } ++ + case _SC_LEVEL3_CACHE_ASSOC: +- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_LINESIZE: +- case _SC_LEVEL1_DCACHE_LINESIZE: +- case _SC_LEVEL2_CACHE_LINESIZE: ++ switch ((edx >> 12) & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return (edx >> 12) & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 10: ++ return 32; ++ case 11: ++ return 48; ++ case 12: ++ return 64; ++ case 13: ++ return 96; ++ case 14: ++ return 128; ++ case 15: ++ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); ++ default: ++ return 0; ++ } ++ + case _SC_LEVEL3_CACHE_LINESIZE: +- return ecx ? (ebx & 0xfff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_SIZE: +- case _SC_LEVEL1_DCACHE_SIZE: +- case _SC_LEVEL2_CACHE_SIZE: +- case _SC_LEVEL3_CACHE_SIZE: +- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; ++ return (edx & 0xf000) == 0 ? 0 : edx & 0xff; ++ + default: + __builtin_unreachable (); + } +@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); + core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); +- shared_per_thread = shared; + + level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); + level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); +@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level3_cache_size = shared; + level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC); + level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); ++ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE); + + if (shared <= 0) +- /* No shared L3 cache. All we have is the L2 cache. */ +- shared = core; ++ { ++ /* No shared L3 cache. All we have is the L2 cache. */ ++ shared = core; ++ } ++ else if (cpu_features->basic.family < 0x17) ++ { ++ /* Account for exclusive L2 and L3 caches. */ ++ shared += core; ++ } + +- if (shared_per_thread <= 0) +- shared_per_thread = shared; ++ shared_per_thread = shared; + } + + cpu_features->level1_icache_size = level1_icache_size;