From 3c9c16c545319958d7fa14ef5ab8934fc5449d83 Mon Sep 17 00:00:00 2001 From: "Aureau, Georges (Kernel Tools ERT)" Date: Wed, 8 Feb 2023 12:09:03 +0000 Subject: [PATCH 72/89] Fix "kmem -s|-S" not working properly on RHEL8.6 and later For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr() code is checking for an additional bswap using a simple release test eg. THIS_KERNEL_VERSION >= LINUX(5,7,0), basically checking for RHEL9 and beyond. However, for RHEL8.6 and later, we have CONFIG_SLAB_FREELIST_HARDENED=y, and we also have the additional bswap, but the current crash is not handling this case, hence "kmem -s|-S" will not work properly, and free objects will not be counted nor reported properly. An example from a RHEL8.6 x86_64 kdump, a kmem cache with a single slab having 42 objects, only the freelist head is seen as free as crash can't walk freelist next pointers, and crash is wrongly reporting 41 allocated objects: crash> sys | grep RELEASE RELEASE: 4.18.0-372.9.1.el8.x86_64 crash> kmem -s nfs_commit_data CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME ffff9ad40c7cb2c0 728 41 42 1 32k nfs_commit_data When properly accounting for the additional bswap, we can walk the freelist and find 38 free objects, and crash is now reporting only 4 allocated objects: crash> kmem -s nfs_commit_data CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME ffff9ad40c7cb2c0 728 4 42 1 32k nfs_commit_data Signed-off-by: Georges Aureau Signed-off-by: Lianbo Jiang --- defs.h | 1 + memory.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/defs.h b/defs.h index b2389cd82fae..ae5d1244e8b3 100644 --- a/defs.h +++ b/defs.h @@ -2638,6 +2638,7 @@ struct vm_table { /* kernel VM-related data */ #define SLAB_OVERLOAD_PAGE (0x8000000) #define SLAB_CPU_CACHE (0x10000000) #define SLAB_ROOT_CACHES (0x20000000) +#define FREELIST_PTR_BSWAP (0x40000000) #define IS_FLATMEM() (vt->flags & FLATMEM) #define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM) diff --git a/memory.c b/memory.c index 5141fbea4b40..e0742c1bd3a4 100644 --- a/memory.c +++ b/memory.c @@ -320,6 +320,7 @@ static void dump_per_cpu_offsets(void); static void dump_page_flags(ulonglong); static ulong kmem_cache_nodelists(ulong); static void dump_hstates(void); +static void freelist_ptr_init(void); static ulong freelist_ptr(struct meminfo *, ulong, ulong); static ulong handle_each_vm_area(struct handle_each_vm_area_args *); @@ -789,6 +790,8 @@ vm_init(void) MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name"); MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags"); MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random"); + if (VALID_MEMBER(kmem_cache_random)) + freelist_ptr_init(); MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist"); MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page"); if (INVALID_MEMBER(kmem_cache_cpu_page)) @@ -13932,6 +13935,8 @@ dump_vm_table(int verbose) fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\ if (vt->flags & SLAB_ROOT_CACHES) fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\ + if (vt->flags & FREELIST_PTR_BSWAP) + fprintf(fp, "%sFREELIST_PTR_BSWAP", others++ ? "|" : "");\ if (vt->flags & USE_VMAP_AREA) fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\ if (vt->flags & CONFIG_NUMA) @@ -19519,13 +19524,55 @@ count_free_objects(struct meminfo *si, ulong freelist) return c; } +/* + * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's, + * and for recent release with an additionnal bswap. Some releases prio to 5.7.0 + * may be using the additionnal bswap. The only easy and reliable way to tell is + * to inspect assembly code (eg. "__slab_free") for a bswap instruction. + */ +static int +freelist_ptr_bswap_x86(void) +{ + char buf1[BUFSIZE]; + char buf2[BUFSIZE]; + char *arglist[MAXARGS]; + int found; + + sprintf(buf1, "disassemble __slab_free"); + open_tmpfile(); + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) { + close_tmpfile(); + return FALSE; + } + rewind(pc->tmpfile); + found = FALSE; + while (fgets(buf2, BUFSIZE, pc->tmpfile)) { + if (parse_line(buf2, arglist) < 3) + continue; + if (STREQ(arglist[2], "bswap")) { + found = TRUE; + break; + } + } + close_tmpfile(); + return found; +} + +static void +freelist_ptr_init(void) +{ + if (THIS_KERNEL_VERSION >= LINUX(5,7,0) || + ((machine_type("X86_64") || machine_type("X86")) && freelist_ptr_bswap_x86())) + vt->flags |= FREELIST_PTR_BSWAP; +} + static ulong freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr) { if (VALID_MEMBER(kmem_cache_random)) { /* CONFIG_SLAB_FREELIST_HARDENED */ - if (THIS_KERNEL_VERSION >= LINUX(5,7,0)) + if (vt->flags & FREELIST_PTR_BSWAP) ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr) : bswap_32(ptr_addr); return (ptr ^ si->random ^ ptr_addr); -- 2.37.1