crash/SOURCES/0072-Fix-kmem-s-S-not-worki...

145 lines
5.1 KiB
Diff

From 3c9c16c545319958d7fa14ef5ab8934fc5449d83 Mon Sep 17 00:00:00 2001
From: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau@hpe.com>
Date: Wed, 8 Feb 2023 12:09:03 +0000
Subject: [PATCH 72/89] Fix "kmem -s|-S" not working properly on RHEL8.6 and
later
For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr()
code is checking for an additional bswap using a simple release test eg.
THIS_KERNEL_VERSION >= LINUX(5,7,0), basically checking for RHEL9 and
beyond.
However, for RHEL8.6 and later, we have CONFIG_SLAB_FREELIST_HARDENED=y,
and we also have the additional bswap, but the current crash is not
handling this case, hence "kmem -s|-S" will not work properly, and free
objects will not be counted nor reported properly.
An example from a RHEL8.6 x86_64 kdump, a kmem cache with a single slab
having 42 objects, only the freelist head is seen as free as crash can't
walk freelist next pointers, and crash is wrongly reporting 41 allocated
objects:
crash> sys | grep RELEASE
RELEASE: 4.18.0-372.9.1.el8.x86_64
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 41 42 1 32k nfs_commit_data
When properly accounting for the additional bswap, we can walk the
freelist and find 38 free objects, and crash is now reporting only 4
allocated objects:
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 4 42 1 32k nfs_commit_data
Signed-off-by: Georges Aureau <georges.aureau@hpe.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index b2389cd82fae..ae5d1244e8b3 100644
--- a/defs.h
+++ b/defs.h
@@ -2638,6 +2638,7 @@ struct vm_table { /* kernel VM-related data */
#define SLAB_OVERLOAD_PAGE (0x8000000)
#define SLAB_CPU_CACHE (0x10000000)
#define SLAB_ROOT_CACHES (0x20000000)
+#define FREELIST_PTR_BSWAP (0x40000000)
#define IS_FLATMEM() (vt->flags & FLATMEM)
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
diff --git a/memory.c b/memory.c
index 5141fbea4b40..e0742c1bd3a4 100644
--- a/memory.c
+++ b/memory.c
@@ -320,6 +320,7 @@ static void dump_per_cpu_offsets(void);
static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
+static void freelist_ptr_init(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
@@ -789,6 +790,8 @@ vm_init(void)
MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name");
MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags");
MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random");
+ if (VALID_MEMBER(kmem_cache_random))
+ freelist_ptr_init();
MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist");
MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page");
if (INVALID_MEMBER(kmem_cache_cpu_page))
@@ -13932,6 +13935,8 @@ dump_vm_table(int verbose)
fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
if (vt->flags & SLAB_ROOT_CACHES)
fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
+ if (vt->flags & FREELIST_PTR_BSWAP)
+ fprintf(fp, "%sFREELIST_PTR_BSWAP", others++ ? "|" : "");\
if (vt->flags & USE_VMAP_AREA)
fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
if (vt->flags & CONFIG_NUMA)
@@ -19519,13 +19524,55 @@ count_free_objects(struct meminfo *si, ulong freelist)
return c;
}
+/*
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
+ */
+static int
+freelist_ptr_bswap_x86(void)
+{
+ char buf1[BUFSIZE];
+ char buf2[BUFSIZE];
+ char *arglist[MAXARGS];
+ int found;
+
+ sprintf(buf1, "disassemble __slab_free");
+ open_tmpfile();
+ if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
+ close_tmpfile();
+ return FALSE;
+ }
+ rewind(pc->tmpfile);
+ found = FALSE;
+ while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
+ if (parse_line(buf2, arglist) < 3)
+ continue;
+ if (STREQ(arglist[2], "bswap")) {
+ found = TRUE;
+ break;
+ }
+ }
+ close_tmpfile();
+ return found;
+}
+
+static void
+freelist_ptr_init(void)
+{
+ if (THIS_KERNEL_VERSION >= LINUX(5,7,0) ||
+ ((machine_type("X86_64") || machine_type("X86")) && freelist_ptr_bswap_x86()))
+ vt->flags |= FREELIST_PTR_BSWAP;
+}
+
static ulong
freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
{
if (VALID_MEMBER(kmem_cache_random)) {
/* CONFIG_SLAB_FREELIST_HARDENED */
- if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+ if (vt->flags & FREELIST_PTR_BSWAP)
ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
: bswap_32(ptr_addr);
return (ptr ^ si->random ^ ptr_addr);
--
2.37.1