145 lines
5.1 KiB
Diff
145 lines
5.1 KiB
Diff
|
From 3c9c16c545319958d7fa14ef5ab8934fc5449d83 Mon Sep 17 00:00:00 2001
|
||
|
From: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau@hpe.com>
|
||
|
Date: Wed, 8 Feb 2023 12:09:03 +0000
|
||
|
Subject: [PATCH 72/89] Fix "kmem -s|-S" not working properly on RHEL8.6 and
|
||
|
later
|
||
|
|
||
|
For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr()
|
||
|
code is checking for an additional bswap using a simple release test eg.
|
||
|
THIS_KERNEL_VERSION >= LINUX(5,7,0), basically checking for RHEL9 and
|
||
|
beyond.
|
||
|
|
||
|
However, for RHEL8.6 and later, we have CONFIG_SLAB_FREELIST_HARDENED=y,
|
||
|
and we also have the additional bswap, but the current crash is not
|
||
|
handling this case, hence "kmem -s|-S" will not work properly, and free
|
||
|
objects will not be counted nor reported properly.
|
||
|
|
||
|
An example from a RHEL8.6 x86_64 kdump, a kmem cache with a single slab
|
||
|
having 42 objects, only the freelist head is seen as free as crash can't
|
||
|
walk freelist next pointers, and crash is wrongly reporting 41 allocated
|
||
|
objects:
|
||
|
|
||
|
crash> sys | grep RELEASE
|
||
|
RELEASE: 4.18.0-372.9.1.el8.x86_64
|
||
|
crash> kmem -s nfs_commit_data
|
||
|
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
|
||
|
ffff9ad40c7cb2c0 728 41 42 1 32k nfs_commit_data
|
||
|
|
||
|
When properly accounting for the additional bswap, we can walk the
|
||
|
freelist and find 38 free objects, and crash is now reporting only 4
|
||
|
allocated objects:
|
||
|
|
||
|
crash> kmem -s nfs_commit_data
|
||
|
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
|
||
|
ffff9ad40c7cb2c0 728 4 42 1 32k nfs_commit_data
|
||
|
|
||
|
Signed-off-by: Georges Aureau <georges.aureau@hpe.com>
|
||
|
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
|
||
|
---
|
||
|
defs.h | 1 +
|
||
|
memory.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
|
||
|
2 files changed, 49 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/defs.h b/defs.h
|
||
|
index b2389cd82fae..ae5d1244e8b3 100644
|
||
|
--- a/defs.h
|
||
|
+++ b/defs.h
|
||
|
@@ -2638,6 +2638,7 @@ struct vm_table { /* kernel VM-related data */
|
||
|
#define SLAB_OVERLOAD_PAGE (0x8000000)
|
||
|
#define SLAB_CPU_CACHE (0x10000000)
|
||
|
#define SLAB_ROOT_CACHES (0x20000000)
|
||
|
+#define FREELIST_PTR_BSWAP (0x40000000)
|
||
|
|
||
|
#define IS_FLATMEM() (vt->flags & FLATMEM)
|
||
|
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
|
||
|
diff --git a/memory.c b/memory.c
|
||
|
index 5141fbea4b40..e0742c1bd3a4 100644
|
||
|
--- a/memory.c
|
||
|
+++ b/memory.c
|
||
|
@@ -320,6 +320,7 @@ static void dump_per_cpu_offsets(void);
|
||
|
static void dump_page_flags(ulonglong);
|
||
|
static ulong kmem_cache_nodelists(ulong);
|
||
|
static void dump_hstates(void);
|
||
|
+static void freelist_ptr_init(void);
|
||
|
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
|
||
|
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
|
||
|
|
||
|
@@ -789,6 +790,8 @@ vm_init(void)
|
||
|
MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name");
|
||
|
MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags");
|
||
|
MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random");
|
||
|
+ if (VALID_MEMBER(kmem_cache_random))
|
||
|
+ freelist_ptr_init();
|
||
|
MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist");
|
||
|
MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page");
|
||
|
if (INVALID_MEMBER(kmem_cache_cpu_page))
|
||
|
@@ -13932,6 +13935,8 @@ dump_vm_table(int verbose)
|
||
|
fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
|
||
|
if (vt->flags & SLAB_ROOT_CACHES)
|
||
|
fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
|
||
|
+ if (vt->flags & FREELIST_PTR_BSWAP)
|
||
|
+ fprintf(fp, "%sFREELIST_PTR_BSWAP", others++ ? "|" : "");\
|
||
|
if (vt->flags & USE_VMAP_AREA)
|
||
|
fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
|
||
|
if (vt->flags & CONFIG_NUMA)
|
||
|
@@ -19519,13 +19524,55 @@ count_free_objects(struct meminfo *si, ulong freelist)
|
||
|
return c;
|
||
|
}
|
||
|
|
||
|
+/*
|
||
|
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
|
||
|
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
|
||
|
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
|
||
|
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
|
||
|
+ */
|
||
|
+static int
|
||
|
+freelist_ptr_bswap_x86(void)
|
||
|
+{
|
||
|
+ char buf1[BUFSIZE];
|
||
|
+ char buf2[BUFSIZE];
|
||
|
+ char *arglist[MAXARGS];
|
||
|
+ int found;
|
||
|
+
|
||
|
+ sprintf(buf1, "disassemble __slab_free");
|
||
|
+ open_tmpfile();
|
||
|
+ if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
|
||
|
+ close_tmpfile();
|
||
|
+ return FALSE;
|
||
|
+ }
|
||
|
+ rewind(pc->tmpfile);
|
||
|
+ found = FALSE;
|
||
|
+ while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
|
||
|
+ if (parse_line(buf2, arglist) < 3)
|
||
|
+ continue;
|
||
|
+ if (STREQ(arglist[2], "bswap")) {
|
||
|
+ found = TRUE;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ close_tmpfile();
|
||
|
+ return found;
|
||
|
+}
|
||
|
+
|
||
|
+static void
|
||
|
+freelist_ptr_init(void)
|
||
|
+{
|
||
|
+ if (THIS_KERNEL_VERSION >= LINUX(5,7,0) ||
|
||
|
+ ((machine_type("X86_64") || machine_type("X86")) && freelist_ptr_bswap_x86()))
|
||
|
+ vt->flags |= FREELIST_PTR_BSWAP;
|
||
|
+}
|
||
|
+
|
||
|
static ulong
|
||
|
freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
|
||
|
{
|
||
|
if (VALID_MEMBER(kmem_cache_random)) {
|
||
|
/* CONFIG_SLAB_FREELIST_HARDENED */
|
||
|
|
||
|
- if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
|
||
|
+ if (vt->flags & FREELIST_PTR_BSWAP)
|
||
|
ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
|
||
|
: bswap_32(ptr_addr);
|
||
|
return (ptr ^ si->random ^ ptr_addr);
|
||
|
--
|
||
|
2.37.1
|
||
|
|