Merge branch 'c8-beta' into a8-beta

This commit is contained in:
eabdullin 2024-03-27 19:32:54 +00:00 committed by eabdullin
commit 61ae9bd860
120 changed files with 1793 additions and 13855 deletions

View File

@ -1,2 +1,2 @@
aab889c6471bfc42cf2b1d065a881ea33d8ba0b7 SOURCES/crash-7.3.2.tar.gz
026f4c9e1c8152a2773354551c523acd32d7f00e SOURCES/gdb-7.6.tar.gz
335ab5dfe04f5265cf5f7bb5a44d6ee0afad1bdc SOURCES/crash-8.0.4.tar.gz
6bf5ee7877a4740835745ed97ce525a00bb2232c SOURCES/gdb-10.2.tar.gz

4
.gitignore vendored
View File

@ -1,2 +1,2 @@
SOURCES/crash-7.3.2.tar.gz
SOURCES/gdb-7.6.tar.gz
SOURCES/crash-8.0.4.tar.gz
SOURCES/gdb-10.2.tar.gz

View File

@ -1,52 +0,0 @@
From 040a56e9f9d0df15a2f8161ed3a0a907d70dda03 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 10 May 2023 16:09:03 +0900
Subject: [PATCH 1/6] Fix kernel version macros for revision numbers over 255
The current comparison macros for kernel version shift minor number only
8 bits. This can cause an unexpected result on kernels with revision
number over 255, e.g. Linux 4.14.314.
In fact, on Linux 4.14.314 for x86_64 without CONFIG_RANDOMIZE_BASE=y
(KASLR), the following condition became false in x86_64_init().
((THIS_KERNEL_VERSION >= LINUX(4,14,84)) &&
(THIS_KERNEL_VERSION < LINUX(4,15,0)))
As a result, crash used a wrong hard-coded value for PAGE_OFFSET and
failed to start a session with the following seek error.
crash: seek error: physical address: 200e000 type: "pud page"
Shift the major and minor number by 24 and 16 bits respectively to fix
this issue.
Reported-by: Luiz Capitulino <luizcap@amazon.com>
Tested-by: Luiz Capitulino <luizcap@amazon.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 12ad6aaa0998..211fc9d55d33 100644
--- a/defs.h
+++ b/defs.h
@@ -807,10 +807,10 @@ struct kernel_table { /* kernel data */
} \
}
-#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 16) + \
- (kt->kernel_version[1] << 8) + \
+#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 24) + \
+ (kt->kernel_version[1] << 16) + \
(kt->kernel_version[2]))
-#define LINUX(x,y,z) (((uint)(x) << 16) + ((uint)(y) << 8) + (uint)(z))
+#define LINUX(x,y,z) (((uint)(x) << 24) + ((uint)(y) << 16) + (uint)(z))
#define THIS_GCC_VERSION ((kt->gcc_version[0] << 16) + \
(kt->gcc_version[1] << 8) + \
--
2.37.1

View File

@ -0,0 +1,142 @@
From 38acd02c7fc09843ffb10fc2d695cccdd10cc7f6 Mon Sep 17 00:00:00 2001
From: Chengen Du <chengen.du@canonical.com>
Date: Fri, 17 Nov 2023 11:45:33 +0800
Subject: [PATCH 01/14] Fix "rd" command for zram data display in Linux 6.2 and
later
Kernel commit 7ac07a26dea7 ("zram: preparation for multi-zcomp support")
replaced "compressor" member with "comp_algs" in the zram struct.
Without the patch, the "rd" command can triggers the following error:
rd: WARNING: Some pages are swapped out to zram. Please run mod -s zram.
rd: invalid user virtual address: ffff7d23f010 type: "64-bit UVADDR"
Related kernel commit:
84b33bf78889 ("zram: introduce recompress sysfs knob")
Signed-off-by: Chengen Du <chengen.du@canonical.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
diskdump.c | 47 ++++++++++++++++++++++++++++++-----------------
2 files changed, 31 insertions(+), 17 deletions(-)
diff --git a/defs.h b/defs.h
index 788f63ada739..2cae5b61e589 100644
--- a/defs.h
+++ b/defs.h
@@ -2227,6 +2227,7 @@ struct offset_table { /* stash of commonly-used offsets */
long module_memory_size;
long irq_data_irq;
long zspage_huge;
+ long zram_comp_algs;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/diskdump.c b/diskdump.c
index 0fe46f4644d0..25054d96313e 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -2757,6 +2757,8 @@ diskdump_device_dump_info(FILE *ofp)
static ulong ZRAM_FLAG_SHIFT;
static ulong ZRAM_FLAG_SAME_BIT;
+static ulong ZRAM_COMP_PRIORITY_BIT1;
+static ulong ZRAM_COMP_PRIORITY_MASK;
static void
zram_init(void)
@@ -2765,6 +2767,8 @@ zram_init(void)
MEMBER_OFFSET_INIT(zram_mempoll, "zram", "mem_pool");
MEMBER_OFFSET_INIT(zram_compressor, "zram", "compressor");
+ if (INVALID_MEMBER(zram_compressor))
+ MEMBER_OFFSET_INIT(zram_comp_algs, "zram", "comp_algs");
MEMBER_OFFSET_INIT(zram_table_flag, "zram_table_entry", "flags");
if (INVALID_MEMBER(zram_table_flag))
MEMBER_OFFSET_INIT(zram_table_flag, "zram_table_entry", "value");
@@ -2782,6 +2786,8 @@ zram_init(void)
ZRAM_FLAG_SHIFT = 1 << zram_flag_shift;
ZRAM_FLAG_SAME_BIT = 1 << (zram_flag_shift+1);
+ ZRAM_COMP_PRIORITY_BIT1 = ZRAM_FLAG_SHIFT + 7;
+ ZRAM_COMP_PRIORITY_MASK = 0x3;
if (CRASHDEBUG(1))
fprintf(fp, "zram_flag_shift: %ld\n", zram_flag_shift);
@@ -2981,9 +2987,9 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
ulong zram, zram_table_entry, sector, index, entry, flags, size,
outsize, off;
- if (INVALID_MEMBER(zram_compressor)) {
+ if (INVALID_MEMBER(zram_mempoll)) {
zram_init();
- if (INVALID_MEMBER(zram_compressor)) {
+ if (INVALID_MEMBER(zram_mempoll)) {
error(WARNING,
"Some pages are swapped out to zram. "
"Please run mod -s zram.\n");
@@ -2997,8 +3003,28 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
if (!get_disk_name_private_data(pte_val, vaddr, NULL, &zram))
return 0;
- readmem(zram + OFFSET(zram_compressor), KVADDR, name,
- sizeof(name), "zram compressor", FAULT_ON_ERROR);
+ if (THIS_KERNEL_VERSION >= LINUX(2, 6, 0))
+ swp_offset = (ulonglong)__swp_offset(pte_val);
+ else
+ swp_offset = (ulonglong)SWP_OFFSET(pte_val);
+
+ sector = swp_offset << (PAGESHIFT() - 9);
+ index = sector >> SECTORS_PER_PAGE_SHIFT;
+ readmem(zram, KVADDR, &zram_table_entry,
+ sizeof(void *), "zram_table_entry", FAULT_ON_ERROR);
+ zram_table_entry += (index * SIZE(zram_table_entry));
+ readmem(zram_table_entry + OFFSET(zram_table_flag), KVADDR, &flags,
+ sizeof(void *), "zram_table_flag", FAULT_ON_ERROR);
+ if (VALID_MEMBER(zram_compressor))
+ readmem(zram + OFFSET(zram_compressor), KVADDR, name, sizeof(name),
+ "zram compressor", FAULT_ON_ERROR);
+ else {
+ ulong comp_alg_addr;
+ uint32_t prio = (flags >> ZRAM_COMP_PRIORITY_BIT1) & ZRAM_COMP_PRIORITY_MASK;
+ readmem(zram + OFFSET(zram_comp_algs) + sizeof(const char *) * prio, KVADDR,
+ &comp_alg_addr, sizeof(comp_alg_addr), "zram comp_algs", FAULT_ON_ERROR);
+ read_string(comp_alg_addr, name, sizeof(name));
+ }
if (STREQ(name, "lzo")) {
#ifdef LZO
if (!(dd->flags & LZO_SUPPORTED)) {
@@ -3019,12 +3045,6 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
return 0;
}
- if (THIS_KERNEL_VERSION >= LINUX(2, 6, 0)) {
- swp_offset = (ulonglong)__swp_offset(pte_val);
- } else {
- swp_offset = (ulonglong)SWP_OFFSET(pte_val);
- }
-
zram_buf = (unsigned char *)GETBUF(PAGESIZE());
/* lookup page from swap cache */
off = PAGEOFFSET(vaddr);
@@ -3034,15 +3054,8 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
goto out;
}
- sector = swp_offset << (PAGESHIFT() - 9);
- index = sector >> SECTORS_PER_PAGE_SHIFT;
- readmem(zram, KVADDR, &zram_table_entry,
- sizeof(void *), "zram_table_entry", FAULT_ON_ERROR);
- zram_table_entry += (index * SIZE(zram_table_entry));
readmem(zram_table_entry, KVADDR, &entry,
sizeof(void *), "entry of table", FAULT_ON_ERROR);
- readmem(zram_table_entry + OFFSET(zram_table_flag), KVADDR, &flags,
- sizeof(void *), "zram_table_flag", FAULT_ON_ERROR);
if (!entry || (flags & ZRAM_FLAG_SAME_BIT)) {
int count;
ulong *same_buf = (ulong *)GETBUF(PAGESIZE());
--
2.41.0

View File

@ -1,71 +0,0 @@
From 8527bbff71cbdfd90a67d5cec4a1d94156e6bf13 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 31 May 2023 14:01:36 +0800
Subject: [PATCH 1/5] Output prompt when stdin is not a TTY
When stdin is not a TTY, prompt ("crash> ") won't be displayed. If
another process interact with crash with piped stdin/stdout, it will not
get the prompt as a delimiter.
Compared to other debugger like gdb, crash seems intended to give a
prompt in this case in the beginning of process_command_line(). It
checks if pc->flags does NOT have any of
READLINE|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE, a
prompt should be printed. The check will never be true since READLINE is
set in setup_environment() unconditionally.
It makes more sense to change the READLINE flag in the check to TTY
instead. Besides this change, the prompt in process_command_line() should
only be print when it's not in the middle of processing the input file
recovering from a previous FATAL command, because the prompt will be
displayed by the exec_input_file().
Additionally, when stdin is not TTY, repeat the command line from user
after prompt, which can give more context.
The prompt and command line can be opt out by using the silent (-s) flag.
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
cmdline.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/cmdline.c b/cmdline.c
index ded6551c2597..b7f919ae2279 100644
--- a/cmdline.c
+++ b/cmdline.c
@@ -64,8 +64,8 @@ process_command_line(void)
fp = stdout;
BZERO(pc->command_line, BUFSIZE);
- if (!(pc->flags &
- (READLINE|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE)))
+ if (!pc->ifile_in_progress && !(pc->flags &
+ (TTY|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE)))
fprintf(fp, "%s", pc->prompt);
fflush(fp);
@@ -136,12 +136,16 @@ process_command_line(void)
add_history(pc->command_line);
check_special_handling(pc->command_line);
- } else {
- if (fgets(pc->command_line, BUFSIZE-1, stdin) == NULL)
+ } else {
+ if (fgets(pc->command_line, BUFSIZE-1, stdin) == NULL)
clean_exit(1);
+ if (!(pc->flags & SILENT)) {
+ fprintf(fp, "%s", pc->command_line);
+ fflush(fp);
+ }
clean_line(pc->command_line);
strcpy(pc->orig_line, pc->command_line);
- }
+ }
/*
* First clean out all linefeeds and leading/trailing spaces.
--
2.37.1

View File

@ -1,31 +0,0 @@
From a48cebaa9691efe4e2c47bbd56d40ebc8acfa89e Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain@linux.ibm.com>
Date: Mon, 9 May 2022 12:49:56 +0530
Subject: [PATCH 01/89] ppc64: update the NR_CPUS to 8192
Since the kernel commit 2d8ae638bb86 ("powerpc: Make the NR_CPUS max 8192")
the NR_CPUS on Linux kernel ranges from 1-8192. So let's match NR_CPUS with
the max NR_CPUS count on the Linux kernel.
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 89f57873f1a1..984348062bcb 100644
--- a/defs.h
+++ b/defs.h
@@ -136,7 +136,7 @@
#define NR_CPUS (4096)
#endif
#ifdef PPC64
-#define NR_CPUS (2048)
+#define NR_CPUS (8192)
#endif
#ifdef S390
#define NR_CPUS (512)
--
2.37.1

View File

@ -1,179 +0,0 @@
From 58c1816521c2e6bece3d69256b1866c9df8d93aa Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Tue, 16 May 2023 08:59:50 +0900
Subject: [PATCH 2/6] Fix failure of "dev -d|-D" options on Linux 6.4 and later
kernels
Kernel commit 2df418cf4b72 ("driver core: class: remove subsystem
private pointer from struct class"), which is contained in Linux 6.4 and
later kernels, removed the class.p member for struct subsys_private. As
a result, the "dev -d|-D" options fail with the following error.
dev: invalid structure member offset: class_p
FILE: dev.c LINE: 4689 FUNCTION: init_iter()
Search the class_kset list for the subsys_private of block class to fix
this.
As a preparation, introduce get_subsys_private() function, which is
abstracted from the same search procedure in init_memory_block().
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
dev.c | 20 +++++++++++++++++---
memory.c | 35 +++--------------------------------
tools.c | 43 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 64 insertions(+), 35 deletions(-)
diff --git a/defs.h b/defs.h
index 211fc9d55d33..21cc760444d1 100644
--- a/defs.h
+++ b/defs.h
@@ -5521,6 +5521,7 @@ struct rb_node *rb_left(struct rb_node *, struct rb_node *);
struct rb_node *rb_next(struct rb_node *);
struct rb_node *rb_last(struct rb_root *);
long percpu_counter_sum_positive(ulong fbc);
+ulong get_subsys_private(char *, char *);
/*
* symbols.c
diff --git a/dev.c b/dev.c
index 75d30bd022a1..9d38aef9b3db 100644
--- a/dev.c
+++ b/dev.c
@@ -4686,9 +4686,16 @@ init_iter(struct iter *i)
} else {
/* kernel version > 2.6.27, klist */
unsigned long class_private_addr;
- readmem(block_class_addr + OFFSET(class_p), KVADDR,
- &class_private_addr, sizeof(class_private_addr),
- "class.p", FAULT_ON_ERROR);
+
+ if (INVALID_MEMBER(class_p)) /* kernel version >= 6.4 */
+ class_private_addr = get_subsys_private("class_kset", "block");
+ else
+ readmem(block_class_addr + OFFSET(class_p), KVADDR,
+ &class_private_addr, sizeof(class_private_addr),
+ "class.p", FAULT_ON_ERROR);
+
+ if (!class_private_addr)
+ error(FATAL, "cannot determine subsys_private for block.\n");
if (VALID_STRUCT(class_private)) {
/* 2.6.27 < kernel version <= 2.6.37-rc2 */
@@ -4823,6 +4830,13 @@ void diskio_init(void)
if (INVALID_MEMBER(class_devices))
MEMBER_OFFSET_INIT(class_devices, "class", "devices");
MEMBER_OFFSET_INIT(class_p, "class", "p");
+ if (INVALID_MEMBER(class_p)) {
+ MEMBER_OFFSET_INIT(kset_list, "kset", "list");
+ MEMBER_OFFSET_INIT(kset_kobj, "kset", "kobj");
+ MEMBER_OFFSET_INIT(kobject_name, "kobject", "name");
+ MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry");
+ MEMBER_OFFSET_INIT(subsys_private_subsys, "subsys_private", "subsys");
+ }
MEMBER_OFFSET_INIT(class_private_devices, "class_private",
"class_devices");
MEMBER_OFFSET_INIT(device_knode_class, "device", "knode_class");
diff --git a/memory.c b/memory.c
index 0568f18eb9b7..953fc380c03c 100644
--- a/memory.c
+++ b/memory.c
@@ -17865,38 +17865,9 @@ init_memory_block(int *klistcnt, ulong **klistbuf)
* v6.3-rc1
* d2bf38c088e0 driver core: remove private pointer from struct bus_type
*/
- if (INVALID_MEMBER(bus_type_p)) {
- int i, cnt;
- char buf[32];
- ulong bus_kset, list, name;
-
- BZERO(ld, sizeof(struct list_data));
-
- get_symbol_data("bus_kset", sizeof(ulong), &bus_kset);
- readmem(bus_kset + OFFSET(kset_list), KVADDR, &list,
- sizeof(ulong), "bus_kset.list", FAULT_ON_ERROR);
-
- ld->flags |= LIST_ALLOCATE;
- ld->start = list;
- ld->end = bus_kset + OFFSET(kset_list);
- ld->list_head_offset = OFFSET(kobject_entry);
-
- cnt = do_list(ld);
- for (i = 0; i < cnt; i++) {
- readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name,
- sizeof(ulong), "kobject.name", FAULT_ON_ERROR);
- read_string(name, buf, sizeof(buf)-1);
- if (CRASHDEBUG(1))
- fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf);
- if (STREQ(buf, "memory")) {
- /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */
- private = ld->list_ptr[i] - OFFSET(kset_kobj)
- - OFFSET(subsys_private_subsys);
- break;
- }
- }
- FREEBUF(ld->list_ptr);
- } else {
+ if (INVALID_MEMBER(bus_type_p))
+ private = get_subsys_private("bus_kset", "memory");
+ else {
ulong memory_subsys = symbol_value("memory_subsys");
readmem(memory_subsys + OFFSET(bus_type_p), KVADDR, &private,
sizeof(void *), "memory_subsys.private", FAULT_ON_ERROR);
diff --git a/tools.c b/tools.c
index c2cfa7e280bc..392a79707e61 100644
--- a/tools.c
+++ b/tools.c
@@ -6963,3 +6963,46 @@ percpu_counter_sum_positive(ulong fbc)
return (ret < 0) ? 0 : ret;
}
+
+ulong
+get_subsys_private(char *kset_name, char *target_name)
+{
+ ulong kset_addr, kset_list, name_addr, private = 0;
+ struct list_data list_data, *ld;
+ char buf[32];
+ int i, cnt;
+
+ if (!symbol_exists(kset_name))
+ return 0;
+
+ ld = &list_data;
+ BZERO(ld, sizeof(struct list_data));
+
+ get_symbol_data(kset_name, sizeof(ulong), &kset_addr);
+ readmem(kset_addr + OFFSET(kset_list), KVADDR, &kset_list,
+ sizeof(ulong), "kset.list", FAULT_ON_ERROR);
+
+ ld->flags |= LIST_ALLOCATE;
+ ld->start = kset_list;
+ ld->end = kset_addr + OFFSET(kset_list);
+ ld->list_head_offset = OFFSET(kobject_entry);
+
+ cnt = do_list(ld);
+
+ for (i = 0; i < cnt; i++) {
+ readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name_addr,
+ sizeof(ulong), "kobject.name", FAULT_ON_ERROR);
+ read_string(name_addr, buf, sizeof(buf)-1);
+ if (CRASHDEBUG(1))
+ fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf);
+ if (STREQ(buf, target_name)) {
+ /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */
+ private = ld->list_ptr[i] - OFFSET(kset_kobj)
+ - OFFSET(subsys_private_subsys);
+ break;
+ }
+ }
+ FREEBUF(ld->list_ptr);
+
+ return private;
+}
--
2.37.1

View File

@ -0,0 +1,168 @@
From d65e5d3eae0dd06a5308a5cb00c05fee60594093 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Mon, 20 Nov 2023 13:22:56 +0900
Subject: [PATCH 02/14] Fix typos in offset_table and missing "help -o" items
A few of zram related members in the offset_table have typos and
irregular naming rule, also they are not present in the "help -o"
output. Let's fix these.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 8 ++++----
diskdump.c | 24 ++++++++++++------------
memory.c | 2 +-
symbols.c | 12 ++++++++++++
4 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/defs.h b/defs.h
index 2cae5b61e589..5218a94fe4a4 100644
--- a/defs.h
+++ b/defs.h
@@ -2112,13 +2112,13 @@ struct offset_table { /* stash of commonly-used offsets */
long bpf_prog_aux_name;
long page_private;
long swap_info_struct_bdev;
- long zram_mempoll;
+ long zram_mem_pool;
long zram_compressor;
- long zram_table_flag;
- long zspoll_size_class;
+ long zram_table_entry_flags;
+ long zs_pool_size_class;
long size_class_size;
long gendisk_private_data;
- long zram_table_entry;
+ long zram_table_entry; /* unused; but cannot remove */
long module_core_size_rw;
long module_core_size_rx;
long module_init_size_rw;
diff --git a/diskdump.c b/diskdump.c
index 25054d96313e..f20f3ac519a1 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -2765,15 +2765,15 @@ zram_init(void)
{
long zram_flag_shift;
- MEMBER_OFFSET_INIT(zram_mempoll, "zram", "mem_pool");
+ MEMBER_OFFSET_INIT(zram_mem_pool, "zram", "mem_pool");
MEMBER_OFFSET_INIT(zram_compressor, "zram", "compressor");
if (INVALID_MEMBER(zram_compressor))
MEMBER_OFFSET_INIT(zram_comp_algs, "zram", "comp_algs");
- MEMBER_OFFSET_INIT(zram_table_flag, "zram_table_entry", "flags");
- if (INVALID_MEMBER(zram_table_flag))
- MEMBER_OFFSET_INIT(zram_table_flag, "zram_table_entry", "value");
+ MEMBER_OFFSET_INIT(zram_table_entry_flags, "zram_table_entry", "flags");
+ if (INVALID_MEMBER(zram_table_entry_flags))
+ MEMBER_OFFSET_INIT(zram_table_entry_flags, "zram_table_entry", "value");
STRUCT_SIZE_INIT(zram_table_entry, "zram_table_entry");
- MEMBER_OFFSET_INIT(zspoll_size_class, "zs_pool", "size_class");
+ MEMBER_OFFSET_INIT(zs_pool_size_class, "zs_pool", "size_class");
MEMBER_OFFSET_INIT(size_class_size, "size_class", "size");
MEMBER_OFFSET_INIT(zspage_huge, "zspage", "huge");
@@ -2826,7 +2826,7 @@ zram_object_addr(ulong pool, ulong handle, unsigned char *zram_buf)
if (zs_magic != ZSPAGE_MAGIC)
error(FATAL, "zspage magic incorrect: %x\n", zs_magic);
- class = pool + OFFSET(zspoll_size_class);
+ class = pool + OFFSET(zs_pool_size_class);
class += (class_idx * sizeof(void *));
readmem(class, KVADDR, &class, sizeof(void *), "size_class", FAULT_ON_ERROR);
readmem(class + OFFSET(size_class_size), KVADDR,
@@ -2987,9 +2987,9 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
ulong zram, zram_table_entry, sector, index, entry, flags, size,
outsize, off;
- if (INVALID_MEMBER(zram_mempoll)) {
+ if (INVALID_MEMBER(zram_mem_pool)) {
zram_init();
- if (INVALID_MEMBER(zram_mempoll)) {
+ if (INVALID_MEMBER(zram_mem_pool)) {
error(WARNING,
"Some pages are swapped out to zram. "
"Please run mod -s zram.\n");
@@ -3013,8 +3013,8 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
readmem(zram, KVADDR, &zram_table_entry,
sizeof(void *), "zram_table_entry", FAULT_ON_ERROR);
zram_table_entry += (index * SIZE(zram_table_entry));
- readmem(zram_table_entry + OFFSET(zram_table_flag), KVADDR, &flags,
- sizeof(void *), "zram_table_flag", FAULT_ON_ERROR);
+ readmem(zram_table_entry + OFFSET(zram_table_entry_flags), KVADDR, &flags,
+ sizeof(void *), "zram_table_entry.flags", FAULT_ON_ERROR);
if (VALID_MEMBER(zram_compressor))
readmem(zram + OFFSET(zram_compressor), KVADDR, name, sizeof(name),
"zram compressor", FAULT_ON_ERROR);
@@ -3072,8 +3072,8 @@ try_zram_decompress(ulonglong pte_val, unsigned char *buf, ulong len, ulonglong
goto out;
}
- readmem(zram + OFFSET(zram_mempoll), KVADDR, &zram,
- sizeof(void *), "zram_mempoll", FAULT_ON_ERROR);
+ readmem(zram + OFFSET(zram_mem_pool), KVADDR, &zram,
+ sizeof(void *), "zram.mem_pool", FAULT_ON_ERROR);
obj_addr = zram_object_addr(zram, entry, zram_buf);
if (obj_addr == NULL) {
diff --git a/memory.c b/memory.c
index 86ccec5e2bac..791194a405d4 100644
--- a/memory.c
+++ b/memory.c
@@ -519,7 +519,7 @@ vm_init(void)
"swap_info_struct", "old_block_size");
MEMBER_OFFSET_INIT(swap_info_struct_bdev, "swap_info_struct", "bdev");
- MEMBER_OFFSET_INIT(zspoll_size_class, "zs_pool", "size_class");
+ MEMBER_OFFSET_INIT(zs_pool_size_class, "zs_pool", "size_class");
MEMBER_OFFSET_INIT(size_class_size, "size_class", "size");
MEMBER_OFFSET_INIT(block_device_bd_inode, "block_device", "bd_inode");
diff --git a/symbols.c b/symbols.c
index 8e8b4c31d915..176c95026f03 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10304,6 +10304,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(page_active));
fprintf(fp, " page_compound_head: %ld\n",
OFFSET(page_compound_head));
+ fprintf(fp, " page_private: %ld\n", OFFSET(page_private));
fprintf(fp, " trace_print_flags_mask: %ld\n",
OFFSET(trace_print_flags_mask));
@@ -10330,6 +10331,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(swap_info_struct_inuse_pages));
fprintf(fp, "swap_info_struct_old_block_size: %ld\n",
OFFSET(swap_info_struct_old_block_size));
+ fprintf(fp, " swap_info_struct_bdev: %ld\n", OFFSET(swap_info_struct_bdev));
fprintf(fp, " block_device_bd_inode: %ld\n",
OFFSET(block_device_bd_inode));
fprintf(fp, " block_device_bd_list: %ld\n",
@@ -11359,6 +11361,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(gendisk_part0));
fprintf(fp, " gendisk_queue: %ld\n",
OFFSET(gendisk_queue));
+ fprintf(fp, " gendisk_private_data: %ld\n", OFFSET(gendisk_private_data));
+
fprintf(fp, " hd_struct_dev: %ld\n",
OFFSET(hd_struct_dev));
fprintf(fp, " hd_struct_dkstats: %ld\n",
@@ -11765,6 +11769,14 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " maple_metadata_end: %ld\n", OFFSET(maple_metadata_end));
fprintf(fp, " maple_metadata_gap: %ld\n", OFFSET(maple_metadata_gap));
+ fprintf(fp, " zram_mem_pool: %ld\n", OFFSET(zram_mem_pool));
+ fprintf(fp, " zram_compressor: %ld\n", OFFSET(zram_compressor));
+ fprintf(fp, " zram_comp_algs: %ld\n", OFFSET(zram_comp_algs));
+ fprintf(fp, " zram_table_entry_flags: %ld\n", OFFSET(zram_table_entry_flags));
+ fprintf(fp, " zs_pool_size_class: %ld\n", OFFSET(zs_pool_size_class));
+ fprintf(fp, " size_class_size: %ld\n", OFFSET(size_class_size));
+ fprintf(fp, " zspage_huge: %ld\n", OFFSET(zspage_huge));
+
fprintf(fp, "\n size_table:\n");
fprintf(fp, " page: %ld\n", SIZE(page));
fprintf(fp, " page_flags: %ld\n", SIZE(page_flags));
--
2.41.0

View File

@ -1,62 +0,0 @@
From d3682a7e57036b226d395541b590ebc460123b04 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:13 +0800
Subject: [PATCH 02/89] sbitmapq: remove struct and member validation in
sbitmapq_init()
Let's remove the struct and member validation from sbitmapq_init(), which
will help the crash to display the actual error when the sbitmapq fails.
Without the patch:
crash> sbitmapq ffff8e99d0dc8010
sbitmapq: command not supported or applicable on this architecture or kernel
With the patch:
crash> sbitmapq ffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_queue_alloc_hint
FILE: sbitmap.c LINE: 365 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 24 ------------------------
1 file changed, 24 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index 96a61e6c2c71..7693eef6cebd 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -525,30 +525,6 @@ void sbitmapq_init(void)
MEMBER_OFFSET_INIT(sbq_wait_state_wait_cnt, "sbq_wait_state", "wait_cnt");
MEMBER_OFFSET_INIT(sbq_wait_state_wait, "sbq_wait_state", "wait");
- if (!VALID_SIZE(sbitmap_word) ||
- !VALID_SIZE(sbitmap) ||
- !VALID_SIZE(sbitmap_queue) ||
- !VALID_SIZE(sbq_wait_state) ||
- INVALID_MEMBER(sbitmap_word_depth) ||
- INVALID_MEMBER(sbitmap_word_word) ||
- INVALID_MEMBER(sbitmap_word_cleared) ||
- INVALID_MEMBER(sbitmap_depth) ||
- INVALID_MEMBER(sbitmap_shift) ||
- INVALID_MEMBER(sbitmap_map_nr) ||
- INVALID_MEMBER(sbitmap_map) ||
- INVALID_MEMBER(sbitmap_queue_sb) ||
- INVALID_MEMBER(sbitmap_queue_alloc_hint) ||
- INVALID_MEMBER(sbitmap_queue_wake_batch) ||
- INVALID_MEMBER(sbitmap_queue_wake_index) ||
- INVALID_MEMBER(sbitmap_queue_ws) ||
- INVALID_MEMBER(sbitmap_queue_ws_active) ||
- INVALID_MEMBER(sbitmap_queue_round_robin) ||
- INVALID_MEMBER(sbitmap_queue_min_shallow_depth) ||
- INVALID_MEMBER(sbq_wait_state_wait_cnt) ||
- INVALID_MEMBER(sbq_wait_state_wait)) {
- command_not_supported();
- }
-
sb_flags |= SB_FLAG_INIT;
}
--
2.37.1

View File

@ -1,345 +0,0 @@
From 77d8621876c1c6a3a25b91e464ba588a542485fb Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 18 May 2023 16:53:54 +0900
Subject: [PATCH 2/5] x86_64: Fix "bt" command printing stale entries on Linux
6.4 and later
Kernel commit fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in
two"), which is contained in Linux 6.4 and later kernels, changed
ORC_TYPE_CALL macro from 0 to 2. As a result, the "bt" command cannot
use ORC entries, and can display stale entries in a call trace.
crash> bt 1
PID: 1 TASK: ffff93cd06294180 CPU: 51 COMMAND: "systemd"
#0 [ffffb72bc00cbc98] __schedule at ffffffff86e52aae
#1 [ffffb72bc00cbd00] schedule at ffffffff86e52f6a
#2 [ffffb72bc00cbd18] schedule_hrtimeout_range_clock at ffffffff86e58ef5
#3 [ffffb72bc00cbd88] ep_poll at ffffffff8669624d
#4 [ffffb72bc00cbe28] do_epoll_wait at ffffffff86696371
#5 [ffffb72bc00cbe30] do_timerfd_settime at ffffffff8669902b <<
#6 [ffffb72bc00cbe60] __x64_sys_epoll_wait at ffffffff86696bf0
#7 [ffffb72bc00cbeb0] do_syscall_64 at ffffffff86e3feb9
#8 [ffffb72bc00cbee0] __task_pid_nr_ns at ffffffff863330d7 <<
#9 [ffffb72bc00cbf08] syscall_exit_to_user_mode at ffffffff86e466b2 << stale entries
#10 [ffffb72bc00cbf18] do_syscall_64 at ffffffff86e3fec9 <<
#11 [ffffb72bc00cbf50] entry_SYSCALL_64_after_hwframe at ffffffff870000aa
Also, kernel commit ffb1b4a41016 added a member to struct orc_entry.
Although this does not affect the crash's unwinder, its debugging
information can be displayed incorrectly.
To fix these,
(1) introduce "kernel_orc_entry_6_4" structure corresponding to 6.4 and
abstruction layer "orc_entry" structure in crash,
(2) switch ORC_TYPE_CALL to 2 or 0 with kernel's orc_entry structure.
Related orc_entry history:
v4.14 39358a033b2e introduced struct orc_entry
v4.19 d31a580266ee added orc_entry.end member
v6.3 ffb1b4a41016 added orc_entry.signal member
v6.4 fb799447ae29 removed end member and changed type member to 3 bits
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 28 ++++++++++++-
x86_64.c | 119 +++++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 119 insertions(+), 28 deletions(-)
diff --git a/defs.h b/defs.h
index 11fdc17e60d0..bfda0c48d37b 100644
--- a/defs.h
+++ b/defs.h
@@ -6363,9 +6363,29 @@ typedef struct __attribute__((__packed__)) {
unsigned int sp_reg:4;
unsigned int bp_reg:4;
unsigned int type:2;
+ unsigned int signal:1;
unsigned int end:1;
} kernel_orc_entry;
+typedef struct __attribute__((__packed__)) {
+ signed short sp_offset;
+ signed short bp_offset;
+ unsigned int sp_reg:4;
+ unsigned int bp_reg:4;
+ unsigned int type:3;
+ unsigned int signal:1;
+} kernel_orc_entry_6_4;
+
+typedef struct orc_entry {
+ signed short sp_offset;
+ signed short bp_offset;
+ unsigned int sp_reg;
+ unsigned int bp_reg;
+ unsigned int type;
+ unsigned int signal;
+ unsigned int end;
+} orc_entry;
+
struct ORC_data {
int module_ORC;
uint lookup_num_blocks;
@@ -6376,10 +6396,13 @@ struct ORC_data {
ulong orc_lookup;
ulong ip_entry;
ulong orc_entry;
- kernel_orc_entry kernel_orc_entry;
+ orc_entry orc_entry_data;
+ int has_signal;
+ int has_end;
};
-#define ORC_TYPE_CALL 0
+#define ORC_TYPE_CALL ((machdep->flags & ORC_6_4) ? 2 : 0)
+/* The below entries are not used and must be updated if we use them. */
#define ORC_TYPE_REGS 1
#define ORC_TYPE_REGS_IRET 2
#define UNWIND_HINT_TYPE_SAVE 3
@@ -6456,6 +6479,7 @@ struct machine_specific {
#define ORC (0x4000)
#define KPTI (0x8000)
#define L1TF (0x10000)
+#define ORC_6_4 (0x20000)
#define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4|VM_5LEVEL)
diff --git a/x86_64.c b/x86_64.c
index 693a08bea758..87e87ae6e1e8 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -132,9 +132,9 @@ static void GART_init(void);
static void x86_64_exception_stacks_init(void);
static int in_START_KERNEL_map(ulong);
static ulong orc_ip(ulong);
-static kernel_orc_entry *__orc_find(ulong, ulong, uint, ulong);
-static kernel_orc_entry *orc_find(ulong);
-static kernel_orc_entry *orc_module_find(ulong);
+static orc_entry *__orc_find(ulong, ulong, uint, ulong);
+static orc_entry *orc_find(ulong);
+static orc_entry *orc_module_find(ulong);
static ulong ip_table_to_vaddr(ulong);
static void orc_dump(ulong);
@@ -806,6 +806,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, "%sFRAMESIZE_DEBUG", others++ ? "|" : "");
if (machdep->flags & ORC)
fprintf(fp, "%sORC", others++ ? "|" : "");
+ if (machdep->flags & ORC_6_4)
+ fprintf(fp, "%sORC_6_4", others++ ? "|" : "");
if (machdep->flags & FRAMEPOINTER)
fprintf(fp, "%sFRAMEPOINTER", others++ ? "|" : "");
if (machdep->flags & GART_REGION)
@@ -980,6 +982,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " ORC_data: %s", machdep->flags & ORC ? "\n" : "(unused)\n");
if (machdep->flags & ORC) {
fprintf(fp, " module_ORC: %s\n", ms->orc.module_ORC ? "TRUE" : "FALSE");
+ fprintf(fp, " has_signal: %s\n", ms->orc.has_signal ? "TRUE" : "FALSE");
+ fprintf(fp, " has_end: %s\n", ms->orc.has_end ? "TRUE" : "FALSE");
fprintf(fp, " lookup_num_blocks: %d\n", ms->orc.lookup_num_blocks);
fprintf(fp, " __start_orc_unwind_ip: %lx\n", ms->orc.__start_orc_unwind_ip);
fprintf(fp, " __stop_orc_unwind_ip: %lx\n", ms->orc.__stop_orc_unwind_ip);
@@ -988,14 +992,18 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " orc_lookup: %lx\n", ms->orc.orc_lookup);
fprintf(fp, " ip_entry: %lx\n", ms->orc.ip_entry);
fprintf(fp, " orc_entry: %lx\n", ms->orc.orc_entry);
- fprintf(fp, " kernel_orc_entry:\n");
- fprintf(fp, " sp_offset: %d\n", ms->orc.kernel_orc_entry.sp_offset);
- fprintf(fp, " bp_offset: %d\n", ms->orc.kernel_orc_entry.bp_offset);
- fprintf(fp, " sp_reg: %d\n", ms->orc.kernel_orc_entry.sp_reg);
- fprintf(fp, " bp_reg: %d\n", ms->orc.kernel_orc_entry.bp_reg);
- fprintf(fp, " type: %d\n", ms->orc.kernel_orc_entry.type);
- if (MEMBER_EXISTS("orc_entry", "end"))
- fprintf(fp, " end: %d\n", ms->orc.kernel_orc_entry.end);
+ fprintf(fp, " orc_entry_data:\n");
+ fprintf(fp, " sp_offset: %d\n", ms->orc.orc_entry_data.sp_offset);
+ fprintf(fp, " bp_offset: %d\n", ms->orc.orc_entry_data.bp_offset);
+ fprintf(fp, " sp_reg: %d\n", ms->orc.orc_entry_data.sp_reg);
+ fprintf(fp, " bp_reg: %d\n", ms->orc.orc_entry_data.bp_reg);
+ fprintf(fp, " type: %d\n", ms->orc.orc_entry_data.type);
+ if (ms->orc.has_signal)
+ fprintf(fp, " signal: %d\n", ms->orc.orc_entry_data.signal);
+ else
+ fprintf(fp, " signal: (n/a)\n");
+ if (ms->orc.has_end)
+ fprintf(fp, " end: %d\n", ms->orc.orc_entry_data.end);
else
fprintf(fp, " end: (n/a)\n");
}
@@ -6440,6 +6448,12 @@ x86_64_ORC_init(void)
MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");
+ orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added at 6.3 */
+ orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed at 6.4 */
+
+ if (orc->has_signal && !orc->has_end)
+ machdep->flags |= ORC_6_4;
+
machdep->flags |= ORC;
}
@@ -8522,7 +8536,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
int reterror;
int arg_exists;
int exception;
- kernel_orc_entry *korc;
+ orc_entry *korc;
if (!(bt->flags & BT_FRAMESIZE_DEBUG)) {
if ((bt->flags & BT_FRAMESIZE_IGNORE_MASK) ||
@@ -8608,11 +8622,14 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
if (CRASHDEBUG(1)) {
+ struct ORC_data *orc = &machdep->machspec->orc;
fprintf(fp,
"rsp: %lx textaddr: %lx -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
rsp, textaddr, korc->sp_offset, korc->bp_offset,
korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
@@ -9118,7 +9135,53 @@ orc_ip(ulong ip)
return (ip + ip_entry);
}
-static kernel_orc_entry *
+static orc_entry *
+orc_get_entry(struct ORC_data *orc)
+{
+ struct orc_entry *entry = &orc->orc_entry_data;
+
+ if (machdep->flags & ORC_6_4) {
+ kernel_orc_entry_6_4 korc;
+
+ if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry_6_4),
+ "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ entry->sp_offset = korc.sp_offset;
+ entry->bp_offset = korc.bp_offset;
+ entry->sp_reg = korc.sp_reg;
+ entry->bp_reg = korc.bp_reg;
+ entry->type = korc.type;
+ entry->signal = korc.signal;
+ } else {
+ kernel_orc_entry korc;
+
+ if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry),
+ "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ entry->sp_offset = korc.sp_offset;
+ entry->bp_offset = korc.bp_offset;
+ entry->sp_reg = korc.sp_reg;
+ entry->bp_reg = korc.bp_reg;
+ entry->type = korc.type;
+ if (orc->has_end) {
+ /*
+ * orc_entry.signal was inserted before orc_entry.end.
+ * see ffb1b4a41016.
+ */
+ if (orc->has_signal) {
+ entry->signal = korc.signal;
+ entry->end = korc.end;
+ } else
+ entry->end = korc.signal; /* on purpose */
+ }
+ }
+
+ return entry;
+}
+
+static orc_entry *
__orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
{
int index;
@@ -9128,7 +9191,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
int *ip_table = (int *)ip_table_ptr;
struct ORC_data *orc = &machdep->machspec->orc;
ulong vaddr;
- kernel_orc_entry *korc;
+ orc_entry *korc;
if (CRASHDEBUG(2)) {
int i, ip_entry;
@@ -9172,18 +9235,20 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
orc->ip_entry = (ulong)found;
orc->orc_entry = u_table_ptr + (index * SIZE(orc_entry));
- if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry,
- sizeof(kernel_orc_entry), "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+
+ if (!orc_get_entry(orc))
return NULL;
- korc = &orc->kernel_orc_entry;
+ korc = &orc->orc_entry_data;
if (CRASHDEBUG(2)) {
fprintf(fp, " found: %lx index: %d\n", (ulong)found, index);
fprintf(fp,
" orc_entry: %lx sp_offset: %d bp_offset: %d sp_reg: %d bp_reg: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
@@ -9196,7 +9261,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
#define LOOKUP_START_IP (unsigned long)kt->stext
#define LOOKUP_STOP_IP (unsigned long)kt->etext
-static kernel_orc_entry *
+static orc_entry *
orc_find(ulong ip)
{
unsigned int idx, start, stop;
@@ -9266,7 +9331,7 @@ orc_find(ulong ip)
orc->__start_orc_unwind + (start * SIZE(orc_entry)), stop - start, ip);
}
-static kernel_orc_entry *
+static orc_entry *
orc_module_find(ulong ip)
{
struct load_module *lm;
@@ -9313,7 +9378,7 @@ static void
orc_dump(ulong ip)
{
struct ORC_data *orc = &machdep->machspec->orc;
- kernel_orc_entry *korc;
+ orc_entry *korc;
ulong vaddr, offset;
struct syment *sp, *orig;
@@ -9336,13 +9401,15 @@ next_in_func:
fprintf(fp, "%s+%ld -> ", sp->name, offset);
else
fprintf(fp, "(unresolved) -> ");
- if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry, sizeof(kernel_orc_entry),
- "kernel orc_entry", RETURN_ON_ERROR))
+
+ if (!orc_get_entry(orc))
error(FATAL, "cannot read orc_entry\n");
- korc = &orc->kernel_orc_entry;
+ korc = &orc->orc_entry_data;
fprintf(fp, "orc: %lx spo: %d bpo: %d spr: %d bpr: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
--
2.37.1

View File

@ -1,48 +0,0 @@
From ec1e61b33a705b8be8d116a541c7b076b0429deb Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 12 Jun 2023 18:50:05 +0800
Subject: [PATCH 3/5] Fix invalid structure size error during crash startup on
ppc64
The crash utility will fail to start session on ppc64 with the following
error:
# crash vmlinux vmcore -s
crash: invalid structure size: note_buf
FILE: diskdump.c LINE: 121 FUNCTION: have_crash_notes()
[./crash] error trace: 101859ac => 10291798 => 10291450 => 10266038
10266038: SIZE_verify+156
10291450: have_crash_notes+308
10291798: map_cpus_to_prstatus_kdump_cmprs+448
101859ac: task_init+11980
The reason is that the size of note_buf is not initialized before using
SIZE(note_buf) in the have_crash_notes() on some architectures including
ppc64. Let's initialize it in task_init() to fix this issue.
Fixes: db8c030857b4 ("diskdump/netdump: fix segmentation fault caused by failure of stopping CPUs")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
task.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/task.c b/task.c
index 88941c7b0e4d..2b7467b4193d 100644
--- a/task.c
+++ b/task.c
@@ -675,6 +675,9 @@ task_init(void)
tt->this_task = pid_to_task(active_pid);
}
else {
+ if (INVALID_SIZE(note_buf))
+ STRUCT_SIZE_INIT(note_buf, "note_buf_t");
+
if (KDUMP_DUMPFILE())
map_cpus_to_prstatus();
else if (ELF_NOTES_VALID() && DISKDUMP_DUMPFILE())
--
2.37.1

View File

@ -1,86 +0,0 @@
From 342cf340ed0386880fe2a3115d6bef32eabb511b Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 18 May 2023 11:48:28 +0900
Subject: [PATCH 3/6] Fix "kmem -v" option displaying no regions on Linux 6.3
and later
Kernel commit 869176a09606 ("mm/vmalloc.c: add flags to mark vm_map_ram
area"), which is contained in Linux 6.3 and later, added "flags" member
to struct vmap_area. This was the revival of the "flags" member as
kernel commit 688fcbfc06e4 had eliminated it before.
As a result, crash started to use the old procedure using the member and
displays no vmalloc'd regions, because it does not have the same flag
value as the old one.
crash> kmem -v
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
crash>
To fix this, also check if vmap_area.purge_list exists, which was
introduced with the flags and removed later, to determine that the flags
member is the old one.
Related vmap_area history:
v2.6.28 db64fe02258f introduced vmap_area with flags and purge_list
v5.4 688fcbfc06e4 removed flags
v5.11 96e2db456135 removed purge_list
v6.3 869176a09606 added flags again
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 4 +++-
symbols.c | 1 +
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 21cc760444d1..bfa07c3f5150 100644
--- a/defs.h
+++ b/defs.h
@@ -2216,6 +2216,7 @@ struct offset_table { /* stash of commonly-used offsets */
long in6_addr_in6_u;
long kset_kobj;
long subsys_private_subsys;
+ long vmap_area_purge_list;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index 953fc380c03c..15fa8b2f08f1 100644
--- a/memory.c
+++ b/memory.c
@@ -429,6 +429,7 @@ vm_init(void)
MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "vm");
if (INVALID_MEMBER(vmap_area_vm))
MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "private");
+ MEMBER_OFFSET_INIT(vmap_area_purge_list, "vmap_area", "purge_list");
STRUCT_SIZE_INIT(vmap_area, "vmap_area");
if (VALID_MEMBER(vmap_area_va_start) &&
VALID_MEMBER(vmap_area_va_end) &&
@@ -9063,7 +9064,8 @@ dump_vmap_area(struct meminfo *vi)
readmem(ld->list_ptr[i], KVADDR, vmap_area_buf,
SIZE(vmap_area), "vmap_area struct", FAULT_ON_ERROR);
- if (VALID_MEMBER(vmap_area_flags)) {
+ if (VALID_MEMBER(vmap_area_flags) &&
+ VALID_MEMBER(vmap_area_purge_list)) {
flags = ULONG(vmap_area_buf + OFFSET(vmap_area_flags));
if (flags != VM_VM_AREA)
continue;
diff --git a/symbols.c b/symbols.c
index f0721023816d..7b1d59203b90 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9169,6 +9169,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(vmap_area_vm));
fprintf(fp, " vmap_area_flags: %ld\n",
OFFSET(vmap_area_flags));
+ fprintf(fp, " vmap_area_purge_list: %ld\n", OFFSET(vmap_area_purge_list));
fprintf(fp, " module_size_of_struct: %ld\n",
OFFSET(module_size_of_struct));
--
2.37.1

View File

@ -1,118 +0,0 @@
From 0242075b6b4321419790f196c405c088394b9462 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:14 +0800
Subject: [PATCH 03/89] sbitmapq: fix invalid offset for
"sbitmap_queue_alloc_hint" on Linux v5.13-rc1
Kernel commit c548e62bcf6a ("scsi: sbitmap: Move allocation hint
into sbitmap") moved the alloc_hint member from struct sbitmap_queue
to struct sbitmap. Without the patch, the sbitmapq will fail:
crash> sbitmapq 0xffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_queue_alloc_hint
FILE: sbitmap.c LINE: 365 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 ++
sbitmap.c | 14 ++++++++++++--
symbols.c | 2 ++
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 984348062bcb..8b4cc38f73bf 100644
--- a/defs.h
+++ b/defs.h
@@ -2168,6 +2168,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sbitmap_queue_min_shallow_depth;
long sbq_wait_state_wait_cnt;
long sbq_wait_state_wait;
+ long sbitmap_alloc_hint;
};
struct size_table { /* stash of commonly-used sizes */
@@ -5915,6 +5916,7 @@ struct sbitmap_context {
unsigned shift;
unsigned map_nr;
ulong map_addr;
+ ulong alloc_hint;
};
typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p);
diff --git a/sbitmap.c b/sbitmap.c
index 7693eef6cebd..2921d5447c65 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -285,6 +285,7 @@ void sbitmap_for_each_set(const struct sbitmap_context *sc,
static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
const struct sbitmap_context *sc)
{
+ ulong alloc_hint_addr = 0;
int cpus = get_cpus_possible();
int sbq_wait_state_size, wait_cnt_off, wait_off, list_head_off;
char *sbq_wait_state_buf;
@@ -297,6 +298,11 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
fprintf(fp, "bits_per_word = %u\n", 1U << sc->shift);
fprintf(fp, "map_nr = %u\n", sc->map_nr);
+ if (VALID_MEMBER(sbitmap_queue_alloc_hint))
+ alloc_hint_addr = sqc->alloc_hint;
+ else if (VALID_MEMBER(sbitmap_alloc_hint)) /* 5.13 and later */
+ alloc_hint_addr = sc->alloc_hint;
+
fputs("alloc_hint = {", fp);
first = true;
for (i = 0; i < cpus; i++) {
@@ -307,7 +313,7 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
fprintf(fp, ", ");
first = false;
- ptr = kt->__per_cpu_offset[i] + sqc->alloc_hint;
+ ptr = kt->__per_cpu_offset[i] + alloc_hint_addr;
readmem(ptr, KVADDR, &val, sizeof(val), "alloc_hint", FAULT_ON_ERROR);
fprintf(fp, "%u", val);
@@ -362,7 +368,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context
error(FATAL, "cannot read sbitmap_queue\n");
}
- sqc->alloc_hint = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_alloc_hint));
+ if (VALID_MEMBER(sbitmap_queue_alloc_hint))
+ sqc->alloc_hint = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_alloc_hint));
sqc->wake_batch = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_batch));
sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index));
sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws));
@@ -387,6 +394,8 @@ void sbitmap_context_load(ulong addr, struct sbitmap_context *sc)
sc->shift = UINT(sbitmap_buf + OFFSET(sbitmap_shift));
sc->map_nr = UINT(sbitmap_buf + OFFSET(sbitmap_map_nr));
sc->map_addr = ULONG(sbitmap_buf + OFFSET(sbitmap_map));
+ if (VALID_MEMBER(sbitmap_alloc_hint))
+ sc->alloc_hint = ULONG(sbitmap_buf + OFFSET(sbitmap_alloc_hint));
FREEBUF(sbitmap_buf);
}
@@ -512,6 +521,7 @@ void sbitmapq_init(void)
MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
+ MEMBER_OFFSET_INIT(sbitmap_alloc_hint, "sbitmap", "alloc_hint");
MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
MEMBER_OFFSET_INIT(sbitmap_queue_alloc_hint, "sbitmap_queue", "alloc_hint");
diff --git a/symbols.c b/symbols.c
index 4afbc227da97..d5ce3e0873a1 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10720,6 +10720,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sbitmap_map_nr));
fprintf(fp, " sbitmap_map: %ld\n",
OFFSET(sbitmap_map));
+ fprintf(fp, " sbitmap_alloc_hint: %ld\n",
+ OFFSET(sbitmap_alloc_hint));
fprintf(fp, " sbitmap_queue_sb: %ld\n",
OFFSET(sbitmap_queue_sb));
fprintf(fp, " sbitmap_queue_alloc_hint: %ld\n",
--
2.37.1

View File

@ -0,0 +1,78 @@
From 582febffa8b3567339148c2bb916fc70f2fc546e Mon Sep 17 00:00:00 2001
From: Johan Erlandsson <johan.erlandsson@sony.com>
Date: Fri, 20 Oct 2023 19:10:52 +0200
Subject: [PATCH 03/14] zram: Fixes for lookup_swap_cache()
Fix the following three issues:
(1) swap cache missing page tree offset
The radix or xarray start at an offset inside struct address_space.
(2) swap cache entries are pointer to struct page
The entries in radix, xarray (swap cache) are address to struct page.
(3) exclude shadow entries from swap cache lookup
radix or xarray can contain shadow entries from previous page
entries. These should be ignored when looking for a page pointer.
Without the patch,
- lookup_swap_cache() returns NULL since do_xarray() call returns FALSE,
- in try_zram_decompress(), since 'entry' is NULL, page is filled with 0,
if (!entry || (flags & ZRAM_FLAG_SAME_BIT)) {
and pages in swap cache will be seen to be a 'zero' page.
Signed-off-by: Johan Erlandsson <johan.erlandsson@sony.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
diskdump.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/diskdump.c b/diskdump.c
index f20f3ac519a1..660c25729dad 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -27,6 +27,7 @@
#include "diskdump.h"
#include "xen_dom0.h"
#include "vmcore.h"
+#include "maple_tree.h"
#define BITMAP_SECT_LEN 4096
@@ -2877,11 +2878,16 @@ out:
return zram_buf;
}
+static inline bool radix_tree_exceptional_entry(ulong entry)
+{
+ return entry & RADIX_TREE_EXCEPTIONAL_ENTRY;
+}
+
static unsigned char *
lookup_swap_cache(ulonglong pte_val, unsigned char *zram_buf)
{
ulonglong swp_offset;
- ulong swp_type, swp_space, page;
+ ulong swp_type, swp_space;
struct list_pair lp;
physaddr_t paddr;
static int is_xarray = -1;
@@ -2907,10 +2913,13 @@ lookup_swap_cache(ulonglong pte_val, unsigned char *zram_buf)
swp_space += (swp_offset >> SWAP_ADDRESS_SPACE_SHIFT) * SIZE(address_space);
lp.index = swp_offset;
- if ((is_xarray ? do_xarray : do_radix_tree)(swp_space, RADIX_TREE_SEARCH, &lp)) {
- readmem((ulong)lp.value, KVADDR, &page, sizeof(void *),
- "swap_cache page", FAULT_ON_ERROR);
- if (!is_page_ptr(page, &paddr)) {
+ if ((is_xarray ? do_xarray : do_radix_tree)
+ (swp_space+OFFSET(address_space_page_tree), RADIX_TREE_SEARCH, &lp)) {
+ if ((is_xarray ? xa_is_value : radix_tree_exceptional_entry)((ulong)lp.value)) {
+ /* ignore shadow values */
+ return NULL;
+ }
+ if (!is_page_ptr((ulong)lp.value, &paddr)) {
error(WARNING, "radix page: %lx: not a page pointer\n", lp.value);
return NULL;
}
--
2.41.0

View File

@ -1,68 +0,0 @@
From 91a76958e4a8a9fb67ac61166ff36e8dc961b3b9 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Wed, 7 Jun 2023 18:37:33 +0900
Subject: [PATCH 4/5] Revert "Fix segfault in arm64_is_kernel_exception_frame()
when corrupt stack pointer address is given"
This reverts commit 9868ebc8e648e5791764a51567a23efae7170d9b.
The commit 9868ebc8e648e5791764a51567a23efae7170d9b causes the issue
that bt command fails to show backtraces for the tasks that is running
in the user mode at the moment of the kernel panic as follows:
crash> bt 1734
PID: 1734 TASK: ffff000000392200 CPU: 4 COMMAND: "insmod"
bt: invalid stack pointer is given
The root cause is that while the commit added a sanity check into
STACK_OFFSET_TYPE() to validate if a given candidate address of any
interrupt or exception frame is contained within the range of the
corresponding kernel stack, the premise that the STACK_OFFSET_TYPE()
should not return out-of-the-buffer address, is wrong.
Reexamining the relevant surrounding part of the backtracing code, it
looks to me now that the STACK_OFFSET_TYPE() is originally expected to
return an out-of-the-buffer address, like the address of the top of
the corresponding kernel stack, e.g. at here:
static int
arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
{
...
if (bt->flags & BT_USER_SPACE)
start = (ulong *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(bt->stacktop))];
else {
Note that the above bt 1734 aborts here.
Hence, the current implementation policy around STACK_OFFSET_TYPE()
looks that the caller side is responsible for understanding the fact
in advance and for avoiding making buffer overrun carefully.
To fix this issue, revert the commit.
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index bfda0c48d37b..3e7d6cfbc6a8 100644
--- a/defs.h
+++ b/defs.h
@@ -976,10 +976,7 @@ struct bt_info {
#define STACK_OFFSET_TYPE(OFF) \
(((ulong)(OFF) > STACKSIZE()) ? \
- (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \
- error(FATAL, "invalid stack pointer is given\n") : \
- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \
- (ulong)(OFF))
+ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF))
#define GET_STACK_ULONG(OFF) \
*((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))])))
--
2.37.1

View File

@ -1,225 +0,0 @@
From a0eceb041dfa248d66f9f9a455106184b7823bec Mon Sep 17 00:00:00 2001
From: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Date: Mon, 29 May 2023 19:55:51 +0800
Subject: [PATCH 4/6] arm64/x86_64: Enhance "vtop" command to show zero_pfn
information
Enhance the "vtop" command to show "ZERO PAGE" information when PTE or
PMD has attached to {huge_}zero_pfn. For example:
crash> vtop -c 13674 ffff8917e000
VIRTUAL PHYSICAL
ffff8917e000 836e71000
PAGE DIRECTORY: ffff000802f8d000
PGD: ffff000802f8dff8 => 884e29003
PUD: ffff000844e29ff0 => 884e93003
PMD: ffff000844e93240 => 840413003
PTE: ffff000800413bf0 => 160000836e71fc3
PAGE: 836e71000 (ZERO PAGE)
...
Hugepage case:
crash> vtop -c 14538 ffff95800000
VIRTUAL PHYSICAL
ffff95800000 910c00000
PAGE DIRECTORY: ffff000801fa0000
PGD: ffff000801fa0ff8 => 884f53003
PUD: ffff000844f53ff0 => 8426cb003
PMD: ffff0008026cb560 => 60000910c00fc1
PAGE: 910c00000 (2MB, ZERO PAGE)
...
Note that
1. support displaying zero page only for THP (except for 1G THP)
2. do not support hugetlb cases.
Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 24 ++++++++++++++++--------
defs.h | 5 +++++
memory.c | 23 +++++++++++++++++++++++
x86_64.c | 9 +++++----
4 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/arm64.c b/arm64.c
index 56fb841f43f8..efbdccbec9d3 100644
--- a/arm64.c
+++ b/arm64.c
@@ -1787,7 +1787,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pgd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pgd_val & SECTION_PAGE_MASK_512MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pgd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
@@ -1806,7 +1807,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -1859,7 +1861,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = PTE_TO_PHYS(pmd_val) & SECTION_PAGE_MASK_512MB;
if (verbose) {
- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
@@ -1878,7 +1881,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = PTE_TO_PHYS(pte_val) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -1940,7 +1944,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
@@ -1959,7 +1964,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -2029,7 +2035,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
@@ -2048,7 +2055,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
diff --git a/defs.h b/defs.h
index bfa07c3f5150..7d8bb8ab3de1 100644
--- a/defs.h
+++ b/defs.h
@@ -2619,6 +2619,8 @@ struct vm_table { /* kernel VM-related data */
char *name;
} *pageflags_data;
ulong max_mem_section_nr;
+ ulong zero_paddr;
+ ulong huge_zero_paddr;
};
#define NODES (0x1)
@@ -3000,6 +3002,9 @@ struct load_module {
#define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask)
#define PHYSPAGEBASE(X) (((physaddr_t)(X)) & (physaddr_t)machdep->pagemask)
+#define IS_ZEROPAGE(paddr) ((paddr) == vt->zero_paddr || \
+ (paddr) == vt->huge_zero_paddr)
+
/*
* Sparse memory stuff
* These must follow the definitions in the kernel mmzone.h
diff --git a/memory.c b/memory.c
index 15fa8b2f08f1..ea3005a5c01f 100644
--- a/memory.c
+++ b/memory.c
@@ -1209,6 +1209,27 @@ vm_init(void)
machdep->memory_size()));
vt->paddr_prlen = strlen(buf);
+ vt->zero_paddr = ~0UL;
+ if (kernel_symbol_exists("zero_pfn")) {
+ ulong zero_pfn;
+
+ if (readmem(symbol_value("zero_pfn"), KVADDR,
+ &zero_pfn, sizeof(zero_pfn),
+ "read zero_pfn", QUIET|RETURN_ON_ERROR))
+ vt->zero_paddr = zero_pfn << PAGESHIFT();
+ }
+
+ vt->huge_zero_paddr = ~0UL;
+ if (kernel_symbol_exists("huge_zero_pfn")) {
+ ulong huge_zero_pfn;
+
+ if (readmem(symbol_value("huge_zero_pfn"), KVADDR,
+ &huge_zero_pfn, sizeof(huge_zero_pfn),
+ "read huge_zero_pfn", QUIET|RETURN_ON_ERROR) &&
+ huge_zero_pfn != ~0UL)
+ vt->huge_zero_paddr = huge_zero_pfn << PAGESHIFT();
+ }
+
if (vt->flags & PERCPU_KMALLOC_V1)
vt->dump_kmem_cache = dump_kmem_cache_percpu_v1;
else if (vt->flags & PERCPU_KMALLOC_V2)
@@ -14065,6 +14086,8 @@ dump_vm_table(int verbose)
} else {
fprintf(fp, " node_online_map: (unused)\n");
}
+ fprintf(fp, " zero_paddr: %lx\n", vt->zero_paddr);
+ fprintf(fp, " huge_zero_paddr: %lx\n", vt->huge_zero_paddr);
fprintf(fp, " nr_vm_stat_items: %d\n", vt->nr_vm_stat_items);
fprintf(fp, " vm_stat_items: %s", (vt->flags & VM_STAT) ?
"\n" : "(not used)\n");
diff --git a/x86_64.c b/x86_64.c
index 5019c69e452e..693a08bea758 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -2114,8 +2114,9 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in
goto no_upage;
if (pmd_pte & _PAGE_PSE) {
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n",
- PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n",
+ PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK,
+ IS_ZEROPAGE(PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK) ? ", ZERO PAGE" : "");
x86_64_translate_pte(pmd_pte, 0, 0);
}
@@ -2143,8 +2144,8 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in
*paddr = (PAGEBASE(pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n",
- PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK);
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK,
+ IS_ZEROPAGE(PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK) ? "(ZERO PAGE)" : "");
x86_64_translate_pte(pte, 0, 0);
}
--
2.37.1

View File

@ -1,103 +0,0 @@
From ccdf0e45c66ca0bbe3eb468c661b405971801c2e Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:15 +0800
Subject: [PATCH 04/89] sbitmapq: fix invalid offset for
"sbitmap_queue_round_robin" on Linux v5.13-rc1
Kernel commit efe1f3a1d583 ("scsi: sbitmap: Maintain allocation
round_robin in sbitmap") moved the round_robin member from struct
sbitmap_queue to struct sbitmap. Without the patch, the sbitmapq
will fail:
crash> sbitmapq 0xffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_queue_round_robin
FILE: sbitmap.c LINE: 378 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 ++
sbitmap.c | 12 ++++++++++--
symbols.c | 2 ++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 8b4cc38f73bf..66f74f640d84 100644
--- a/defs.h
+++ b/defs.h
@@ -2169,6 +2169,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sbq_wait_state_wait_cnt;
long sbq_wait_state_wait;
long sbitmap_alloc_hint;
+ long sbitmap_round_robin;
};
struct size_table { /* stash of commonly-used sizes */
@@ -5917,6 +5918,7 @@ struct sbitmap_context {
unsigned map_nr;
ulong map_addr;
ulong alloc_hint;
+ bool round_robin;
};
typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p);
diff --git a/sbitmap.c b/sbitmap.c
index 2921d5447c65..7b318b533702 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -352,7 +352,11 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
FREEBUF(sbq_wait_state_buf);
- fprintf(fp, "round_robin = %d\n", sqc->round_robin);
+ if (VALID_MEMBER(sbitmap_queue_round_robin))
+ fprintf(fp, "round_robin = %d\n", sqc->round_robin);
+ else if (VALID_MEMBER(sbitmap_round_robin)) /* 5.13 and later */
+ fprintf(fp, "round_robin = %d\n", sc->round_robin);
+
fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth);
}
@@ -374,7 +378,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context
sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index));
sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws));
sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active));
- sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin));
+ if (VALID_MEMBER(sbitmap_queue_round_robin))
+ sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin));
sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth));
FREEBUF(sbitmap_queue_buf);
@@ -396,6 +401,8 @@ void sbitmap_context_load(ulong addr, struct sbitmap_context *sc)
sc->map_addr = ULONG(sbitmap_buf + OFFSET(sbitmap_map));
if (VALID_MEMBER(sbitmap_alloc_hint))
sc->alloc_hint = ULONG(sbitmap_buf + OFFSET(sbitmap_alloc_hint));
+ if (VALID_MEMBER(sbitmap_round_robin))
+ sc->round_robin = BOOL(sbitmap_buf + OFFSET(sbitmap_round_robin));
FREEBUF(sbitmap_buf);
}
@@ -522,6 +529,7 @@ void sbitmapq_init(void)
MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
MEMBER_OFFSET_INIT(sbitmap_alloc_hint, "sbitmap", "alloc_hint");
+ MEMBER_OFFSET_INIT(sbitmap_round_robin, "sbitmap", "round_robin");
MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
MEMBER_OFFSET_INIT(sbitmap_queue_alloc_hint, "sbitmap_queue", "alloc_hint");
diff --git a/symbols.c b/symbols.c
index d5ce3e0873a1..7431aaecec9d 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10722,6 +10722,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sbitmap_map));
fprintf(fp, " sbitmap_alloc_hint: %ld\n",
OFFSET(sbitmap_alloc_hint));
+ fprintf(fp, " sbitmap_round_robin: %ld\n",
+ OFFSET(sbitmap_round_robin));
fprintf(fp, " sbitmap_queue_sb: %ld\n",
OFFSET(sbitmap_queue_sb));
fprintf(fp, " sbitmap_queue_alloc_hint: %ld\n",
--
2.37.1

View File

@ -0,0 +1,171 @@
From f2ee6fa6c841ddc37ba665909dafbc7294c34d64 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Fri, 17 Nov 2023 15:52:19 +0800
Subject: [PATCH 04/14] symbols: expand all kernel module symtable if not all
expanded previously
There is an issue that, for kernel modules, "dis -rl" fails to display
modules code line number data after execute "bt" command in crash.
Without the patch:
crsah> mod -S
crash> bt
PID: 1500 TASK: ff2bd8b093524000 CPU: 16 COMMAND: "lpfc_worker_0"
#0 [ff2c9f725c39f9e0] machine_kexec at ffffffff8e0686d3
...snip...
#8 [ff2c9f725c39fcc0] __lpfc_sli_release_iocbq_s4 at ffffffffc0f2f425 [lpfc]
...snip...
crash> dis -rl ffffffffc0f60f82
0xffffffffc0f60eb0 <lpfc_nlp_get>: nopl 0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc0f60eb5 <lpfc_nlp_get+5>: push %rbp
0xffffffffc0f60eb6 <lpfc_nlp_get+6>: push %rbx
0xffffffffc0f60eb7 <lpfc_nlp_get+7>: test %rdi,%rdi
With the patch:
crash> mod -S
crash> bt
PID: 1500 TASK: ff2bd8b093524000 CPU: 16 COMMAND: "lpfc_worker_0"
#0 [ff2c9f725c39f9e0] machine_kexec at ffffffff8e0686d3
...snip...
#8 [ff2c9f725c39fcc0] __lpfc_sli_release_iocbq_s4 at ffffffffc0f2f425 [lpfc]
...snip...
crash> dis -rl ffffffffc0f60f82
/usr/src/debug/kernel-4.18.0-425.13.1.el8_7/linux-4.18.0-425.13.1.el8_7.x86_64/drivers/scsi/lpfc/lpfc_hbadisc.c: 6756
0xffffffffc0f60eb0 <lpfc_nlp_get>: nopl 0x0(%rax,%rax,1) [FTRACE NOP]
/usr/src/debug/kernel-4.18.0-425.13.1.el8_7/linux-4.18.0-425.13.1.el8_7.x86_64/drivers/scsi/lpfc/lpfc_hbadisc.c: 6759
0xffffffffc0f60eb5 <lpfc_nlp_get+5>: push %rbp
The root cause is, after kernel module been loaded by mod command, the symtable
is not expanded in gdb side. crash bt or dis command will trigger such an
expansion. However the symtable expansion is different for the 2 commands:
The stack trace of "dis -rl" for symtable expanding:
#0 0x00000000008d8d9f in add_compunit_symtab_to_objfile ...
#1 0x00000000006d3293 in buildsym_compunit::end_symtab_with_blockvector ...
#2 0x00000000006d336a in buildsym_compunit::end_symtab_from_static_block ...
#3 0x000000000077e8e9 in process_full_comp_unit ...
#4 process_queue ...
#5 dw2_do_instantiate_symtab ...
#6 0x000000000077ed67 in dw2_instantiate_symtab ...
#7 0x000000000077f75e in dw2_expand_all_symtabs ...
#8 0x00000000008f254d in gdb_get_line_number ...
#9 0x00000000008f22af in gdb_command_funnel_1 ...
#10 0x00000000008f2003 in gdb_command_funnel ...
#11 0x00000000005b7f02 in gdb_interface ...
#12 0x00000000005f5bd8 in get_line_number ...
#13 0x000000000059e574 in cmd_dis ...
The stack trace of "bt" for symtable expanding:
#0 0x00000000008d8d9f in add_compunit_symtab_to_objfile ...
#1 0x00000000006d3293 in buildsym_compunit::end_symtab_with_blockvector ...
#2 0x00000000006d336a in buildsym_compunit::end_symtab_from_static_block ...
#3 0x000000000077e8e9 in process_full_comp_unit ...
#4 process_queue ...
#5 dw2_do_instantiate_symtab ...
#6 0x000000000077ed67 in dw2_instantiate_symtab ...
#7 0x000000000077f8ed in dw2_lookup_symbol ...
#8 0x00000000008e6d03 in lookup_symbol_via_quick_fns ...
#9 0x00000000008e7153 in lookup_symbol_in_objfile ...
#10 0x00000000008e73c6 in lookup_symbol_global_or_static_iterator_cb ...
#11 0x00000000008b99c4 in svr4_iterate_over_objfiles_in_search_order ...
#12 0x00000000008e754e in lookup_global_or_static_symbol ...
#13 0x00000000008e75da in lookup_static_symbol ...
#14 0x00000000008e632c in lookup_symbol_aux ...
#15 0x00000000008e5a7a in lookup_symbol_in_language ...
#16 0x00000000008e5b30 in lookup_symbol ...
#17 0x00000000008f2a4a in gdb_get_datatype ...
#18 0x00000000008f22c0 in gdb_command_funnel_1 ...
#19 0x00000000008f2003 in gdb_command_funnel ...
#20 0x00000000005b7f02 in gdb_interface ...
#21 0x00000000005f8a9f in datatype_info ...
#22 0x0000000000599947 in cpu_map_size ...
#23 0x00000000005a975d in get_cpus_online ...
#24 0x0000000000637a8b in diskdump_get_prstatus_percpu ...
#25 0x000000000062f0e4 in get_netdump_regs_x86_64 ...
#26 0x000000000059fe68 in back_trace ...
#27 0x00000000005ab1cb in cmd_bt ...
For the stacktrace of "dis -rl", it calls dw2_expand_all_symtabs() to expand
all symtable of the objfile, or "*.ko.debug" in our case. However for
the stacktrace of "bt", it doesn't expand all, but only a subset of symtable
which is enough to find a symbol by dw2_lookup_symbol(). As a result, the
objfile->compunit_symtabs, which is the head of a single linked list of
struct compunit_symtab, is not NULL but didn't contain all symtables. It
will not be reinitialized in gdb_get_line_number() by "dis -rl" because
!objfile_has_full_symbols(objfile) check will fail, so it cannot display
the proper code line number data.
Since objfile_has_full_symbols(objfile) check cannot ensure all symbols
been expanded, this patch add a new member as a flag for struct objfile
to record if all symbols have been expanded. The flag will be set only ofter
expand_all_symtabs been called.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
gdb-10.2.patch | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/gdb-10.2.patch b/gdb-10.2.patch
index d81030d946e8..2f7d585105aa 100644
--- a/gdb-10.2.patch
+++ b/gdb-10.2.patch
@@ -3187,3 +3187,53 @@ exit 0
result = stringtab + symbol_entry->_n._n_n._n_offset;
}
else
+--- gdb-10.2/gdb/objfiles.h.orig
++++ gdb-10.2/gdb/objfiles.h
+@@ -712,6 +712,8 @@ struct objfile
+ next time. If an objfile does not have the symbols, it will
+ never have them. */
+ bool skip_jit_symbol_lookup = false;
++
++ bool all_symtabs_expanded = false;
+ };
+
+ /* A deleter for objfile. */
+--- gdb-10.2/gdb/symfile.c.orig
++++ gdb-10.2/gdb/symfile.c
+@@ -1133,8 +1133,10 @@ symbol_file_add_with_addrs (bfd *abfd, const char *name,
+ printf_filtered (_("Expanding full symbols from %ps...\n"),
+ styled_string (file_name_style.style (), name));
+
+- if (objfile->sf)
++ if (objfile->sf) {
+ objfile->sf->qf->expand_all_symtabs (objfile);
++ objfile->all_symtabs_expanded = true;
++ }
+ }
+
+ /* Note that we only print a message if we have no symbols and have
+--- gdb-10.2/gdb/symtab.c.orig
++++ gdb-10.2/gdb/symtab.c
+@@ -7097,8 +7097,9 @@ gdb_get_line_number(struct gnu_request *req)
+ */
+ if (req->lm) {
+ objfile = req->lm->loaded_objfile;
+- if (!objfile_has_full_symbols(objfile) && objfile->sf) {
++ if (!objfile->all_symtabs_expanded && objfile->sf) {
+ objfile->sf->qf->expand_all_symtabs(objfile);
++ objfile->all_symtabs_expanded = true;
+ sal = find_pc_line(pc, 0);
+ }
+ }
+@@ -7761,8 +7765,10 @@ iterate_datatypes (struct gnu_request *req)
+ {
+ for (objfile *objfile : current_program_space->objfiles ())
+ {
+- if (objfile->sf)
++ if (objfile->sf) {
+ objfile->sf->qf->expand_all_symtabs(objfile);
++ objfile->all_symtabs_expanded = true;
++ }
+
+ for (compunit_symtab *cust : objfile->compunits ())
+ {
--
2.41.0

View File

@ -1,45 +0,0 @@
From 6c8cd9b5dcf48221e5f75fc5850bb4719d77acce Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Wed, 7 Jun 2023 18:37:34 +0900
Subject: [PATCH 5/5] arm64: Fix again segfault in
arm64_is_kernel_exception_frame() when corrupt stack pointer address is given
This is the second trial from the commit
9868ebc8e648e5791764a51567a23efae7170d9b that was reverted at the
previous commit.
As described in the previous commit, result of STACK_OFFSET_TYPE() can
be an address out of bt->stackbuf and hence the address needs to be
checked prior to being referred to as an pt_regs object.
So, to fix the issue, let's check if stkptr points to within the range
of the kernel stack first.
[ kh: added a warning at Lianbo's suggestion ]
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arm64.c b/arm64.c
index efbdccbec9d3..67b1a2244810 100644
--- a/arm64.c
+++ b/arm64.c
@@ -2381,6 +2381,12 @@ arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
struct arm64_pt_regs *regs;
struct machine_specific *ms = machdep->machspec;
+ if (stkptr > STACKSIZE() && !INSTACK(stkptr, bt)) {
+ if (CRASHDEBUG(1))
+ error(WARNING, "stkptr: %lx is outside the kernel stack range\n", stkptr);
+ return FALSE;
+ }
+
regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) &&
--
2.37.1

View File

@ -1,165 +0,0 @@
From db8c030857b4e318728c51c20da687906c109d0d Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Tue, 30 May 2023 19:38:34 +0900
Subject: [PATCH 5/6] diskdump/netdump: fix segmentation fault caused by
failure of stopping CPUs
There's no NMI on ARM. Hence, stopping the non-panicking CPUs from the
panicking CPU via IPI can fail easily if interrupts are being masked
in those moment. Moreover, crash_notes are not initialized for such
unstopped CPUs and the corresponding NT_PRSTATUS notes are not
attached to vmcore. However, crash utility never takes it
consideration such uninitialized crash_notes and then ends with
mapping different NT_PRSTATUS to actually unstopped CPUs. This corrupt
mapping can result crash utility into segmentation fault in the
operations where register values in NT_PRSTATUS notes are used.
For example:
crash> bt 1408
PID: 1408 TASK: ffff000003e22200 CPU: 2 COMMAND: "repro"
Segmentation fault (core dumped)
crash> help -D
diskdump_data:
filename: 127.0.0.1-2023-05-26-02:21:27/vmcore-ld1
flags: 46 (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED|LZO_SUPPORTED)
...snip...
notes_buf: 1815df0
num_vmcoredd_notes: 0
num_prstatus_notes: 5
notes[0]: 1815df0 (NT_PRSTATUS)
si.signo: 0 si.code: 0 si.errno: 0
...snip...
PSTATE: 80400005 FPVALID: 00000000
notes[4]: 1808f10 (NT_PRSTATUS)
Segmentation fault (core dumped)
To fix this issue, let's map NT_PRSTATUS to some CPU only if the
corresponding crash_notes is checked to be initialized.
[ kh: moved existence check for crash_notes out of the loop ]
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
diskdump.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
netdump.c | 7 ++++++-
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 7d8bb8ab3de1..6520d2f13f48 100644
--- a/defs.h
+++ b/defs.h
@@ -7118,6 +7118,7 @@ int dumpfile_is_split(void);
void show_split_dumpfiles(void);
void x86_process_elf_notes(void *, unsigned long);
void *diskdump_get_prstatus_percpu(int);
+int have_crash_notes(int cpu);
void map_cpus_to_prstatus_kdump_cmprs(void);
void diskdump_display_regs(int, FILE *);
void process_elf32_notes(void *, ulong);
diff --git a/diskdump.c b/diskdump.c
index 94bca4ded572..2c284ff3f97f 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -101,12 +101,54 @@ int dumpfile_is_split(void)
return KDUMP_SPLIT();
}
+int have_crash_notes(int cpu)
+{
+ ulong crash_notes, notes_ptr;
+ char *buf, *p;
+ Elf64_Nhdr *note = NULL;
+
+ if (!readmem(symbol_value("crash_notes"), KVADDR, &crash_notes,
+ sizeof(crash_notes), "crash_notes", RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read \"crash_notes\"\n");
+ return FALSE;
+ }
+
+ if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF))
+ notes_ptr = crash_notes + kt->__per_cpu_offset[cpu];
+ else
+ notes_ptr = crash_notes;
+
+ buf = GETBUF(SIZE(note_buf));
+
+ if (!readmem(notes_ptr, KVADDR, buf,
+ SIZE(note_buf), "note_buf_t", RETURN_ON_ERROR)) {
+ error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", cpu);
+ return FALSE;
+ }
+
+ note = (Elf64_Nhdr *)buf;
+ p = buf + sizeof(Elf64_Nhdr);
+
+ if (note->n_type != NT_PRSTATUS) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n", cpu);
+ return FALSE;
+ }
+
+ if (!STRNEQ(p, "CORE")) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name != \"CORE\")\n", cpu);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
void
map_cpus_to_prstatus_kdump_cmprs(void)
{
void **nt_ptr;
int online, i, j, nrcpus;
size_t size;
+ int crash_notes_exists;
if (pc->flags2 & QEMU_MEM_DUMP_COMPRESSED) /* notes exist for all cpus */
goto resize_note_pointers;
@@ -129,9 +171,10 @@ map_cpus_to_prstatus_kdump_cmprs(void)
* Re-populate the array with the notes mapping to online cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
+ crash_notes_exists = kernel_symbol_exists("crash_notes");
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i)) {
+ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
dd->nt_prstatus_percpu[i] = nt_ptr[j++];
dd->num_prstatus_notes =
MAX(dd->num_prstatus_notes, i+1);
diff --git a/netdump.c b/netdump.c
index 4eba66cecb55..61ddeaa08831 100644
--- a/netdump.c
+++ b/netdump.c
@@ -75,6 +75,7 @@ map_cpus_to_prstatus(void)
void **nt_ptr;
int online, i, j, nrcpus;
size_t size;
+ int crash_notes_exists;
if (pc->flags2 & QEMU_MEM_DUMP_ELF) /* notes exist for all cpus */
return;
@@ -97,10 +98,14 @@ map_cpus_to_prstatus(void)
* Re-populate the array with the notes mapping to online cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
+ crash_notes_exists = kernel_symbol_exists("crash_notes");
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i))
+ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
nd->nt_prstatus_percpu[i] = nt_ptr[j++];
+ nd->num_prstatus_notes =
+ MAX(nd->num_prstatus_notes, i+1);
+ }
}
FREEBUF(nt_ptr);
--
2.37.1

View File

@ -1,101 +0,0 @@
From 0637e7bee77f127ff6a36fc7a9e52408a3106173 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:16 +0800
Subject: [PATCH 05/89] sbitmapq: fix invalid offset for "sbitmap_word_depth"
on Linux v5.18-rc1
Kernel commit 3301bc53358a ("lib/sbitmap: kill 'depth' from sbitmap_word")
removed the depth member from struct sbitmap_word. Without the patch, the
sbitmapq will fail:
crash> sbitmapq 0xffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_word_depth
FILE: sbitmap.c LINE: 84 FUNCTION: __sbitmap_weight()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index 7b318b533702..e8ebd62fe01c 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -78,10 +78,16 @@ static unsigned long bitmap_weight(unsigned long bitmap, unsigned int bits)
return w;
}
+static inline unsigned int __map_depth(const struct sbitmap_context *sc, int index)
+{
+ if (index == sc->map_nr - 1)
+ return sc->depth - (index << sc->shift);
+ return 1U << sc->shift;
+}
+
static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
- const ulong w_depth_off = OFFSET(sbitmap_word_depth);
const ulong w_word_off = OFFSET(sbitmap_word_word);
const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
@@ -99,7 +105,7 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
error(FATAL, "cannot read sbitmap_word\n");
}
- depth = ULONG(sbitmap_word_buf + w_depth_off);
+ depth = __map_depth(sc, i);
if (set) {
word = ULONG(sbitmap_word_buf + w_word_off);
@@ -142,7 +148,6 @@ static void sbitmap_emit_byte(unsigned int offset, uint8_t byte)
static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
- const ulong w_depth_off = OFFSET(sbitmap_word_depth);
const ulong w_word_off = OFFSET(sbitmap_word_word);
const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
@@ -165,7 +170,7 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
word = ULONG(sbitmap_word_buf + w_word_off);
cleared = ULONG(sbitmap_word_buf + w_cleared_off);
- word_bits = ULONG(sbitmap_word_buf + w_depth_off);
+ word_bits = __map_depth(sc, i);
word &= ~cleared;
@@ -213,7 +218,6 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
unsigned int start, sbitmap_for_each_fn fn, void *data)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
- const ulong w_depth_off = OFFSET(sbitmap_word_depth);
const ulong w_word_off = OFFSET(sbitmap_word_word);
const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
@@ -232,7 +236,7 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
while (scanned < sc->depth) {
unsigned long w_addr = sc->map_addr + (sbitmap_word_size * index);
- unsigned long w_depth, w_word, w_cleared;
+ unsigned long w_word, w_cleared;
unsigned long word, depth;
if (!readmem(w_addr, KVADDR, sbitmap_word_buf, sbitmap_word_size, "sbitmap_word", RETURN_ON_ERROR)) {
@@ -240,11 +244,10 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
error(FATAL, "cannot read sbitmap_word\n");
}
- w_depth = ULONG(sbitmap_word_buf + w_depth_off);
w_word = ULONG(sbitmap_word_buf + w_word_off);
w_cleared = ULONG(sbitmap_word_buf + w_cleared_off);
- depth = min(w_depth - nr, sc->depth - scanned);
+ depth = min(__map_depth(sc, index) - nr, sc->depth - scanned);
scanned += depth;
word = w_word & ~w_cleared;
--
2.37.1

View File

@ -0,0 +1,67 @@
From 0c5ef6a4a3a2759915ffe72b1366dce2f32f65c5 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 14 Nov 2023 16:32:07 +0800
Subject: [PATCH 05/14] symbols: skip load .init.* sections if module was
successfully initialized
There might be address overlap of one modules .init.text symbols and
another modules .text symbols. As a result, gdb fails to translate the
address to symbol name correctly:
crash> sym -m virtio_blk | grep MODULE
ffffffffc00a4000 MODULE START: virtio_blk
ffffffffc00a86ec MODULE END: virtio_blk
crash> gdb info address floppy_module_init
Symbol "floppy_module_init" is a function at address 0xffffffffc00a4131.
Since the .init.* sections of a module had been freed by kernel if the
module was initialized successfully, there is no need to load the .init.*
sections data from "*.ko.debug" in gdb to create such an overlap.
lm->mod_init_module_ptr is used as a flag of whether module is freed.
Without the patch:
crash> mod -S
crash> struct blk_mq_ops 0xffffffffc00a7160
struct blk_mq_ops {
queue_rq = 0xffffffffc00a45b0 <floppy_module_init+1151>, <-- translated from module floppy
map_queue = 0xffffffff813015c0 <blk_mq_map_queue>,
...snip...
complete = 0xffffffffc00a4370 <floppy_module_init+575>,
init_request = 0xffffffffc00a4260 <floppy_module_init+303>,
...snip...
}
With the patch:
crash> mod -S
crash> struct blk_mq_ops 0xffffffffc00a7160
struct blk_mq_ops {
queue_rq = 0xffffffffc00a45b0 <virtio_queue_rq>, <-- translated from module virtio_blk
map_queue = 0xffffffff813015c0 <blk_mq_map_queue>,
...snip...
complete = 0xffffffffc00a4370 <virtblk_request_done>,
init_request = 0xffffffffc00a4260 <virtblk_init_request>,
...snip...
}
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
symbols.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/symbols.c b/symbols.c
index 176c95026f03..5d919910164e 100644
--- a/symbols.c
+++ b/symbols.c
@@ -13295,7 +13295,7 @@ add_symbol_file_kallsyms(struct load_module *lm, struct gnu_request *req)
shift_string_right(req->buf, strlen(buf));
BCOPY(buf, req->buf, strlen(buf));
retval = TRUE;
- } else {
+ } else if (lm->mod_init_module_ptr || !STRNEQ(section_name, ".init.")) {
sprintf(buf, " -s %s 0x%lx", section_name, section_vaddr);
while ((len + strlen(buf)) >= buflen) {
RESIZEBUF(req->buf, buflen, buflen * 2);
--
2.41.0

View File

@ -1,62 +0,0 @@
From 9868ebc8e648e5791764a51567a23efae7170d9b Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Tue, 30 May 2023 19:38:35 +0900
Subject: [PATCH 6/6] Fix segfault in arm64_is_kernel_exception_frame() when
corrupt stack pointer address is given
Due to the corrupted mapping fixed by the previous commit,
arm64_is_kernel_exception_frame() can receive invalid stack pointer
address via the 2nd argument; different NT_PRSTATUS contains different
task's stack pointer address. However, macro STACK_OFFSET_TYPE() never
checks if a given address is within the range of the kernel stack of
the corresponding task and hence can result in referring to outside of
bt->stackbuf.
static int
arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
{
struct arm64_pt_regs *regs;
struct machine_specific *ms = machdep->machspec;
regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
=> if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) &&
!(regs->pstate & (0xffffffff00000000ULL | PSR_MODE32_BIT)) &&
is_kernel_text(regs->pc) &&
is_kernel_text(regs->regs[30] | ms->CONFIG_ARM64_KERNELPACMASK)) {
To fix this issue, check if the given stack pointer address points to
the range of the kernel stack of the corresponding task, and abort if
it turns out to be invalid.
Although the corrupted mapping has already been fixed, this fix is
still needed because corrupt stack pointer address can still be passed
here from different reasons. Consider, for example, that data on the
kernel stack can be modified abnormally due to any kernel bugs or
hardware issues.
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 6520d2f13f48..11fdc17e60d0 100644
--- a/defs.h
+++ b/defs.h
@@ -976,7 +976,10 @@ struct bt_info {
#define STACK_OFFSET_TYPE(OFF) \
(((ulong)(OFF) > STACKSIZE()) ? \
- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF))
+ (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \
+ error(FATAL, "invalid stack pointer is given\n") : \
+ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \
+ (ulong)(OFF))
#define GET_STACK_ULONG(OFF) \
*((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))])))
--
2.37.1

View File

@ -1,205 +0,0 @@
From 3ce590be037dc11c31a013e435484926276eee9f Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Tue, 24 May 2022 20:25:53 +0800
Subject: [PATCH 06/89] bt: x86_64: filter out idle task stack
When we use crash to troubleshoot softlockup and other problems,
we often use the 'bt -a' command to print the stacks of running
processes on all CPUs. But now some servers have hundreds of CPUs
(such as AMD machines), which causes the 'bt -a' command to output
a lot of process stacks. And many of these stacks are the stacks
of the idle process, which are not needed by us.
Therefore, in order to reduce this part of the interference information,
this patch adds the -n option to the bt command. When we specify
'-n idle' (meaning no idle), the stack of the idle process will be
filtered out, thus speeding up our troubleshooting.
And the option works only for crash dumps captured by kdump.
The command output is as follows:
crash> bt -a -n idle
[...]
PID: 0 TASK: ffff889ff8c34380 CPU: 8 COMMAND: "swapper/8"
PID: 0 TASK: ffff889ff8c32d00 CPU: 9 COMMAND: "swapper/9"
PID: 0 TASK: ffff889ff8c31680 CPU: 10 COMMAND: "swapper/10"
PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: "swapper/11"
PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: "swapper/12"
PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: "bash"
#0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407
#1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d
#2 [ffffc9000d35bdf0] panic at ffffffff81081930
#3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1
#4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175
#5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b
#6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86
#7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5
#8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579
#9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259
RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274
RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001
RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80
R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760
R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002
ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
[...]
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Acked-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
help.c | 33 ++++++++++++++++++++++++++++++++-
kernel.c | 13 ++++++++++++-
x86_64.c | 8 ++++++++
4 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 66f74f640d84..52e4b14aa27c 100644
--- a/defs.h
+++ b/defs.h
@@ -5840,6 +5840,7 @@ ulong cpu_map_addr(const char *type);
#define BT_SHOW_ALL_REGS (0x2000000000000ULL)
#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
#define BT_OVERFLOW_STACK (0x8000000000000ULL)
+#define BT_SKIP_IDLE (0x10000000000000ULL)
#define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
#define BT_REF_HEXVAL (0x1)
diff --git a/help.c b/help.c
index 00712a690593..00c833da13cc 100644
--- a/help.c
+++ b/help.c
@@ -1915,12 +1915,14 @@ char *help_bt[] = {
"bt",
"backtrace",
"[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s [-x|d]]"
-"\n [-I ip] [-S sp] [pid | task]",
+"\n [-I ip] [-S sp] [-n idle] [pid | task]",
" Display a kernel stack backtrace. If no arguments are given, the stack",
" trace of the current context will be displayed.\n",
" -a displays the stack traces of the active task on each CPU.",
" (only applicable to crash dumps)",
" -A same as -a, but also displays vector registers (S390X only).",
+" -n idle filter the stack of idle tasks (x86_64).",
+" (only applicable to crash dumps)",
" -p display the stack trace of the panic task only.",
" (only applicable to crash dumps)",
" -c cpu display the stack trace of the active task on one or more CPUs,",
@@ -2010,6 +2012,35 @@ char *help_bt[] = {
" DS: 002b ESI: bfffc8a0 ES: 002b EDI: 00000000 ",
" SS: 002b ESP: bfffc82c EBP: bfffd224 ",
" CS: 0023 EIP: 400d032e ERR: 0000008e EFLAGS: 00000246 ",
+" ",
+" Display the stack trace of the active task(s) when the kernel panicked,",
+" and filter out the stack of the idle tasks:",
+" ",
+" %s> bt -a -n idle",
+" ...",
+" PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: \"swapper/11\"",
+" ",
+" PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: \"swapper/12\"",
+" ",
+" PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: \"bash\"",
+" #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407",
+" #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d",
+" #2 [ffffc9000d35bdf0] panic at ffffffff81081930",
+" #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1",
+" #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175",
+" #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b",
+" #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86",
+" #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5",
+" #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579",
+" #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259",
+" RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246",
+" RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274",
+" RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001",
+" RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80",
+" R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760",
+" R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002",
+" ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b",
+" ...",
"\n Display the stack trace of the active task on CPU 0 and 1:\n",
" %s> bt -c 0,1",
" PID: 0 TASK: ffffffff81a8d020 CPU: 0 COMMAND: \"swapper\"",
diff --git a/kernel.c b/kernel.c
index 9ae7ee1ebe64..185b09335733 100644
--- a/kernel.c
+++ b/kernel.c
@@ -2503,7 +2503,7 @@ cmd_bt(void)
if (kt->flags & USE_OPT_BT)
bt->flags |= BT_OPT_BACK_TRACE;
- while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF) {
+ while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) != EOF) {
switch (c)
{
case 'f':
@@ -2672,6 +2672,13 @@ cmd_bt(void)
active++;
break;
+ case 'n':
+ if (machine_type("X86_64") && STREQ(optarg, "idle"))
+ bt->flags |= BT_SKIP_IDLE;
+ else
+ option_not_supported(c);
+ break;
+
case 'r':
bt->flags |= BT_RAW;
break;
@@ -3092,6 +3099,10 @@ back_trace(struct bt_info *bt)
} else
machdep->get_stack_frame(bt, &eip, &esp);
+ /* skip idle task stack */
+ if (bt->flags & BT_SKIP_IDLE)
+ return;
+
if (bt->flags & BT_KSTACKP) {
bt->stkptr = esp;
return;
diff --git a/x86_64.c b/x86_64.c
index 1305afd791a1..f4e5d9e77cef 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -4915,6 +4915,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
if (bt->flags & BT_DUMPFILE_SEARCH)
return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
+ if (bt->flags & BT_SKIP_IDLE)
+ bt->flags &= ~BT_SKIP_IDLE;
+
if (pcp)
*pcp = x86_64_get_pc(bt);
if (spp)
@@ -4957,6 +4960,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *rip, ulong *rsp)
estack = -1;
panic = FALSE;
+ if (bt_in->flags & BT_SKIP_IDLE)
+ bt_in->flags &= ~BT_SKIP_IDLE;
+
panic_task = tt->panic_task == bt->task ? TRUE : FALSE;
if (panic_task && bt->machdep) {
@@ -5095,6 +5101,8 @@ next_sysrq:
if (!panic_task && STREQ(sym, "crash_nmi_callback")) {
*rip = *up;
*rsp = bt->stackbase + ((char *)(up) - bt->stackbuf);
+ if ((bt->flags & BT_SKIP_IDLE) && is_idle_thread(bt->task))
+ bt_in->flags |= BT_SKIP_IDLE;
return;
}
--
2.37.1

View File

@ -0,0 +1,47 @@
From c15da07526291a5c357010cb4aaf4bde6151e642 Mon Sep 17 00:00:00 2001
From: Johan Erlandsson <johan.erlandsson@sony.com>
Date: Wed, 19 Apr 2023 11:26:04 +0200
Subject: [PATCH 06/14] use NR_SWAPCACHE when nr_swapper_spaces isn't available
In 5.12 the following change was introduced:
b6038942480e ("mm: memcg: add swapcache stat for memcg v2")
Then the variable 'nr_swapper_spaces' is not read (unless
CONFIG_DEBUG_VM=y). In GKI builds this variable is then optimized
out. But the same change provided a new way to obtain the same
information, using NR_SWAPCACHE.
Reported-by: xueguolun <xueguolun@xiaomi.com>
Signed-off-by: Johan Erlandsson <johan.erlandsson@sony.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/memory.c b/memory.c
index 791194a405d4..b84e974a3325 100644
--- a/memory.c
+++ b/memory.c
@@ -8486,7 +8486,7 @@ dump_kmeminfo(void)
ulong hugetlb_total_pages, hugetlb_total_free_pages = 0;
int done_hugetlb_calc = 0;
long nr_file_pages, nr_slab;
- ulong swapper_space_nrpages;
+ long swapper_space_nrpages;
ulong pct;
uint tmp;
struct meminfo meminfo;
@@ -8609,7 +8609,9 @@ dump_kmeminfo(void)
char *swapper_space = GETBUF(SIZE(address_space));
swapper_space_nrpages = 0;
- if (symbol_exists("nr_swapper_spaces") &&
+ if (dump_vm_stat("NR_SWAPCACHE", &swapper_space_nrpages, 0)) {
+ ;
+ } else if (symbol_exists("nr_swapper_spaces") &&
(len = get_array_length("nr_swapper_spaces",
NULL, 0))) {
char *nr_swapper_space =
--
2.41.0

View File

@ -0,0 +1,43 @@
From 2e513114e7d77fadc88011f186ef943ccf397d35 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Wed, 29 Nov 2023 13:47:34 +0100
Subject: [PATCH 07/14] Fix identity_map_base value dump on S390
Kernel virtual base instead of identity base is printed
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
s390.c | 2 +-
s390x.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/s390.c b/s390.c
index 42f5cc63ae52..a8b2bcca86c7 100644
--- a/s390.c
+++ b/s390.c
@@ -183,7 +183,7 @@ s390_dump_machdep_table(ulong arg)
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
- fprintf(fp, " identity_map_base: %lx\n", machdep->kvbase);
+ fprintf(fp, " identity_map_base: %lx\n", machdep->identity_map_base);
fprintf(fp, " pagesize: %d\n", machdep->pagesize);
fprintf(fp, " pageshift: %d\n", machdep->pageshift);
fprintf(fp, " pagemask: %llx\n", machdep->pagemask);
diff --git a/s390x.c b/s390x.c
index d7ee3755fc0b..096c072186f5 100644
--- a/s390x.c
+++ b/s390x.c
@@ -650,7 +650,7 @@ s390x_dump_machdep_table(ulong arg)
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
- fprintf(fp, " identity_map_base: %lx\n", machdep->kvbase);
+ fprintf(fp, " identity_map_base: %lx\n", machdep->identity_map_base);
fprintf(fp, " pagesize: %d\n", machdep->pagesize);
fprintf(fp, " pageshift: %d\n", machdep->pageshift);
fprintf(fp, " pagemask: %llx\n", machdep->pagemask);
--
2.41.0

View File

@ -1,96 +0,0 @@
From f436ae7f01db8ec13080f41a2754b2d282166ed7 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Tue, 24 May 2022 20:25:54 +0800
Subject: [PATCH 07/89] bt: arm64: add support for 'bt -n idle'
The '-n idle' option of bt command can help us filter the
stack of the idle process when debugging the dumpfiles
captured by kdump.
This patch supports this feature on ARM64.
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 19 ++++++++++++++++---
help.c | 2 +-
kernel.c | 3 ++-
3 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/arm64.c b/arm64.c
index 65f6cdf69fa6..0f615cf52bef 100644
--- a/arm64.c
+++ b/arm64.c
@@ -3681,6 +3681,12 @@ arm64_get_dumpfile_stackframe(struct bt_info *bt, struct arm64_stackframe *frame
{
struct machine_specific *ms = machdep->machspec;
struct arm64_pt_regs *ptregs;
+ bool skip = false;
+
+ if (bt->flags & BT_SKIP_IDLE) {
+ skip = true;
+ bt->flags &= ~BT_SKIP_IDLE;
+ }
if (!ms->panic_task_regs ||
(!ms->panic_task_regs[bt->tc->processor].sp &&
@@ -3713,8 +3719,11 @@ try_kernel:
}
if (arm64_in_kdump_text(bt, frame) ||
- arm64_in_kdump_text_on_irq_stack(bt))
+ arm64_in_kdump_text_on_irq_stack(bt)) {
bt->flags |= BT_KDUMP_ADJUST;
+ if (skip && is_idle_thread(bt->task))
+ bt->flags |= BT_SKIP_IDLE;
+ }
return TRUE;
}
@@ -3738,10 +3747,14 @@ arm64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
int ret;
struct arm64_stackframe stackframe = { 0 };
- if (DUMPFILE() && is_task_active(bt->task))
+ if (DUMPFILE() && is_task_active(bt->task)) {
ret = arm64_get_dumpfile_stackframe(bt, &stackframe);
- else
+ } else {
+ if (bt->flags & BT_SKIP_IDLE)
+ bt->flags &= ~BT_SKIP_IDLE;
+
ret = arm64_get_stackframe(bt, &stackframe);
+ }
if (!ret)
error(WARNING,
diff --git a/help.c b/help.c
index 00c833da13cc..e1ac6f93fde2 100644
--- a/help.c
+++ b/help.c
@@ -1921,7 +1921,7 @@ char *help_bt[] = {
" -a displays the stack traces of the active task on each CPU.",
" (only applicable to crash dumps)",
" -A same as -a, but also displays vector registers (S390X only).",
-" -n idle filter the stack of idle tasks (x86_64).",
+" -n idle filter the stack of idle tasks (x86_64, arm64).",
" (only applicable to crash dumps)",
" -p display the stack trace of the panic task only.",
" (only applicable to crash dumps)",
diff --git a/kernel.c b/kernel.c
index 185b09335733..bd0bf8c6cf03 100644
--- a/kernel.c
+++ b/kernel.c
@@ -2673,7 +2673,8 @@ cmd_bt(void)
break;
case 'n':
- if (machine_type("X86_64") && STREQ(optarg, "idle"))
+ if ((machine_type("X86_64") || machine_type("ARM64")) &&
+ STREQ(optarg, "idle"))
bt->flags |= BT_SKIP_IDLE;
else
option_not_supported(c);
--
2.37.1

View File

@ -1,392 +0,0 @@
From 0a988a8b46741aad6f2503163735ae72485cd04f Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Thu, 2 Jun 2022 20:12:55 +0800
Subject: [PATCH 08/89] Enhance "dev -d|-D" options to support blk-mq sbitmap
Since Linux 5.16-rc1, which kernel commit 9a14d6ce4135 ("block: remove
debugfs blk_mq_ctx dispatched/merged/completed attributes") removed the
members from struct blk_mq_ctx, crash has not displayed disk I/O statistics
for multiqueue (blk-mq) devices.
Let's parse the sbitmap in blk-mq layer to support it.
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
defs.h | 11 +++
dev.c | 244 +++++++++++++++++++++++++++++++++++++++++++++---------
symbols.c | 22 +++++
3 files changed, 238 insertions(+), 39 deletions(-)
diff --git a/defs.h b/defs.h
index 52e4b14aa27c..f2b6ab14c6aa 100644
--- a/defs.h
+++ b/defs.h
@@ -2170,6 +2170,16 @@ struct offset_table { /* stash of commonly-used offsets */
long sbq_wait_state_wait;
long sbitmap_alloc_hint;
long sbitmap_round_robin;
+ long request_cmd_flags;
+ long request_q;
+ long request_state;
+ long request_queue_queue_hw_ctx;
+ long request_queue_nr_hw_queues;
+ long blk_mq_hw_ctx_tags;
+ long blk_mq_tags_bitmap_tags;
+ long blk_mq_tags_breserved_tags;
+ long blk_mq_tags_nr_reserved_tags;
+ long blk_mq_tags_rqs;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2339,6 +2349,7 @@ struct size_table { /* stash of commonly-used sizes */
long sbitmap;
long sbitmap_queue;
long sbq_wait_state;
+ long blk_mq_tags;
};
struct array_table {
diff --git a/dev.c b/dev.c
index a493e51ac95c..4be4c96df8b0 100644
--- a/dev.c
+++ b/dev.c
@@ -4238,19 +4238,176 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io)
io->write = (dispatch[1] - comp[1]);
}
+typedef bool (busy_tag_iter_fn)(ulong rq, void *data);
+
+struct mq_inflight {
+ ulong q;
+ struct diskio *dio;
+};
+
+struct bt_iter_data {
+ ulong tags;
+ uint reserved;
+ uint nr_reserved_tags;
+ busy_tag_iter_fn *fn;
+ void *data;
+};
+
+/*
+ * See the include/linux/blk_types.h and include/linux/blk-mq.h
+ */
+#define MQ_RQ_IN_FLIGHT 1
+#define REQ_OP_BITS 8
+#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
+
+static uint op_is_write(uint op)
+{
+ return (op & REQ_OP_MASK) & 1;
+}
+
+static bool mq_check_inflight(ulong rq, void *data)
+{
+ uint cmd_flags = 0, state = 0;
+ ulong addr = 0, queue = 0;
+ struct mq_inflight *mi = data;
+
+ if (!IS_KVADDR(rq))
+ return TRUE;
+
+ addr = rq + OFFSET(request_q);
+ if (!readmem(addr, KVADDR, &queue, sizeof(ulong), "request.q", RETURN_ON_ERROR))
+ return FALSE;
+
+ addr = rq + OFFSET(request_cmd_flags);
+ if (!readmem(addr, KVADDR, &cmd_flags, sizeof(uint), "request.cmd_flags", RETURN_ON_ERROR))
+ return FALSE;
+
+ addr = rq + OFFSET(request_state);
+ if (!readmem(addr, KVADDR, &state, sizeof(uint), "request.state", RETURN_ON_ERROR))
+ return FALSE;
+
+ if (queue == mi->q && state == MQ_RQ_IN_FLIGHT) {
+ if (op_is_write(cmd_flags))
+ mi->dio->write++;
+ else
+ mi->dio->read++;
+ }
+
+ return TRUE;
+}
+
+static bool bt_iter(uint bitnr, void *data)
+{
+ ulong addr = 0, rqs_addr = 0, rq = 0;
+ struct bt_iter_data *iter_data = data;
+ ulong tag = iter_data->tags;
+
+ if (!iter_data->reserved)
+ bitnr += iter_data->nr_reserved_tags;
+
+ /* rqs */
+ addr = tag + OFFSET(blk_mq_tags_rqs);
+ if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.rqs", RETURN_ON_ERROR))
+ return FALSE;
+
+ addr = rqs_addr + bitnr * sizeof(ulong); /* rqs[bitnr] */
+ if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.rqs[]", RETURN_ON_ERROR))
+ return FALSE;
+
+ return iter_data->fn(rq, iter_data->data);
+}
+
+static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_resvd_tags, struct diskio *dio)
+{
+ struct sbitmap_context sc = {0};
+ struct mq_inflight mi = {
+ .q = q,
+ .dio = dio,
+ };
+ struct bt_iter_data iter_data = {
+ .tags = tags,
+ .reserved = reserved,
+ .nr_reserved_tags = nr_resvd_tags,
+ .fn = mq_check_inflight,
+ .data = &mi,
+ };
+
+ sbitmap_context_load(sbq + OFFSET(sbitmap_queue_sb), &sc);
+ sbitmap_for_each_set(&sc, bt_iter, &iter_data);
+}
+
+static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
+{
+ uint i;
+
+ for (i = 0; i < cnt; i++) {
+ ulong addr = 0, tags = 0;
+ uint nr_reserved_tags = 0;
+
+ /* Tags owned by the block driver */
+ addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
+ if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
+ "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
+ break;
+
+ addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
+ if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
+ "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
+ break;
+
+ if (nr_reserved_tags) {
+ addr = tags + OFFSET(blk_mq_tags_breserved_tags);
+ bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
+ }
+ addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
+ bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
+ }
+}
+
+static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio)
+{
+ uint cnt = 0;
+ ulong addr = 0, hctx_addr = 0;
+ ulong *hctx_array = NULL;
+
+ addr = q + OFFSET(request_queue_nr_hw_queues);
+ readmem(addr, KVADDR, &cnt, sizeof(uint),
+ "request_queue.nr_hw_queues", FAULT_ON_ERROR);
+
+ addr = q + OFFSET(request_queue_queue_hw_ctx);
+ readmem(addr, KVADDR, &hctx_addr, sizeof(void *),
+ "request_queue.queue_hw_ctx", FAULT_ON_ERROR);
+
+ hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt);
+ if (!hctx_array)
+ error(FATAL, "fail to get memory for the hctx_array\n");
+
+ if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt,
+ "request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) {
+ FREEBUF(hctx_array);
+ return;
+ }
+
+ queue_for_each_hw_ctx(q, hctx_array, cnt, dio);
+
+ FREEBUF(hctx_array);
+}
+
static void
get_mq_diskio(unsigned long q, unsigned long *mq_count)
{
int cpu;
unsigned long queue_ctx;
unsigned long mctx_addr;
- struct diskio tmp;
+ struct diskio tmp = {0};
if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
- INVALID_MEMBER(blk_mq_ctx_rq_completed))
+ INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
+ get_mq_diskio_from_hw_queues(q, &tmp);
+ mq_count[0] = tmp.read;
+ mq_count[1] = tmp.write;
return;
-
- memset(&tmp, 0x00, sizeof(struct diskio));
+ }
readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx,
sizeof(ulong), "request_queue.queue_ctx",
@@ -4479,41 +4636,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk, ulong flags)
&& (io.read + io.write == 0))
return;
- if (use_mq_interface(queue_addr) &&
- (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
- INVALID_MEMBER(blk_mq_ctx_rq_completed)))
- fprintf(fp, "%s%s%s %s%s%s%s %s%s%s",
- mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
- space(MINSPACE),
- mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
- space(MINSPACE),
- mkstring(buf2, 10, LJUST, disk_name),
- space(MINSPACE),
- mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
- LJUST|LONG_HEX, (char *)queue_addr),
- space(MINSPACE),
- mkstring(buf4, 17, RJUST, "(not supported)"),
- space(MINSPACE));
-
- else
- fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s",
- mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
- space(MINSPACE),
- mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
- space(MINSPACE),
- mkstring(buf2, 10, LJUST, disk_name),
- space(MINSPACE),
- mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
- LJUST|LONG_HEX, (char *)queue_addr),
- space(MINSPACE),
- io.read + io.write,
- space(MINSPACE),
- mkstring(buf4, 5, RJUST|INT_DEC,
- (char *)(unsigned long)io.read),
- space(MINSPACE),
- mkstring(buf5, 5, RJUST|INT_DEC,
- (char *)(unsigned long)io.write),
- space(MINSPACE));
+ fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s",
+ mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
+ space(MINSPACE),
+ mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
+ space(MINSPACE),
+ mkstring(buf2, 10, LJUST, disk_name),
+ space(MINSPACE),
+ mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
+ LJUST|LONG_HEX, (char *)queue_addr),
+ space(MINSPACE),
+ io.read + io.write,
+ space(MINSPACE),
+ mkstring(buf4, 5, RJUST|INT_DEC,
+ (char *)(unsigned long)io.read),
+ space(MINSPACE),
+ mkstring(buf5, 5, RJUST|INT_DEC,
+ (char *)(unsigned long)io.write),
+ space(MINSPACE));
if (VALID_MEMBER(request_queue_in_flight)) {
if (!use_mq_interface(queue_addr)) {
@@ -4597,6 +4737,9 @@ void diskio_init(void)
MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry");
MEMBER_OFFSET_INIT(kset_list, "kset", "list");
MEMBER_OFFSET_INIT(request_list_count, "request_list", "count");
+ MEMBER_OFFSET_INIT(request_cmd_flags, "request", "cmd_flags");
+ MEMBER_OFFSET_INIT(request_q, "request", "q");
+ MEMBER_OFFSET_INIT(request_state, "request", "state");
MEMBER_OFFSET_INIT(request_queue_in_flight, "request_queue",
"in_flight");
if (MEMBER_EXISTS("request_queue", "rq"))
@@ -4608,10 +4751,33 @@ void diskio_init(void)
"mq_ops");
ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx,
"request_queue", "queue_ctx");
+ MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx,
+ "request_queue", "queue_hw_ctx");
+ MEMBER_OFFSET_INIT(request_queue_nr_hw_queues,
+ "request_queue", "nr_hw_queues");
MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx",
"rq_dispatched");
MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx",
"rq_completed");
+ MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx", "tags");
+ MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags",
+ "bitmap_tags");
+ MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags",
+ "breserved_tags");
+ MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags",
+ "nr_reserved_tags");
+ MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags", "rqs");
+ STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags");
+ STRUCT_SIZE_INIT(sbitmap, "sbitmap");
+ STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word");
+ MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word");
+ MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared");
+ MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth");
+ MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
+ MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
+ MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
+ MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
+
}
MEMBER_OFFSET_INIT(subsys_private_klist_devices, "subsys_private",
"klist_devices");
diff --git a/symbols.c b/symbols.c
index 7431aaecec9d..520debdb311e 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10397,6 +10397,12 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(kset_list));
fprintf(fp, " request_list_count: %ld\n",
OFFSET(request_list_count));
+ fprintf(fp, " request_cmd_flags: %ld\n",
+ OFFSET(request_cmd_flags));
+ fprintf(fp, " request_q: %ld\n",
+ OFFSET(request_q));
+ fprintf(fp, " request_state: %ld\n",
+ OFFSET(request_state));
fprintf(fp, " request_queue_in_flight: %ld\n",
OFFSET(request_queue_in_flight));
fprintf(fp, " request_queue_rq: %ld\n",
@@ -10405,10 +10411,25 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(request_queue_mq_ops));
fprintf(fp, " request_queue_queue_ctx: %ld\n",
OFFSET(request_queue_queue_ctx));
+ fprintf(fp, " request_queue_queue_hw_ctx: %ld\n",
+ OFFSET(request_queue_queue_hw_ctx));
+ fprintf(fp, " request_queue_nr_hw_queues: %ld\n",
+ OFFSET(request_queue_nr_hw_queues));
fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n",
OFFSET(blk_mq_ctx_rq_dispatched));
fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n",
OFFSET(blk_mq_ctx_rq_completed));
+ fprintf(fp, " blk_mq_hw_ctx_tags: %ld\n",
+ OFFSET(blk_mq_hw_ctx_tags));
+ fprintf(fp, " blk_mq_tags_bitmap_tags: %ld\n",
+ OFFSET(blk_mq_tags_bitmap_tags));
+ fprintf(fp, " blk_mq_tags_breserved_tags: %ld\n",
+ OFFSET(blk_mq_tags_breserved_tags));
+ fprintf(fp, " blk_mq_tags_nr_reserved_tags: %ld\n",
+ OFFSET(blk_mq_tags_nr_reserved_tags));
+ fprintf(fp, " blk_mq_tags_rqs: %ld\n",
+ OFFSET(blk_mq_tags_rqs));
+
fprintf(fp, " subsys_private_klist_devices: %ld\n",
OFFSET(subsys_private_klist_devices));
fprintf(fp, " subsystem_kset: %ld\n",
@@ -11015,6 +11036,7 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " sbitmap: %ld\n", SIZE(sbitmap));
fprintf(fp, " sbitmap_queue: %ld\n", SIZE(sbitmap_queue));
fprintf(fp, " sbq_wait_state: %ld\n", SIZE(sbq_wait_state));
+ fprintf(fp, " blk_mq_tags: %ld\n", SIZE(blk_mq_tags));
fprintf(fp, "\n array_table:\n");
/*
--
2.37.1

View File

@ -0,0 +1,70 @@
From 4c78eb4a9199631fe94845cb3fbd6376aae1251d Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Wed, 29 Nov 2023 13:47:35 +0100
Subject: [PATCH 08/14] s390x: fix virtual vs physical address confusion
Physical and virtual addresses are the same on S390X.
That led to missing to use PTOV and VTOP macros where
they actually expected.
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
s390x.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/s390x.c b/s390x.c
index 096c072186f5..957b839a5fa9 100644
--- a/s390x.c
+++ b/s390x.c
@@ -311,7 +311,7 @@ static struct s390x_cpu *s390x_cpu_get(struct bt_info *bt)
readmem(lowcore_ptr + cpu * sizeof(long), KVADDR,
&prefix, sizeof(long), "lowcore_ptr", FAULT_ON_ERROR);
for (i = 0; i < s390x_cpu_cnt; i++) {
- if (s390x_cpu_vec[i].prefix == prefix)
+ if (s390x_cpu_vec[i].prefix == VTOP(prefix))
return &s390x_cpu_vec[i];
}
error(FATAL, "cannot determine CPU for task: %lx\n", bt->task);
@@ -985,12 +985,12 @@ int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
verbose);
if (!entry)
return FALSE;
- table = entry & ~0xfffULL;
+ table = PTOV(entry & ~0xfffULL);
/* Check if this a 2GB page */
if ((entry & 0x400ULL) && (level == 1)) {
/* Add the 2GB frame offset & return the final value. */
table &= ~0x7fffffffULL;
- *phys_addr = table + (vaddr & 0x7fffffffULL);
+ *phys_addr = VTOP(table + (vaddr & 0x7fffffffULL));
return TRUE;
}
len = entry & 0x3ULL;
@@ -1001,12 +1001,12 @@ int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
if (entry & 0x400ULL) {
/* Add the 1MB page offset and return the final value. */
table &= ~0xfffffULL;
- *phys_addr = table + (vaddr & 0xfffffULL);
+ *phys_addr = VTOP(table + (vaddr & 0xfffffULL));
return TRUE;
}
/* Get the page table entry */
- entry = _kl_pg_table_deref_s390x(vaddr, entry & ~0x7ffULL, verbose);
+ entry = _kl_pg_table_deref_s390x(vaddr, PTOV(entry & ~0x7ffULL), verbose);
if (!entry)
return FALSE;
@@ -1033,7 +1033,7 @@ s390x_vmalloc_start(void)
{
unsigned long highmem_addr,high_memory;
highmem_addr=symbol_value("high_memory");
- readmem(highmem_addr, PHYSADDR, &high_memory,sizeof(long),
+ readmem(highmem_addr, KVADDR, &high_memory,sizeof(long),
"highmem",FAULT_ON_ERROR);
return high_memory;
}
--
2.41.0

View File

@ -1,121 +0,0 @@
From 7b91ac6d20b741c1c8487a04d2b9d8aee5772471 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Thu, 2 Jun 2022 20:12:56 +0800
Subject: [PATCH 09/89] Fix for "dev -d|-D" options to support blk-mq change on
Linux v5.18-rc1
Kernel commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray") removed
the "queue_hw_ctx" member from struct request_queue at Linux v5.18-rc1,
and replaced it with a struct xarray "hctx_table". Without the patch, the
"dev -d|-D" options will print an error:
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE
dev: invalid structure member offset: request_queue_queue_hw_ctx
With the patch:
crash> dev -d
MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE
8 ffff8e99d0a1ae00 sda ffff8e9c14c59980 10 6 4
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
dev.c | 42 +++++++++++++++++++++++++++++++++---------
symbols.c | 2 ++
3 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/defs.h b/defs.h
index f2b6ab14c6aa..c524a05d8105 100644
--- a/defs.h
+++ b/defs.h
@@ -2180,6 +2180,7 @@ struct offset_table { /* stash of commonly-used offsets */
long blk_mq_tags_breserved_tags;
long blk_mq_tags_nr_reserved_tags;
long blk_mq_tags_rqs;
+ long request_queue_hctx_table;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/dev.c b/dev.c
index 4be4c96df8b0..0172c83ffaea 100644
--- a/dev.c
+++ b/dev.c
@@ -4369,20 +4369,42 @@ static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio)
uint cnt = 0;
ulong addr = 0, hctx_addr = 0;
ulong *hctx_array = NULL;
+ struct list_pair *lp = NULL;
+
+ if (VALID_MEMBER(request_queue_hctx_table)) {
+ addr = q + OFFSET(request_queue_hctx_table);
+ cnt = do_xarray(addr, XARRAY_COUNT, NULL);
+ lp = (struct list_pair *)GETBUF(sizeof(struct list_pair) * (cnt + 1));
+ if (!lp)
+ error(FATAL, "fail to get memory for list_pair.\n");
+ lp[0].index = cnt;
+ cnt = do_xarray(addr, XARRAY_GATHER, lp);
+ } else {
+ addr = q + OFFSET(request_queue_nr_hw_queues);
+ readmem(addr, KVADDR, &cnt, sizeof(uint),
+ "request_queue.nr_hw_queues", FAULT_ON_ERROR);
- addr = q + OFFSET(request_queue_nr_hw_queues);
- readmem(addr, KVADDR, &cnt, sizeof(uint),
- "request_queue.nr_hw_queues", FAULT_ON_ERROR);
-
- addr = q + OFFSET(request_queue_queue_hw_ctx);
- readmem(addr, KVADDR, &hctx_addr, sizeof(void *),
- "request_queue.queue_hw_ctx", FAULT_ON_ERROR);
+ addr = q + OFFSET(request_queue_queue_hw_ctx);
+ readmem(addr, KVADDR, &hctx_addr, sizeof(void *),
+ "request_queue.queue_hw_ctx", FAULT_ON_ERROR);
+ }
hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt);
- if (!hctx_array)
+ if (!hctx_array) {
+ if (lp)
+ FREEBUF(lp);
error(FATAL, "fail to get memory for the hctx_array\n");
+ }
+
+ if (lp && hctx_array) {
+ uint i;
+
+ /* copy it from list_pair to hctx_array */
+ for (i = 0; i < cnt; i++)
+ hctx_array[i] = (ulong)lp[i].value;
- if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt,
+ FREEBUF(lp);
+ } else if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt,
"request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) {
FREEBUF(hctx_array);
return;
@@ -4755,6 +4777,8 @@ void diskio_init(void)
"request_queue", "queue_hw_ctx");
MEMBER_OFFSET_INIT(request_queue_nr_hw_queues,
"request_queue", "nr_hw_queues");
+ MEMBER_OFFSET_INIT(request_queue_hctx_table,
+ "request_queue", "hctx_table");
MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx",
"rq_dispatched");
MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx",
diff --git a/symbols.c b/symbols.c
index 520debdb311e..69004a2e66e3 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10415,6 +10415,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(request_queue_queue_hw_ctx));
fprintf(fp, " request_queue_nr_hw_queues: %ld\n",
OFFSET(request_queue_nr_hw_queues));
+ fprintf(fp, " request_queue_hctx_table: %ld\n",
+ OFFSET(request_queue_hctx_table));
fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n",
OFFSET(blk_mq_ctx_rq_dispatched));
fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n",
--
2.37.1

View File

@ -0,0 +1,323 @@
From d0164e7e480ad2ffd3fe73fe53c46087e5e137a6 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Thu, 7 Dec 2023 16:54:06 +0100
Subject: [PATCH 09/14] s390x: uncouple physical and virtual memory spaces
Rework VTOP and PTOV macros to reflect the future
uncoupling of physical and virtual address spaces
in kernel. Existing versions are not affected.
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 20 +++++-
s390x.c | 212 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 228 insertions(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index 5218a94fe4a4..20237b72a10b 100644
--- a/defs.h
+++ b/defs.h
@@ -4564,9 +4564,9 @@ struct efi_memory_desc_t {
#define _64BIT_
#define MACHINE_TYPE "S390X"
-#define PTOV(X) ((unsigned long)(X)+(machdep->kvbase))
-#define VTOP(X) ((unsigned long)(X)-(machdep->kvbase))
-#define IS_VMALLOC_ADDR(X) (vt->vmalloc_start && (ulong)(X) >= vt->vmalloc_start)
+#define PTOV(X) s390x_PTOV((ulong)(X))
+#define VTOP(X) s390x_VTOP((ulong)(X))
+#define IS_VMALLOC_ADDR(X) s390x_IS_VMALLOC_ADDR(X)
#define PTRS_PER_PTE 512
#define PTRS_PER_PMD 1024
#define PTRS_PER_PGD 2048
@@ -6827,7 +6827,21 @@ void get_s390_panicmsg(char *);
* s390x.c
*/
#ifdef S390X
+
+struct machine_specific
+{
+ ulong (*virt_to_phys)(ulong vaddr);
+ ulong (*phys_to_virt)(ulong paddr);
+ int (*is_vmalloc_addr)(ulong vaddr);
+ ulong __kaslr_offset_phys;
+ ulong amode31_start;
+ ulong amode31_end;
+};
+
void s390x_init(int);
+ulong s390x_PTOV(ulong);
+ulong s390x_VTOP(ulong);
+int s390x_IS_VMALLOC_ADDR(ulong);
void s390x_dump_machdep_table(ulong);
#define display_idt_table() \
error(FATAL, "-d option is not applicable to S390X architecture\n")
diff --git a/s390x.c b/s390x.c
index 957b839a5fa9..794ae825906d 100644
--- a/s390x.c
+++ b/s390x.c
@@ -47,6 +47,7 @@
#define S390X_PSW_MASK_PSTATE 0x0001000000000000UL
#define S390X_LC_VMCORE_INFO 0xe0c
+#define S390X_LC_OS_INFO 0xe18
/*
* Flags for Region and Segment table entries.
@@ -168,6 +169,19 @@ static struct line_number_hook s390x_line_number_hooks[];
static int s390x_is_uvaddr(ulong, struct task_context *);
static int s390x_get_kvaddr_ranges(struct vaddr_range *);
static int set_s390x_max_physmem_bits(void);
+static ulong s390x_generic_VTOP(ulong vaddr);
+static ulong s390x_generic_PTOV(ulong paddr);
+static int s390x_generic_IS_VMALLOC_ADDR(ulong vaddr);
+static ulong s390x_vr_VTOP(ulong vaddr);
+static ulong s390x_vr_PTOV(ulong paddr);
+static int s390x_vr_IS_VMALLOC_ADDR(ulong vaddr);
+static int s390x_vr_is_kvaddr(ulong);
+
+struct machine_specific s390x_machine_specific = {
+ .virt_to_phys = s390x_generic_VTOP,
+ .phys_to_virt = s390x_generic_PTOV,
+ .is_vmalloc_addr = s390x_generic_IS_VMALLOC_ADDR,
+};
/*
* struct lowcore name (old: "_lowcore", new: "lowcore")
@@ -546,6 +560,191 @@ static void s390x_check_kaslr(void)
free(vmcoreinfo);
}
+#define OS_INFO_VERSION_MAJOR 1
+#define OS_INFO_VERSION_MINOR 1
+
+#define OS_INFO_VMCOREINFO 0
+#define OS_INFO_REIPL_BLOCK 1
+#define OS_INFO_FLAGS_ENTRY 2
+#define OS_INFO_RESERVED 3
+#define OS_INFO_IDENTITY_BASE 4
+#define OS_INFO_KASLR_OFFSET 5
+#define OS_INFO_KASLR_OFF_PHYS 6
+#define OS_INFO_VMEMMAP 7
+#define OS_INFO_AMODE31_START 8
+#define OS_INFO_AMODE31_END 9
+
+struct os_info_entry {
+ union {
+ __u64 addr;
+ __u64 val;
+ };
+ __u64 size;
+ __u32 csum;
+} __attribute__((packed));
+
+struct os_info {
+ __u64 magic;
+ __u32 csum;
+ __u16 version_major;
+ __u16 version_minor;
+ __u64 crashkernel_addr;
+ __u64 crashkernel_size;
+ struct os_info_entry entry[10];
+ __u8 reserved[3864];
+} __attribute__((packed));
+
+struct vm_info {
+ __u64 __identity_base;
+ __u64 __kaslr_offset;
+ __u64 __kaslr_offset_phys;
+ __u64 amode31_start;
+ __u64 amode31_end;
+};
+
+static bool
+vmcoreinfo_read_u64(const char *key, __u64 *val)
+{
+ char *string;
+
+ string = pc->read_vmcoreinfo(key);
+ if (string) {
+ *val = strtoul(string, NULL, 16);
+ free(string);
+ return true;
+ }
+
+ return false;
+}
+
+static bool vmcoreinfo_read_vm_info(struct vm_info *_vm_info)
+{
+ struct vm_info vm_info;
+
+ if (!vmcoreinfo_read_u64("IDENTITYBASE", &vm_info.__identity_base) ||
+ !vmcoreinfo_read_u64("KERNELOFFSET", &vm_info.__kaslr_offset) ||
+ !vmcoreinfo_read_u64("KERNELOFFPHYS", &vm_info.__kaslr_offset_phys) ||
+ !vmcoreinfo_read_u64("SAMODE31", &vm_info.amode31_start) ||
+ !vmcoreinfo_read_u64("EAMODE31", &vm_info.amode31_end))
+ return false;
+
+ *_vm_info = vm_info;
+
+ return true;
+}
+
+static bool os_info_read_vm_info(struct vm_info *vm_info)
+{
+ struct os_info os_info;
+ ulong addr;
+
+ if (!readmem(S390X_LC_OS_INFO, PHYSADDR, &addr,
+ sizeof(addr), "s390x os_info ptr",
+ QUIET|RETURN_ON_ERROR))
+ return false;
+
+ if (addr == 0)
+ return true;
+
+ if (!readmem(addr, PHYSADDR, &os_info,
+ offsetof(struct os_info, reserved), "s390x os_info header",
+ QUIET|RETURN_ON_ERROR))
+ return false;
+
+ vm_info->__identity_base = os_info.entry[OS_INFO_IDENTITY_BASE].val;
+ vm_info->__kaslr_offset = os_info.entry[OS_INFO_KASLR_OFFSET].val;
+ vm_info->__kaslr_offset_phys = os_info.entry[OS_INFO_KASLR_OFF_PHYS].val;
+ vm_info->amode31_start = os_info.entry[OS_INFO_AMODE31_START].val;
+ vm_info->amode31_end = os_info.entry[OS_INFO_AMODE31_END].val;
+
+ return true;
+}
+
+static bool vm_info_empty(struct vm_info *vm_info)
+{
+ return !vm_info->__kaslr_offset;
+}
+
+static bool s390x_init_vm(void)
+{
+ struct vm_info vm_info;
+
+ if (pc->flags & PROC_KCORE) {
+ if (!vmcoreinfo_read_vm_info(&vm_info))
+ return true;
+ } else {
+ if (!os_info_read_vm_info(&vm_info))
+ return false;
+ }
+ if (vm_info_empty(&vm_info))
+ return true;
+
+ machdep->identity_map_base = vm_info.__identity_base;
+ machdep->kvbase = vm_info.__kaslr_offset;
+ machdep->machspec->__kaslr_offset_phys = vm_info.__kaslr_offset_phys;
+ machdep->machspec->amode31_start = vm_info.amode31_start;
+ machdep->machspec->amode31_end = vm_info.amode31_end;
+
+ machdep->is_kvaddr = s390x_vr_is_kvaddr;
+ machdep->machspec->virt_to_phys = s390x_vr_VTOP;
+ machdep->machspec->phys_to_virt = s390x_vr_PTOV;
+ machdep->machspec->is_vmalloc_addr = s390x_vr_IS_VMALLOC_ADDR;
+
+ return true;
+}
+
+static ulong s390x_generic_VTOP(ulong vaddr)
+{
+ return vaddr - machdep->kvbase;
+}
+
+static ulong s390x_generic_PTOV(ulong paddr)
+{
+ return paddr + machdep->kvbase;
+}
+
+static int s390x_generic_IS_VMALLOC_ADDR(ulong vaddr)
+{
+ return vt->vmalloc_start && vaddr >= vt->vmalloc_start;
+}
+
+static ulong s390x_vr_VTOP(ulong vaddr)
+{
+ if (vaddr < LOWCORE_SIZE)
+ return vaddr;
+ if ((vaddr < machdep->machspec->amode31_end) &&
+ (vaddr >= machdep->machspec->amode31_start))
+ return vaddr;
+ if (vaddr < machdep->kvbase)
+ return vaddr - machdep->identity_map_base;
+ return vaddr - machdep->kvbase + machdep->machspec->__kaslr_offset_phys;
+}
+
+static ulong s390x_vr_PTOV(ulong paddr)
+{
+ return paddr + machdep->identity_map_base;
+}
+
+static int s390x_vr_IS_VMALLOC_ADDR(ulong vaddr)
+{
+ return (vaddr >= vt->vmalloc_start && vaddr < machdep->kvbase);
+}
+
+ulong s390x_VTOP(ulong vaddr)
+{
+ return machdep->machspec->virt_to_phys(vaddr);
+}
+
+ulong s390x_PTOV(ulong paddr)
+{
+ return machdep->machspec->phys_to_virt(paddr);
+}
+
+int s390x_IS_VMALLOC_ADDR(ulong vaddr)
+{
+ return machdep->machspec->is_vmalloc_addr(vaddr);
+}
+
/*
* Do all necessary machine-specific setup here. This is called several
* times during initialization.
@@ -560,6 +759,7 @@ s390x_init(int when)
machdep->process_elf_notes = s390x_process_elf_notes;
break;
case PRE_SYMTAB:
+ machdep->machspec = &s390x_machine_specific;
machdep->verify_symbol = s390x_verify_symbol;
if (pc->flags & KERNEL_DEBUG_QUERY)
return;
@@ -587,6 +787,8 @@ s390x_init(int when)
machdep->kvbase = 0;
machdep->identity_map_base = 0;
machdep->is_kvaddr = generic_is_kvaddr;
+ if (!s390x_init_vm())
+ error(FATAL, "cannot initialize VM parameters.");
machdep->is_uvaddr = s390x_is_uvaddr;
machdep->eframe_search = s390x_eframe_search;
machdep->back_trace = s390x_back_trace_cmd;
@@ -681,7 +883,9 @@ s390x_dump_machdep_table(ulong arg)
fprintf(fp, " dis_filter: s390x_dis_filter()\n");
fprintf(fp, " cmd_mach: s390x_cmd_mach()\n");
fprintf(fp, " get_smp_cpus: s390x_get_smp_cpus()\n");
- fprintf(fp, " is_kvaddr: generic_is_kvaddr()\n");
+ fprintf(fp, " is_kvaddr: %s()\n", machdep->is_kvaddr == s390x_vr_is_kvaddr ?
+ "s390x_vr_is_kvaddr" :
+ "generic_is_kvaddr");
fprintf(fp, " is_uvaddr: s390x_is_uvaddr()\n");
fprintf(fp, " verify_paddr: generic_verify_paddr()\n");
fprintf(fp, " get_kvaddr_ranges: s390x_get_kvaddr_ranges()\n");
@@ -702,6 +906,12 @@ s390x_dump_machdep_table(ulong arg)
fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec);
}
+static int
+s390x_vr_is_kvaddr(ulong vaddr)
+{
+ return (vaddr < LOWCORE_SIZE) || (vaddr >= machdep->identity_map_base);
+}
+
/*
* Check if address is in context's address space
*/
--
2.41.0

View File

@ -1,43 +0,0 @@
From 2d9e180858b3212507fcba7a7cb9d13f6a935a2f Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 6 Jun 2022 19:09:16 +0800
Subject: [PATCH 10/89] Doc: update man page for the "bpf" and "sbitmapq"
commands
The information of the "bpf" and "sbitmapq" commands is missing in the man
page of the crash utility. Let's add it to the man page.
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
crash.8 | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/crash.8 b/crash.8
index 1f3657b11e4c..e553a0b4adb3 100644
--- a/crash.8
+++ b/crash.8
@@ -584,6 +584,9 @@ creates a single-word alias for a command.
.I ascii
displays an ascii chart or translates a numeric value into its ascii components.
.TP
+.I bpf
+provides information on currently-loaded eBPF programs and maps.
+.TP
.I bt
displays a task's kernel-stack backtrace. If it is given the
.I \-a
@@ -706,6 +709,11 @@ number of seconds between each command execution.
.I runq
displays the tasks on the run queue.
.TP
+.I sbitmapq
+dumps the contents of the sbitmap_queue structure and the used
+bits in the bitmap. Also, it shows the dump of a structure array
+associated with the sbitmap_queue.
+.TP
.I search
searches a range of user or kernel memory space for given value.
.TP
--
2.37.1

View File

@ -0,0 +1,160 @@
From 5187a0320cc54a9cb8b326cf012e69795950a716 Mon Sep 17 00:00:00 2001
From: Song Shuai <songshuaishuai@tinylab.org>
Date: Tue, 12 Dec 2023 18:20:50 +0800
Subject: [PATCH 10/14] RISCV64: Dump NT_PRSTATUS in 'help -n'
With the patch we can get full dump of "struct elf_prstatus" in 'help -n':
```
crash> help -n
<snip>
Elf64_Nhdr:
n_namesz: 5 ("CORE")
n_descsz: 376
n_type: 1 (NT_PRSTATUS)
si.signo: 0 si.code: 0 si.errno: 0
cursig: 0 sigpend: 0 sighold: 0
pid: 1 ppid: 0 pgrp: 0 sid:0
utime: 0.000000 stime: 0.000000
cutime: 0.000000 cstime: 0.000000
epc: ffffffff8000a1dc ra: ffffffff800af958 sp: ff6000001fc501c0
gp: ffffffff81515d38 tp: ff600000000d8000 t0: 6666666666663c5b
t1: ff600000000d88c8 t2: 666666666666663c s0: ff6000001fc50320
s1: ffffffff815170d8 a0: ff6000001fc501c8 a1: c0000000ffffefff
a2: 0000000000000000 a3: 0000000000000001 a4: 0000000000000000
a5: ff60000001782c00 a6: 000000000130e0f0 a7: 0000000000000000
s2: ffffffff81517820 s3: ff6000001fc501c8 s4: 000000000000000f
s5: 0000000000000000 s6: ff20000000013e60 s7: 0000000000000000
s8: ff60000000861000 s9: 00007fffc3641694 s10: 00007fffc3641690
s11: 00005555796ed240 t3: 0000000000010297 t4: ffffffff80c17810
t5: ffffffff8195e7b8 t6: ff6000001fc50048
0000000000000000 0000000000000000
0000000000000000 0000000000000000
0000000000000001 0000000000000000
0000000000000000 0000000000000000
0000000000000000 0000000000000000
0000000000000000 0000000000000000
0000000000000000 0000000000000000
ffffffff8000a1dc ffffffff800af958
ff6000001fc501c0 ffffffff81515d38
ff600000000d8000 6666666666663c5b
<snip>
```
Signed-off-by: Song Shuai <songshuaishuai@tinylab.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
netdump.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 84 insertions(+)
diff --git a/netdump.c b/netdump.c
index 390786364959..32586b6809d3 100644
--- a/netdump.c
+++ b/netdump.c
@@ -2578,6 +2578,8 @@ dump_Elf64_Nhdr(Elf64_Off offset, int store)
display_ELF_note(EM_PPC64, PRSTATUS_NOTE, note, nd->ofp);
if (machine_type("ARM64") && (note->n_type == NT_PRSTATUS))
display_ELF_note(EM_AARCH64, PRSTATUS_NOTE, note, nd->ofp);
+ if (machine_type("RISCV64") && (note->n_type == NT_PRSTATUS))
+ display_ELF_note(EM_RISCV, PRSTATUS_NOTE, note, nd->ofp);
}
for (i = lf = 0; i < note->n_descsz/sizeof(ulonglong); i++) {
if (((i%2)==0)) {
@@ -3399,6 +3401,80 @@ display_prstatus_arm64(void *note_ptr, FILE *ofp)
space(sp), pr->pr_reg[33], pr->pr_fpvalid);
}
+struct riscv64_elf_siginfo {
+ int si_signo;
+ int si_code;
+ int si_errno;
+};
+
+struct riscv64_elf_prstatus {
+ struct riscv64_elf_siginfo pr_info;
+ short pr_cursig;
+ unsigned long pr_sigpend;
+ unsigned long pr_sighold;
+ pid_t pr_pid;
+ pid_t pr_ppid;
+ pid_t pr_pgrp;
+ pid_t pr_sid;
+ struct timeval pr_utime;
+ struct timeval pr_stime;
+ struct timeval pr_cutime;
+ struct timeval pr_cstime;
+/* elf_gregset_t pr_reg; => typedef struct user_regs_struct elf_gregset_t; */
+ unsigned long pr_reg[32];
+ int pr_fpvalid;
+};
+
+static void
+display_prstatus_riscv64(void *note_ptr, FILE *ofp)
+{
+ struct riscv64_elf_prstatus *pr;
+ Elf64_Nhdr *note;
+ int sp;
+
+ note = (Elf64_Nhdr *)note_ptr;
+ pr = (struct riscv64_elf_prstatus *)(
+ (char *)note + sizeof(Elf64_Nhdr) + note->n_namesz);
+ pr = (struct riscv64_elf_prstatus *)roundup((ulong)pr, 4);
+ sp = nd->num_prstatus_notes ? 25 : 22;
+
+ fprintf(ofp,
+ "%ssi.signo: %d si.code: %d si.errno: %d\n"
+ "%scursig: %d sigpend: %lx sighold: %lx\n"
+ "%spid: %d ppid: %d pgrp: %d sid:%d\n"
+ "%sutime: %01lld.%06d stime: %01lld.%06d\n"
+ "%scutime: %01lld.%06d cstime: %01lld.%06d\n",
+ space(sp), pr->pr_info.si_signo, pr->pr_info.si_code, pr->pr_info.si_errno,
+ space(sp), pr->pr_cursig, pr->pr_sigpend, pr->pr_sighold,
+ space(sp), pr->pr_pid, pr->pr_ppid, pr->pr_pgrp, pr->pr_sid,
+ space(sp), (long long)pr->pr_utime.tv_sec, (int)pr->pr_utime.tv_usec,
+ (long long)pr->pr_stime.tv_sec, (int)pr->pr_stime.tv_usec,
+ space(sp), (long long)pr->pr_cutime.tv_sec, (int)pr->pr_cutime.tv_usec,
+ (long long)pr->pr_cstime.tv_sec, (int)pr->pr_cstime.tv_usec);
+ fprintf(ofp,
+ "%sepc: %016lx ra: %016lx sp: %016lx\n"
+ "%s gp: %016lx tp: %016lx t0: %016lx\n"
+ "%s t1: %016lx t2: %016lx s0: %016lx\n"
+ "%s s1: %016lx a0: %016lx a1: %016lx\n"
+ "%s a2: %016lx a3: %016lx a4: %016lx\n"
+ "%s a5: %016lx a6: %016lx a7: %016lx\n"
+ "%s s2: %016lx s3: %016lx s4: %016lx\n"
+ "%s s5: %016lx s6: %016lx s7: %016lx\n"
+ "%s s8: %016lx s9: %016lx s10: %016lx\n"
+ "%ss11: %016lx t3: %016lx t4: %016lx\n"
+ "%s t5: %016lx t6: %016lx\n",
+ space(sp), pr->pr_reg[0], pr->pr_reg[1], pr->pr_reg[2],
+ space(sp), pr->pr_reg[3], pr->pr_reg[4], pr->pr_reg[5],
+ space(sp), pr->pr_reg[6], pr->pr_reg[7], pr->pr_reg[8],
+ space(sp), pr->pr_reg[9], pr->pr_reg[10], pr->pr_reg[11],
+ space(sp), pr->pr_reg[12], pr->pr_reg[13], pr->pr_reg[14],
+ space(sp), pr->pr_reg[15], pr->pr_reg[16], pr->pr_reg[17],
+ space(sp), pr->pr_reg[18], pr->pr_reg[19], pr->pr_reg[20],
+ space(sp), pr->pr_reg[21], pr->pr_reg[22], pr->pr_reg[23],
+ space(sp), pr->pr_reg[24], pr->pr_reg[25], pr->pr_reg[26],
+ space(sp), pr->pr_reg[27], pr->pr_reg[28], pr->pr_reg[29],
+ space(sp), pr->pr_reg[30], pr->pr_reg[31]);
+}
void
display_ELF_note(int machine, int type, void *note, FILE *ofp)
@@ -3449,6 +3525,14 @@ display_ELF_note(int machine, int type, void *note, FILE *ofp)
break;
}
break;
+ case EM_RISCV:
+ switch (type)
+ {
+ case PRSTATUS_NOTE:
+ display_prstatus_riscv64(note, ofp);
+ break;
+ }
+ break;
default:
return;
--
2.41.0

View File

@ -0,0 +1,87 @@
From 9b69093e623f1d54c373b1e091900d40576c059b Mon Sep 17 00:00:00 2001
From: Song Shuai <songshuaishuai@tinylab.org>
Date: Tue, 12 Dec 2023 18:20:51 +0800
Subject: [PATCH 11/14] RISCV64: Fix 'bt' output when no ra on the stack top
Same as the Linux commit f766f77a74f5 ("riscv/stacktrace: Fix
stack output without ra on the stack top").
When a function doesn't have a callee, then it will not
push ra into the stack, such as lkdtm functions, so
correct the FP of the second frame and use pt_regs to get
the right PC of the second frame.
Before this patch, the `bt -f` outputs only the first frame with
the wrong PC and FP of next frame:
```
crash> bt -f
PID: 1 TASK: ff600000000e0000 CPU: 1 COMMAND: "sh"
#0 [ff20000000013cf0] lkdtm_EXCEPTION at ffffffff805303c0
[PC: ffffffff805303c0 RA: ff20000000013d10 SP: ff20000000013cf0 SIZE: 16] <- wrong next PC
ff20000000013cf0: 0000000000000001 ff20000000013d10 <- next FP
ff20000000013d00: ff20000000013d40
crash>
```
After this patch, the `bt` outputs the full frames:
```
crash> bt
PID: 1 TASK: ff600000000e0000 CPU: 1 COMMAND: "sh"
#0 [ff20000000013cf0] lkdtm_EXCEPTION at ffffffff805303c0
#1 [ff20000000013d00] lkdtm_do_action at ffffffff8052fe36
#2 [ff20000000013d10] direct_entry at ffffffff80530018
#3 [ff20000000013d40] full_proxy_write at ffffffff80305044
#4 [ff20000000013d80] vfs_write at ffffffff801b68b4
#5 [ff20000000013e30] ksys_write at ffffffff801b6c4a
#6 [ff20000000013e80] __riscv_sys_write at ffffffff801b6cc4
#7 [ff20000000013e90] do_trap_ecall_u at ffffffff80836798
crash>
```
Acked-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Song Shuai <songshuaishuai@tinylab.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/riscv64.c b/riscv64.c
index 0aaa14b2671e..872be594d72b 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -747,11 +747,14 @@ riscv64_back_trace_cmd(struct bt_info *bt)
{
struct riscv64_unwind_frame current, previous;
struct stackframe curr_frame;
+ struct riscv64_register * regs;
int level = 0;
if (bt->flags & BT_REGS_NOT_FOUND)
return;
+ regs = (struct riscv64_register *) bt->machdep;
+
current.pc = bt->instptr;
current.sp = bt->stkptr;
current.fp = bt->frameptr;
@@ -788,8 +791,16 @@ riscv64_back_trace_cmd(struct bt_info *bt)
sizeof(curr_frame), "get stack frame", RETURN_ON_ERROR))
return;
- previous.pc = curr_frame.ra;
- previous.fp = curr_frame.fp;
+ /* correct PC and FP of the second frame when the first frame has no callee */
+
+ if (regs && (regs->regs[RISCV64_REGS_EPC] == current.pc) && curr_frame.fp & 0x7){
+ previous.pc = regs->regs[RISCV64_REGS_RA];
+ previous.fp = curr_frame.ra;
+ } else {
+ previous.pc = curr_frame.ra;
+ previous.fp = curr_frame.fp;
+ }
+
previous.sp = current.fp;
riscv64_dump_backtrace_entry(bt, symbol, &current, &previous, level++);
--
2.41.0

View File

@ -1,48 +0,0 @@
From aff3d6e19c9a9ffe4e5d55850aa42a4dc9cf0485 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 10 Jun 2022 11:49:47 +0900
Subject: [PATCH 11/89] sbitmapq: Fix for sbitmap_queue without ws_active
member
The sbitmap_queue.ws_active member was added by kernel commit 5d2ee7122c73
("sbitmap: optimize wakeup check") at Linux 5.0. Without the patch, on
earlier kernels the "sbitmapq" command fails with the following error:
crash> sbitmapq ffff8f1a3611cf10
sbitmapq: invalid structure member offset: sbitmap_queue_ws_active
FILE: sbitmap.c LINE: 393 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index e8ebd62fe01c..152c28e6875f 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -325,7 +325,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
fprintf(fp, "wake_batch = %u\n", sqc->wake_batch);
fprintf(fp, "wake_index = %d\n", sqc->wake_index);
- fprintf(fp, "ws_active = %d\n", sqc->ws_active);
+ if (VALID_MEMBER(sbitmap_queue_ws_active)) /* 5.0 and later */
+ fprintf(fp, "ws_active = %d\n", sqc->ws_active);
sbq_wait_state_size = SIZE(sbq_wait_state);
wait_cnt_off = OFFSET(sbq_wait_state_wait_cnt);
@@ -380,7 +381,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context
sqc->wake_batch = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_batch));
sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index));
sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws));
- sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active));
+ if (VALID_MEMBER(sbitmap_queue_ws_active))
+ sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active));
if (VALID_MEMBER(sbitmap_queue_round_robin))
sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin));
sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth));
--
2.37.1

View File

@ -0,0 +1,265 @@
From 19d3c56c9fca9dea49dced0414becc6d1b12e9fc Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie@os.amperecomputing.com>
Date: Thu, 14 Dec 2023 15:15:20 +0800
Subject: [PATCH 12/14] arm64: rewrite the arm64_get_vmcoreinfo_ul to
arm64_get_vmcoreinfo
Rewrite the arm64_get_vmcoreinfo_ul to arm64_get_vmcoreinfo,
add a new parameter "base" for it.
Also use it to simplify the arm64 code.
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 99 +++++++++++++++++++++++----------------------------------
1 file changed, 39 insertions(+), 60 deletions(-)
diff --git a/arm64.c b/arm64.c
index 2b6b0e588d4e..57965c6cb3c8 100644
--- a/arm64.c
+++ b/arm64.c
@@ -92,6 +92,7 @@ static void arm64_get_crash_notes(void);
static void arm64_calc_VA_BITS(void);
static int arm64_is_uvaddr(ulong, struct task_context *);
static void arm64_calc_KERNELPACMASK(void);
+static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base);
struct kernel_range {
unsigned long modules_vaddr, modules_end;
@@ -124,7 +125,6 @@ void
arm64_init(int when)
{
ulong value;
- char *string;
struct machine_specific *ms;
#if defined(__x86_64__)
@@ -160,11 +160,8 @@ arm64_init(int when)
if (!ms->kimage_voffset && STREQ(pc->live_memsrc, "/dev/crash"))
ioctl(pc->mfd, DEV_CRASH_ARCH_DATA, &ms->kimage_voffset);
- if (!ms->kimage_voffset &&
- (string = pc->read_vmcoreinfo("NUMBER(kimage_voffset)"))) {
- ms->kimage_voffset = htol(string, QUIET, NULL);
- free(string);
- }
+ if (!ms->kimage_voffset)
+ arm64_get_vmcoreinfo(&ms->kimage_voffset, "NUMBER(kimage_voffset)", NUM_HEX);
if (ms->kimage_voffset ||
(ACTIVE() && (symbol_value_from_proc_kallsyms("kimage_voffset") != BADVAL))) {
@@ -185,11 +182,8 @@ arm64_init(int when)
if (kernel_symbol_exists("kimage_voffset"))
machdep->flags |= NEW_VMEMMAP;
- if (!machdep->pagesize &&
- (string = pc->read_vmcoreinfo("PAGESIZE"))) {
- machdep->pagesize = atoi(string);
- free(string);
- }
+ if (!machdep->pagesize && arm64_get_vmcoreinfo(&value, "PAGESIZE", NUM_DEC))
+ machdep->pagesize = (unsigned int)value;
if (!machdep->pagesize) {
/*
@@ -443,9 +437,8 @@ arm64_init(int when)
arm64_get_section_size_bits();
if (!machdep->max_physmem_bits) {
- if ((string = pc->read_vmcoreinfo("NUMBER(MAX_PHYSMEM_BITS)"))) {
- machdep->max_physmem_bits = atol(string);
- free(string);
+ if (arm64_get_vmcoreinfo(&machdep->max_physmem_bits, "NUMBER(MAX_PHYSMEM_BITS)", NUM_DEC)) {
+ /* nothing */
} else if (machdep->machspec->VA_BITS == 52) /* guess */
machdep->max_physmem_bits = _MAX_PHYSMEM_BITS_52;
else if (THIS_KERNEL_VERSION >= LINUX(3,17,0))
@@ -573,16 +566,28 @@ static int arm64_get_struct_page_max_shift(struct machine_specific *ms)
}
/* Return TRUE if we succeed, return FALSE on failure. */
-static int arm64_get_vmcoreinfo_ul(unsigned long *vaddr, const char* label)
+static int
+arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base)
{
+ int err = 0;
char *string = pc->read_vmcoreinfo(label);
if (!string)
return FALSE;
- *vaddr = strtoul(string, NULL, 0);
+ switch (base) {
+ case NUM_HEX:
+ *vaddr = strtoul(string, NULL, 16);
+ break;
+ case NUM_DEC:
+ *vaddr = strtoul(string, NULL, 10);
+ break;
+ default:
+ err++;
+ error(INFO, "Unknown type:%#x, (NUM_HEX|NUM_DEC)\n", base);
+ }
free(string);
- return TRUE;
+ return err ? FALSE: TRUE;
}
/*
@@ -594,21 +599,21 @@ static struct kernel_range *arm64_get_range_v5_18(struct machine_specific *ms)
struct kernel_range *r = &tmp_range;
/* Get the MODULES_VADDR ~ MODULES_END */
- if (!arm64_get_vmcoreinfo_ul(&r->modules_vaddr, "NUMBER(MODULES_VADDR)"))
+ if (!arm64_get_vmcoreinfo(&r->modules_vaddr, "NUMBER(MODULES_VADDR)", NUM_HEX))
return NULL;
- if (!arm64_get_vmcoreinfo_ul(&r->modules_end, "NUMBER(MODULES_END)"))
+ if (!arm64_get_vmcoreinfo(&r->modules_end, "NUMBER(MODULES_END)", NUM_HEX))
return NULL;
/* Get the VMEMMAP_START ~ VMEMMAP_END */
- if (!arm64_get_vmcoreinfo_ul(&r->vmemmap_vaddr, "NUMBER(VMEMMAP_START)"))
+ if (!arm64_get_vmcoreinfo(&r->vmemmap_vaddr, "NUMBER(VMEMMAP_START)", NUM_HEX))
return NULL;
- if (!arm64_get_vmcoreinfo_ul(&r->vmemmap_end, "NUMBER(VMEMMAP_END)"))
+ if (!arm64_get_vmcoreinfo(&r->vmemmap_end, "NUMBER(VMEMMAP_END)", NUM_HEX))
return NULL;
/* Get the VMALLOC_START ~ VMALLOC_END */
- if (!arm64_get_vmcoreinfo_ul(&r->vmalloc_start_addr, "NUMBER(VMALLOC_START)"))
+ if (!arm64_get_vmcoreinfo(&r->vmalloc_start_addr, "NUMBER(VMALLOC_START)", NUM_HEX))
return NULL;
- if (!arm64_get_vmcoreinfo_ul(&r->vmalloc_end, "NUMBER(VMALLOC_END)"))
+ if (!arm64_get_vmcoreinfo(&r->vmalloc_end, "NUMBER(VMALLOC_END)", NUM_HEX))
return NULL;
return r;
@@ -888,12 +893,7 @@ range_failed:
/* Get the size of struct page {} */
static void arm64_get_struct_page_size(struct machine_specific *ms)
{
- char *string;
-
- string = pc->read_vmcoreinfo("SIZE(page)");
- if (string)
- ms->struct_page_size = atol(string);
- free(string);
+ arm64_get_vmcoreinfo(&ms->struct_page_size, "SIZE(page)", NUM_DEC);
}
/*
@@ -1469,16 +1469,12 @@ arm64_calc_phys_offset(void)
physaddr_t paddr;
ulong vaddr;
struct syment *sp;
- char *string;
if ((machdep->flags & NEW_VMEMMAP) &&
ms->kimage_voffset && (sp = kernel_symbol_search("memstart_addr"))) {
if (pc->flags & PROC_KCORE) {
- if ((string = pc->read_vmcoreinfo("NUMBER(PHYS_OFFSET)"))) {
- ms->phys_offset = htol(string, QUIET, NULL);
- free(string);
+ if (arm64_get_vmcoreinfo(&ms->phys_offset, "NUMBER(PHYS_OFFSET)", NUM_HEX))
return;
- }
vaddr = symbol_value_from_proc_kallsyms("memstart_addr");
if (vaddr == BADVAL)
vaddr = sp->value;
@@ -1560,9 +1556,8 @@ arm64_get_section_size_bits(void)
} else
machdep->section_size_bits = _SECTION_SIZE_BITS;
- if ((string = pc->read_vmcoreinfo("NUMBER(SECTION_SIZE_BITS)"))) {
- machdep->section_size_bits = atol(string);
- free(string);
+ if (arm64_get_vmcoreinfo(&machdep->section_size_bits, "NUMBER(SECTION_SIZE_BITS)", NUM_DEC)) {
+ /* nothing */
} else if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
if ((ret = get_kernel_config("CONFIG_MEMORY_HOTPLUG", NULL)) == IKCONFIG_Y) {
if ((ret = get_kernel_config("CONFIG_HOTPLUG_SIZE_BITS", &string)) == IKCONFIG_STR)
@@ -1581,15 +1576,11 @@ arm64_get_section_size_bits(void)
static int
arm64_kdump_phys_base(ulong *phys_offset)
{
- char *string;
struct syment *sp;
physaddr_t paddr;
- if ((string = pc->read_vmcoreinfo("NUMBER(PHYS_OFFSET)"))) {
- *phys_offset = htol(string, QUIET, NULL);
- free(string);
+ if (arm64_get_vmcoreinfo(phys_offset, "NUMBER(PHYS_OFFSET)", NUM_HEX))
return TRUE;
- }
if ((machdep->flags & NEW_VMEMMAP) &&
machdep->machspec->kimage_voffset &&
@@ -4592,10 +4583,9 @@ static int
arm64_set_va_bits_by_tcr(void)
{
ulong value;
- char *string;
- if ((string = pc->read_vmcoreinfo("NUMBER(TCR_EL1_T1SZ)")) ||
- (string = pc->read_vmcoreinfo("NUMBER(tcr_el1_t1sz)"))) {
+ if (arm64_get_vmcoreinfo(&value, "NUMBER(TCR_EL1_T1SZ)", NUM_HEX) ||
+ arm64_get_vmcoreinfo(&value, "NUMBER(tcr_el1_t1sz)", NUM_HEX)) {
/* See ARMv8 ARM for the description of
* TCR_EL1.T1SZ and how it can be used
* to calculate the vabits_actual
@@ -4604,10 +4594,9 @@ arm64_set_va_bits_by_tcr(void)
* Basically:
* vabits_actual = 64 - T1SZ;
*/
- value = 64 - strtoll(string, NULL, 0);
+ value = 64 - value;
if (CRASHDEBUG(1))
fprintf(fp, "vmcoreinfo : vabits_actual: %ld\n", value);
- free(string);
machdep->machspec->VA_BITS_ACTUAL = value;
machdep->machspec->VA_BITS = value;
machdep->machspec->VA_START = _VA_START(machdep->machspec->VA_BITS_ACTUAL);
@@ -4623,13 +4612,8 @@ arm64_calc_VA_BITS(void)
int bitval;
struct syment *sp;
ulong vabits_actual, value;
- char *string;
- if ((string = pc->read_vmcoreinfo("NUMBER(VA_BITS)"))) {
- value = atol(string);
- free(string);
- machdep->machspec->CONFIG_ARM64_VA_BITS = value;
- }
+ arm64_get_vmcoreinfo(&machdep->machspec->CONFIG_ARM64_VA_BITS, "NUMBER(VA_BITS)", NUM_DEC);
if (kernel_symbol_exists("vabits_actual")) {
if (pc->flags & PROC_KCORE) {
@@ -4754,9 +4738,7 @@ arm64_calc_virtual_memory_ranges(void)
ulong PUD_SIZE = UNINITIALIZED;
if (!machdep->machspec->CONFIG_ARM64_VA_BITS) {
- if ((string = pc->read_vmcoreinfo("NUMBER(VA_BITS)"))) {
- value = atol(string);
- free(string);
+ if (arm64_get_vmcoreinfo(&value, "NUMBER(VA_BITS)", NUM_DEC)) {
machdep->machspec->CONFIG_ARM64_VA_BITS = value;
} else if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
if ((ret = get_kernel_config("CONFIG_ARM64_VA_BITS",
@@ -4852,11 +4834,8 @@ arm64_swp_offset(ulong pte)
static void arm64_calc_KERNELPACMASK(void)
{
ulong value;
- char *string;
- if ((string = pc->read_vmcoreinfo("NUMBER(KERNELPACMASK)"))) {
- value = htol(string, QUIET, NULL);
- free(string);
+ if (arm64_get_vmcoreinfo(&value, "NUMBER(KERNELPACMASK)", NUM_HEX)) {
machdep->machspec->CONFIG_ARM64_KERNELPACMASK = value;
if (CRASHDEBUG(1))
fprintf(fp, "CONFIG_ARM64_KERNELPACMASK: %lx\n", value);
--
2.41.0

View File

@ -1,110 +0,0 @@
From 040e8be3b13746d1f64a36040b4ca613cf18eff0 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 10 Jun 2022 11:49:47 +0900
Subject: [PATCH 12/89] sbitmapq: Fix for sbitmap_word without cleared member
The sbitmap_word.cleared member was added by kernel commit ea86ea2cdced
("sbitmap: ammortize cost of clearing bits") at Linux 5.0. Without the
patch, on earlier kernels the "sbitmapq" command fails with the
following error:
crash> sbitmapq ffff8f1a3611cf10
sbitmapq: invalid structure member offset: sbitmap_word_cleared
FILE: sbitmap.c LINE: 92 FUNCTION: __sbitmap_weight()
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index 152c28e6875f..c9f7209f9e3e 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -89,7 +89,6 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
const ulong w_word_off = OFFSET(sbitmap_word_word);
- const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
unsigned int weight = 0;
ulong addr = sc->map_addr;
@@ -111,7 +110,10 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
word = ULONG(sbitmap_word_buf + w_word_off);
weight += bitmap_weight(word, depth);
} else {
- cleared = ULONG(sbitmap_word_buf + w_cleared_off);
+ if (VALID_MEMBER(sbitmap_word_cleared))
+ cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+ else
+ cleared = 0;
weight += bitmap_weight(cleared, depth);
}
@@ -130,7 +132,10 @@ static unsigned int sbitmap_weight(const struct sbitmap_context *sc)
static unsigned int sbitmap_cleared(const struct sbitmap_context *sc)
{
- return __sbitmap_weight(sc, false);
+ if (VALID_MEMBER(sbitmap_word_cleared)) /* 5.0 and later */
+ return __sbitmap_weight(sc, false);
+
+ return 0;
}
static void sbitmap_emit_byte(unsigned int offset, uint8_t byte)
@@ -149,7 +154,6 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
const ulong w_word_off = OFFSET(sbitmap_word_word);
- const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
uint8_t byte = 0;
unsigned int byte_bits = 0;
@@ -169,7 +173,10 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
}
word = ULONG(sbitmap_word_buf + w_word_off);
- cleared = ULONG(sbitmap_word_buf + w_cleared_off);
+ if (VALID_MEMBER(sbitmap_word_cleared))
+ cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+ else
+ cleared = 0;
word_bits = __map_depth(sc, i);
word &= ~cleared;
@@ -219,7 +226,6 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
const ulong w_word_off = OFFSET(sbitmap_word_word);
- const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
unsigned int index;
unsigned int nr;
@@ -245,7 +251,10 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
}
w_word = ULONG(sbitmap_word_buf + w_word_off);
- w_cleared = ULONG(sbitmap_word_buf + w_cleared_off);
+ if (VALID_MEMBER(sbitmap_word_cleared))
+ w_cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+ else
+ w_cleared = 0;
depth = min(__map_depth(sc, index) - nr, sc->depth - scanned);
@@ -297,7 +306,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
fprintf(fp, "depth = %u\n", sc->depth);
fprintf(fp, "busy = %u\n", sbitmap_weight(sc) - sbitmap_cleared(sc));
- fprintf(fp, "cleared = %u\n", sbitmap_cleared(sc));
+ if (VALID_MEMBER(sbitmap_word_cleared)) /* 5.0 and later */
+ fprintf(fp, "cleared = %u\n", sbitmap_cleared(sc));
fprintf(fp, "bits_per_word = %u\n", 1U << sc->shift);
fprintf(fp, "map_nr = %u\n", sc->map_nr);
--
2.37.1

View File

@ -0,0 +1,36 @@
From 38435c3acec075b076353ca28f557a0dfe1341c3 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Fri, 15 Dec 2023 10:44:21 +0800
Subject: [PATCH 13/14] help.c: Remove "kmem -l" help messages
"kmem -l" option has existed when crash git project initialization, but
its help message was not accurate (extra arguments a|i|ic|id was missing).
In addition, those symbols required by the -l option were for very old
kernels, at least 2.6 kernels don't contain them. Also, this option has
not been fixed for a long time.
Instead of document this option, hide it from help messages.
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
help.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/help.c b/help.c
index cc7ab20e343e..d80e843703c1 100644
--- a/help.c
+++ b/help.c
@@ -6888,8 +6888,6 @@ char *help_kmem[] = {
" members of the associated page struct are displayed.",
" address when used with -c, the address must be a page pointer address;",
" the page_hash_table entry containing the page is displayed.",
-" address when used with -l, the address must be a page pointer address;",
-" the page address is displayed if it is contained with the list.",
" address when used with -v, the address can be a mapped kernel virtual",
" address or physical address; the mapped region containing the",
" address is displayed.\n",
--
2.41.0

View File

@ -1,49 +0,0 @@
From 8ea476439a57ee5552c752c582faf1d057ea4f6d Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 10 Jun 2022 11:49:47 +0900
Subject: [PATCH 13/89] sbitmapq: Fix for sbitmap_queue without
min_shallow_depth member
The sbitmap_queue.min_shallow_depth member was added by kernel commit
a327553965de ("sbitmap: fix missed wakeups caused by sbitmap_queue_get_shallow()")
at Linux 4.18. Without the patch, on earlier kernels the "sbitmapq"
command fails with the following error:
crash> sbitmapq ffff89bb7638ee50
sbitmapq: invalid structure member offset: sbitmap_queue_min_shallow_depth
FILE: sbitmap.c LINE: 398 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index c9f7209f9e3e..bb2f19e6207b 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -371,7 +371,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
else if (VALID_MEMBER(sbitmap_round_robin)) /* 5.13 and later */
fprintf(fp, "round_robin = %d\n", sc->round_robin);
- fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth);
+ if (VALID_MEMBER(sbitmap_queue_min_shallow_depth)) /* 4.18 and later */
+ fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth);
}
static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context *sqc)
@@ -395,7 +396,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context
sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active));
if (VALID_MEMBER(sbitmap_queue_round_robin))
sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin));
- sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth));
+ if (VALID_MEMBER(sbitmap_queue_min_shallow_depth))
+ sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth));
FREEBUF(sbitmap_queue_buf);
}
--
2.37.1

View File

@ -1,84 +0,0 @@
From 04b6edada1180b0391ddf980d09b4bac8a0c3aba Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 10 Jun 2022 15:21:53 +0900
Subject: [PATCH 14/89] Make "dev -d|-D" options parse sbitmap on Linux 4.18
and later
There have been a few reports that the "dev -d|-D" options displayed
incorrect I/O stats due to racy blk_mq_ctx.rq_* counters. To fix it,
make the options parse sbitmap to count I/O stats on Linux 4.18 and
later kernels, which include RHEL8 ones.
To do this, adjust to the blk_mq_tags structure of Linux 5.10 through
5.15 kernels, which contain kernel commit 222a5ae03cdd ("blk-mq: Use
pointers for blk_mq_tags bitmap tags") and do not contain ae0f1a732f4a
("blk-mq: Stop using pointers for blk_mq_tags bitmap tags").
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
dev.c | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/dev.c b/dev.c
index 0172c83ffaea..db97f8aebdc2 100644
--- a/dev.c
+++ b/dev.c
@@ -4339,6 +4339,10 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_r
static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
{
uint i;
+ int bitmap_tags_is_ptr = 0;
+
+ if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") == TYPE_CODE_PTR)
+ bitmap_tags_is_ptr = 1;
for (i = 0; i < cnt; i++) {
ulong addr = 0, tags = 0;
@@ -4357,9 +4361,17 @@ static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio
if (nr_reserved_tags) {
addr = tags + OFFSET(blk_mq_tags_breserved_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ break;
bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
}
addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
+ if (bitmap_tags_is_ptr &&
+ !readmem(addr, KVADDR, &addr, sizeof(ulong),
+ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
+ break;
bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
}
}
@@ -4423,14 +4435,23 @@ get_mq_diskio(unsigned long q, unsigned long *mq_count)
unsigned long mctx_addr;
struct diskio tmp = {0};
- if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
- INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
+ /*
+ * Currently this function does not support old blk-mq implementation
+ * before 12f5b9314545 ("blk-mq: Remove generation seqeunce"), so
+ * filter them out.
+ */
+ if (VALID_MEMBER(request_state)) {
+ if (CRASHDEBUG(1))
+ fprintf(fp, "mq: using sbitmap\n");
get_mq_diskio_from_hw_queues(q, &tmp);
mq_count[0] = tmp.read;
mq_count[1] = tmp.write;
return;
}
+ if (CRASHDEBUG(1))
+ fprintf(fp, "mq: using blk_mq_ctx.rq_{completed,dispatched} counters\n");
+
readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx,
sizeof(ulong), "request_queue.queue_ctx",
FAULT_ON_ERROR);
--
2.37.1

View File

@ -0,0 +1,65 @@
From 53d2577cef98b76b122aade94349637a11e06138 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 26 Dec 2023 09:19:28 +0800
Subject: [PATCH 14/14] x86_64: check bt->bptr before calculate framesize
Previously the value of bt->bptr is not checked, which may led to a
wrong prev_sp and framesize. As a result, bt->stackbuf[] will be
accessed out of range, and segfault.
Before:
crash> set debug 1
crash> bt
...snip...
--- <NMI exception stack> ---
#8 [ffffffff9a603e10] __switch_to_asm at ffffffff99800214
rsp: ffffffff9a603e10 textaddr: ffffffff99800214 -> spo: 0 bpo: 0 spr: 0 bpr: 0 type: 0 end: 0
#9 [ffffffff9a603e40] __schedule at ffffffff9960dfb1
rsp: ffffffff9a603e40 textaddr: ffffffff9960dfb1 -> spo: 16 bpo: -16 spr: 4 bpr: 1 type: 0 end: 0
rsp: ffffffff9a603e40 rbp: ffffb9ca076e7ca8 prev_sp: ffffb9ca076e7cb8 framesize: 1829650024
Segmentation fault (core dumped)
(gdb) p/x bt->stackbase
$1 = 0xffffffff9a600000
(gdb) p/x bt->stacktop
$2 = 0xffffffff9a604000
After:
crash> set debug 1
crash> bt
...snip...
--- <NMI exception stack> ---
#8 [ffffffff9a603e10] __switch_to_asm at ffffffff99800214
rsp: ffffffff9a603e10 textaddr: ffffffff99800214 -> spo: 0 bpo: 0 spr: 0 bpr: 0 type: 0 end: 0
#9 [ffffffff9a603e40] __schedule at ffffffff9960dfb1
rsp: ffffffff9a603e40 textaddr: ffffffff9960dfb1 -> spo: 16 bpo: -16 spr: 4 bpr: 1 type: 0 end: 0
#10 [ffffffff9a603e98] schedule_idle at ffffffff9960e87c
rsp: ffffffff9a603e98 textaddr: ffffffff9960e87c -> spo: 8 bpo: 0 spr: 5 bpr: 0 type: 0 end: 0
rsp: ffffffff9a603e98 prev_sp: ffffffff9a603ea8 framesize: 0
...snip...
Check bt->bptr value before calculate framesize. Only bt->bptr within
the range of bt->stackbase and bt->stacktop will be regarded as valid.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
x86_64.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/x86_64.c b/x86_64.c
index 42ade4817ad9..f59991f8c4c5 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -8649,7 +8649,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx prev_sp: %lx framesize: %d\n",
rsp, prev_sp, framesize);
- } else if ((korc->sp_reg == ORC_REG_BP) && bt->bptr) {
+ } else if ((korc->sp_reg == ORC_REG_BP) && bt->bptr && INSTACK(bt->bptr, bt)) {
prev_sp = bt->bptr + korc->sp_offset;
framesize = (prev_sp - (rsp + 8) - 8);
if (CRASHDEBUG(1))
--
2.41.0

View File

@ -1,44 +0,0 @@
From 60af53c7b69df11dec05d2a396e5272856e4243d Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 10 Jun 2022 15:52:34 +0900
Subject: [PATCH 15/89] sbitmapq: Fix for kernels without struct
wait_queue_head
The current struct wait_queue_head was renamed by kernel commit
9d9d676f595b ("sched/wait: Standardize internal naming of wait-queue heads")
at Linux 4.13. Without the patch, on earlier kernels the "sbitmapq"
command fails with the following error:
crash> sbitmapq ffff8801790b3b50
depth = 128
busy = 0
bits_per_word = 32
...
sbitmapq: invalid structure member offset: wait_queue_head_head
FILE: sbitmap.c LINE: 344 FUNCTION: sbitmap_queue_show()
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/sbitmap.c b/sbitmap.c
index bb2f19e6207b..be5d30a8ea88 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -341,7 +341,10 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
sbq_wait_state_size = SIZE(sbq_wait_state);
wait_cnt_off = OFFSET(sbq_wait_state_wait_cnt);
wait_off = OFFSET(sbq_wait_state_wait);
- list_head_off = OFFSET(wait_queue_head_head);
+ if (VALID_MEMBER(wait_queue_head_head)) /* 4.13 and later */
+ list_head_off = OFFSET(wait_queue_head_head);
+ else
+ list_head_off = OFFSET(__wait_queue_head_task_list);
sbq_wait_state_buf = GETBUF(sbq_wait_state_size);
--
2.37.1

View File

@ -1,43 +0,0 @@
From 46db4c6a66067d96248be664a09fdd93575cb0f2 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 10 Jun 2022 16:00:14 +0900
Subject: [PATCH 16/89] sbitmapq: Limit kernels without sbitmap again
commit 364b2e413c69 ("sbitmapq: remove struct and member validation
in sbitmapq_init()") allowed the use of the "sbitmapq" command
unconditionally. Without the patch, the command fails with the
following error on kernels without sbitmap:
crash> sbitmapq ffff88015796e550
sbitmapq: invalid structure member offset: sbitmap_queue_sb
FILE: sbitmap.c LINE: 385 FUNCTION: sbitmap_queue_context_load()
Now the command supports Linux 4.9 and later kernels since it was
abstracted out, so it can be limited by the non-existence of the
sbitmap structure.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/sbitmap.c b/sbitmap.c
index be5d30a8ea88..12d6512a1e4d 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -540,6 +540,10 @@ void sbitmapq_init(void)
STRUCT_SIZE_INIT(sbitmap_queue, "sbitmap_queue");
STRUCT_SIZE_INIT(sbq_wait_state, "sbq_wait_state");
+ /* sbitmap was abstracted out by commit 88459642cba4 on Linux 4.9. */
+ if (INVALID_SIZE(sbitmap))
+ command_not_supported();
+
MEMBER_OFFSET_INIT(sbitmap_word_depth, "sbitmap_word", "depth");
MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word");
MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared");
--
2.37.1

View File

@ -1,146 +0,0 @@
From b6b1f17766583873cde5335e6e631fe3887cb564 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 15 Jun 2022 10:50:13 +0900
Subject: [PATCH 17/89] Fix for "dev" command on Linux 5.11 and later
The following kernel commits eventually removed the bdev_map array in
Linux v5.11 kernel:
e418de3abcda ("block: switch gendisk lookup to a simple xarray")
22ae8ce8b892 ("block: simplify bdev/disk lookup in blkdev_get")
Without the patch, the "dev" command fails to dump block device data
with the following error:
crash> dev
...
dev: blkdevs or all_bdevs: symbols do not exist
To get block device's gendisk, search blockdev_superblock.s_inodes
instead of bdev_map.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
dev.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 72 insertions(+), 5 deletions(-)
diff --git a/dev.c b/dev.c
index db97f8aebdc2..75d30bd022a1 100644
--- a/dev.c
+++ b/dev.c
@@ -24,6 +24,7 @@ static void dump_blkdevs_v2(ulong);
static void dump_blkdevs_v3(ulong);
static ulong search_cdev_map_probes(char *, int, int, ulong *);
static ulong search_bdev_map_probes(char *, int, int, ulong *);
+static ulong search_blockdev_inodes(int, ulong *);
static void do_pci(void);
static void do_pci2(void);
static void do_io(void);
@@ -493,9 +494,10 @@ dump_blkdevs(ulong flags)
ulong ops;
} blkdevs[MAX_DEV], *bp;
- if (kernel_symbol_exists("major_names") &&
- kernel_symbol_exists("bdev_map")) {
- dump_blkdevs_v3(flags);
+ if (kernel_symbol_exists("major_names") &&
+ (kernel_symbol_exists("bdev_map") ||
+ kernel_symbol_exists("blockdev_superblock"))) {
+ dump_blkdevs_v3(flags);
return;
}
@@ -717,6 +719,7 @@ dump_blkdevs_v3(ulong flags)
char buf[BUFSIZE];
uint major;
ulong gendisk, addr, fops;
+ int use_bdev_map = kernel_symbol_exists("bdev_map");
if (!(len = get_array_length("major_names", NULL, 0)))
len = MAX_DEV;
@@ -745,8 +748,11 @@ dump_blkdevs_v3(ulong flags)
strncpy(buf, blk_major_name_buf +
OFFSET(blk_major_name_name), 16);
- fops = search_bdev_map_probes(buf, major == i ? major : i,
- UNUSED, &gendisk);
+ if (use_bdev_map)
+ fops = search_bdev_map_probes(buf, major == i ? major : i,
+ UNUSED, &gendisk);
+ else /* v5.11 and later */
+ fops = search_blockdev_inodes(major, &gendisk);
if (CRASHDEBUG(1))
fprintf(fp, "blk_major_name: %lx block major: %d name: %s gendisk: %lx fops: %lx\n",
@@ -829,6 +835,67 @@ search_bdev_map_probes(char *name, int major, int minor, ulong *gendisk)
return fops;
}
+/* For bdev_inode. See block/bdev.c */
+#define I_BDEV(inode) (inode - SIZE(block_device))
+
+static ulong
+search_blockdev_inodes(int major, ulong *gendisk)
+{
+ struct list_data list_data, *ld;
+ ulong addr, bd_sb, disk, fops = 0;
+ int i, inode_count, gendisk_major;
+ char *gendisk_buf;
+
+ ld = &list_data;
+ BZERO(ld, sizeof(struct list_data));
+
+ get_symbol_data("blockdev_superblock", sizeof(void *), &bd_sb);
+
+ addr = bd_sb + OFFSET(super_block_s_inodes);
+ if (!readmem(addr, KVADDR, &ld->start, sizeof(ulong),
+ "blockdev_superblock.s_inodes", QUIET|RETURN_ON_ERROR))
+ return 0;
+
+ if (empty_list(ld->start))
+ return 0;
+
+ ld->flags |= LIST_ALLOCATE;
+ ld->end = bd_sb + OFFSET(super_block_s_inodes);
+ ld->list_head_offset = OFFSET(inode_i_sb_list);
+
+ inode_count = do_list(ld);
+
+ gendisk_buf = GETBUF(SIZE(gendisk));
+
+ for (i = 0; i < inode_count; i++) {
+ addr = I_BDEV(ld->list_ptr[i]) + OFFSET(block_device_bd_disk);
+ if (!readmem(addr, KVADDR, &disk, sizeof(ulong),
+ "block_device.bd_disk", QUIET|RETURN_ON_ERROR))
+ continue;
+
+ if (!disk)
+ continue;
+
+ if (!readmem(disk, KVADDR, gendisk_buf, SIZE(gendisk),
+ "gendisk buffer", QUIET|RETURN_ON_ERROR))
+ continue;
+
+ gendisk_major = INT(gendisk_buf + OFFSET(gendisk_major));
+ if (gendisk_major != major)
+ continue;
+
+ fops = ULONG(gendisk_buf + OFFSET(gendisk_fops));
+ if (fops) {
+ *gendisk = disk;
+ break;
+ }
+ }
+
+ FREEBUF(ld->list_ptr);
+ FREEBUF(gendisk_buf);
+ return fops;
+}
+
void
dump_dev_table(void)
{
--
2.37.1

View File

@ -1,84 +0,0 @@
From 21c2bf4844893fe9499fda156d043b0ff6c5f3af Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 22 Jun 2022 08:32:59 +0900
Subject: [PATCH 18/89] Extend field length of task attributes
Nowadays, some machines have many CPU cores and memory, and some
distributions have a larger kernel.pid_max parameter, e.g. 7 digits.
This impairs the readability of a few commands, especially "ps" and
"ps -l|-m" options.
Let's extend the field length of the task attributes, PID, CPU, VSZ,
and RSS to improve the readability.
Without the patch:
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
...
2802197 2699997 2 ffff916f63c40000 IN 0.0 307212 10688 timer
2802277 1 0 ffff9161a25bb080 IN 0.0 169040 2744 gpg-agent
2806711 3167854 10 ffff9167fc498000 IN 0.0 127208 6508 su
2806719 2806711 1 ffff91633c3a48c0 IN 0.0 29452 6416 bash
2988346 1 5 ffff916f7c629840 IN 2.8 9342476 1917384 qemu-kvm
With the patch:
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
...
2802197 2699997 2 ffff916f63c40000 IN 0.0 307212 10688 timer
2802277 1 0 ffff9161a25bb080 IN 0.0 169040 2744 gpg-agent
2806711 3167854 10 ffff9167fc498000 IN 0.0 127208 6508 su
2806719 2806711 1 ffff91633c3a48c0 IN 0.0 29452 6416 bash
2988346 1 5 ffff916f7c629840 IN 2.8 9342476 1917384 qemu-kvm
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
task.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/task.c b/task.c
index 864c838637ee..071c787fbfa5 100644
--- a/task.c
+++ b/task.c
@@ -3828,7 +3828,7 @@ show_ps_data(ulong flag, struct task_context *tc, struct psinfo *psi)
} else
fprintf(fp, " ");
- fprintf(fp, "%5ld %5ld %2s %s %3s",
+ fprintf(fp, "%7ld %7ld %3s %s %3s",
tc->pid, task_to_pid(tc->ptask),
task_cpu(tc->processor, buf2, !VERBOSE),
task_pointer_string(tc, flag & PS_KSTACKP, buf3),
@@ -3838,8 +3838,8 @@ show_ps_data(ulong flag, struct task_context *tc, struct psinfo *psi)
if (strlen(buf1) == 3)
mkstring(buf1, 4, CENTER|RJUST, NULL);
fprintf(fp, "%s ", buf1);
- fprintf(fp, "%7ld ", (tm->total_vm * PAGESIZE())/1024);
- fprintf(fp, "%6ld ", (tm->rss * PAGESIZE())/1024);
+ fprintf(fp, "%8ld ", (tm->total_vm * PAGESIZE())/1024);
+ fprintf(fp, "%8ld ", (tm->rss * PAGESIZE())/1024);
if (is_kernel_thread(tc->task))
fprintf(fp, "[%s]\n", tc->comm);
else
@@ -3856,7 +3856,7 @@ show_ps(ulong flag, struct psinfo *psi)
if (!(flag & ((PS_EXCLUSIVE & ~PS_ACTIVE)|PS_NO_HEADER)))
fprintf(fp,
- " PID PPID CPU %s ST %%MEM VSZ RSS COMM\n",
+ " PID PPID CPU %s ST %%MEM VSZ RSS COMM\n",
flag & PS_KSTACKP ?
mkstring(buf, VADDR_PRLEN, CENTER|RJUST, "KSTACKP") :
mkstring(buf, VADDR_PRLEN, CENTER, "TASK"));
@@ -7713,7 +7713,7 @@ print_task_header(FILE *out, struct task_context *tc, int newline)
char buf[BUFSIZE];
char buf1[BUFSIZE];
- fprintf(out, "%sPID: %-5ld TASK: %s CPU: %-2s COMMAND: \"%s\"\n",
+ fprintf(out, "%sPID: %-7ld TASK: %s CPU: %-3s COMMAND: \"%s\"\n",
newline ? "\n" : "", tc->pid,
mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, MKSTR(tc->task)),
task_cpu(tc->processor, buf, !VERBOSE), tc->comm);
--
2.37.1

View File

@ -1,45 +0,0 @@
From 00f8699daacba46fd706758024c375825264db32 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:41 +0530
Subject: [PATCH 19/89] ppc64: fix bt for '-S' case
Passing '-S' option to 'bt' command was intended to specify the stack
pointer manually. But get_stack_frame() handling on ppc64 is ignoring
this option altogether. Fix it.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/ppc64.c b/ppc64.c
index 975caa53b812..0e1d8678eef5 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -2330,6 +2330,22 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
pt_regs = (struct ppc64_pt_regs *)bt_in->machdep;
if (!pt_regs || !pt_regs->gpr[1]) {
+ if (bt_in->hp) {
+ if (bt_in->hp->esp) {
+ *ksp = bt_in->hp->esp;
+ if (!bt_in->hp->eip) {
+ if (IS_KVADDR(*ksp)) {
+ readmem(*ksp+16, KVADDR, &unip, sizeof(ulong),
+ "Regs NIP value", FAULT_ON_ERROR);
+ *nip = unip;
+ }
+ } else
+ *nip = bt_in->hp->eip;
+
+ }
+ return TRUE;
+ }
+
/*
* Not collected regs. May be the corresponding CPU not
* responded to an IPI in case of KDump OR f/w has not
--
2.37.1

View File

@ -1,147 +0,0 @@
From 32282bab0edf1aebd96bb8fe5ee66b3855474831 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:42 +0530
Subject: [PATCH 20/89] ppc64: dynamically allocate h/w interrupt stack
Only older kernel (v2.4) used h/w interrupt stack to store frames when
CPU received IPI. Memory used for this in 'struct machine_specific' is
useless for later kernels. For the sake of backward compatibility keep
h/w interrupt stack but dynamically allocate memory for it and save
some bytes from being wasted.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
ppc64.c | 51 +++++++++++++++++++++------------------------------
2 files changed, 22 insertions(+), 31 deletions(-)
diff --git a/defs.h b/defs.h
index c524a05d8105..d8fbeb89e335 100644
--- a/defs.h
+++ b/defs.h
@@ -6311,7 +6311,7 @@ struct ppc64_vmemmap {
* Used to store the HW interrupt stack. It is only for 2.4.
*/
struct machine_specific {
- ulong hwintrstack[NR_CPUS];
+ ulong *hwintrstack;
char *hwstackbuf;
uint hwstacksize;
diff --git a/ppc64.c b/ppc64.c
index 0e1d8678eef5..272eb207074a 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -256,7 +256,7 @@ static int set_ppc64_max_physmem_bits(void)
}
struct machine_specific ppc64_machine_specific = {
- .hwintrstack = { 0 },
+ .hwintrstack = NULL,
.hwstackbuf = 0,
.hwstacksize = 0,
.pte_rpn_shift = PTE_RPN_SHIFT_DEFAULT,
@@ -275,7 +275,7 @@ struct machine_specific ppc64_machine_specific = {
};
struct machine_specific book3e_machine_specific = {
- .hwintrstack = { 0 },
+ .hwintrstack = NULL,
.hwstackbuf = 0,
.hwstacksize = 0,
.pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_64K,
@@ -676,6 +676,9 @@ ppc64_init(int when)
*/
offset = MEMBER_OFFSET("paca_struct", "xHrdIntStack");
paca_sym = symbol_value("paca");
+ if (!(machdep->machspec->hwintrstack =
+ (ulong *)calloc(NR_CPUS, sizeof(ulong))))
+ error(FATAL, "cannot malloc hwintrstack space.");
for (cpu = 0; cpu < kt->cpus; cpu++) {
readmem(paca_sym + (paca_size * cpu) + offset,
KVADDR,
@@ -686,14 +689,9 @@ ppc64_init(int when)
machdep->machspec->hwstacksize = 8 * machdep->pagesize;
if ((machdep->machspec->hwstackbuf = (char *)
malloc(machdep->machspec->hwstacksize)) == NULL)
- error(FATAL, "cannot malloc hwirqstack space.");
- } else
- /*
- * 'xHrdIntStack' member in "paca_struct" is not
- * available for 2.6 kernel.
- */
- BZERO(&machdep->machspec->hwintrstack,
- NR_CPUS*sizeof(ulong));
+ error(FATAL, "cannot malloc hwirqstack buffer space.");
+ }
+
if (!machdep->hz) {
machdep->hz = HZ;
if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
@@ -846,23 +844,15 @@ ppc64_dump_machdep_table(ulong arg)
fprintf(fp, " is_vmaddr: %s\n",
machdep->machspec->is_vmaddr == book3e_is_vmaddr ?
"book3e_is_vmaddr()" : "ppc64_is_vmaddr()");
- fprintf(fp, " hwintrstack[%d]: ", NR_CPUS);
- for (c = 0; c < NR_CPUS; c++) {
- for (others = 0, i = c; i < NR_CPUS; i++) {
- if (machdep->machspec->hwintrstack[i])
- others++;
+ if (machdep->machspec->hwintrstack) {
+ fprintf(fp, " hwintrstack[%d]: ", NR_CPUS);
+ for (c = 0; c < NR_CPUS; c++) {
+ fprintf(fp, "%s%016lx ",
+ ((c % 4) == 0) ? "\n " : "",
+ machdep->machspec->hwintrstack[c]);
}
- if (!others) {
- fprintf(fp, "%s%s",
- c && ((c % 4) == 0) ? "\n " : "",
- c ? "(remainder unused)" : "(unused)");
- break;
- }
-
- fprintf(fp, "%s%016lx ",
- ((c % 4) == 0) ? "\n " : "",
- machdep->machspec->hwintrstack[c]);
- }
+ } else
+ fprintf(fp, " hwintrstack: (unused)");
fprintf(fp, "\n");
fprintf(fp, " hwstackbuf: %lx\n", (ulong)machdep->machspec->hwstackbuf);
fprintf(fp, " hwstacksize: %d\n", machdep->machspec->hwstacksize);
@@ -1683,9 +1673,10 @@ ppc64_check_sp_in_HWintrstack(ulong sp, struct bt_info *bt)
*
* Note: HW Interrupt stack is used only in 2.4 kernel.
*/
- if (is_task_active(bt->task) && (tt->panic_task != bt->task) &&
- machdep->machspec->hwintrstack[bt->tc->processor]) {
+ if (machdep->machspec->hwintrstack && is_task_active(bt->task) &&
+ (bt->task != tt->panic_task)) {
ulong newsp;
+
readmem(machdep->machspec->hwintrstack[bt->tc->processor],
KVADDR, &newsp, sizeof(ulong),
"stack pointer", FAULT_ON_ERROR);
@@ -1958,7 +1949,7 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
bt->stackbase = irqstack;
bt->stacktop = bt->stackbase + STACKSIZE();
alter_stackbuf(bt);
- } else if (ms->hwintrstack[bt->tc->processor]) {
+ } else if (ms->hwintrstack) {
bt->stacktop = ms->hwintrstack[bt->tc->processor] +
sizeof(ulong);
bt->stackbase = ms->hwintrstack[bt->tc->processor] -
@@ -2555,7 +2546,7 @@ retry:
goto retry;
}
- if (check_intrstack && ms->hwintrstack[bt->tc->processor]) {
+ if (check_intrstack && ms->hwintrstack) {
bt->stacktop = ms->hwintrstack[bt->tc->processor] +
sizeof(ulong);
bt->stackbase = ms->hwintrstack[bt->tc->processor] -
--
2.37.1

View File

@ -1,56 +0,0 @@
From 275b62afe8fd7446fc31025998b6d0fd47ec8be0 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:43 +0530
Subject: [PATCH 21/89] ppc64: rename ppc64_paca_init to
ppc64_paca_percpu_offset_init
ppc64_paca_init() function is specifically used to initialize percpu
data_offset for kernels older than v2.6.36. So, the name is slightly
misleading. Rename it to ppc64_paca_percpu_offset_init to reflect its
purpose.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/ppc64.c b/ppc64.c
index 272eb207074a..0a3aa5f7af91 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -52,7 +52,7 @@ static char * ppc64_check_eframe(struct ppc64_pt_regs *);
static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
struct bt_info *);
static void parse_cmdline_args(void);
-static int ppc64_paca_init(int);
+static int ppc64_paca_percpu_offset_init(int);
static void ppc64_init_cpu_info(void);
static int ppc64_get_cpu_map(void);
static void ppc64_clear_machdep_cache(void);
@@ -3285,7 +3285,7 @@ parse_cmdline_args(void)
* Initialize the per cpu data_offset values from paca structure.
*/
static int
-ppc64_paca_init(int map)
+ppc64_paca_percpu_offset_init(int map)
{
int i, cpus, nr_paca;
char *cpu_paca_buf;
@@ -3387,10 +3387,11 @@ ppc64_init_cpu_info(void)
* which was removed post v2.6.15 ppc64 and now we get the per cpu
* data_offset from __per_cpu_offset symbol during kernel_init()
* call. Hence for backward (pre-2.6.36) compatibility, call
- * ppc64_paca_init() only if symbol __per_cpu_offset does not exist.
+ * ppc64_paca_percpu_offset_init() only if symbol __per_cpu_offset
+ * does not exist.
*/
if (!symbol_exists("__per_cpu_offset"))
- cpus = ppc64_paca_init(map);
+ cpus = ppc64_paca_percpu_offset_init(map);
else {
if (!(nr_cpus = get_array_length("__per_cpu_offset", NULL, 0)))
nr_cpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS :
--
2.37.1

View File

@ -1,352 +0,0 @@
From 26b0949228786562fdf73fef145a829c0adb9c70 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:44 +0530
Subject: [PATCH 22/89] ppc64: handle backtrace when CPU is in an emergency
stack
A CPU could be in an emergency stack when it is running in real mode
or any special scenario like TM bad thing. Also, there are dedicated
emergency stacks for machine check and system reset interrupt. Right
now, no backtrace is provided if a CPU is in any of these stacks.
This change ensures backtrace is processed appropriately even when
a CPU is in any one of these emergency stacks. Also, if stack info
cannot be found, print that message always instead of only when
verbose logs are enabled.
Related kernel commits:
729b0f715371 ("powerpc/book3s: Introduce exclusive emergency stack for machine check exception.")
b1ee8a3de579 ("powerpc/64s: Dedicated system reset interrupt stack")
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 12 ++++
ppc64.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 203 insertions(+), 12 deletions(-)
diff --git a/defs.h b/defs.h
index d8fbeb89e335..6a1b6f8a16a8 100644
--- a/defs.h
+++ b/defs.h
@@ -6296,6 +6296,13 @@ struct ppc64_elf_prstatus {
#ifdef PPC64
+enum emergency_stack_type {
+ NONE_STACK = 0,
+ EMERGENCY_STACK,
+ NMI_EMERGENCY_STACK,
+ MC_EMERGENCY_STACK
+};
+
struct ppc64_opal {
uint64_t base;
uint64_t entry;
@@ -6315,6 +6322,11 @@ struct machine_specific {
char *hwstackbuf;
uint hwstacksize;
+ /* Emergency stacks */
+ ulong *emergency_sp;
+ ulong *nmi_emergency_sp;
+ ulong *mc_emergency_sp;
+
uint l4_index_size;
uint l3_index_size;
uint l2_index_size;
diff --git a/ppc64.c b/ppc64.c
index 0a3aa5f7af91..03047a85955d 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -48,6 +48,10 @@ static ulong ppc64_get_stackbase(ulong);
static ulong ppc64_get_stacktop(ulong);
void ppc64_compiler_warning_stub(void);
static ulong ppc64_in_irqstack(ulong);
+static enum emergency_stack_type ppc64_in_emergency_stack(int cpu, ulong addr,
+ bool verbose);
+static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
+ struct bt_info *bt);
static char * ppc64_check_eframe(struct ppc64_pt_regs *);
static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
struct bt_info *);
@@ -56,6 +60,7 @@ static int ppc64_paca_percpu_offset_init(int);
static void ppc64_init_cpu_info(void);
static int ppc64_get_cpu_map(void);
static void ppc64_clear_machdep_cache(void);
+static void ppc64_init_paca_info(void);
static void ppc64_vmemmap_init(void);
static int ppc64_get_kvaddr_ranges(struct vaddr_range *);
static uint get_ptetype(ulong pte);
@@ -692,6 +697,8 @@ ppc64_init(int when)
error(FATAL, "cannot malloc hwirqstack buffer space.");
}
+ ppc64_init_paca_info();
+
if (!machdep->hz) {
machdep->hz = HZ;
if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
@@ -1204,6 +1211,70 @@ ppc64_kvtop(struct task_context *tc, ulong kvaddr,
return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose);
}
+static void
+ppc64_init_paca_info(void)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong *paca_ptr;
+ int i;
+
+ if (!(paca_ptr = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc paca pointers space.\n");
+
+ /* Get paca pointers for all CPUs. */
+ if (symbol_exists("paca_ptrs")) {
+ ulong paca_loc;
+
+ readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc, sizeof(void *),
+ "paca double pointer", FAULT_ON_ERROR);
+ readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
+ "paca pointers", FAULT_ON_ERROR);
+ } else if (symbol_exists("paca") &&
+ (get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR)) {
+ readmem(symbol_value("paca"), KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
+ "paca pointers", FAULT_ON_ERROR);
+ } else {
+ free(paca_ptr);
+ return;
+ }
+
+ /* Initialize emergency stacks info. */
+ if (MEMBER_EXISTS("paca_struct", "emergency_sp")) {
+ ulong offset = MEMBER_OFFSET("paca_struct", "emergency_sp");
+
+ if (!(ms->emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc emergency stack space.\n");
+ for (i = 0; i < kt->cpus; i++)
+ readmem(paca_ptr[i] + offset, KVADDR, &ms->emergency_sp[i],
+ sizeof(void *), "paca->emergency_sp",
+ FAULT_ON_ERROR);
+ }
+
+ if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) {
+ ulong offset = MEMBER_OFFSET("paca_struct", "nmi_emergency_sp");
+
+ if (!(ms->nmi_emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc NMI emergency stack space.\n");
+ for (i = 0; i < kt->cpus; i++)
+ readmem(paca_ptr[i] + offset, KVADDR, &ms->nmi_emergency_sp[i],
+ sizeof(void *), "paca->nmi_emergency_sp",
+ FAULT_ON_ERROR);
+ }
+
+ if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) {
+ ulong offset = MEMBER_OFFSET("paca_struct", "mc_emergency_sp");
+
+ if (!(ms->mc_emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc machine check emergency stack space.\n");
+ for (i = 0; i < kt->cpus; i++)
+ readmem(paca_ptr[i] + offset, KVADDR, &ms->mc_emergency_sp[i],
+ sizeof(void *), "paca->mc_emergency_sp",
+ FAULT_ON_ERROR);
+ }
+
+ free(paca_ptr);
+}
+
/*
* Verify that the kernel has made the vmemmap list available,
* and if so, stash the relevant data required to make vtop
@@ -1755,6 +1826,11 @@ ppc64_eframe_search(struct bt_info *bt_in)
addr = bt->stackbase +
roundup(SIZE(thread_info), sizeof(ulong));
} else if (!INSTACK(addr, bt)) {
+ enum emergency_stack_type estype;
+
+ if ((estype = ppc64_in_emergency_stack(bt->tc->processor, addr, false)))
+ ppc64_set_bt_emergency_stack(estype, bt);
+
/*
* If the user specified SP is in HW interrupt stack
* (only for tasks running on other CPUs and in 2.4
@@ -1856,6 +1932,84 @@ ppc64_in_irqstack(ulong addr)
return 0;
}
+/*
+ * Check if the CPU is running in any of its emergency stacks.
+ * Returns
+ * NONE_STACK : if input is invalid or addr is not within any emergency stack.
+ * EMERGENCY_STACK : if the addr is within emergency stack.
+ * NMI_EMERGENCY_STACK : if the addr is within NMI emergency stack.
+ * MC_EMERGENCY_STACK : if the addr is within machine check emergency stack.
+ */
+static enum emergency_stack_type
+ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong base, top;
+
+ if (cpu < 0 || cpu >= kt->cpus)
+ return NONE_STACK;
+
+ if (ms->emergency_sp) {
+ top = ms->emergency_sp[cpu];
+ base = top - STACKSIZE();
+ if (addr >= base && addr < top) {
+ if (verbose)
+ fprintf(fp, "---<Emergency Stack>---\n");
+ return EMERGENCY_STACK;
+ }
+ }
+
+ if (ms->nmi_emergency_sp) {
+ top = ms->nmi_emergency_sp[cpu];
+ base = top - STACKSIZE();
+ if (addr >= base && addr < top) {
+ if (verbose)
+ fprintf(fp, "---<NMI Emergency Stack>---\n");
+ return NMI_EMERGENCY_STACK;
+ }
+ }
+
+ if (ms->mc_emergency_sp) {
+ top = ms->mc_emergency_sp[cpu];
+ base = top - STACKSIZE();
+ if (addr >= base && addr < top) {
+ if (verbose)
+ fprintf(fp, "---<Machine Check Emergency Stack>---\n");
+ return MC_EMERGENCY_STACK;
+ }
+ }
+
+ return NONE_STACK;
+}
+
+static void
+ppc64_set_bt_emergency_stack(enum emergency_stack_type type, struct bt_info *bt)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong top;
+
+ switch (type) {
+ case EMERGENCY_STACK:
+ top = ms->emergency_sp[bt->tc->processor];
+ break;
+ case NMI_EMERGENCY_STACK:
+ top = ms->nmi_emergency_sp[bt->tc->processor];
+ break;
+ case MC_EMERGENCY_STACK:
+ top = ms->mc_emergency_sp[bt->tc->processor];
+ break;
+ default:
+ top = 0;
+ break;
+ }
+
+ if (top) {
+ bt->stackbase = top - STACKSIZE();
+ bt->stacktop = top;
+ alter_stackbuf(bt);
+ }
+}
+
/*
* Unroll a kernel stack.
*/
@@ -1936,10 +2090,13 @@ ppc64_back_trace_cmd(struct bt_info *bt)
static void
ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
{
- int frame = 0;
- ulong lr = 0; /* hack...need to pass in initial lr reg */
+ enum emergency_stack_type estype;
ulong newpc = 0, newsp, marker;
+ int c = bt->tc->processor;
+ ulong nmi_sp = 0;
int eframe_found;
+ int frame = 0;
+ ulong lr = 0; /* hack...need to pass in initial lr reg */
if (!INSTACK(req->sp, bt)) {
ulong irqstack;
@@ -1949,6 +2106,10 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
bt->stackbase = irqstack;
bt->stacktop = bt->stackbase + STACKSIZE();
alter_stackbuf(bt);
+ } else if ((estype = ppc64_in_emergency_stack(c, req->sp, true))) {
+ if (estype == NMI_EMERGENCY_STACK)
+ nmi_sp = req->sp;
+ ppc64_set_bt_emergency_stack(estype, bt);
} else if (ms->hwintrstack) {
bt->stacktop = ms->hwintrstack[bt->tc->processor] +
sizeof(ulong);
@@ -1957,9 +2118,7 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
bt->stackbuf = ms->hwstackbuf;
alter_stackbuf(bt);
} else {
- if (CRASHDEBUG(1)) {
- fprintf(fp, "cannot find the stack info.\n");
- }
+ fprintf(fp, "cannot find the stack info.\n");
return;
}
}
@@ -1989,13 +2148,20 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
newsp =
*(ulong *)&bt->stackbuf[newsp - bt->stackbase];
if (!INSTACK(newsp, bt)) {
- /*
- * Switch HW interrupt stack to process's stack.
- */
- bt->stackbase = GET_STACKBASE(bt->task);
- bt->stacktop = GET_STACKTOP(bt->task);
- alter_stackbuf(bt);
- }
+ if ((estype = ppc64_in_emergency_stack(c, newsp, true))) {
+ if (!nmi_sp && estype == NMI_EMERGENCY_STACK)
+ nmi_sp = newsp;
+ ppc64_set_bt_emergency_stack(estype, bt);
+ } else {
+ /*
+ * Switch HW interrupt stack or emergency stack
+ * to process's stack.
+ */
+ bt->stackbase = GET_STACKBASE(bt->task);
+ bt->stacktop = GET_STACKTOP(bt->task);
+ alter_stackbuf(bt);
+ }
+ }
if (IS_KVADDR(newsp) && INSTACK(newsp, bt))
newpc = *(ulong *)&bt->stackbuf[newsp + 16 -
bt->stackbase];
@@ -2039,6 +2205,16 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
}
}
+ /*
+ * NMI stack may not be re-entrant. In so, an SP in the NMI stack
+ * is likely to point back to an SP within the NMI stack, in case
+ * of a nested NMI.
+ */
+ if (nmi_sp && nmi_sp == newsp) {
+ fprintf(fp, "---<Nested NMI>---\n");
+ break;
+ }
+
/*
* Some Linux 3.7 kernel threads have been seen to have
* their end-of-trace stack linkage pointer pointing
@@ -2416,6 +2592,9 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
pt_regs = (struct ppc64_pt_regs *)bt->machdep;
ur_nip = pt_regs->nip;
ur_ksp = pt_regs->gpr[1];
+ /* Print the collected regs for panic task. */
+ ppc64_print_regs(pt_regs);
+ ppc64_print_nip_lr(pt_regs, 1);
} else if ((pc->flags & KDUMP) ||
((pc->flags & DISKDUMP) &&
(*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
--
2.37.1

View File

@ -1,74 +0,0 @@
From 5ef39b9dda5ab29ddbbc72d54829ddbc66238518 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:45 +0530
Subject: [PATCH 23/89] ppc64: print emergency stacks info with 'mach' command
Print top address of emergency stacks with 'mach' command.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/ppc64.c b/ppc64.c
index 03047a85955d..ad1d6e121e81 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -3161,6 +3161,44 @@ opalmsg(void)
fprintf(fp, "\n");
}
+static void ppc64_print_emergency_stack_info(void)
+{
+ struct machine_specific *ms = machdep->machspec;
+ char buf[32];
+ int i;
+
+ fprintf(fp, " EMERGENCY STACK: ");
+ if (ms->emergency_sp) {
+ fprintf(fp, "\n");
+ for (i = 0; i < kt->cpus; i++) {
+ sprintf(buf, "CPU %d", i);
+ fprintf(fp, "%19s: %lx\n", buf, ms->emergency_sp[i]);
+ }
+ } else
+ fprintf(fp, "(unused)\n");
+
+ fprintf(fp, "NMI EMERGENCY STACK: ");
+ if (ms->nmi_emergency_sp) {
+ fprintf(fp, "\n");
+ for (i = 0; i < kt->cpus; i++) {
+ sprintf(buf, "CPU %d", i);
+ fprintf(fp, "%19s: %lx\n", buf, ms->nmi_emergency_sp[i]);
+ }
+ } else
+ fprintf(fp, "(unused)\n");
+
+ fprintf(fp, " MC EMERGENCY STACK: ");
+ if (ms->mc_emergency_sp) {
+ fprintf(fp, "\n");
+ for (i = 0; i < kt->cpus; i++) {
+ sprintf(buf, "CPU %d", i);
+ fprintf(fp, "%19s: %lx\n", buf, ms->mc_emergency_sp[i]);
+ }
+ } else
+ fprintf(fp, "(unused)\n");
+ fprintf(fp, "\n");
+}
+
/*
* Machine dependent command.
*/
@@ -3241,6 +3279,8 @@ ppc64_display_machine_stats(void)
fprintf(fp, "%19s: %lx\n", buf, tt->softirq_ctx[c]);
}
}
+
+ ppc64_print_emergency_stack_info();
}
static const char *hook_files[] = {
--
2.37.1

View File

@ -1,389 +0,0 @@
From 1365bde4820805457a35743be2dbb1035f4eeede Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:46 +0530
Subject: [PATCH 24/89] ppc64: use a variable for machdep->machspec
machdpep->machspec is referred to multiple times. The compiler would
likely optimize this but nonetheless, use a variable to optimize in
coding and also improve readability. No functional change.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 224 ++++++++++++++++++++++++++++----------------------------
1 file changed, 111 insertions(+), 113 deletions(-)
diff --git a/ppc64.c b/ppc64.c
index ad1d6e121e81..4ea1f7c0c6f8 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -307,6 +307,8 @@ struct machine_specific book3e_machine_specific = {
void
ppc64_init(int when)
{
+ struct machine_specific *ms;
+
#if defined(__x86_64__)
if (ACTIVE())
error(FATAL, "compiled for the PPC64 architecture\n");
@@ -416,16 +418,16 @@ ppc64_init(int when)
break;
case POST_GDB:
- if (!(machdep->flags & BOOK3E)) {
- struct machine_specific *m = machdep->machspec;
+ ms = machdep->machspec;
+ if (!(machdep->flags & BOOK3E)) {
/*
* To determine if the kernel was running on OPAL based platform,
* use struct opal, which is populated with relevant values.
*/
if (symbol_exists("opal")) {
- get_symbol_data("opal", sizeof(struct ppc64_opal), &(m->opal));
- if (m->opal.base == SKIBOOT_BASE)
+ get_symbol_data("opal", sizeof(struct ppc64_opal), &(ms->opal));
+ if (ms->opal.base == SKIBOOT_BASE)
machdep->flags |= OPAL_FW;
}
@@ -453,18 +455,18 @@ ppc64_init(int when)
* _PAGE_WRITETHRU can be used to infer it.
*/
if (THIS_KERNEL_VERSION >= LINUX(3,14,0))
- m->_page_coherent = 0x0UL;
+ ms->_page_coherent = 0x0UL;
/*
* In kernel v4.5, _PAGE_PTE bit is introduced to
* distinguish PTEs from pointers.
*/
if (THIS_KERNEL_VERSION >= LINUX(4,5,0)) {
- m->_page_pte = 0x1UL;
- m->_page_present = 0x2UL;
- m->_page_user = 0x4UL;
- m->_page_rw = 0x8UL;
- m->_page_guarded = 0x10UL;
+ ms->_page_pte = 0x1UL;
+ ms->_page_present = 0x2UL;
+ ms->_page_user = 0x4UL;
+ ms->_page_rw = 0x8UL;
+ ms->_page_guarded = 0x10UL;
}
/*
@@ -474,8 +476,8 @@ ppc64_init(int when)
* Also, page table entries store physical addresses.
*/
if (THIS_KERNEL_VERSION >= LINUX(4,6,0)) {
- m->_page_pte = 0x1UL << 62;
- m->_page_present = 0x1UL << 63;
+ ms->_page_pte = 0x1UL << 62;
+ ms->_page_present = 0x1UL << 63;
machdep->flags |= PHYS_ENTRY_L4;
}
@@ -504,118 +506,117 @@ ppc64_init(int when)
machdep->ptrs_per_pgd = PTRS_PER_PGD;
} else {
/* 2.6.14 layout */
- struct machine_specific *m = machdep->machspec;
if (machdep->pagesize == 65536) {
/* 64K pagesize */
if (machdep->flags & RADIX_MMU) {
- m->l1_index_size = PTE_INDEX_SIZE_RADIX_64K;
- m->l2_index_size = PMD_INDEX_SIZE_RADIX_64K;
- m->l3_index_size = PUD_INDEX_SIZE_RADIX_64K;
- m->l4_index_size = PGD_INDEX_SIZE_RADIX_64K;
+ ms->l1_index_size = PTE_INDEX_SIZE_RADIX_64K;
+ ms->l2_index_size = PMD_INDEX_SIZE_RADIX_64K;
+ ms->l3_index_size = PUD_INDEX_SIZE_RADIX_64K;
+ ms->l4_index_size = PGD_INDEX_SIZE_RADIX_64K;
} else if (!(machdep->flags & BOOK3E) &&
(THIS_KERNEL_VERSION >= LINUX(4,6,0))) {
- m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
+ ms->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
if (THIS_KERNEL_VERSION >= LINUX(4,12,0)) {
- m->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_12;
+ ms->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_12;
if (THIS_KERNEL_VERSION >= LINUX(4,17,0))
- m->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_17;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_17;
else
- m->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_12;
- m->l4_index_size = PGD_INDEX_SIZE_L4_64K_4_12;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_12;
+ ms->l4_index_size = PGD_INDEX_SIZE_L4_64K_4_12;
} else {
- m->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6;
- m->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_6;
- m->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10;
+ ms->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_6;
+ ms->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10;
}
} else if (THIS_KERNEL_VERSION >= LINUX(3,10,0)) {
- m->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
- m->l2_index_size = PMD_INDEX_SIZE_L4_64K_3_10;
- m->l3_index_size = PUD_INDEX_SIZE_L4_64K;
- m->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10;
+ ms->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10;
+ ms->l2_index_size = PMD_INDEX_SIZE_L4_64K_3_10;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_64K;
+ ms->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10;
} else {
- m->l1_index_size = PTE_INDEX_SIZE_L4_64K;
- m->l2_index_size = PMD_INDEX_SIZE_L4_64K;
- m->l3_index_size = PUD_INDEX_SIZE_L4_64K;
- m->l4_index_size = PGD_INDEX_SIZE_L4_64K;
+ ms->l1_index_size = PTE_INDEX_SIZE_L4_64K;
+ ms->l2_index_size = PMD_INDEX_SIZE_L4_64K;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_64K;
+ ms->l4_index_size = PGD_INDEX_SIZE_L4_64K;
}
if (!(machdep->flags & BOOK3E))
- m->pte_rpn_shift = symbol_exists("demote_segment_4k") ?
+ ms->pte_rpn_shift = symbol_exists("demote_segment_4k") ?
PTE_RPN_SHIFT_L4_64K_V2 : PTE_RPN_SHIFT_L4_64K_V1;
if (!(machdep->flags & BOOK3E) &&
(THIS_KERNEL_VERSION >= LINUX(4,6,0))) {
- m->pgd_masked_bits = PGD_MASKED_BITS_64K_4_6;
- m->pud_masked_bits = PUD_MASKED_BITS_64K_4_6;
- m->pmd_masked_bits = PMD_MASKED_BITS_64K_4_6;
+ ms->pgd_masked_bits = PGD_MASKED_BITS_64K_4_6;
+ ms->pud_masked_bits = PUD_MASKED_BITS_64K_4_6;
+ ms->pmd_masked_bits = PMD_MASKED_BITS_64K_4_6;
} else {
- m->pgd_masked_bits = PGD_MASKED_BITS_64K;
- m->pud_masked_bits = PUD_MASKED_BITS_64K;
+ ms->pgd_masked_bits = PGD_MASKED_BITS_64K;
+ ms->pud_masked_bits = PUD_MASKED_BITS_64K;
if ((machdep->flags & BOOK3E) &&
(THIS_KERNEL_VERSION >= LINUX(4,5,0)))
- m->pmd_masked_bits = PMD_MASKED_BITS_BOOK3E_64K_4_5;
+ ms->pmd_masked_bits = PMD_MASKED_BITS_BOOK3E_64K_4_5;
else if (THIS_KERNEL_VERSION >= LINUX(3,11,0))
- m->pmd_masked_bits = PMD_MASKED_BITS_64K_3_11;
+ ms->pmd_masked_bits = PMD_MASKED_BITS_64K_3_11;
else
- m->pmd_masked_bits = PMD_MASKED_BITS_64K;
+ ms->pmd_masked_bits = PMD_MASKED_BITS_64K;
}
} else {
/* 4K pagesize */
if (machdep->flags & RADIX_MMU) {
- m->l1_index_size = PTE_INDEX_SIZE_RADIX_4K;
- m->l2_index_size = PMD_INDEX_SIZE_RADIX_4K;
- m->l3_index_size = PUD_INDEX_SIZE_RADIX_4K;
- m->l4_index_size = PGD_INDEX_SIZE_RADIX_4K;
+ ms->l1_index_size = PTE_INDEX_SIZE_RADIX_4K;
+ ms->l2_index_size = PMD_INDEX_SIZE_RADIX_4K;
+ ms->l3_index_size = PUD_INDEX_SIZE_RADIX_4K;
+ ms->l4_index_size = PGD_INDEX_SIZE_RADIX_4K;
} else {
- m->l1_index_size = PTE_INDEX_SIZE_L4_4K;
- m->l2_index_size = PMD_INDEX_SIZE_L4_4K;
+ ms->l1_index_size = PTE_INDEX_SIZE_L4_4K;
+ ms->l2_index_size = PMD_INDEX_SIZE_L4_4K;
if (THIS_KERNEL_VERSION >= LINUX(3,7,0))
- m->l3_index_size = PUD_INDEX_SIZE_L4_4K_3_7;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_4K_3_7;
else
- m->l3_index_size = PUD_INDEX_SIZE_L4_4K;
- m->l4_index_size = PGD_INDEX_SIZE_L4_4K;
+ ms->l3_index_size = PUD_INDEX_SIZE_L4_4K;
+ ms->l4_index_size = PGD_INDEX_SIZE_L4_4K;
if (machdep->flags & BOOK3E)
- m->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K;
+ ms->pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_4K;
else
- m->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ?
+ ms->pte_rpn_shift = THIS_KERNEL_VERSION >= LINUX(4,5,0) ?
PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K;
}
- m->pgd_masked_bits = PGD_MASKED_BITS_4K;
- m->pud_masked_bits = PUD_MASKED_BITS_4K;
- m->pmd_masked_bits = PMD_MASKED_BITS_4K;
+ ms->pgd_masked_bits = PGD_MASKED_BITS_4K;
+ ms->pud_masked_bits = PUD_MASKED_BITS_4K;
+ ms->pmd_masked_bits = PMD_MASKED_BITS_4K;
}
- m->pte_rpn_mask = PTE_RPN_MASK_DEFAULT;
+ ms->pte_rpn_mask = PTE_RPN_MASK_DEFAULT;
if (!(machdep->flags & BOOK3E)) {
if (THIS_KERNEL_VERSION >= LINUX(4,6,0)) {
- m->pte_rpn_mask = PTE_RPN_MASK_L4_4_6;
- m->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_6;
+ ms->pte_rpn_mask = PTE_RPN_MASK_L4_4_6;
+ ms->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_6;
}
if (THIS_KERNEL_VERSION >= LINUX(4,7,0)) {
- m->pgd_masked_bits = PGD_MASKED_BITS_4_7;
- m->pud_masked_bits = PUD_MASKED_BITS_4_7;
- m->pmd_masked_bits = PMD_MASKED_BITS_4_7;
+ ms->pgd_masked_bits = PGD_MASKED_BITS_4_7;
+ ms->pud_masked_bits = PUD_MASKED_BITS_4_7;
+ ms->pmd_masked_bits = PMD_MASKED_BITS_4_7;
}
}
/* Compute ptrs per each level */
- m->l1_shift = machdep->pageshift;
- m->ptrs_per_l1 = (1 << m->l1_index_size);
- m->ptrs_per_l2 = (1 << m->l2_index_size);
- m->ptrs_per_l3 = (1 << m->l3_index_size);
- m->ptrs_per_l4 = (1 << m->l4_index_size);
- machdep->ptrs_per_pgd = m->ptrs_per_l4;
+ ms->l1_shift = machdep->pageshift;
+ ms->ptrs_per_l1 = (1 << ms->l1_index_size);
+ ms->ptrs_per_l2 = (1 << ms->l2_index_size);
+ ms->ptrs_per_l3 = (1 << ms->l3_index_size);
+ ms->ptrs_per_l4 = (1 << ms->l4_index_size);
+ machdep->ptrs_per_pgd = ms->ptrs_per_l4;
/* Compute shifts */
- m->l2_shift = m->l1_shift + m->l1_index_size;
- m->l3_shift = m->l2_shift + m->l2_index_size;
- m->l4_shift = m->l3_shift + m->l3_index_size;
+ ms->l2_shift = ms->l1_shift + ms->l1_index_size;
+ ms->l3_shift = ms->l2_shift + ms->l2_index_size;
+ ms->l4_shift = ms->l3_shift + ms->l3_index_size;
}
if (machdep->flags & VMEMMAP)
@@ -681,19 +682,15 @@ ppc64_init(int when)
*/
offset = MEMBER_OFFSET("paca_struct", "xHrdIntStack");
paca_sym = symbol_value("paca");
- if (!(machdep->machspec->hwintrstack =
- (ulong *)calloc(NR_CPUS, sizeof(ulong))))
+ if (!(ms->hwintrstack = (ulong *)calloc(NR_CPUS, sizeof(ulong))))
error(FATAL, "cannot malloc hwintrstack space.");
for (cpu = 0; cpu < kt->cpus; cpu++) {
- readmem(paca_sym + (paca_size * cpu) + offset,
- KVADDR,
- &machdep->machspec->hwintrstack[cpu],
- sizeof(ulong), "PPC64 HW_intr_stack",
- FAULT_ON_ERROR);
+ readmem(paca_sym + (paca_size * cpu) + offset, KVADDR,
+ &ms->hwintrstack[cpu], sizeof(ulong),
+ "PPC64 HW_intr_stack", FAULT_ON_ERROR);
}
- machdep->machspec->hwstacksize = 8 * machdep->pagesize;
- if ((machdep->machspec->hwstackbuf = (char *)
- malloc(machdep->machspec->hwstacksize)) == NULL)
+ ms->hwstacksize = 8 * machdep->pagesize;
+ if ((ms->hwstackbuf = (char *)malloc(ms->hwstacksize)) == NULL)
error(FATAL, "cannot malloc hwirqstack buffer space.");
}
@@ -756,6 +753,7 @@ ppc64_get_stacktop(ulong task)
void
ppc64_dump_machdep_table(ulong arg)
{
+ struct machine_specific *ms = machdep->machspec;
int i, c, others;
others = 0;
@@ -844,57 +842,57 @@ ppc64_dump_machdep_table(ulong arg)
i, machdep->cmdline_args[i] ?
machdep->cmdline_args[i] : "(unused)");
}
- fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec);
+ fprintf(fp, " machspec: %lx\n", (ulong)ms);
fprintf(fp, " is_kvaddr: %s\n",
- machdep->machspec->is_kvaddr == book3e_is_kvaddr ?
+ ms->is_kvaddr == book3e_is_kvaddr ?
"book3e_is_kvaddr()" : "generic_is_kvaddr()");
fprintf(fp, " is_vmaddr: %s\n",
- machdep->machspec->is_vmaddr == book3e_is_vmaddr ?
+ ms->is_vmaddr == book3e_is_vmaddr ?
"book3e_is_vmaddr()" : "ppc64_is_vmaddr()");
- if (machdep->machspec->hwintrstack) {
+ if (ms->hwintrstack) {
fprintf(fp, " hwintrstack[%d]: ", NR_CPUS);
for (c = 0; c < NR_CPUS; c++) {
fprintf(fp, "%s%016lx ",
((c % 4) == 0) ? "\n " : "",
- machdep->machspec->hwintrstack[c]);
+ ms->hwintrstack[c]);
}
} else
fprintf(fp, " hwintrstack: (unused)");
fprintf(fp, "\n");
- fprintf(fp, " hwstackbuf: %lx\n", (ulong)machdep->machspec->hwstackbuf);
- fprintf(fp, " hwstacksize: %d\n", machdep->machspec->hwstacksize);
- fprintf(fp, " l4_index_size: %d\n", machdep->machspec->l4_index_size);
- fprintf(fp, " l3_index_size: %d\n", machdep->machspec->l3_index_size);
- fprintf(fp, " l2_index_size: %d\n", machdep->machspec->l2_index_size);
- fprintf(fp, " l1_index_size: %d\n", machdep->machspec->l1_index_size);
- fprintf(fp, " ptrs_per_l4: %d\n", machdep->machspec->ptrs_per_l4);
- fprintf(fp, " ptrs_per_l3: %d\n", machdep->machspec->ptrs_per_l3);
- fprintf(fp, " ptrs_per_l2: %d\n", machdep->machspec->ptrs_per_l2);
- fprintf(fp, " ptrs_per_l1: %d\n", machdep->machspec->ptrs_per_l1);
- fprintf(fp, " l4_shift: %d\n", machdep->machspec->l4_shift);
- fprintf(fp, " l3_shift: %d\n", machdep->machspec->l3_shift);
- fprintf(fp, " l2_shift: %d\n", machdep->machspec->l2_shift);
- fprintf(fp, " l1_shift: %d\n", machdep->machspec->l1_shift);
- fprintf(fp, " pte_rpn_mask: %lx\n", machdep->machspec->pte_rpn_mask);
- fprintf(fp, " pte_rpn_shift: %d\n", machdep->machspec->pte_rpn_shift);
- fprintf(fp, " pgd_masked_bits: %lx\n", machdep->machspec->pgd_masked_bits);
- fprintf(fp, " pud_masked_bits: %lx\n", machdep->machspec->pud_masked_bits);
- fprintf(fp, " pmd_masked_bits: %lx\n", machdep->machspec->pmd_masked_bits);
+ fprintf(fp, " hwstackbuf: %lx\n", (ulong)ms->hwstackbuf);
+ fprintf(fp, " hwstacksize: %d\n", ms->hwstacksize);
+ fprintf(fp, " l4_index_size: %d\n", ms->l4_index_size);
+ fprintf(fp, " l3_index_size: %d\n", ms->l3_index_size);
+ fprintf(fp, " l2_index_size: %d\n", ms->l2_index_size);
+ fprintf(fp, " l1_index_size: %d\n", ms->l1_index_size);
+ fprintf(fp, " ptrs_per_l4: %d\n", ms->ptrs_per_l4);
+ fprintf(fp, " ptrs_per_l3: %d\n", ms->ptrs_per_l3);
+ fprintf(fp, " ptrs_per_l2: %d\n", ms->ptrs_per_l2);
+ fprintf(fp, " ptrs_per_l1: %d\n", ms->ptrs_per_l1);
+ fprintf(fp, " l4_shift: %d\n", ms->l4_shift);
+ fprintf(fp, " l3_shift: %d\n", ms->l3_shift);
+ fprintf(fp, " l2_shift: %d\n", ms->l2_shift);
+ fprintf(fp, " l1_shift: %d\n", ms->l1_shift);
+ fprintf(fp, " pte_rpn_mask: %lx\n", ms->pte_rpn_mask);
+ fprintf(fp, " pte_rpn_shift: %d\n", ms->pte_rpn_shift);
+ fprintf(fp, " pgd_masked_bits: %lx\n", ms->pgd_masked_bits);
+ fprintf(fp, " pud_masked_bits: %lx\n", ms->pud_masked_bits);
+ fprintf(fp, " pmd_masked_bits: %lx\n", ms->pmd_masked_bits);
fprintf(fp, " vmemmap_base: ");
- if (machdep->machspec->vmemmap_base)
- fprintf(fp, "%lx\n", machdep->machspec->vmemmap_base);
+ if (ms->vmemmap_base)
+ fprintf(fp, "%lx\n", ms->vmemmap_base);
else
fprintf(fp, "(unused)\n");
- if (machdep->machspec->vmemmap_cnt) {
+ if (ms->vmemmap_cnt) {
fprintf(fp, " vmemmap_cnt: %d\n",
- machdep->machspec->vmemmap_cnt);
+ ms->vmemmap_cnt);
fprintf(fp, " vmemmap_psize: %d\n",
- machdep->machspec->vmemmap_psize);
- for (i = 0; i < machdep->machspec->vmemmap_cnt; i++) {
+ ms->vmemmap_psize);
+ for (i = 0; i < ms->vmemmap_cnt; i++) {
fprintf(fp,
" vmemmap_list[%d]: virt: %lx phys: %lx\n", i,
- machdep->machspec->vmemmap_list[i].virt,
- machdep->machspec->vmemmap_list[i].phys);
+ ms->vmemmap_list[i].virt,
+ ms->vmemmap_list[i].phys);
}
} else {
fprintf(fp, " vmemmap_cnt: (unused)\n");
--
2.37.1

View File

@ -1,39 +0,0 @@
From ee5ab26db61025b2f5ed3a4b529394187df54d36 Mon Sep 17 00:00:00 2001
From: Qianli Zhao <qianli.zhao@horizon.ai>
Date: Mon, 4 Jul 2022 16:40:01 +0800
Subject: [PATCH 25/89] arm64: Fix for st->_stext_vmlinux not initialized when
set VA_BITS_ACTUAL
Setting st->_stext_vmlinux to UNINITIALIZED to search for "_stext"
from the vmlinux. In the scenario where kaslr is disabled and
without vmcoreinfo, crash will get the wrong MODULES/VMALLOC ranges
and cause a failure in parsing a raw RAM dumpfile.
Signed-off-by: Qianli Zhao <qianli.zhao@horizon.ai>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arm64.c b/arm64.c
index 0f615cf52bef..b6b7aa11f4fe 100644
--- a/arm64.c
+++ b/arm64.c
@@ -149,6 +149,14 @@ arm64_init(int when)
ms = machdep->machspec;
+ /*
+ * The st->_stext_vmlinux is needed in arm64_init(PRE_GDB) when a
+ * dumpfile does not have vmcoreinfo and we use -m vabits_actual
+ * option, e.g. a raw RAM dumpfile.
+ */
+ if (ms->VA_BITS_ACTUAL)
+ st->_stext_vmlinux = UNINITIALIZED;
+
if (!ms->kimage_voffset && STREQ(pc->live_memsrc, "/dev/crash"))
ioctl(pc->mfd, DEV_CRASH_ARCH_DATA, &ms->kimage_voffset);
--
2.37.1

View File

@ -1,53 +0,0 @@
From f141628a62b795f52a5d57f5d3e844b6a49114da Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 22 Jul 2022 13:44:50 +0900
Subject: [PATCH 26/89] Fix gcc-11 compiler warnings on filesys.c
Without the patch, the following gcc-11 compiler warnings are emitted
for filesys.c:
filesys.c: In function 'mount_point':
filesys.c:718:17: warning: 'pclose' called on pointer returned from a mismatched allocation function [-Wmismatched-dealloc]
718 | pclose(mp);
| ^~~~~~~~~~
filesys.c:709:27: note: returned from 'fopen'
709 | if ((mp = fopen(mntfile, "r")) == NULL)
| ^~~~~~~~~~~~~~~~~~~
filesys.c:738:17: warning: 'pclose' called on pointer returned from a mismatched allocation function [-Wmismatched-dealloc]
738 | pclose(mp);
| ^~~~~~~~~~
filesys.c:723:27: note: returned from 'fopen'
723 | if ((mp = fopen(mntfile, "r")) == NULL)
| ^~~~~~~~~~~~~~~~~~~
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
filesys.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/filesys.c b/filesys.c
index 43cbe826fc79..a863f04eb250 100644
--- a/filesys.c
+++ b/filesys.c
@@ -715,7 +715,7 @@ mount_point(char *name)
continue;
found++;
}
- pclose(mp);
+ fclose(mp);
if (!(mount_points = (char **)malloc(sizeof(char *) * found)))
return FALSE;
@@ -735,7 +735,7 @@ mount_point(char *name)
mount_points_gathered++, i++;
}
}
- pclose(mp);
+ fclose(mp);
if (CRASHDEBUG(2))
for (i = 0; i < mount_points_gathered; i++)
--
2.37.1

View File

@ -1,53 +0,0 @@
From 12898df249a5db826d2390467aaadd49de29074e Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 22 Jul 2022 13:44:50 +0900
Subject: [PATCH 27/89] Fix gcc-11 compiler warning on symbols.c
Without the patch, the following gcc-11 compiler warning is emitted for
symbols.c:
symbols.c: In function 'cmd_p':
symbols.c:7412:38: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
7412 | *(cpuspec-1) = ':';
| ~~~~~~~~~~~~~^~~~~
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
symbols.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/symbols.c b/symbols.c
index 69004a2e66e3..a94660538492 100644
--- a/symbols.c
+++ b/symbols.c
@@ -7363,7 +7363,7 @@ cmd_p(void)
unsigned radix;
int do_load_module_filter;
char buf1[BUFSIZE];
- char *cpuspec;
+ char *cpuspec, *p;
do_load_module_filter = radix = 0;
@@ -7398,7 +7398,7 @@ cmd_p(void)
if (argerrs || !args[optind])
cmd_usage(pc->curcmd, SYNOPSIS);
- cpuspec = strrchr(args[optind], ':');
+ p = cpuspec = strrchr(args[optind], ':');
if (cpuspec)
*cpuspec++ = NULLCHAR;
@@ -7421,7 +7421,7 @@ cmd_p(void)
sp->name);
else
/* maybe a valid C expression (e.g. ':') */
- *(cpuspec-1) = ':';
+ *p = ':';
}
process_gdb_output(concat_args(buf1, 0, TRUE), radix,
--
2.37.1

View File

@ -1,38 +0,0 @@
From 8d9e6c5b02c9c417264b76279b8f77b0995b6f74 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 22 Jul 2022 13:44:50 +0900
Subject: [PATCH 28/89] Fix gcc-11 compiler warning on makedumpfile.c
Without the patch, the following gcc-11 compiler warning is emitted for
makedumpfile.c:
In function 'flattened_format_get_osrelease',
inlined from 'check_flattened_format' at makedumpfile.c:236:3:
makedumpfile.c:392:9: warning: 'fclose' called on pointer returned from a mismatched allocation function [-Wmismatched-dealloc]
392 | fclose(pipe);
| ^~~~~~~~~~~~
makedumpfile.c: In function 'check_flattened_format':
makedumpfile.c:380:21: note: returned from 'popen'
380 | if ((pipe = popen(buf, "r")) == NULL)
| ^~~~~~~~~~~~~~~
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
makedumpfile.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index ebf24f56da2c..26d12b638ecd 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -389,5 +389,5 @@ flattened_format_get_osrelease(char *file)
}
}
- fclose(pipe);
+ pclose(pipe);
}
--
2.37.1

View File

@ -1,61 +0,0 @@
From 6618a57d40ff48726c26d54813f3f8090037760b Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 22 Jul 2022 13:44:50 +0900
Subject: [PATCH 29/89] Fix gcc-11 compiler warning on kvmdump.c
Without the patch, the following gcc-11 compiler warning is emitted for
kvmdump.c:
In function 'write_mapfile_registers',
inlined from 'write_mapfile_trailer' at kvmdump.c:947:3,
inlined from 'kvmdump_init' at kvmdump.c:145:4:
kvmdump.c:972:13: warning: 'write' reading 8 bytes from a region of size 4 [-Wstringop-overread]
972 | if (write(kvm->mapfd, &kvm->cpu_devices, sizeof(uint64_t)) != sizeof(uint64_t))
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from kvmdump.c:19:
kvmdump.c: In function 'kvmdump_init':
kvmdump.h:67:18: note: source object 'cpu_devices' of size 4
67 | uint32_t cpu_devices;
| ^~~~~~~~~~~
In file included from defs.h:26,
from kvmdump.c:18:
/usr/include/unistd.h:378:16: note: in a call to function 'write' declared with attribute 'access (read_only, 2, 3)'
378 | extern ssize_t write (int __fd, const void *__buf, size_t __n) __wur
| ^~~~~
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
kvmdump.c | 2 +-
kvmdump.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/kvmdump.c b/kvmdump.c
index 4db96bd844e9..e515bf0ce9a1 100644
--- a/kvmdump.c
+++ b/kvmdump.c
@@ -297,7 +297,7 @@ kvmdump_memory_dump(FILE *ofp)
(ulonglong)kvm->page_cache[i].paddr);
}
- fprintf(ofp, " cpu_devices: %d\n", kvm->cpu_devices);
+ fprintf(ofp, " cpu_devices: %ld\n", kvm->cpu_devices);
fprintf(ofp, " iohole: %llx (%llx - %llx)\n",
(ulonglong)kvm->iohole, 0x100000000ULL - kvm->iohole,
0x100000000ULL);
diff --git a/kvmdump.h b/kvmdump.h
index 07e047bb171c..2e408aebef0b 100644
--- a/kvmdump.h
+++ b/kvmdump.h
@@ -64,7 +64,7 @@ struct kvmdump_data {
ulong compresses;
uint64_t kvbase;
ulong *debug;
- uint32_t cpu_devices;
+ uint64_t cpu_devices;
struct register_set *registers;
uint64_t iohole;
};
--
2.37.1

View File

@ -1,156 +0,0 @@
From 59e9621ac2a7cb3f001fdab53c1e7e3590f8762d Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Thu, 28 Jul 2022 15:11:20 +0800
Subject: [PATCH 30/89] x86_64: Fix for AMD SME issue
Kernel commit changes(see [1]/[2]) may cause the failure of crash-utility
with the following error:
#./crash /home/vmlinux /home/vmcore
...
For help, type "help".
Type "apropos word" to search for commands related to "word"...
crash: seek error: physical address: 8000760a14000 type: "p4d page"
Let's get the "NUMBER(sme_mask)" from vmcoreinfo, and try to remove
the C-bit from the page table entries, the intention is to get the
true physical address.
Related kernel commits:
[1] aad983913d77 ("x86/mm/encrypt: Simplify sme_populate_pgd() and sme_populate_pgd_large()")
[2] e7d445ab26db ("x86/sme: Use #define USE_EARLY_PGTABLE_L5 in mem_encrypt_identity.c")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
x86_64.c | 21 ++++++++++++++++++---
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 6a1b6f8a16a8..f8fbfdfd1152 100644
--- a/defs.h
+++ b/defs.h
@@ -6206,6 +6206,7 @@ struct machine_specific {
ulong cpu_entry_area_end;
ulong page_offset_force;
char **exception_functions;
+ ulong sme_mask;
};
#define KSYMS_START (0x1)
diff --git a/x86_64.c b/x86_64.c
index f4e5d9e77cef..b2a536e4b19c 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -206,6 +206,10 @@ x86_64_init(int when)
machdep->machspec->kernel_image_size = dtol(string, QUIET, NULL);
free(string);
}
+ if ((string = pc->read_vmcoreinfo("NUMBER(sme_mask)"))) {
+ machdep->machspec->sme_mask = dtol(string, QUIET, NULL);
+ free(string);
+ }
if (SADUMP_DUMPFILE() || QEMU_MEM_DUMP_NO_VMCOREINFO() ||
VMSS_DUMPFILE())
/* Need for calculation of kaslr_offset and phys_base */
@@ -937,6 +941,7 @@ x86_64_dump_machdep_table(ulong arg)
ms->kernel_image_size/MEGABYTES(1));
else
fprintf(fp, "(uninitialized)\n");
+ fprintf(fp, " sme_mask: %lx\n", ms->sme_mask);
fprintf(fp, " physical_mask_shift: %ld\n", ms->physical_mask_shift);
fprintf(fp, " pgdir_shift: %ld\n", ms->pgdir_shift);
fprintf(fp, " GART_start: %lx\n", ms->GART_start);
@@ -1814,7 +1819,7 @@ x86_64_kpgd_offset(ulong kvaddr, int verbose, int IS_XEN)
if (IS_XEN)
fprintf(fp, "PAGE DIRECTORY: %lx [machine]\n", *pgd);
else
- fprintf(fp, "PAGE DIRECTORY: %lx\n", *pgd);
+ fprintf(fp, "PAGE DIRECTORY: %lx\n", *pgd & ~machdep->machspec->sme_mask);
}
return pgd;
@@ -1851,7 +1856,8 @@ x86_64_upgd_offset_legacy(struct task_context *tc, ulong uvaddr, int verbose, in
if (IS_XEN)
fprintf(fp, " PGD: %lx => %lx [machine]\n", (ulong)pud, pud_pte);
else
- fprintf(fp, " PGD: %lx => %lx\n", (ulong)pud, pud_pte);
+ fprintf(fp, " PGD: %lx => %lx\n",
+ (ulong)pud, pud_pte & ~machdep->machspec->sme_mask);
}
return pud_pte;
@@ -1882,7 +1888,8 @@ x86_64_upgd_offset(struct task_context *tc, ulong uvaddr, int verbose, int IS_XE
if (IS_XEN)
fprintf(fp, " PGD: %lx => %lx [machine]\n", (ulong)pgd, pgd_pte);
else
- fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd, pgd_pte);
+ fprintf(fp, " PGD: %lx => %lx\n",
+ (ulong)pgd, pgd_pte & ~machdep->machspec->sme_mask);
}
return pgd_pte;
@@ -1900,9 +1907,11 @@ x86_64_p4d_offset(ulong pgd_pte, ulong vaddr, int verbose, int IS_XEN)
ulong p4d_pte;
p4d_paddr = pgd_pte & PHYSICAL_PAGE_MASK;
+ p4d_paddr &= ~machdep->machspec->sme_mask;
FILL_P4D(p4d_paddr, PHYSADDR, PAGESIZE());
p4d = ((ulong *)p4d_paddr) + p4d_index(vaddr);
p4d_pte = ULONG(machdep->machspec->p4d + PAGEOFFSET(p4d));
+ p4d_pte &= ~machdep->machspec->sme_mask;
if (verbose) {
if (IS_XEN)
fprintf(fp, " P4D: %lx => %lx [machine]\n", (ulong)p4d, p4d_pte);
@@ -1925,6 +1934,7 @@ x86_64_pud_offset(ulong pgd_pte, ulong vaddr, int verbose, int IS_XEN)
ulong pud_pte;
pud_paddr = pgd_pte & PHYSICAL_PAGE_MASK;
+ pud_paddr &= ~machdep->machspec->sme_mask;
if (IS_XEN) {
pud_paddr = xen_m2p(pud_paddr);
@@ -1935,6 +1945,7 @@ x86_64_pud_offset(ulong pgd_pte, ulong vaddr, int verbose, int IS_XEN)
FILL_PUD(pud_paddr, PHYSADDR, PAGESIZE());
pud = ((ulong *)pud_paddr) + pud_index(vaddr);
pud_pte = ULONG(machdep->pud + PAGEOFFSET(pud));
+ pud_pte &= ~machdep->machspec->sme_mask;
if (verbose) {
if (IS_XEN)
fprintf(fp, " PUD: %lx => %lx [machine]\n", (ulong)pud, pud_pte);
@@ -1957,6 +1968,7 @@ x86_64_pmd_offset(ulong pud_pte, ulong vaddr, int verbose, int IS_XEN)
ulong pmd_pte;
pmd_paddr = pud_pte & PHYSICAL_PAGE_MASK;
+ pmd_paddr &= ~machdep->machspec->sme_mask;
if (IS_XEN) {
pmd_paddr = xen_m2p(pmd_paddr);
@@ -1967,6 +1979,7 @@ x86_64_pmd_offset(ulong pud_pte, ulong vaddr, int verbose, int IS_XEN)
FILL_PMD(pmd_paddr, PHYSADDR, PAGESIZE());
pmd = ((ulong *)pmd_paddr) + pmd_index(vaddr);
pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(pmd));
+ pmd_pte &= ~machdep->machspec->sme_mask;
if (verbose) {
if (IS_XEN)
fprintf(fp, " PMD: %lx => %lx [machine]\n", (ulong)pmd, pmd_pte);
@@ -1988,6 +2001,7 @@ x86_64_pte_offset(ulong pmd_pte, ulong vaddr, int verbose, int IS_XEN)
ulong pte;
pte_paddr = pmd_pte & PHYSICAL_PAGE_MASK;
+ pte_paddr &= ~machdep->machspec->sme_mask;
if (IS_XEN) {
pte_paddr = xen_m2p(pte_paddr);
@@ -1998,6 +2012,7 @@ x86_64_pte_offset(ulong pmd_pte, ulong vaddr, int verbose, int IS_XEN)
FILL_PTBL(pte_paddr, PHYSADDR, PAGESIZE());
ptep = ((ulong *)pte_paddr) + pte_index(vaddr);
pte = ULONG(machdep->ptbl + PAGEOFFSET(ptep));
+ pte &= ~machdep->machspec->sme_mask;
if (verbose) {
if (IS_XEN)
fprintf(fp, " PTE: %lx => %lx [machine]\n", (ulong)ptep, pte);
--
2.37.1

View File

@ -1,34 +0,0 @@
From 6be4f19f32bfa6d5a52580b733afca8a5b849c23 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Mon, 22 Aug 2022 11:59:46 +0900
Subject: [PATCH 31/89] Makefile: Fix unnecessary re-patching with
coreutils-9.0
"sum" command in coreutils-9.0 (e.g. Fedora 36) started to output a file
name. As a result, "make" always detects a change of gdb-10.2.patch
wrongly and re-applies it unnecessarily.
Use standard input to fix it and "md5sum" to improve detection.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index c031db4ca23f..2549d1d39273 100644
--- a/Makefile
+++ b/Makefile
@@ -269,7 +269,7 @@ rebuild:
@if [ ! -f ${GDB}/${GDB}.patch ]; then \
touch ${GDB}/${GDB}.patch; fi
@if [ -f ${GDB}.patch ] && [ -s ${GDB}.patch ] && \
- [ "`sum ${GDB}.patch`" != "`sum ${GDB}/${GDB}.patch`" ]; then \
+ [ "`md5sum < ${GDB}.patch`" != "`md5sum < ${GDB}/${GDB}.patch`" ]; then \
(sh -x ${GDB}.patch ${TARGET}; patch -N -p0 -r- --fuzz=0 < ${GDB}.patch; cp ${GDB}.patch ${GDB}; cd ${GDB}; \
$(MAKE) CRASH_TARGET=${TARGET}) \
else (cd ${GDB}/gdb; $(MAKE) CRASH_TARGET=${TARGET}); fi
--
2.37.1

View File

@ -1,111 +0,0 @@
From 1bd4bd41e67dae5a970d34f33c566812f9f2c6af Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie@os.amperecomputing.com>
Date: Mon, 22 Aug 2022 09:29:32 +0000
Subject: [PATCH 32/89] arm64: use TCR_EL1_T1SZ to get the correct info if
vabits_actual is missing
After kernel commit 0d9b1ffefabe ("arm64: mm: make vabits_actual a build
time constant if possible"), the vabits_actual is not compiled to kernel
symbols when "VA_BITS > 48" is false.
So the crash will not find the vabits_actual symbol, and it will fail
in the end like this:
# ./crash
...
WARNING: VA_BITS: calculated: 46 vmcoreinfo: 48
crash: invalid kernel virtual address: ffff88177ffff000 type: "pud page"
This patch introduces the arm64_set_va_bits_by_tcr(), and if crash cannot
find vabits_actual symbol, it will use the TCR_EL1_T1SZ register to get
the correct VA_BITS_ACTUAL/VA_BITS/VA_START.
Tested this patch with:
1.) the live mode with /proc/kcore
2.) the kdump file with /proc/vmcore.
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 51 ++++++++++++++++++++++++++++++++++-----------------
1 file changed, 34 insertions(+), 17 deletions(-)
diff --git a/arm64.c b/arm64.c
index b6b7aa11f4fe..c3e26a371a61 100644
--- a/arm64.c
+++ b/arm64.c
@@ -4586,6 +4586,36 @@ arm64_IS_VMALLOC_ADDR(ulong vaddr)
(vaddr >= ms->modules_vaddr && vaddr <= ms->modules_end));
}
+/* Return TRUE if we succeed, return FALSE on failure. */
+static int
+arm64_set_va_bits_by_tcr(void)
+{
+ ulong value;
+ char *string;
+
+ if ((string = pc->read_vmcoreinfo("NUMBER(TCR_EL1_T1SZ)")) ||
+ (string = pc->read_vmcoreinfo("NUMBER(tcr_el1_t1sz)"))) {
+ /* See ARMv8 ARM for the description of
+ * TCR_EL1.T1SZ and how it can be used
+ * to calculate the vabits_actual
+ * supported by underlying kernel.
+ *
+ * Basically:
+ * vabits_actual = 64 - T1SZ;
+ */
+ value = 64 - strtoll(string, NULL, 0);
+ if (CRASHDEBUG(1))
+ fprintf(fp, "vmcoreinfo : vabits_actual: %ld\n", value);
+ free(string);
+ machdep->machspec->VA_BITS_ACTUAL = value;
+ machdep->machspec->VA_BITS = value;
+ machdep->machspec->VA_START = _VA_START(machdep->machspec->VA_BITS_ACTUAL);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
static void
arm64_calc_VA_BITS(void)
{
@@ -4616,23 +4646,8 @@ arm64_calc_VA_BITS(void)
} else if (ACTIVE())
error(FATAL, "cannot determine VA_BITS_ACTUAL: please use /proc/kcore\n");
else {
- if ((string = pc->read_vmcoreinfo("NUMBER(TCR_EL1_T1SZ)")) ||
- (string = pc->read_vmcoreinfo("NUMBER(tcr_el1_t1sz)"))) {
- /* See ARMv8 ARM for the description of
- * TCR_EL1.T1SZ and how it can be used
- * to calculate the vabits_actual
- * supported by underlying kernel.
- *
- * Basically:
- * vabits_actual = 64 - T1SZ;
- */
- value = 64 - strtoll(string, NULL, 0);
- if (CRASHDEBUG(1))
- fprintf(fp, "vmcoreinfo : vabits_actual: %ld\n", value);
- free(string);
- machdep->machspec->VA_BITS_ACTUAL = value;
- machdep->machspec->VA_BITS = value;
- machdep->machspec->VA_START = _VA_START(machdep->machspec->VA_BITS_ACTUAL);
+ if (arm64_set_va_bits_by_tcr()) {
+ /* nothing */
} else if (machdep->machspec->VA_BITS_ACTUAL) {
machdep->machspec->VA_BITS = machdep->machspec->VA_BITS_ACTUAL;
machdep->machspec->VA_START = _VA_START(machdep->machspec->VA_BITS_ACTUAL);
@@ -4654,6 +4669,8 @@ arm64_calc_VA_BITS(void)
*/
machdep->flags |= FLIPPED_VM;
return;
+ } else if (arm64_set_va_bits_by_tcr()) {
+ return;
}
if (!(sp = symbol_search("swapper_pg_dir")) &&
--
2.37.1

View File

@ -1,66 +0,0 @@
From 5df8fdf1f591176682e6fac18e6631c3a2aab1dc Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Thu, 25 Aug 2022 14:39:44 +0800
Subject: [PATCH 33/89] Fix "task -R" by adding end identifier for union in
task_struct
Previously, the start and end identifiers for union are " {\n" and
" }, \n". However the end identifier is not always as expected.
" },\n" can also be the end identifier with gdb-10.2. As a result,
variable "randomized" is in incorrect state after union, and fails to
identify the later struct members. For example, we can reproduce the
issue as follows:
crash> task
PID: 847 TASK: ffff94f8038f4000 CPU: 72 COMMAND: "khungtaskd"
struct task_struct {
thread_info = {
flags = 2148024320,
status = 0,
preempt_lazy_count = 0
},
{
<the union>
},
...
wake_entry = {
next = 0x0
},
...
Before patch:
crash> task -R wake_entry
PID: 847 TASK: ffff94f8038f4000 CPU: 72 COMMAND: "khungtaskd"
After patch:
crash> task -R wake_entry
PID: 847 TASK: ffff94f8038f4000 CPU: 72 COMMAND: "khungtaskd"
wake_entry = {
next = 0x0
},
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
task.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/task.c b/task.c
index 071c787fbfa5..db2abc8106a2 100644
--- a/task.c
+++ b/task.c
@@ -3436,7 +3436,8 @@ parse_task_thread(int argcnt, char *arglist[], struct task_context *tc) {
while (fgets(buf, BUFSIZE, pc->tmpfile)) {
if (STREQ(buf, " {\n"))
randomized = TRUE;
- else if (randomized && STREQ(buf, " }, \n"))
+ else if (randomized &&
+ (STREQ(buf, " }, \n") || STREQ(buf, " },\n")))
randomized = FALSE;
if (strlen(lookfor2)) {
--
2.37.1

View File

@ -1,133 +0,0 @@
From 38064b8d5866f8a4f7e7867f187ad95e67f16209 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Wed, 16 Nov 2022 17:36:03 +0800
Subject: [PATCH 34/89] Let gdb get kernel module symbols info from crash
Gdb will try to resolve an address to its corresponding symbol name such as
when printing a structure. It works fine for kernel symbols, because gdb can
find them through vmlinux. However as for kernel modules symbols, crash
resolves them by dig into "struct module", which gdb don't know. As a result,
gdb fails to translate a kernel module address to its symbol name without
"mod -s|-S" options. For example we can reproduce the issue as follows.
crash> timer
....
4331308176 336 ffff94ea24240860 ffffffffc03762c0 <estimation_timer>
....
crash> sym 0xffffffffc03762c0
ffffffffc03762c0 (t) estimation_timer [ip_vs]
Before patch:
crash> timer_list ffff94ea24240860
struct timer_list {
....
function = 0xffffffffc03762c0,
....
}
After patch:
crash> timer_list ffff94ea24240860
struct timer_list {
....
function = 0xffffffffc03762c0 <estimation_timer>,
....
}
In this patch, we add an interface for gdb, when gdb trying to build kernel
module's address symbolic, the info can be get from crash.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 ++
gdb-7.6.patch | 33 +++++++++++++++++++++++++++++++++
gdb_interface.c | 12 ++++++++++++
3 files changed, 47 insertions(+)
diff --git a/defs.h b/defs.h
index f8fbfdfd1152..b7d76330141a 100644
--- a/defs.h
+++ b/defs.h
@@ -4877,6 +4877,7 @@ int patch_kernel_symbol(struct gnu_request *);
struct syment *symbol_search(char *);
int gdb_line_number_callback(ulong, ulong, ulong);
int gdb_print_callback(ulong);
+char *gdb_lookup_module_symbol(ulong, ulong *);
#endif
#ifndef GDB_COMMON
@@ -7291,6 +7292,7 @@ int gdb_pass_through(char *, FILE *, ulong);
int gdb_readmem_callback(ulong, void *, int, int);
int gdb_line_number_callback(ulong, ulong, ulong);
int gdb_print_callback(ulong);
+char *gdb_lookup_module_symbol(ulong, ulong *);
void gdb_error_hook(void);
void restore_gdb_sanity(void);
int is_gdb_command(int, ulong);
diff --git a/gdb-7.6.patch b/gdb-7.6.patch
index c63ad7d81cb0..d0becd055666 100644
--- a/gdb-7.6.patch
+++ b/gdb-7.6.patch
@@ -2568,3 +2568,36 @@ diff -up gdb-7.6/opcodes/configure.orig gdb-7.6/opcodes/configure
$(CC_LD) $(INTERNAL_LDFLAGS) $(WIN32LDAPP) \
-o $(shell /bin/cat mergeobj) $(LIBGDB_OBS) \
$(TDEPLIBS) $(TUI_LIBRARY) $(CLIBS) $(LOADLIBES) $(shell /bin/cat mergelibs)
+--- gdb-7.6/gdb/printcmd.c.orig
++++ gdb-7.6/gdb/printcmd.c
+@@ -622,6 +622,10 @@ print_address_symbolic (struct gdbarch *gdbarch, CORE_ADDR addr,
+ return 1;
+ }
+
++#ifdef CRASH_MERGE
++extern char *gdb_lookup_module_symbol(unsigned long, unsigned long *);
++#endif
++
+ /* Given an address ADDR return all the elements needed to print the
+ address in a symbolic form. NAME can be mangled or not depending
+ on DO_DEMANGLE (and also on the asm_demangle global variable,
+@@ -710,7 +714,19 @@ build_address_symbolic (struct gdbarch *gdbarch,
+ }
+ }
+ if (symbol == NULL && msymbol == NULL)
++#ifdef CRASH_MERGE
++ {
++ char *name_ptr = gdb_lookup_module_symbol(addr, (unsigned long *)offset);
++ if (name_ptr) {
++ *name = xstrdup (name_ptr);
++ return 0;
++ } else {
++ return 1;
++ }
++ }
++#else
+ return 1;
++#endif
+
+ /* If the nearest symbol is too far away, don't print anything symbolic. */
+
diff --git a/gdb_interface.c b/gdb_interface.c
index 1f10006a2d63..95298a94c702 100644
--- a/gdb_interface.c
+++ b/gdb_interface.c
@@ -945,6 +945,18 @@ gdb_print_callback(ulong addr)
return IS_KVADDR(addr);
}
+char *
+gdb_lookup_module_symbol(ulong addr, ulong *offset)
+{
+ struct syment *sp;
+
+ if ((sp = value_search(addr, offset))) {
+ return sp->name;
+ } else {
+ return NULL;
+ }
+}
+
/*
* Used by gdb_interface() to catch gdb-related errors, if desired.
*/
--
2.37.1

View File

@ -1,65 +0,0 @@
From 6a766b5159ebb971a399bb32a8a58599df977cdb Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Wed, 16 Nov 2022 20:09:22 +0800
Subject: [PATCH 35/89] x86_64: Correct the identifier when locating the call
instruction
The previous implementation to locate the call instruction is
to strstr "call", then check whether the previous char is ' '
or '\t'. The implementation is problematic. For example it
cannot resolve the following disassembly string:
"0xffffffffc0995378 <nfs41_callback_svc+344>:\tcall 0xffffffff8ecfa4c0 <schedule>\n"
strstr will locate the "_call" and char check fails,
as a result, extract_hex fails to get the calling address.
NOTE: the issue is more likely to be reproduced when patch[1] applied.
Because without patch[1], the disassembly string will be as follows,
so the issue is no longer reproducible.
"0xffffffffc0995378:\tcall 0xffffffff8ecfa4c0 <schedule>\n"
Before the patch:
crash> bt 1472
PID: 1472 TASK: ffff8c121fa72f70 CPU: 18 COMMAND: "nfsv4.1-svc"
#0 [ffff8c16231a3db8] __schedule at ffffffff8ecf9ef3
#1 [ffff8c16231a3e40] schedule at ffffffff8ecfa4e9
After the patch:
crash> bt 1472
PID: 1472 TASK: ffff8c121fa72f70 CPU: 18 COMMAND: "nfsv4.1-svc"
#0 [ffff8c16231a3db8] __schedule at ffffffff8ecf9ef3
#1 [ffff8c16231a3e40] schedule at ffffffff8ecfa4e9
#2 [ffff8c16231a3e50] nfs41_callback_svc at ffffffffc099537d [nfsv4]
#3 [ffff8c16231a3ec8] kthread at ffffffff8e6b966f
#4 [ffff8c16231a3f50] ret_from_fork at ffffffff8ed07898
This patch fix the issue by strstr "\tcall" and " call", to
locate the correct call instruction.
[1]: https://listman.redhat.com/archives/crash-utility/2022-August/010085.html
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
x86_64.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/x86_64.c b/x86_64.c
index b2a536e4b19c..292c240e887e 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -4429,8 +4429,7 @@ x86_64_function_called_by(ulong rip)
if (gdb_pass_through(buf, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
rewind(pc->tmpfile2);
while (fgets(buf, BUFSIZE, pc->tmpfile2)) {
- if ((p1 = strstr(buf, "callq")) &&
- whitespace(*(p1-1))) {
+ if ((p1 = strstr(buf, " callq")) || (p1 = strstr(buf, "\tcallq"))) {
if (extract_hex(p1, &value, NULLCHAR, TRUE))
break;
}
--
2.37.1

View File

@ -1,40 +0,0 @@
From 792730b32cb15b59af2833cfd6819b8408b20e89 Mon Sep 17 00:00:00 2001
From: "Chunguang.Xu" <chunguang.xu@shopee.com>
Date: Thu, 25 Aug 2022 12:07:20 +0800
Subject: [PATCH 36/89] Add debian/ubuntu vmlinux location to default search
dirs
Now crash cannot find debian/ubuntu kernel vmlinux, we need to
explicitly specify the path to vmlinux. Try to add the debian
vmlinux location to default search directories.
Signed-off-by: Chunguang Xu <chunguang.xu@shopee.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
filesys.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/filesys.c b/filesys.c
index a863f04eb250..c2ea78de821d 100644
--- a/filesys.c
+++ b/filesys.c
@@ -319,7 +319,7 @@ match_proc_version(void)
#define CREATE 1
#define DESTROY 0
-#define DEFAULT_SEARCHDIRS 5
+#define DEFAULT_SEARCHDIRS 6
#define EXTRA_SEARCHDIRS 5
static char **
@@ -336,6 +336,7 @@ build_searchdirs(int create, int *preferred)
"/boot/",
"/boot/efi/redhat",
"/boot/efi/EFI/redhat",
+ "/usr/lib/debug/boot/",
"/",
NULL
};
--
2.37.1

View File

@ -1,102 +0,0 @@
From 1312fdbc514dc47e591b2d94994c6f467581c6f1 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 1 Sep 2022 14:03:09 +0900
Subject: [PATCH 37/89] Fix gcc-12 compiler warnings on lkcd_*.c
Without the patch, the following gcc-12 compiler warnings are emitted
for lkcd_*.c:
lkcd_v1.c: In function 'dump_lkcd_environment_v1':
lkcd_v1.c:252:20: warning: the comparison will always evaluate as 'true' for the address of 'dh_panic_string' will never be NULL [-Waddress]
252 | dh && dh->dh_panic_string &&
| ^~
In file included from lkcd_v1.c:21:
lkcd_vmdump_v1.h:108:30: note: 'dh_panic_string' declared here
108 | char dh_panic_string[DUMP_PANIC_LEN];
| ^~~~~~~~~~~~~~~
...
Reported-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
lkcd_v1.c | 3 +--
lkcd_v2_v3.c | 3 +--
lkcd_v5.c | 3 +--
lkcd_v7.c | 3 +--
lkcd_v8.c | 3 +--
5 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/lkcd_v1.c b/lkcd_v1.c
index 5f891aeb1ce3..31bdc04c5fdd 100644
--- a/lkcd_v1.c
+++ b/lkcd_v1.c
@@ -249,8 +249,7 @@ dump_header_only:
lkcd_print(" dh_eip: %lx\n", dh->dh_eip);
lkcd_print(" dh_num_pages: %d\n", dh->dh_num_pages);
lkcd_print(" dh_panic_string: %s%s", dh->dh_panic_string,
- dh && dh->dh_panic_string &&
- strstr(dh->dh_panic_string, "\n") ? "" : "\n");
+ dh && strstr(dh->dh_panic_string, "\n") ? "" : "\n");
lkcd_print(" dh_time: %s\n",
strip_linefeeds(ctime(&(dh->dh_time.tv_sec))));
diff --git a/lkcd_v2_v3.c b/lkcd_v2_v3.c
index 8635a7b07c77..edcb6637a55e 100644
--- a/lkcd_v2_v3.c
+++ b/lkcd_v2_v3.c
@@ -307,8 +307,7 @@ dump_header_only:
lkcd_print(" dh_num_pages: ");
lkcd_print(BITS32() ? "%ld\n" : "%d\n", dh->dh_num_pages);
lkcd_print(" dh_panic_string: %s%s", dh->dh_panic_string,
- dh && dh->dh_panic_string &&
- strstr(dh->dh_panic_string, "\n") ? "" : "\n");
+ dh && strstr(dh->dh_panic_string, "\n") ? "" : "\n");
lkcd_print(" dh_time: %s\n",
strip_linefeeds(ctime(&(dh->dh_time.tv_sec))));
diff --git a/lkcd_v5.c b/lkcd_v5.c
index cb7634d094ec..e3bfa6f4b272 100644
--- a/lkcd_v5.c
+++ b/lkcd_v5.c
@@ -270,8 +270,7 @@ dump_header_only:
lkcd_print(" dh_num_pages: ");
lkcd_print(BITS32() ? "%ld\n" : "%d\n", dh->dh_num_pages);
lkcd_print(" dh_panic_string: %s%s", dh->dh_panic_string,
- dh && dh->dh_panic_string &&
- strstr(dh->dh_panic_string, "\n") ? "" : "\n");
+ dh && strstr(dh->dh_panic_string, "\n") ? "" : "\n");
lkcd_print(" dh_time: %s\n",
strip_linefeeds(ctime(&(dh->dh_time.tv_sec))));
diff --git a/lkcd_v7.c b/lkcd_v7.c
index 608e1481f01d..97d99008635a 100644
--- a/lkcd_v7.c
+++ b/lkcd_v7.c
@@ -347,8 +347,7 @@ dump_header_only:
lkcd_print(" dh_num_pages: ");
lkcd_print(BITS32() ? "%ld\n" : "%d\n", dh->dh_num_pages);
lkcd_print(" dh_panic_string: %s%s", dh->dh_panic_string,
- dh && dh->dh_panic_string &&
- strstr(dh->dh_panic_string, "\n") ? "" : "\n");
+ dh && strstr(dh->dh_panic_string, "\n") ? "" : "\n");
lkcd_print(" dh_time: %s\n",
strip_linefeeds(ctime(&(dh->dh_time.tv_sec))));
diff --git a/lkcd_v8.c b/lkcd_v8.c
index 3b355e056123..4167fa5e4292 100644
--- a/lkcd_v8.c
+++ b/lkcd_v8.c
@@ -543,8 +543,7 @@ dump_header_only:
lkcd_print(" dh_num_pages: ");
lkcd_print(BITS32() ? "%ld\n" : "%d\n", dh->dh_num_pages);
lkcd_print(" dh_panic_string: %s%s", dh->dh_panic_string,
- dh && dh->dh_panic_string &&
- strstr(dh->dh_panic_string, "\n") ? "" : "\n");
+ dh && strstr(dh->dh_panic_string, "\n") ? "" : "\n");
tv.tv_sec = dh->dh_time.tv_sec;
lkcd_print(" dh_time: %s\n",
strip_linefeeds(ctime(&(tv.tv_sec))));
--
2.37.1

View File

@ -1,41 +0,0 @@
From 61e5c455516622cabf4a01c1643ca2175ef9510c Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 16 Nov 2022 20:46:48 +0800
Subject: [PATCH 38/89] Fix for the invalid linux_banner pointer issue
Currently, crash may fail with the following error:
# ./crash -s vmlinux vmcore
WARNING: invalid linux_banner pointer: 65762078756e694c
crash: vmlinux and vmcore do not match!
The reason is that the type of the symbol in the data segment may be
defined as 'D' or 'd'. The crash only handled the type 'D', but it
didn't deal with the type 'd'. For example:
# nm vmlinux | grep linux_banner
ffffffff827cfa80 d linux_banner
It has been observed that a vmlinux compiled by clang has this type.
Let's add the type 'd' recognition to solve such issue.
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
kernel.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel.c b/kernel.c
index bd0bf8c6cf03..2a1c1c391414 100644
--- a/kernel.c
+++ b/kernel.c
@@ -1060,6 +1060,7 @@ verify_version(void)
if (!(sp = symbol_search("linux_banner")))
error(FATAL, "linux_banner symbol does not exist?\n");
else if ((sp->type == 'R') || (sp->type == 'r') ||
+ (THIS_KERNEL_VERSION >= LINUX(2,6,11) && (sp->type == 'D' || sp->type == 'd')) ||
(machine_type("ARM") && sp->type == 'T') ||
(machine_type("ARM64")))
linux_banner = symbol_value("linux_banner");
--
2.37.1

View File

@ -1,57 +0,0 @@
From a72184ebeec704158a2b225fa7fe0869b9b93a93 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Mon, 19 Sep 2022 17:49:21 +0800
Subject: [PATCH 39/89] Fix "kmem" failing to print task context when address
is vmalloced stack
When kernel enabled CONFIG_VMAP_STACK, stack can be allocated to
vmalloced area. Currently crash didn't handle the case, as a result,
"kmem" will not print the task context as expected. This patch fix the
bug by checking if the address is a vmalloced stack first.
Before:
crash> kmem ffffb7efce9bbe28
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
ffff94eb9102c640 ffff94eb9102b140 ffffb7efce9b8000 - ffffb7efce9bd000 20480
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffdd28220dc000 1883700000 0 0 1 50000000000000
After:
crash> kmem ffffb7efce9bbe28
PID: 847
COMMAND: "khungtaskd"
TASK: ffff94f8038f4000 [THREAD_INFO: ffff94f8038f4000]
CPU: 72
STATE: TASK_RUNNING (PANIC)
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
ffff94eb9102c640 ffff94eb9102b140 ffffb7efce9b8000 - ffffb7efce9bd000 20480
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffdd28220dc000 1883700000 0 0 1 50000000000000
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/memory.c b/memory.c
index 7339f0cd0224..9ab578134fa1 100644
--- a/memory.c
+++ b/memory.c
@@ -13477,6 +13477,10 @@ kmem_search(struct meminfo *mi)
* Check for a valid mapped address.
*/
if ((mi->memtype == KVADDR) && IS_VMALLOC_ADDR(mi->spec_addr)) {
+ if ((task = stkptr_to_task(vaddr)) && (tc = task_to_context(task))) {
+ show_context(tc);
+ fprintf(fp, "\n");
+ }
if (kvtop(NULL, mi->spec_addr, &paddr, 0)) {
mi->flags = orig_flags | VMLIST_VERIFY;
dump_vmlist(mi);
--
2.37.1

View File

@ -1,49 +0,0 @@
From f18c391b650ace3cea6e48278b969425a328e4cf Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Mon, 19 Sep 2022 17:49:22 +0800
Subject: [PATCH 40/89] Fix page offset issue when converting physical to
virtual address
When trying to convert a physical address to its virtual
address in dump_vmap_area() and dump_vmlist(), the vi->retval
is added by 2 values: the page aligned address "pcheck"
and page offset address "PAGEOFFSET(paddr)".
However "paddr" is given by "pcheck", is also page aligned,
so "PAGEOFFSET(paddr)" is always 0.
In this patch, we will use PAGEOFFSET(vi->spec_addr) to give the
page offset, vi->spec_addr is the physical address we'd like
to convert, which contains the correct page offset.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/memory.c b/memory.c
index 9ab578134fa1..1b6f9ba17e57 100644
--- a/memory.c
+++ b/memory.c
@@ -8861,7 +8861,7 @@ dump_vmlist(struct meminfo *vi)
(vi->spec_addr < (paddr+PAGESIZE()))) {
if (vi->flags & GET_PHYS_TO_VMALLOC) {
vi->retval = pcheck +
- PAGEOFFSET(paddr);
+ PAGEOFFSET(vi->spec_addr);
return;
} else
fprintf(fp,
@@ -9010,7 +9010,7 @@ dump_vmap_area(struct meminfo *vi)
(vi->spec_addr < (paddr+PAGESIZE()))) {
if (vi->flags & GET_PHYS_TO_VMALLOC) {
vi->retval = pcheck +
- PAGEOFFSET(paddr);
+ PAGEOFFSET(vi->spec_addr);
FREEBUF(ld->list_ptr);
return;
} else
--
2.37.1

View File

@ -1,76 +0,0 @@
From 2c24770e3ceab8eebd9788256967b79145f53123 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Mon, 19 Sep 2022 17:49:23 +0800
Subject: [PATCH 41/89] Let "kmem" print task context with physical address
Patch [1] enables "kmem" to print task context if the given virtual
address is a vmalloced stack.
This patch lets "kmem" print task context also when the given address
is a physical address.
Before:
crash> kmem 1883700e28
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
ffff94eb9102c640 ffff94eb9102b140 ffffb7efce9b8000 - ffffb7efce9bd000 20480
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffdd28220dc000 1883700000 0 0 1 50000000000000
After:
crash> kmem 1883700e28
PID: 847
COMMAND: "khungtaskd"
TASK: ffff94f8038f4000 [THREAD_INFO: ffff94f8038f4000]
CPU: 72
STATE: TASK_RUNNING (PANIC)
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
ffff94eb9102c640 ffff94eb9102b140 ffffb7efce9b8000 - ffffb7efce9bd000 20480
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffdd28220dc000 1883700000 0 0 1 50000000000000
[1]: https://listman.redhat.com/archives/crash-utility/2022-September/010115.html
[ kh: squashed the 4/4 patch into 3/4 ]
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/memory.c b/memory.c
index 1b6f9ba17e57..c80ef61bdcf7 100644
--- a/memory.c
+++ b/memory.c
@@ -13506,6 +13506,10 @@ kmem_search(struct meminfo *mi)
mi->flags &= ~GET_PHYS_TO_VMALLOC;
if (mi->retval) {
+ if ((task = stkptr_to_task(mi->retval)) && (tc = task_to_context(task))) {
+ show_context(tc);
+ fprintf(fp, "\n");
+ }
if ((sp = value_search(mi->retval, &offset))) {
show_symbol(sp, offset,
SHOW_LINENUM | SHOW_RADIX());
@@ -13562,11 +13566,11 @@ kmem_search(struct meminfo *mi)
/*
* Check whether it's a current task or stack address.
*/
- if ((mi->memtype == KVADDR) && (task = vaddr_in_task_struct(vaddr)) &&
+ if ((mi->memtype & (KVADDR|PHYSADDR)) && (task = vaddr_in_task_struct(vaddr)) &&
(tc = task_to_context(task))) {
show_context(tc);
fprintf(fp, "\n");
- } else if ((mi->memtype == KVADDR) && (task = stkptr_to_task(vaddr)) &&
+ } else if ((mi->memtype & (KVADDR|PHYSADDR)) && (task = stkptr_to_task(vaddr)) &&
(tc = task_to_context(task))) {
show_context(tc);
fprintf(fp, "\n");
--
2.37.1

View File

@ -1,103 +0,0 @@
From e39a2560928ef6f7e8dad80cb9a7521bdf0495e3 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Tue, 4 Oct 2022 18:57:11 +0800
Subject: [PATCH 42/89] ppc64: still allow to move on if the emergency stacks
info fails to initialize
Currently crash will fail and then exit, if the initialization of
the emergency stacks information fails. In real customer environments,
sometimes, a vmcore may be partially damaged, although such vmcores
are rare. For example:
# ./crash ../3.10.0-1127.18.2.el7.ppc64le/vmcore ../3.10.0-1127.18.2.el7.ppc64le/vmlinux -s
crash: invalid kernel virtual address: 38 type: "paca->emergency_sp"
#
Lets try to keep loading vmcore if such issues happen, so call
the readmem() with the RETURN_ON_ERROR instead of FAULT_ON_ERROR,
which allows the crash move on.
Reported-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/ppc64.c b/ppc64.c
index 4ea1f7c0c6f8..b95a621d8fe4 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -1224,13 +1224,13 @@ ppc64_init_paca_info(void)
ulong paca_loc;
readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc, sizeof(void *),
- "paca double pointer", FAULT_ON_ERROR);
+ "paca double pointer", RETURN_ON_ERROR);
readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
- "paca pointers", FAULT_ON_ERROR);
+ "paca pointers", RETURN_ON_ERROR);
} else if (symbol_exists("paca") &&
(get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR)) {
readmem(symbol_value("paca"), KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
- "paca pointers", FAULT_ON_ERROR);
+ "paca pointers", RETURN_ON_ERROR);
} else {
free(paca_ptr);
return;
@@ -1245,7 +1245,7 @@ ppc64_init_paca_info(void)
for (i = 0; i < kt->cpus; i++)
readmem(paca_ptr[i] + offset, KVADDR, &ms->emergency_sp[i],
sizeof(void *), "paca->emergency_sp",
- FAULT_ON_ERROR);
+ RETURN_ON_ERROR);
}
if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) {
@@ -1256,7 +1256,7 @@ ppc64_init_paca_info(void)
for (i = 0; i < kt->cpus; i++)
readmem(paca_ptr[i] + offset, KVADDR, &ms->nmi_emergency_sp[i],
sizeof(void *), "paca->nmi_emergency_sp",
- FAULT_ON_ERROR);
+ RETURN_ON_ERROR);
}
if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) {
@@ -1267,7 +1267,7 @@ ppc64_init_paca_info(void)
for (i = 0; i < kt->cpus; i++)
readmem(paca_ptr[i] + offset, KVADDR, &ms->mc_emergency_sp[i],
sizeof(void *), "paca->mc_emergency_sp",
- FAULT_ON_ERROR);
+ RETURN_ON_ERROR);
}
free(paca_ptr);
@@ -1947,7 +1947,7 @@ ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
if (cpu < 0 || cpu >= kt->cpus)
return NONE_STACK;
- if (ms->emergency_sp) {
+ if (ms->emergency_sp && IS_KVADDR(ms->emergency_sp[cpu])) {
top = ms->emergency_sp[cpu];
base = top - STACKSIZE();
if (addr >= base && addr < top) {
@@ -1957,7 +1957,7 @@ ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
}
}
- if (ms->nmi_emergency_sp) {
+ if (ms->nmi_emergency_sp && IS_KVADDR(ms->nmi_emergency_sp[cpu])) {
top = ms->nmi_emergency_sp[cpu];
base = top - STACKSIZE();
if (addr >= base && addr < top) {
@@ -1967,7 +1967,7 @@ ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
}
}
- if (ms->mc_emergency_sp) {
+ if (ms->mc_emergency_sp && IS_KVADDR(ms->mc_emergency_sp[cpu])) {
top = ms->mc_emergency_sp[cpu];
base = top - STACKSIZE();
if (addr >= base && addr < top) {
--
2.37.1

View File

@ -1,39 +0,0 @@
From 975265a8056566ace8eaa6cf532fd42754e915a9 Mon Sep 17 00:00:00 2001
From: Matias Ezequiel Vara Larsen <matiasevara@gmail.com>
Date: Mon, 24 Oct 2022 11:35:29 +0200
Subject: [PATCH 43/89] Fix segmentation fault in
page_flags_init_from_pageflag_names()
When read_string() fails in page_flags_init_from_pageflag_names(),
error() dereferences the name variable to print the string that the
variable points to. However, name points to a string that is not in
crash's memory-space thus triggering a segmentation fault.
This patch replaces "%s" in the error message with "%lx" so the address
is printed instead. Also replaces "%ld" for mask with "%lx".
[ kh: changed the conversion specifiers and commit message ]
Signed-off-by: Matias Ezequiel Vara Larsen <matias.vara@vates.fr>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/memory.c b/memory.c
index c80ef61bdcf7..8724c4aa3d8a 100644
--- a/memory.c
+++ b/memory.c
@@ -6599,7 +6599,7 @@ page_flags_init_from_pageflag_names(void)
}
if (!read_string((ulong)name, namebuf, BUFSIZE-1)) {
- error(INFO, "failed to read pageflag_names entry (i: %d name: \"%s\" mask: %ld)\n",
+ error(INFO, "failed to read pageflag_names entry (i: %d name: %lx mask: %lx)\n",
i, name, mask);
goto pageflags_fail;
}
--
2.37.1

View File

@ -1,86 +0,0 @@
From 4a6f318a39f78648a3bb13d74e31e3a331254e30 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 9 Nov 2022 14:21:57 +0800
Subject: [PATCH 44/89] Fix for "ps/vm" commands to display correct %MEM and
RSS values
The ps/vm commands may print the bogus value of the %MEM and RSS, the
reason is that the counter of rss stat is updated in asynchronous manner
and may become negative, when the SPLIT_RSS_COUNTING is enabled in kernel.
As a result, crash will read it from memory and convert from negative to
unsigned long integer, eventually it overflows and gets a big integer. For
example:
crash> ps 1393
PID PPID CPU TASK ST %MEM VSZ RSS COMM
1393 1 24 ffff9584bb542100 RU 541298032135.9 4132 18014398509481908 enlinuxpc64
^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^
This is unexpected, crash needs to correct its value for this case.
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/memory.c b/memory.c
index 8724c4aa3d8a..9c15c1b745ef 100644
--- a/memory.c
+++ b/memory.c
@@ -4714,18 +4714,29 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
* Latest kernels have mm_struct.mm_rss_stat[].
*/
if (VALID_MEMBER(mm_struct_rss_stat)) {
- long anonpages, filepages;
+ long anonpages, filepages, count;
anonpages = tt->anonpages;
filepages = tt->filepages;
- rss += LONG(tt->mm_struct +
+ count = LONG(tt->mm_struct +
OFFSET(mm_struct_rss_stat) +
OFFSET(mm_rss_stat_count) +
(filepages * sizeof(long)));
- rss += LONG(tt->mm_struct +
+
+ /*
+ * The counter is updated in asynchronous manner
+ * and may become negative, see:
+ * include/linux/mm.h: get_mm_counter()
+ */
+ if (count > 0)
+ rss += count;
+
+ count = LONG(tt->mm_struct +
OFFSET(mm_struct_rss_stat) +
OFFSET(mm_rss_stat_count) +
(anonpages * sizeof(long)));
+ if (count > 0)
+ rss += count;
}
/* Check whether SPLIT_RSS_COUNTING is enabled */
@@ -4769,7 +4780,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
RETURN_ON_ERROR))
continue;
- rss_cache += sync_rss;
+ if (sync_rss > 0)
+ rss_cache += sync_rss;
/* count 1 -> anonpages */
if (!readmem(first->task +
@@ -4782,7 +4794,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
RETURN_ON_ERROR))
continue;
- rss_cache += sync_rss;
+ if (sync_rss > 0)
+ rss_cache += sync_rss;
if (first == last)
break;
--
2.37.1

View File

@ -1,72 +0,0 @@
From acd2b8a54e4532f859173fe9f0d1b92b44cbc848 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Tue, 29 Nov 2022 14:05:26 +0000
Subject: [PATCH 45/89] ps: Provide an option to display no header line
One might often find it useful to redirect/or filter the output
generated by the 'ps' command. This simple patch provides an option
(i.e. '-H') to display no header line so it does not need to be
considered e.g.
crash> ps -u -H | head -5
1 0 1 ffff956e8028d280 IN 0.0 174276 9272 systemd
1067 1 2 ffff956e81380000 IN 0.1 59480 15788 systemd-journal
1080 1 0 ffff956e8d152940 IN 0.0 36196 3548 systemd-udevd
1278 1 6 ffff956e8aa60000 IN 0.0 17664 3072 systemd-oomd
1366 1 7 ffff956e88548000 IN 0.0 10868 2328 dbus-broker-lau
Signed-off-by: Aaron Tomlin <atomlin@atomlin.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
help.c | 3 ++-
task.c | 6 +++++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/help.c b/help.c
index e1ac6f93fde2..2cb570b244f4 100644
--- a/help.c
+++ b/help.c
@@ -1385,7 +1385,7 @@ NULL
char *help_ps[] = {
"ps",
"display process status information",
-"[-k|-u|-G|-y policy] [-s] [-p|-c|-t|-[l|m][-C cpu]|-a|-g|-r|-S|-A]\n [pid | task | command] ...",
+"[-k|-u|-G|-y policy] [-s] [-p|-c|-t|-[l|m][-C cpu]|-a|-g|-r|-S|-A|-H]\n [pid | task | command] ...",
" This command displays process status for selected, or all, processes" ,
" in the system. If no arguments are entered, the process data is",
" is displayed for all processes. Specific processes may be selected",
@@ -1464,6 +1464,7 @@ char *help_ps[] = {
" -r display resource limits (rlimits) of selected, or all, tasks.",
" -S display a summary consisting of the number of tasks in a task state.",
" -A display only the active task on each cpu.",
+" -H display no header line.",
"\nEXAMPLES",
" Show the process status of all current tasks:\n",
" %s> ps",
diff --git a/task.c b/task.c
index db2abc8106a2..88941c7b0e4d 100644
--- a/task.c
+++ b/task.c
@@ -3504,7 +3504,7 @@ cmd_ps(void)
cpuspec = NULL;
flag = 0;
- while ((c = getopt(argcnt, args, "ASgstcpkuGlmarC:y:")) != EOF) {
+ while ((c = getopt(argcnt, args, "HASgstcpkuGlmarC:y:")) != EOF) {
switch(c)
{
case 'k':
@@ -3615,6 +3615,10 @@ cmd_ps(void)
flag |= PS_ACTIVE;
break;
+ case 'H':
+ flag |= PS_NO_HEADER;
+ break;
+
default:
argerrs++;
break;
--
2.37.1

View File

@ -1,150 +0,0 @@
From 6122f0d067ed5c522ad11fb91b6db3ea1d34f355 Mon Sep 17 00:00:00 2001
From: Ding Hui <dinghui@sangfor.com.cn>
Date: Thu, 1 Dec 2022 15:01:45 +0800
Subject: [PATCH 46/89] arm64: fix backtraces of KASAN kernel dumpfile
truncated
We met "bt" command on KASAN kernel vmcore display truncated backtraces
like this:
crash> bt
PID: 4131 TASK: ffff8001521df000 CPU: 3 COMMAND: "bash"
#0 [ffff2000224b0cb0] machine_kexec_prepare at ffff2000200bff4c
After digging the root cause, it turns out that arm64_in_kdump_text()
found wrong bt->bptr at "machine_kexec" branch.
Disassemble machine_kexec() of KASAN vmlinux (gcc 7.3.0):
crash> dis -x machine_kexec
0xffff2000200bff50 <machine_kexec>: stp x29, x30, [sp,#-208]!
0xffff2000200bff54 <machine_kexec+0x4>: mov x29, sp
0xffff2000200bff58 <machine_kexec+0x8>: stp x19, x20, [sp,#16]
0xffff2000200bff5c <machine_kexec+0xc>: str x24, [sp,#56]
0xffff2000200bff60 <machine_kexec+0x10>: str x26, [sp,#72]
0xffff2000200bff64 <machine_kexec+0x14>: mov x2, #0x8ab3
0xffff2000200bff68 <machine_kexec+0x18>: add x1, x29, #0x70
0xffff2000200bff6c <machine_kexec+0x1c>: lsr x1, x1, #3
0xffff2000200bff70 <machine_kexec+0x20>: movk x2, #0x41b5, lsl #16
0xffff2000200bff74 <machine_kexec+0x24>: mov x19, #0x200000000000
0xffff2000200bff78 <machine_kexec+0x28>: adrp x3, 0xffff2000224b0000
0xffff2000200bff7c <machine_kexec+0x2c>: movk x19, #0xdfff, lsl #48
0xffff2000200bff80 <machine_kexec+0x30>: add x3, x3, #0xcb0
0xffff2000200bff84 <machine_kexec+0x34>: add x4, x1, x19
0xffff2000200bff88 <machine_kexec+0x38>: stp x2, x3, [x29,#112]
0xffff2000200bff8c <machine_kexec+0x3c>: adrp x2, 0xffff2000200bf000 <swsusp_arch_resume+0x1e8>
0xffff2000200bff90 <machine_kexec+0x40>: add x2, x2, #0xf50
0xffff2000200bff94 <machine_kexec+0x44>: str x2, [x29,#128]
0xffff2000200bff98 <machine_kexec+0x48>: mov w2, #0xf1f1f1f1
0xffff2000200bff9c <machine_kexec+0x4c>: str w2, [x1,x19]
0xffff2000200bffa0 <machine_kexec+0x50>: mov w2, #0xf200
0xffff2000200bffa4 <machine_kexec+0x54>: mov w1, #0xf3f3f3f3
0xffff2000200bffa8 <machine_kexec+0x58>: movk w2, #0xf2f2, lsl #16
0xffff2000200bffac <machine_kexec+0x5c>: stp w2, w1, [x4,#4]
We notice that:
1. machine_kexec() start address is 0xffff2000200bff50
2. the instruction at machine_kexec+0x44 stores the same value
0xffff2000200bff50 (comes from 0xffff2000200bf000 + 0xf50)
into stack postion [x29,#128].
When arm64_in_kdump_text() searches for LR from stack, it met
0xffff2000200bff50 firstly, so got wrong bt->bptr.
We know that the real LR is always greater than the start address
of a function, so let's fix it by changing the search conditon to
(*ptr > xxx_start) && (*ptr < xxx_end).
Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/arm64.c b/arm64.c
index c3e26a371a61..7e8a7db1fcc4 100644
--- a/arm64.c
+++ b/arm64.c
@@ -3479,7 +3479,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
ms = machdep->machspec;
for (ptr = start - 8; ptr >= base; ptr--) {
if (bt->flags & BT_OPT_BACK_TRACE) {
- if ((*ptr >= ms->crash_kexec_start) &&
+ if ((*ptr > ms->crash_kexec_start) &&
(*ptr < ms->crash_kexec_end) &&
INSTACK(*(ptr - 1), bt)) {
bt->bptr = ((ulong)(ptr - 1) - (ulong)base)
@@ -3488,7 +3488,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
fprintf(fp, "%lx: %lx (crash_kexec)\n", bt->bptr, *ptr);
return TRUE;
}
- if ((*ptr >= ms->crash_save_cpu_start) &&
+ if ((*ptr > ms->crash_save_cpu_start) &&
(*ptr < ms->crash_save_cpu_end) &&
INSTACK(*(ptr - 1), bt)) {
bt->bptr = ((ulong)(ptr - 1) - (ulong)base)
@@ -3498,14 +3498,14 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
return TRUE;
}
} else {
- if ((*ptr >= ms->machine_kexec_start) && (*ptr < ms->machine_kexec_end)) {
+ if ((*ptr > ms->machine_kexec_start) && (*ptr < ms->machine_kexec_end)) {
bt->bptr = ((ulong)ptr - (ulong)base)
+ task_to_stackbase(bt->tc->task);
if (CRASHDEBUG(1))
fprintf(fp, "%lx: %lx (machine_kexec)\n", bt->bptr, *ptr);
return TRUE;
}
- if ((*ptr >= ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) {
+ if ((*ptr > ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) {
/*
* Stash the first crash_kexec frame in case the machine_kexec
* frame is not found.
@@ -3519,7 +3519,7 @@ arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
}
continue;
}
- if ((*ptr >= ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) {
+ if ((*ptr > ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) {
bt->bptr = ((ulong)ptr - (ulong)base)
+ task_to_stackbase(bt->tc->task);
if (CRASHDEBUG(1))
@@ -3566,7 +3566,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
for (ptr = start - 8; ptr >= base; ptr--) {
if (bt->flags & BT_OPT_BACK_TRACE) {
- if ((*ptr >= ms->crash_kexec_start) &&
+ if ((*ptr > ms->crash_kexec_start) &&
(*ptr < ms->crash_kexec_end) &&
INSTACK(*(ptr - 1), bt)) {
bt->bptr = ((ulong)(ptr - 1) - (ulong)base) + stackbase;
@@ -3576,7 +3576,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
FREEBUF(stackbuf);
return TRUE;
}
- if ((*ptr >= ms->crash_save_cpu_start) &&
+ if ((*ptr > ms->crash_save_cpu_start) &&
(*ptr < ms->crash_save_cpu_end) &&
INSTACK(*(ptr - 1), bt)) {
bt->bptr = ((ulong)(ptr - 1) - (ulong)base) + stackbase;
@@ -3587,7 +3587,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
return TRUE;
}
} else {
- if ((*ptr >= ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) {
+ if ((*ptr > ms->crash_kexec_start) && (*ptr < ms->crash_kexec_end)) {
bt->bptr = ((ulong)ptr - (ulong)base) + stackbase;
if (CRASHDEBUG(1))
fprintf(fp, "%lx: %lx (crash_kexec on IRQ stack)\n",
@@ -3595,7 +3595,7 @@ arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
FREEBUF(stackbuf);
return TRUE;
}
- if ((*ptr >= ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) {
+ if ((*ptr > ms->crash_save_cpu_start) && (*ptr < ms->crash_save_cpu_end)) {
bt->bptr = ((ulong)ptr - (ulong)base) + stackbase;
if (CRASHDEBUG(1))
fprintf(fp, "%lx: %lx (crash_save_cpu on IRQ stack)\n",
--
2.37.1

View File

@ -1,37 +0,0 @@
From 69786ee12c4842d2cbd39ea42df65c45fb043edd Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <quic_pkondeti@quicinc.com>
Date: Thu, 8 Dec 2022 09:55:07 +0530
Subject: [PATCH 47/89] arm64: handle vabits_actual symbol missing case
After kernel commit 0d9b1ffefabe ("arm64: mm: make vabits_actual
a build time constant if possible") introduced in Linux v5.19,
the crash will not find vabits_actual symbol if VA_BITS <= 48.
Add a fallback option to initialize VA_BITS based on the user
supplied machdep option.
Tested ramdumps loading in both 6.0 and 5.15 kernels.
Signed-off-by: Pavankumar Kondeti <quic_pkondeti@quicinc.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arm64.c b/arm64.c
index 7e8a7db1fcc4..56fb841f43f8 100644
--- a/arm64.c
+++ b/arm64.c
@@ -4671,6 +4671,10 @@ arm64_calc_VA_BITS(void)
return;
} else if (arm64_set_va_bits_by_tcr()) {
return;
+ } else if (machdep->machspec->VA_BITS_ACTUAL) {
+ machdep->machspec->VA_BITS = machdep->machspec->VA_BITS_ACTUAL;
+ machdep->machspec->VA_START = _VA_START(machdep->machspec->VA_BITS_ACTUAL);
+ return;
}
if (!(sp = symbol_search("swapper_pg_dir")) &&
--
2.37.1

View File

@ -1,144 +0,0 @@
From 1f6b58b323f669293b4ef8d497fe7867d1411143 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 7 Dec 2022 09:46:56 +0900
Subject: [PATCH 48/89] x86_64: Fix for move of per-cpu variables into struct
pcpu_hot
The following kernel commits, which are contained in Linux 6.2-rc1 and
later kernels, introduced struct pcpu_hot and moved several per-cpu
variables into it.
d7b6d709a76a x86/percpu: Move irq_stack variables next to current_task
7443b296e699 x86/percpu: Move cpu_number next to current_task
e57ef2ed97c1 x86: Put hot per CPU variables into a struct
Without the patch, crash fails to start session with the following
error:
$ crash vmlinux vmcore
...
bt: invalid size request: 0 type: "stack contents"
bt: read of stack at 0 failed
<segmentation violation in gdb>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
x86_64.c | 44 +++++++++++++++++++++++++++++++++-----------
1 file changed, 33 insertions(+), 11 deletions(-)
diff --git a/x86_64.c b/x86_64.c
index 292c240e887e..1113a1055f77 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -1287,12 +1287,15 @@ x86_64_per_cpu_init(void)
{
int i, cpus, cpunumber;
struct machine_specific *ms;
- struct syment *irq_sp, *curr_sp, *cpu_sp, *hardirq_stack_ptr_sp;
+ struct syment *irq_sp, *curr_sp, *cpu_sp, *hardirq_stack_ptr_sp, *pcpu_sp;
ulong hardirq_stack_ptr;
ulong __per_cpu_load = 0;
+ long hardirq_addr = 0, cpu_addr = 0, curr_addr = 0;
ms = machdep->machspec;
+ pcpu_sp = per_cpu_symbol_search("pcpu_hot");
+
hardirq_stack_ptr_sp = per_cpu_symbol_search("hardirq_stack_ptr");
irq_sp = per_cpu_symbol_search("per_cpu__irq_stack_union");
cpu_sp = per_cpu_symbol_search("per_cpu__cpu_number");
@@ -1321,7 +1324,7 @@ x86_64_per_cpu_init(void)
return;
}
- if (!cpu_sp || (!irq_sp && !hardirq_stack_ptr_sp))
+ if (!pcpu_sp && (!cpu_sp || (!irq_sp && !hardirq_stack_ptr_sp)))
return;
if (MEMBER_EXISTS("irq_stack_union", "irq_stack"))
@@ -1334,10 +1337,21 @@ x86_64_per_cpu_init(void)
if (kernel_symbol_exists("__per_cpu_load"))
__per_cpu_load = symbol_value("__per_cpu_load");
+ if (pcpu_sp) {
+ hardirq_addr = pcpu_sp->value + MEMBER_OFFSET("pcpu_hot", "hardirq_stack_ptr");
+ cpu_addr = pcpu_sp->value + MEMBER_OFFSET("pcpu_hot", "cpu_number");
+ curr_addr = pcpu_sp->value + MEMBER_OFFSET("pcpu_hot", "current_task");
+ } else {
+ if (hardirq_stack_ptr_sp)
+ hardirq_addr = hardirq_stack_ptr_sp->value;
+ cpu_addr = cpu_sp->value;
+ curr_addr = curr_sp->value;
+ }
+
for (i = cpus = 0; i < NR_CPUS; i++) {
if (__per_cpu_load && kt->__per_cpu_offset[i] == __per_cpu_load)
break;
- if (!readmem(cpu_sp->value + kt->__per_cpu_offset[i],
+ if (!readmem(cpu_addr + kt->__per_cpu_offset[i],
KVADDR, &cpunumber, sizeof(int),
"cpu number (per_cpu)", QUIET|RETURN_ON_ERROR))
break;
@@ -1346,8 +1360,8 @@ x86_64_per_cpu_init(void)
break;
cpus++;
- if (hardirq_stack_ptr_sp) {
- if (!readmem(hardirq_stack_ptr_sp->value + kt->__per_cpu_offset[i],
+ if (pcpu_sp || hardirq_stack_ptr_sp) {
+ if (!readmem(hardirq_addr + kt->__per_cpu_offset[i],
KVADDR, &hardirq_stack_ptr, sizeof(void *),
"hardirq_stack_ptr (per_cpu)", QUIET|RETURN_ON_ERROR))
continue;
@@ -1370,13 +1384,13 @@ x86_64_per_cpu_init(void)
else
kt->cpus = cpus;
- if (DUMPFILE() && curr_sp) {
+ if (DUMPFILE() && (pcpu_sp || curr_sp)) {
if ((ms->current = calloc(kt->cpus, sizeof(ulong))) == NULL)
error(FATAL,
"cannot calloc %d x86_64 current pointers!\n",
kt->cpus);
for (i = 0; i < kt->cpus; i++)
- if (!readmem(curr_sp->value + kt->__per_cpu_offset[i],
+ if (!readmem(curr_addr + kt->__per_cpu_offset[i],
KVADDR, &ms->current[i], sizeof(ulong),
"current_task (per_cpu)", RETURN_ON_ERROR))
continue;
@@ -5622,11 +5636,19 @@ x86_64_get_smp_cpus(void)
char *cpu_pda_buf;
ulong level4_pgt, cpu_pda_addr;
struct syment *sp;
- ulong __per_cpu_load = 0;
+ ulong __per_cpu_load = 0, cpu_addr;
if (!VALID_STRUCT(x8664_pda)) {
- if (!(sp = per_cpu_symbol_search("per_cpu__cpu_number")) ||
- !(kt->flags & PER_CPU_OFF))
+
+ if (!(kt->flags & PER_CPU_OFF))
+ return 1;
+
+ if ((sp = per_cpu_symbol_search("pcpu_hot")) &&
+ (cpu_addr = MEMBER_OFFSET("pcpu_hot", "cpu_number")) != INVALID_OFFSET)
+ cpu_addr += sp->value;
+ else if ((sp = per_cpu_symbol_search("per_cpu__cpu_number")))
+ cpu_addr = sp->value;
+ else
return 1;
if (kernel_symbol_exists("__per_cpu_load"))
@@ -5635,7 +5657,7 @@ x86_64_get_smp_cpus(void)
for (i = cpus = 0; i < NR_CPUS; i++) {
if (__per_cpu_load && kt->__per_cpu_offset[i] == __per_cpu_load)
break;
- if (!readmem(sp->value + kt->__per_cpu_offset[i],
+ if (!readmem(cpu_addr + kt->__per_cpu_offset[i],
KVADDR, &cpunumber, sizeof(int),
"cpu number (per_cpu)", QUIET|RETURN_ON_ERROR))
break;
--
2.37.1

View File

@ -1,163 +0,0 @@
From c05bbb36fcb468bce384ee4b8e30a0f5748bc4ae Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 15 Dec 2022 11:31:38 +0900
Subject: [PATCH 49/89] Fix for mm_struct.rss_stat conversion into
percpu_counter
Kernel commit f1a7941243c1 ("mm: convert mm's rss stats into
percpu_counter"), which is contained in Linux 6.2-rc1 and later
kernels, changed mm_struct.rss_stat from struct mm_rss_stat into an
array of struct percpu_counter.
Without the patch, "ps" and several commands fail with the following
error message:
ps: invalid structure member offset: mm_rss_stat_count
FILE: memory.c LINE: 4724 FUNCTION: get_task_mem_usage()
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 3 +++
kernel.c | 2 ++
memory.c | 14 +++++++++++++-
symbols.c | 6 ++++--
tools.c | 28 ++++++++++++++++++++++++++++
5 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index b7d76330141a..e4732c885371 100644
--- a/defs.h
+++ b/defs.h
@@ -2181,6 +2181,7 @@ struct offset_table { /* stash of commonly-used offsets */
long blk_mq_tags_nr_reserved_tags;
long blk_mq_tags_rqs;
long request_queue_hctx_table;
+ long percpu_counter_counters;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2351,6 +2352,7 @@ struct size_table { /* stash of commonly-used sizes */
long sbitmap_queue;
long sbq_wait_state;
long blk_mq_tags;
+ long percpu_counter;
};
struct array_table {
@@ -5307,6 +5309,7 @@ struct rb_node *rb_right(struct rb_node *, struct rb_node *);
struct rb_node *rb_left(struct rb_node *, struct rb_node *);
struct rb_node *rb_next(struct rb_node *);
struct rb_node *rb_last(struct rb_root *);
+long percpu_counter_sum_positive(ulong fbc);
/*
* symbols.c
diff --git a/kernel.c b/kernel.c
index 2a1c1c391414..3ca513962970 100644
--- a/kernel.c
+++ b/kernel.c
@@ -316,6 +316,8 @@ kernel_init()
}
MEMBER_OFFSET_INIT(percpu_counter_count, "percpu_counter", "count");
+ MEMBER_OFFSET_INIT(percpu_counter_counters, "percpu_counter", "counters");
+ STRUCT_SIZE_INIT(percpu_counter, "percpu_counter");
if (STRUCT_EXISTS("runqueue")) {
rqstruct = "runqueue";
diff --git a/memory.c b/memory.c
index 9c15c1b745ef..9d003713534b 100644
--- a/memory.c
+++ b/memory.c
@@ -4713,7 +4713,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
/*
* Latest kernels have mm_struct.mm_rss_stat[].
*/
- if (VALID_MEMBER(mm_struct_rss_stat)) {
+ if (VALID_MEMBER(mm_struct_rss_stat) && VALID_MEMBER(mm_rss_stat_count)) {
long anonpages, filepages, count;
anonpages = tt->anonpages;
@@ -4737,6 +4737,18 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
(anonpages * sizeof(long)));
if (count > 0)
rss += count;
+
+ } else if (VALID_MEMBER(mm_struct_rss_stat)) {
+ /* 6.2: struct percpu_counter rss_stat[NR_MM_COUNTERS] */
+ ulong fbc;
+
+ fbc = tc->mm_struct + OFFSET(mm_struct_rss_stat) +
+ (tt->filepages * SIZE(percpu_counter));
+ rss += percpu_counter_sum_positive(fbc);
+
+ fbc = tc->mm_struct + OFFSET(mm_struct_rss_stat) +
+ (tt->anonpages * SIZE(percpu_counter));
+ rss += percpu_counter_sum_positive(fbc);
}
/* Check whether SPLIT_RSS_COUNTING is enabled */
diff --git a/symbols.c b/symbols.c
index a94660538492..657f50706819 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10645,8 +10645,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(ktime_t_nsec));
fprintf(fp, " atomic_t_counter: %ld\n",
OFFSET(atomic_t_counter));
- fprintf(fp, " percpu_counter_count: %ld\n",
- OFFSET(percpu_counter_count));
+ fprintf(fp, " percpu_counter_count: %ld\n", OFFSET(percpu_counter_count));
+ fprintf(fp, " percpu_counter_counters: %ld\n", OFFSET(percpu_counter_counters));
fprintf(fp, " sk_buff_head_next: %ld\n",
OFFSET(sk_buff_head_next));
fprintf(fp, " sk_buff_head_qlen: %ld\n",
@@ -11040,6 +11040,8 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " sbq_wait_state: %ld\n", SIZE(sbq_wait_state));
fprintf(fp, " blk_mq_tags: %ld\n", SIZE(blk_mq_tags));
+ fprintf(fp, " percpu_counter: %ld\n", SIZE(percpu_counter));
+
fprintf(fp, "\n array_table:\n");
/*
* Use get_array_length() for those fields not set up at init-time;
diff --git a/tools.c b/tools.c
index 6fa3c70bac2b..7f64bd6328cf 100644
--- a/tools.c
+++ b/tools.c
@@ -6902,3 +6902,31 @@ rb_last(struct rb_root *root)
return node;
}
+
+long
+percpu_counter_sum_positive(ulong fbc)
+{
+ int i, count;
+ ulong addr;
+ long ret;
+
+ if (INVALID_MEMBER(percpu_counter_count))
+ return 0;
+
+ readmem(fbc + OFFSET(percpu_counter_count), KVADDR, &ret,
+ sizeof(long long), "percpu_counter.count", FAULT_ON_ERROR);
+
+ if (INVALID_MEMBER(percpu_counter_counters)) /* !CONFIG_SMP */
+ return (ret < 0) ? 0 : ret;
+
+ readmem(fbc + OFFSET(percpu_counter_counters), KVADDR, &addr,
+ sizeof(void *), "percpu_counter.counters", FAULT_ON_ERROR);
+
+ for (i = 0; i < kt->cpus; i++) {
+ readmem(addr + kt->__per_cpu_offset[i], KVADDR, &count,
+ sizeof(int), "percpu_counter.counters count", FAULT_ON_ERROR);
+ ret += count;
+ }
+
+ return (ret < 0) ? 0 : ret;
+}
--
2.37.1

View File

@ -1,48 +0,0 @@
From 1c56f2a37c79fa07c64dce34605d7b4a8a903243 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 21 Dec 2022 17:09:08 +0800
Subject: [PATCH 50/89] Fix "mount" command to appropriately display the mount
dumps
Recently the following failure has been observed on some vmcores when
using the mount command:
crash> mount
MOUNT SUPERBLK TYPE DEVNAME DIRNAME
ffff97a4818a3480 ffff979500013800 rootfs none /
ffff97e4846ca700 ffff97e484653000 sysfs sysfs /sys
...
ffff97b484753420 0 mount: invalid kernel virtual address: 0 type: "super_block buffer"
The kernel virtual address of the super_block is zero when the mount
command fails with the vfsmnt address 0xffff97b484753420. And the
remaining mount information will be discarded. That is not expected.
Check the address and skip it with a warning, if this is an invalid
kernel virtual address, that can avoid truncating the remaining mount
dumps.
Reported-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
filesys.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/filesys.c b/filesys.c
index c2ea78de821d..d64b54a9b822 100644
--- a/filesys.c
+++ b/filesys.c
@@ -1491,6 +1491,10 @@ show_mounts(ulong one_vfsmount, int flags, struct task_context *namespace_contex
}
sbp = ULONG(vfsmount_buf + OFFSET(vfsmount_mnt_sb));
+ if (!IS_KVADDR(sbp)) {
+ error(WARNING, "cannot get super_block from vfsmnt: 0x%lx\n", *vfsmnt);
+ continue;
+ }
if (flags)
fprintf(fp, "%s", mount_hdr);
--
2.37.1

View File

@ -1,692 +0,0 @@
From 8ac8ab4f0e2d81b51b79b30f410c09e394b17e9c Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:06 +0800
Subject: [PATCH 51/89] Add RISCV64 framework code support
This patch mainly added some environment configurations, macro definitions,
specific architecture structures and some function declarations supported
by the RISCV64 architecture.
We can use the build command to get the simplest version crash tool:
make target=RISCV64 -j2
Co-developed-by: Lifang Xia <lifang_xia@linux.alibaba.com>
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
Makefile | 7 +-
README | 6 +-
configure.c | 43 ++++++++++++-
defs.h | 154 +++++++++++++++++++++++++++++++++++++++++++-
diskdump.c | 11 +++-
help.c | 6 +-
lkcd_vmdump_v1.h | 8 +--
lkcd_vmdump_v2_v3.h | 8 +--
netdump.c | 9 ++-
ramdump.c | 2 +
riscv64.c | 54 ++++++++++++++++
symbols.c | 10 +++
12 files changed, 294 insertions(+), 24 deletions(-)
create mode 100644 riscv64.c
diff --git a/Makefile b/Makefile
index 2549d1d39273..c0de5ef8ff75 100644
--- a/Makefile
+++ b/Makefile
@@ -64,7 +64,7 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
kernel.c test.c gdb_interface.c configure.c net.c dev.c bpf.c \
printk.c \
alpha.c x86.c ppc.c ia64.c s390.c s390x.c s390dbf.c ppc64.c x86_64.c \
- arm.c arm64.c mips.c mips64.c sparc64.c \
+ arm.c arm64.c mips.c mips64.c riscv64.c sparc64.c \
extensions.c remote.c va_server.c va_server_v1.c symbols.c cmdline.c \
lkcd_common.c lkcd_v1.c lkcd_v2_v3.c lkcd_v5.c lkcd_v7.c lkcd_v8.c\
lkcd_fix_mem.c s390_dump.c lkcd_x86_trace.c \
@@ -84,7 +84,7 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
build_data.o kernel.o test.o gdb_interface.o net.o dev.o bpf.o \
printk.o \
alpha.o x86.o ppc.o ia64.o s390.o s390x.o s390dbf.o ppc64.o x86_64.o \
- arm.o arm64.o mips.o mips64.o sparc64.o \
+ arm.o arm64.o mips.o mips64.o riscv64.o sparc64.o \
extensions.o remote.o va_server.o va_server_v1.o symbols.o cmdline.o \
lkcd_common.o lkcd_v1.o lkcd_v2_v3.o lkcd_v5.o lkcd_v7.o lkcd_v8.o \
lkcd_fix_mem.o s390_dump.o netdump.o diskdump.o makedumpfile.o xendump.o \
@@ -447,6 +447,9 @@ mips.o: ${GENERIC_HFILES} ${REDHAT_HFILES} mips.c
mips64.o: ${GENERIC_HFILES} ${REDHAT_HFILES} mips64.c
${CC} -c ${CRASH_CFLAGS} mips64.c ${WARNING_OPTIONS} ${WARNING_ERROR}
+riscv64.o: ${GENERIC_HFILES} ${REDHAT_HFILES} riscv64.c
+ ${CC} -c ${CRASH_CFLAGS} riscv64.c ${WARNING_OPTIONS} ${WARNING_ERROR}
+
sparc64.o: ${GENERIC_HFILES} ${REDHAT_HFILES} sparc64.c
${CC} -c ${CRASH_CFLAGS} sparc64.c ${WARNING_OPTIONS} ${WARNING_ERROR}
diff --git a/README b/README
index 00386f6f8224..13a9de5d2ccb 100644
--- a/README
+++ b/README
@@ -37,8 +37,8 @@
These are the current prerequisites:
o At this point, x86, ia64, x86_64, ppc64, ppc, arm, arm64, alpha, mips,
- mips64, s390 and s390x-based kernels are supported. Other architectures
- may be addressed in the future.
+ mips64, riscv64, s390 and s390x-based kernels are supported. Other
+ architectures may be addressed in the future.
o One size fits all -- the utility can be run on any Linux kernel version
version dating back to 2.2.5-15. A primary design goal is to always
@@ -98,6 +98,8 @@
arm64 dumpfiles may be built by typing "make target=ARM64".
o On an x86_64 host, an x86_64 binary that can be used to analyze
ppc64le dumpfiles may be built by typing "make target=PPC64".
+ o On an x86_64 host, an x86_64 binary that can be used to analyze
+ riscv64 dumpfiles may be built by typing "make target=RISCV64".
Traditionally when vmcores are compressed via the makedumpfile(8) facility
the libz compression library is used, and by default the crash utility
diff --git a/configure.c b/configure.c
index b691a139b960..7de89553dbd3 100644
--- a/configure.c
+++ b/configure.c
@@ -107,6 +107,7 @@ void add_extra_lib(char *);
#undef MIPS
#undef SPARC64
#undef MIPS64
+#undef RISCV64
#define UNKNOWN 0
#define X86 1
@@ -122,6 +123,7 @@ void add_extra_lib(char *);
#define MIPS 11
#define SPARC64 12
#define MIPS64 13
+#define RISCV64 14
#define TARGET_X86 "TARGET=X86"
#define TARGET_ALPHA "TARGET=ALPHA"
@@ -136,6 +138,7 @@ void add_extra_lib(char *);
#define TARGET_MIPS "TARGET=MIPS"
#define TARGET_MIPS64 "TARGET=MIPS64"
#define TARGET_SPARC64 "TARGET=SPARC64"
+#define TARGET_RISCV64 "TARGET=RISCV64"
#define TARGET_CFLAGS_X86 "TARGET_CFLAGS=-D_FILE_OFFSET_BITS=64"
#define TARGET_CFLAGS_ALPHA "TARGET_CFLAGS="
@@ -158,6 +161,8 @@ void add_extra_lib(char *);
#define TARGET_CFLAGS_MIPS_ON_X86_64 "TARGET_CFLAGS=-m32 -D_FILE_OFFSET_BITS=64"
#define TARGET_CFLAGS_MIPS64 "TARGET_CFLAGS="
#define TARGET_CFLAGS_SPARC64 "TARGET_CFLAGS="
+#define TARGET_CFLAGS_RISCV64 "TARGET_CFLAGS="
+#define TARGET_CFLAGS_RISCV64_ON_X86_64 "TARGET_CFLAGS="
#define GDB_TARGET_DEFAULT "GDB_CONF_FLAGS="
#define GDB_TARGET_ARM_ON_X86 "GDB_CONF_FLAGS=--target=arm-elf-linux"
@@ -168,6 +173,7 @@ void add_extra_lib(char *);
#define GDB_TARGET_PPC64_ON_X86_64 "GDB_CONF_FLAGS=--target=powerpc64le-unknown-linux-gnu"
#define GDB_TARGET_MIPS_ON_X86 "GDB_CONF_FLAGS=--target=mipsel-elf-linux"
#define GDB_TARGET_MIPS_ON_X86_64 "GDB_CONF_FLAGS=--target=mipsel-elf-linux CFLAGS=-m32"
+#define GDB_TARGET_RISCV64_ON_X86_64 "GDB_CONF_FLAGS=--target=riscv64-unknown-linux-gnu"
/*
* The original plan was to allow the use of a particular version
@@ -394,6 +400,9 @@ get_current_configuration(struct supported_gdb_version *sp)
#ifdef __sparc_v9__
target_data.target = SPARC64;
#endif
+#if defined(__riscv) && (__riscv_xlen == 64)
+ target_data.target = RISCV64;
+#endif
set_initial_target(sp);
@@ -447,6 +456,12 @@ get_current_configuration(struct supported_gdb_version *sp)
if ((target_data.initial_gdb_target != UNKNOWN) &&
(target_data.host != target_data.initial_gdb_target))
arch_mismatch(sp);
+ } else if ((target_data.target == X86_64) &&
+ (name_to_target((char *)target_data.target_as_param) == RISCV64)) {
+ /*
+ * Build an RISCV64 crash binary on an X86_64 host.
+ */
+ target_data.target = RISCV64;
} else {
fprintf(stderr,
"\ntarget=%s is not supported on the %s host architecture\n\n",
@@ -487,6 +502,14 @@ get_current_configuration(struct supported_gdb_version *sp)
(target_data.target != MIPS64))
arch_mismatch(sp);
+ if ((target_data.initial_gdb_target == RISCV64) &&
+ (target_data.target != RISCV64)) {
+ if (target_data.target == X86_64)
+ target_data.target = RISCV64;
+ else
+ arch_mismatch(sp);
+ }
+
if ((target_data.initial_gdb_target == X86) &&
(target_data.target != X86)) {
if (target_data.target == X86_64)
@@ -650,6 +673,9 @@ show_configuration(void)
case SPARC64:
printf("TARGET: SPARC64\n");
break;
+ case RISCV64:
+ printf("TARGET: RISCV64\n");
+ break;
}
if (strlen(target_data.program)) {
@@ -767,6 +793,14 @@ build_configure(struct supported_gdb_version *sp)
target = TARGET_SPARC64;
target_CFLAGS = TARGET_CFLAGS_SPARC64;
break;
+ case RISCV64:
+ target = TARGET_RISCV64;
+ if (target_data.host == X86_64) {
+ target_CFLAGS = TARGET_CFLAGS_RISCV64_ON_X86_64;
+ gdb_conf_flags = GDB_TARGET_RISCV64_ON_X86_64;
+ } else
+ target_CFLAGS = TARGET_CFLAGS_RISCV64;
+ break;
}
ldflags = get_extra_flags("LDFLAGS.extra", NULL);
@@ -1364,7 +1398,7 @@ make_spec_file(struct supported_gdb_version *sp)
printf("Vendor: Red Hat, Inc.\n");
printf("Packager: Dave Anderson <anderson@redhat.com>\n");
printf("ExclusiveOS: Linux\n");
- printf("ExclusiveArch: %%{ix86} alpha ia64 ppc ppc64 ppc64pseries ppc64iseries x86_64 s390 s390x arm aarch64 ppc64le mips mipsel mips64el sparc64\n");
+ printf("ExclusiveArch: %%{ix86} alpha ia64 ppc ppc64 ppc64pseries ppc64iseries x86_64 s390 s390x arm aarch64 ppc64le mips mipsel mips64el sparc64 riscv64\n");
printf("Buildroot: %%{_tmppath}/%%{name}-root\n");
printf("BuildRequires: ncurses-devel zlib-devel bison\n");
printf("Requires: binutils\n");
@@ -1597,6 +1631,8 @@ set_initial_target(struct supported_gdb_version *sp)
target_data.initial_gdb_target = MIPS;
else if (strncmp(buf, "SPARC64", strlen("SPARC64")) == 0)
target_data.initial_gdb_target = SPARC64;
+ else if (strncmp(buf, "RISCV64", strlen("RISCV64")) == 0)
+ target_data.initial_gdb_target = RISCV64;
}
char *
@@ -1617,6 +1653,7 @@ target_to_name(int target)
case MIPS: return("MIPS");
case MIPS64: return("MIPS64");
case SPARC64: return("SPARC64");
+ case RISCV64: return("RISCV64");
}
return "UNKNOWN";
@@ -1681,6 +1718,10 @@ name_to_target(char *name)
return MIPS64;
else if (strncmp(name, "sparc64", strlen("sparc64")) == 0)
return SPARC64;
+ else if (strncmp(name, "RISCV64", strlen("RISCV64")) == 0)
+ return RISCV64;
+ else if (strncmp(name, "riscv64", strlen("riscv64")) == 0)
+ return RISCV64;
return UNKNOWN;
}
diff --git a/defs.h b/defs.h
index e4732c885371..f46b666dde13 100644
--- a/defs.h
+++ b/defs.h
@@ -76,7 +76,7 @@
#if !defined(X86) && !defined(X86_64) && !defined(ALPHA) && !defined(PPC) && \
!defined(IA64) && !defined(PPC64) && !defined(S390) && !defined(S390X) && \
!defined(ARM) && !defined(ARM64) && !defined(MIPS) && !defined(MIPS64) && \
- !defined(SPARC64)
+ !defined(RISCV64) && !defined(SPARC64)
#ifdef __alpha__
#define ALPHA
#endif
@@ -118,6 +118,9 @@
#ifdef __sparc_v9__
#define SPARC64
#endif
+#if defined(__riscv) && (__riscv_xlen == 64)
+#define RISCV64
+#endif
#endif
#ifdef X86
@@ -159,6 +162,9 @@
#ifdef SPARC64
#define NR_CPUS (4096)
#endif
+#ifdef RISCV64
+#define NR_CPUS (256)
+#endif
#define NR_DEVICE_DUMPS (64)
@@ -3486,6 +3492,63 @@ struct arm64_stackframe {
#define _MAX_PHYSMEM_BITS 48
#endif /* MIPS64 */
+#ifdef RISCV64
+#define _64BIT_
+#define MACHINE_TYPE "RISCV64"
+
+/*
+ * Direct memory mapping
+ */
+#define PTOV(X) \
+ (((unsigned long)(X)+(machdep->kvbase)) - machdep->machspec->phys_base)
+#define VTOP(X) ({ \
+ ulong _X = X; \
+ (THIS_KERNEL_VERSION >= LINUX(5,13,0) && \
+ (_X) >= machdep->machspec->kernel_link_addr) ? \
+ (((unsigned long)(_X)-(machdep->machspec->kernel_link_addr)) + \
+ machdep->machspec->phys_base): \
+ (((unsigned long)(_X)-(machdep->kvbase)) + \
+ machdep->machspec->phys_base); \
+ })
+#define PAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask)
+
+/*
+ * Stack size order
+ */
+#define THREAD_SIZE_ORDER 2
+
+#define PAGE_OFFSET (machdep->machspec->page_offset)
+#define VMALLOC_START (machdep->machspec->vmalloc_start_addr)
+#define VMALLOC_END (machdep->machspec->vmalloc_end)
+#define VMEMMAP_VADDR (machdep->machspec->vmemmap_vaddr)
+#define VMEMMAP_END (machdep->machspec->vmemmap_end)
+#define MODULES_VADDR (machdep->machspec->modules_vaddr)
+#define MODULES_END (machdep->machspec->modules_end)
+#define IS_VMALLOC_ADDR(X) riscv64_IS_VMALLOC_ADDR((ulong)(X))
+
+/* from arch/riscv/include/asm/pgtable.h */
+#define __SWP_TYPE_SHIFT 6
+#define __SWP_TYPE_BITS 5
+#define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
+#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+
+#define SWP_TYPE(entry) (((entry) >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
+#define SWP_OFFSET(entry) ((entry) >> __SWP_OFFSET_SHIFT)
+#define __swp_type(entry) SWP_TYPE(entry)
+#define __swp_offset(entry) SWP_OFFSET(entry)
+
+#define TIF_SIGPENDING (THIS_KERNEL_VERSION >= LINUX(2,6,23) ? 1 : 2)
+
+/* from arch/riscv/include/asm/sparsemem.h */
+#define _SECTION_SIZE_BITS 27
+#define _MAX_PHYSMEM_BITS 56 /* 56-bit physical address supported */
+#define PHYS_MASK_SHIFT _MAX_PHYSMEM_BITS
+#define PHYS_MASK (((1UL) << PHYS_MASK_SHIFT) - 1)
+
+#endif /* RISCV64 */
+
#ifdef X86
#define _32BIT_
#define MACHINE_TYPE "X86"
@@ -4534,6 +4597,10 @@ struct machine_specific {
#define MAX_HEXADDR_STRLEN (16)
#define UVADDR_PRLEN (16)
#endif
+#ifdef RISCV64
+#define MAX_HEXADDR_STRLEN (16)
+#define UVADDR_PRLEN (16)
+#endif
#define BADADDR ((ulong)(-1))
#define BADVAL ((ulong)(-1))
@@ -5131,6 +5198,9 @@ void dump_build_data(void);
#ifdef MIPS64
#define machdep_init(X) mips64_init(X)
#endif
+#ifdef RISCV64
+#define machdep_init(X) riscv64_init(X)
+#endif
#ifdef SPARC64
#define machdep_init(X) sparc64_init(X)
#endif
@@ -5618,6 +5688,9 @@ void display_help_screen(char *);
#ifdef SPARC64
#define dump_machdep_table(X) sparc64_dump_machdep_table(X)
#endif
+#ifdef RISCV64
+#define dump_machdep_table(X) riscv64_dump_machdep_table(X)
+#endif
extern char *help_pointer[];
extern char *help_alias[];
extern char *help_ascii[];
@@ -6695,6 +6768,85 @@ struct machine_specific {
#endif /* MIPS64 */
+/*
+ * riscv64.c
+ */
+void riscv64_display_regs_from_elf_notes(int, FILE *);
+
+#ifdef RISCV64
+void riscv64_init(int);
+void riscv64_dump_machdep_table(ulong);
+int riscv64_IS_VMALLOC_ADDR(ulong);
+
+#define display_idt_table() \
+ error(FATAL, "-d option is not applicable to RISCV64 architecture\n")
+
+/* from arch/riscv/include/asm/ptrace.h */
+struct riscv64_register {
+ ulong regs[36];
+};
+
+struct riscv64_pt_regs {
+ ulong badvaddr;
+ ulong cause;
+ ulong epc;
+};
+
+struct riscv64_unwind_frame {
+ ulong fp;
+ ulong sp;
+ ulong pc;
+};
+
+#define KSYMS_START (0x1)
+
+struct machine_specific {
+ ulong phys_base;
+ ulong page_offset;
+ ulong vmalloc_start_addr;
+ ulong vmalloc_end;
+ ulong vmemmap_vaddr;
+ ulong vmemmap_end;
+ ulong modules_vaddr;
+ ulong modules_end;
+ ulong kernel_link_addr;
+
+ ulong _page_present;
+ ulong _page_read;
+ ulong _page_write;
+ ulong _page_exec;
+ ulong _page_user;
+ ulong _page_global;
+ ulong _page_accessed;
+ ulong _page_dirty;
+ ulong _page_soft;
+
+ ulong _pfn_shift;
+
+ struct riscv64_register *crash_task_regs;
+};
+/* from arch/riscv/include/asm/pgtable-bits.h */
+#define _PAGE_PRESENT (machdep->machspec->_page_present)
+#define _PAGE_READ (machdep->machspec->_page_read)
+#define _PAGE_WRITE (machdep->machspec->_page_write)
+#define _PAGE_EXEC (machdep->machspec->_page_exec)
+#define _PAGE_USER (machdep->machspec->_page_user)
+#define _PAGE_GLOBAL (machdep->machspec->_page_global)
+#define _PAGE_ACCESSED (machdep->machspec->_page_accessed)
+#define _PAGE_DIRTY (machdep->machspec->_page_dirty)
+#define _PAGE_SOFT (machdep->machspec->_page_soft)
+#define _PAGE_SEC (machdep->machspec->_page_sec)
+#define _PAGE_SHARE (machdep->machspec->_page_share)
+#define _PAGE_BUF (machdep->machspec->_page_buf)
+#define _PAGE_CACHE (machdep->machspec->_page_cache)
+#define _PAGE_SO (machdep->machspec->_page_so)
+#define _PAGE_SPECIAL _PAGE_SOFT
+#define _PAGE_TABLE _PAGE_PRESENT
+#define _PAGE_PROT_NONE _PAGE_READ
+#define _PAGE_PFN_SHIFT 10
+
+#endif /* RISCV64 */
+
/*
* sparc64.c
*/
diff --git a/diskdump.c b/diskdump.c
index 09fac543011a..136cbd63f799 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -622,6 +622,9 @@ restart:
else if (STRNEQ(header->utsname.machine, "aarch64") &&
machine_type_mismatch(file, "ARM64", NULL, 0))
goto err;
+ else if (STRNEQ(header->utsname.machine, "riscv64") &&
+ machine_type_mismatch(file, "RISCV64", NULL, 0))
+ goto err;
if (header->block_size != block_size) {
block_size = header->block_size;
@@ -780,6 +783,8 @@ restart:
dd->machine_type = EM_AARCH64;
else if (machine_type("SPARC64"))
dd->machine_type = EM_SPARCV9;
+ else if (machine_type("RISCV64"))
+ dd->machine_type = EM_RISCV;
else {
error(INFO, "%s: unsupported machine type: %s\n",
DISKDUMP_VALID() ? "diskdump" : "compressed kdump",
@@ -1751,7 +1756,8 @@ dump_note_offsets(FILE *fp)
qemu = FALSE;
if (machine_type("X86_64") || machine_type("S390X") ||
machine_type("ARM64") || machine_type("PPC64") ||
- machine_type("SPARC64") || machine_type("MIPS64")) {
+ machine_type("SPARC64") || machine_type("MIPS64") ||
+ machine_type("RISCV64")) {
note64 = (void *)dd->notes_buf + tot;
len = sizeof(Elf64_Nhdr);
if (STRNEQ((char *)note64 + len, "QEMU"))
@@ -2558,7 +2564,8 @@ dump_registers_for_compressed_kdump(void)
if (!KDUMP_CMPRS_VALID() || (dd->header->header_version < 4) ||
!(machine_type("X86") || machine_type("X86_64") ||
machine_type("ARM64") || machine_type("PPC64") ||
- machine_type("MIPS") || machine_type("MIPS64")))
+ machine_type("MIPS") || machine_type("MIPS64") ||
+ machine_type("RISCV64")))
error(FATAL, "-r option not supported for this dumpfile\n");
if (machine_type("ARM64") && (kt->cpus != dd->num_prstatus_notes))
diff --git a/help.c b/help.c
index 2cb570b244f4..bfba43f6e9fd 100644
--- a/help.c
+++ b/help.c
@@ -9518,8 +9518,8 @@ char *README[] = {
" These are the current prerequisites: ",
"",
" o At this point, x86, ia64, x86_64, ppc64, ppc, arm, arm64, alpha, mips,",
-" mips64, s390 and s390x-based kernels are supported. Other architectures",
-" may be addressed in the future.",
+" mips64, riscv64, s390 and s390x-based kernels are supported. Other",
+" architectures may be addressed in the future.",
"",
" o One size fits all -- the utility can be run on any Linux kernel version",
" version dating back to 2.2.5-15. A primary design goal is to always",
@@ -9578,6 +9578,8 @@ README_ENTER_DIRECTORY,
" arm64 dumpfiles may be built by typing \"make target=ARM64\".",
" o On an x86_64 host, an x86_64 binary that can be used to analyze",
" ppc64le dumpfiles may be built by typing \"make target=PPC64\".",
+" o On an x86_64 host, an x86_64 binary that can be used to analyze",
+" riscv64 dumpfiles may be built by typing \"make target=RISCV64\".",
"",
" Traditionally when vmcores are compressed via the makedumpfile(8) facility",
" the libz compression library is used, and by default the crash utility",
diff --git a/lkcd_vmdump_v1.h b/lkcd_vmdump_v1.h
index 4933427fc755..98ee09495869 100644
--- a/lkcd_vmdump_v1.h
+++ b/lkcd_vmdump_v1.h
@@ -114,14 +114,8 @@ typedef struct _dump_header_s {
struct new_utsname dh_utsname;
/* the dump registers */
-#ifndef IA64
-#ifndef S390
-#ifndef S390X
-#ifndef ARM64
+#if !defined(IA64) && !defined(S390) && !defined(S390X) && !defined(ARM64) && !defined(RISCV64)
struct pt_regs dh_regs;
-#endif
-#endif
-#endif
#endif
/* the address of the current task */
diff --git a/lkcd_vmdump_v2_v3.h b/lkcd_vmdump_v2_v3.h
index 984c2c25e3c6..ef3067f88e1e 100644
--- a/lkcd_vmdump_v2_v3.h
+++ b/lkcd_vmdump_v2_v3.h
@@ -37,7 +37,7 @@
#if defined(ARM) || defined(X86) || defined(PPC) || defined(S390) || \
defined(S390X) || defined(ARM64) || defined(MIPS) || \
- defined(MIPS64) || defined(SPARC64)
+ defined(MIPS64) || defined(SPARC64) || defined(RISCV64)
/*
* Kernel header file for Linux crash dumps.
@@ -84,13 +84,9 @@ typedef struct _dump_header_asm_s {
uint32_t dha_eip;
/* the dump registers */
-#ifndef S390
-#ifndef S390X
-#ifndef ARM64
+#if !defined(S390) && !defined(S390X) && !defined(ARM64) && !defined(RISCV64)
struct pt_regs dha_regs;
#endif
-#endif
-#endif
} dump_header_asm_t;
diff --git a/netdump.c b/netdump.c
index e8721d89f1a3..c4c8baae0d20 100644
--- a/netdump.c
+++ b/netdump.c
@@ -300,6 +300,12 @@ is_netdump(char *file, ulong source_query)
goto bailout;
break;
+ case EM_RISCV:
+ if (machine_type_mismatch(file, "RISCV64", NULL,
+ source_query))
+ goto bailout;
+ break;
+
default:
if (machine_type_mismatch(file, "(unknown)", NULL,
source_query))
@@ -2935,7 +2941,8 @@ dump_registers_for_elf_dumpfiles(void)
if (!(machine_type("X86") || machine_type("X86_64") ||
machine_type("ARM64") || machine_type("PPC64") ||
- machine_type("MIPS") || machine_type("MIPS64")))
+ machine_type("MIPS") || machine_type("MIPS64") ||
+ machine_type("RISCV64")))
error(FATAL, "-r option not supported for this dumpfile\n");
if (NETDUMP_DUMPFILE()) {
diff --git a/ramdump.c b/ramdump.c
index a206fcbbab3c..d2bd7ffb0b4b 100644
--- a/ramdump.c
+++ b/ramdump.c
@@ -188,6 +188,8 @@ char *ramdump_to_elf(void)
e_machine = EM_MIPS;
else if (machine_type("X86_64"))
e_machine = EM_X86_64;
+ else if (machine_type("RISCV64"))
+ e_machine = EM_RISCV;
else
error(FATAL, "ramdump: unsupported machine type: %s\n",
MACHINE_TYPE);
diff --git a/riscv64.c b/riscv64.c
new file mode 100644
index 000000000000..4f858a418a8c
--- /dev/null
+++ b/riscv64.c
@@ -0,0 +1,54 @@
+/* riscv64.c - core analysis suite
+ *
+ * Copyright (C) 2022 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include "defs.h"
+#ifdef RISCV64
+
+#include <elf.h>
+
+void
+riscv64_dump_machdep_table(ulong arg)
+{
+}
+
+/*
+ * Include both vmalloc'd and module address space as VMALLOC space.
+ */
+int
+riscv64_IS_VMALLOC_ADDR(ulong vaddr)
+{
+ return ((vaddr >= VMALLOC_START && vaddr <= VMALLOC_END) ||
+ (vaddr >= VMEMMAP_VADDR && vaddr <= VMEMMAP_END) ||
+ (vaddr >= MODULES_VADDR && vaddr <= MODULES_END));
+}
+
+void
+riscv64_init(int when)
+{
+}
+
+void
+riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
+{
+}
+
+#else /* !RISCV64 */
+
+void
+riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
+{
+ return;
+}
+
+#endif /* !RISCV64 */
diff --git a/symbols.c b/symbols.c
index 657f50706819..d1b35a56aa71 100644
--- a/symbols.c
+++ b/symbols.c
@@ -3745,6 +3745,11 @@ is_kernel(char *file)
goto bailout;
break;
+ case EM_RISCV:
+ if (machine_type_mismatch(file, "RISCV64", NULL, 0))
+ goto bailout;
+ break;
+
default:
if (machine_type_mismatch(file, "(unknown)", NULL, 0))
goto bailout;
@@ -4004,6 +4009,11 @@ is_shared_object(char *file)
if (machine_type("MIPS64"))
return TRUE;
break;
+
+ case EM_RISCV:
+ if (machine_type("RISCV64"))
+ return TRUE;
+ break;
}
if (CRASHDEBUG(1))
--
2.37.1

View File

@ -1,44 +0,0 @@
From 4876c73ca54a8ee2e02032d1b10832e0de9adde3 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:08 +0800
Subject: [PATCH 53/89] RISCV64: Add 'dis' command support
Use generic_dis_filter() function to support dis command implementation.
With this patch, we can get the disassembled code,
crash> dis __crash_kexec
0xffffffff80088580 <__crash_kexec>: addi sp,sp,-352
0xffffffff80088582 <__crash_kexec+2>: sd s0,336(sp)
0xffffffff80088584 <__crash_kexec+4>: sd s1,328(sp)
0xffffffff80088586 <__crash_kexec+6>: sd s2,320(sp)
0xffffffff80088588 <__crash_kexec+8>: addi s0,sp,352
0xffffffff8008858a <__crash_kexec+10>: sd ra,344(sp)
0xffffffff8008858c <__crash_kexec+12>: sd s3,312(sp)
0xffffffff8008858e <__crash_kexec+14>: sd s4,304(sp)
0xffffffff80088590 <__crash_kexec+16>: auipc s2,0x1057
0xffffffff80088594 <__crash_kexec+20>: addi s2,s2,-1256
0xffffffff80088598 <__crash_kexec+24>: ld a5,0(s2)
0xffffffff8008859c <__crash_kexec+28>: mv s1,a0
0xffffffff8008859e <__crash_kexec+30>: auipc a0,0xfff
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/riscv64.c b/riscv64.c
index d8de3d56e1b7..1e20a09a81ae 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -988,6 +988,7 @@ riscv64_init(int when)
machdep->is_task_addr = riscv64_is_task_addr;
machdep->get_smp_cpus = riscv64_get_smp_cpus;
machdep->value_to_symbol = generic_machdep_value_to_symbol;
+ machdep->dis_filter = generic_dis_filter;
machdep->show_interrupts = generic_show_interrupts;
machdep->get_irq_affinity = generic_get_irq_affinity;
machdep->init_kernel_pgd = NULL; /* pgd set by symbol_value("swapper_pg_dir") */
--
2.37.1

View File

@ -1,36 +0,0 @@
From b4d78e7f96564ecbdbfe4b4b70a1c6d65429bbb9 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:09 +0800
Subject: [PATCH 54/89] RISCV64: Add irq command support
With the patch, we can get the irq info,
crash> irq
IRQ IRQ_DESC/_DATA IRQACTION NAME
0 (unused) (unused)
1 ff60000001329600 ff60000001d17180 "101000.rtc"
2 ff60000001329800 ff60000001d17680 "ttyS0"
3 ff60000001329a00 ff60000001c33c00 "virtio0"
4 ff60000001329c00 ff60000001c33f80 "virtio1"
5 ff6000000120f400 ff60000001216000 "riscv-timer"
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/riscv64.c b/riscv64.c
index 1e20a09a81ae..2355daca7aac 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -989,6 +989,7 @@ riscv64_init(int when)
machdep->get_smp_cpus = riscv64_get_smp_cpus;
machdep->value_to_symbol = generic_machdep_value_to_symbol;
machdep->dis_filter = generic_dis_filter;
+ machdep->dump_irq = generic_dump_irq;
machdep->show_interrupts = generic_show_interrupts;
machdep->get_irq_affinity = generic_get_irq_affinity;
machdep->init_kernel_pgd = NULL; /* pgd set by symbol_value("swapper_pg_dir") */
--
2.37.1

View File

@ -1,413 +0,0 @@
From 75afbf54dd183b05a8b7363390df4a198155580a Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:10 +0800
Subject: [PATCH 55/89] RISCV64: Add 'bt' command support
1, Add the implementation to get stack frame from active & inactive
task's stack.
2, Add 'bt -l' command support get a line number associated with a
current pc address.
3, Add 'bt -f' command support to display all stack data contained
in a frame
With the patch, we can get the backtrace,
crash> bt
PID: 113 TASK: ff6000000226c200 CPU: 0 COMMAND: "sh"
#0 [ff20000010333b90] riscv_crash_save_regs at ffffffff800078f8
#1 [ff20000010333cf0] panic at ffffffff806578c6
#2 [ff20000010333d50] sysrq_reset_seq_param_set at ffffffff8038c03c
#3 [ff20000010333da0] __handle_sysrq at ffffffff8038c604
#4 [ff20000010333e00] write_sysrq_trigger at ffffffff8038cae4
#5 [ff20000010333e20] proc_reg_write at ffffffff801b7ee8
#6 [ff20000010333e40] vfs_write at ffffffff80152bb2
#7 [ff20000010333e80] ksys_write at ffffffff80152eda
#8 [ff20000010333ed0] sys_write at ffffffff80152f52
crash> bt -l
PID: 113 TASK: ff6000000226c200 CPU: 0 COMMAND: "sh"
#0 [ff20000010333b90] riscv_crash_save_regs at ffffffff800078f8
/buildroot/qemu_riscv64_virt_defconfig/build/linux-custom/arch/riscv/kernel/crash_save_regs.S: 47
#1 [ff20000010333cf0] panic at ffffffff806578c6
/buildroot/qemu_riscv64_virt_defconfig/build/linux-custom/kernel/panic.c: 276
... ...
crash> bt -f
PID: 113 TASK: ff6000000226c200 CPU: 0 COMMAND: "sh"
#0 [ff20000010333b90] riscv_crash_save_regs at ffffffff800078f8
[PC: ffffffff800078f8 RA: ffffffff806578c6 SP: ff20000010333b90 SIZE: 352]
ff20000010333b90: ff20000010333bb0 ffffffff800078f8
ff20000010333ba0: ffffffff8008862c ff20000010333b90
ff20000010333bb0: ffffffff810dde38 ff6000000226c200
ff20000010333bc0: ffffffff8032be68 0720072007200720
... ...
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
netdump.c | 13 +++
riscv64.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 296 insertions(+)
diff --git a/netdump.c b/netdump.c
index c4c8baae0d20..4ef5807a641b 100644
--- a/netdump.c
+++ b/netdump.c
@@ -42,6 +42,7 @@ static void get_netdump_regs_ppc64(struct bt_info *, ulong *, ulong *);
static void get_netdump_regs_arm(struct bt_info *, ulong *, ulong *);
static void get_netdump_regs_arm64(struct bt_info *, ulong *, ulong *);
static void get_netdump_regs_mips(struct bt_info *, ulong *, ulong *);
+static void get_netdump_regs_riscv(struct bt_info *, ulong *, ulong *);
static void check_dumpfile_size(char *);
static int proc_kcore_init_32(FILE *, int);
static int proc_kcore_init_64(FILE *, int);
@@ -2675,6 +2676,10 @@ get_netdump_regs(struct bt_info *bt, ulong *eip, ulong *esp)
return get_netdump_regs_mips(bt, eip, esp);
break;
+ case EM_RISCV:
+ get_netdump_regs_riscv(bt, eip, esp);
+ break;
+
default:
error(FATAL,
"support for ELF machine type %d not available\n",
@@ -2931,6 +2936,8 @@ display_regs_from_elf_notes(int cpu, FILE *ofp)
mips_display_regs_from_elf_notes(cpu, ofp);
} else if (machine_type("MIPS64")) {
mips64_display_regs_from_elf_notes(cpu, ofp);
+ } else if (machine_type("RISCV64")) {
+ riscv64_display_regs_from_elf_notes(cpu, ofp);
}
}
@@ -3877,6 +3884,12 @@ get_netdump_regs_mips(struct bt_info *bt, ulong *eip, ulong *esp)
machdep->get_stack_frame(bt, eip, esp);
}
+static void
+get_netdump_regs_riscv(struct bt_info *bt, ulong *eip, ulong *esp)
+{
+ machdep->get_stack_frame(bt, eip, esp);
+}
+
int
is_partial_netdump(void)
{
diff --git a/riscv64.c b/riscv64.c
index 2355daca7aac..4c9b35bb93f2 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -33,6 +33,17 @@ static int riscv64_uvtop(struct task_context *tc, ulong vaddr,
static int riscv64_kvtop(struct task_context *tc, ulong kvaddr,
physaddr_t *paddr, int verbose);
static void riscv64_cmd_mach(void);
+static void riscv64_stackframe_init(void);
+static void riscv64_back_trace_cmd(struct bt_info *bt);
+static int riscv64_get_dumpfile_stack_frame(struct bt_info *bt,
+ ulong *nip, ulong *ksp);
+static void riscv64_get_stack_frame(struct bt_info *bt, ulong *pcp,
+ ulong *spp);
+static int riscv64_get_frame(struct bt_info *bt, ulong *pcp,
+ ulong *spp);
+static void riscv64_display_full_frame(struct bt_info *bt,
+ struct riscv64_unwind_frame *current,
+ struct riscv64_unwind_frame *previous);
static int riscv64_translate_pte(ulong, void *, ulonglong);
static int riscv64_init_active_task_regs(void);
static int riscv64_get_crash_notes(void);
@@ -498,6 +509,275 @@ no_page:
return FALSE;
}
+/*
+ * 'bt -f' command output
+ * Display all stack data contained in a frame
+ */
+static void
+riscv64_display_full_frame(struct bt_info *bt, struct riscv64_unwind_frame *current,
+ struct riscv64_unwind_frame *previous)
+{
+ int i, u_idx;
+ ulong *up;
+ ulong words, addr;
+ char buf[BUFSIZE];
+
+ if (previous->sp < current->sp)
+ return;
+
+ if (!(INSTACK(previous->sp, bt) && INSTACK(current->sp, bt)))
+ return;
+
+ words = (previous->sp - current->sp) / sizeof(ulong) + 1;
+ addr = current->sp;
+ u_idx = (current->sp - bt->stackbase) / sizeof(ulong);
+
+ for (i = 0; i < words; i++, u_idx++) {
+ if (!(i & 1))
+ fprintf(fp, "%s %lx: ", i ? "\n" : "", addr);
+
+ up = (ulong *)(&bt->stackbuf[u_idx*sizeof(ulong)]);
+ fprintf(fp, "%s ", format_stack_entry(bt, buf, *up, 0));
+ addr += sizeof(ulong);
+ }
+ fprintf(fp, "\n");
+}
+
+static void
+riscv64_stackframe_init(void)
+{
+ long task_struct_thread = MEMBER_OFFSET("task_struct", "thread");
+
+ /* from arch/riscv/include/asm/processor.h */
+ long thread_reg_ra = MEMBER_OFFSET("thread_struct", "ra");
+ long thread_reg_sp = MEMBER_OFFSET("thread_struct", "sp");
+ long thread_reg_fp = MEMBER_OFFSET("thread_struct", "s");
+
+ if ((task_struct_thread == INVALID_OFFSET) ||
+ (thread_reg_ra == INVALID_OFFSET) ||
+ (thread_reg_sp == INVALID_OFFSET) ||
+ (thread_reg_fp == INVALID_OFFSET) )
+ error(FATAL,
+ "cannot determine thread_struct offsets\n");
+
+ ASSIGN_OFFSET(task_struct_thread_context_pc) =
+ task_struct_thread + thread_reg_ra;
+ ASSIGN_OFFSET(task_struct_thread_context_sp) =
+ task_struct_thread + thread_reg_sp;
+ ASSIGN_OFFSET(task_struct_thread_context_fp) =
+ task_struct_thread + thread_reg_fp;
+}
+
+static void
+riscv64_dump_backtrace_entry(struct bt_info *bt, struct syment *sym,
+ struct riscv64_unwind_frame *current,
+ struct riscv64_unwind_frame *previous, int level)
+{
+ const char *name = sym ? sym->name : "(invalid)";
+ struct load_module *lm;
+ char *name_plus_offset = NULL;
+ struct syment *symp;
+ ulong symbol_offset;
+ char buf[BUFSIZE];
+
+ if (bt->flags & BT_SYMBOL_OFFSET) {
+ symp = value_search(current->pc, &symbol_offset);
+
+ if (symp && symbol_offset)
+ name_plus_offset =
+ value_to_symstr(current->pc, buf, bt->radix);
+ }
+
+ fprintf(fp, "%s#%d [%016lx] %s at %016lx",
+ level < 10 ? " " : "",
+ level,
+ current->sp,
+ name_plus_offset ? name_plus_offset : name,
+ current->pc);
+
+ if (module_symbol(current->pc, NULL, &lm, NULL, 0))
+ fprintf(fp, " [%s]", lm->mod_name);
+
+ fprintf(fp, "\n");
+
+ /*
+ * 'bt -l', get a line number associated with a current pc address.
+ */
+ if (bt->flags & BT_LINE_NUMBERS) {
+ get_line_number(current->pc, buf, FALSE);
+ if (strlen(buf))
+ fprintf(fp, " %s\n", buf);
+ }
+
+ /* bt -f */
+ if (bt->flags & BT_FULL) {
+ fprintf(fp, " "
+ "[PC: %016lx RA: %016lx SP: %016lx SIZE: %ld]\n",
+ current->pc,
+ previous->pc,
+ current->sp,
+ previous->sp - current->sp);
+ riscv64_display_full_frame(bt, current, previous);
+ }
+}
+
+/*
+ * Unroll a kernel stack.
+ */
+static void
+riscv64_back_trace_cmd(struct bt_info *bt)
+{
+ struct riscv64_unwind_frame current, previous;
+ struct stackframe curr_frame;
+ int level = 0;
+
+ if (bt->flags & BT_REGS_NOT_FOUND)
+ return;
+
+ current.pc = bt->instptr;
+ current.sp = bt->stkptr;
+ current.fp = bt->frameptr;
+
+ if (!INSTACK(current.sp, bt))
+ return;
+
+ for (;;) {
+ struct syment *symbol = NULL;
+ struct stackframe *frameptr;
+ ulong low, high;
+ ulong offset;
+
+ if (CRASHDEBUG(8))
+ fprintf(fp, "level %d pc %#lx sp %lx fp 0x%lx\n",
+ level, current.pc, current.sp, current.fp);
+
+ /* Validate frame pointer */
+ low = current.sp + sizeof(struct stackframe);
+ high = bt->stacktop;
+ if (current.fp < low || current.fp > high || current.fp & 0x7) {
+ if (CRASHDEBUG(8))
+ fprintf(fp, "fp 0x%lx sp 0x%lx low 0x%lx high 0x%lx\n",
+ current.fp, current.sp, low, high);
+ return;
+ }
+
+ symbol = value_search(current.pc, &offset);
+ if (!symbol)
+ return;
+
+ frameptr = (struct stackframe *)current.fp - 1;
+ if (!readmem((ulong)frameptr, KVADDR, &curr_frame,
+ sizeof(curr_frame), "get stack frame", RETURN_ON_ERROR))
+ return;
+
+ previous.pc = curr_frame.ra;
+ previous.fp = curr_frame.fp;
+ previous.sp = current.fp;
+
+ riscv64_dump_backtrace_entry(bt, symbol, &current, &previous, level++);
+
+ current.pc = previous.pc;
+ current.fp = previous.fp;
+ current.sp = previous.sp;
+
+ if (CRASHDEBUG(8))
+ fprintf(fp, "next %d pc %#lx sp %#lx fp %lx\n",
+ level, current.pc, current.sp, current.fp);
+ }
+}
+
+/*
+ * Get a stack frame combination of pc and ra from the most relevant spot.
+ */
+static void
+riscv64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
+{
+ ulong ksp = 0, nip = 0;
+ int ret = 0;
+
+ if (DUMPFILE() && is_task_active(bt->task))
+ ret = riscv64_get_dumpfile_stack_frame(bt, &nip, &ksp);
+ else
+ ret = riscv64_get_frame(bt, &nip, &ksp);
+
+ if (!ret)
+ error(WARNING, "cannot determine starting stack frame for task %lx\n",
+ bt->task);
+
+ if (pcp)
+ *pcp = nip;
+ if (spp)
+ *spp = ksp;
+}
+
+/*
+ * Get the starting point for the active cpu in a diskdump.
+ */
+static int
+riscv64_get_dumpfile_stack_frame(struct bt_info *bt, ulong *nip, ulong *ksp)
+{
+ const struct machine_specific *ms = machdep->machspec;
+ struct riscv64_register *regs;
+ ulong epc, sp;
+
+ if (!ms->crash_task_regs) {
+ bt->flags |= BT_REGS_NOT_FOUND;
+ return FALSE;
+ }
+
+ /*
+ * We got registers for panic task from crash_notes. Just return them.
+ */
+ regs = &ms->crash_task_regs[bt->tc->processor];
+ epc = regs->regs[RISCV64_REGS_EPC];
+ sp = regs->regs[RISCV64_REGS_SP];
+
+ /*
+ * Set stack frame ptr.
+ */
+ bt->frameptr = regs->regs[RISCV64_REGS_FP];
+
+ if (nip)
+ *nip = epc;
+ if (ksp)
+ *ksp = sp;
+
+ bt->machdep = regs;
+
+ return TRUE;
+}
+
+/*
+ * Do the work for riscv64_get_stack_frame() for non-active tasks.
+ * Get SP and PC values for idle tasks.
+ */
+static int
+riscv64_get_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
+{
+ if (!bt->tc || !(tt->flags & THREAD_INFO))
+ return FALSE;
+
+ if (!readmem(bt->task + OFFSET(task_struct_thread_context_pc),
+ KVADDR, pcp, sizeof(*pcp),
+ "thread_struct.ra",
+ RETURN_ON_ERROR))
+ return FALSE;
+
+ if (!readmem(bt->task + OFFSET(task_struct_thread_context_sp),
+ KVADDR, spp, sizeof(*spp),
+ "thread_struct.sp",
+ RETURN_ON_ERROR))
+ return FALSE;
+
+ if (!readmem(bt->task + OFFSET(task_struct_thread_context_fp),
+ KVADDR, &bt->frameptr, sizeof(bt->frameptr),
+ "thread_struct.fp",
+ RETURN_ON_ERROR))
+ return FALSE;
+
+ return TRUE;
+}
+
static int
riscv64_vtop_4level_4k(ulong *pgd, ulong vaddr, physaddr_t *paddr, int verbose)
{
@@ -978,6 +1258,8 @@ riscv64_init(int when)
machdep->uvtop = riscv64_uvtop;
machdep->kvtop = riscv64_kvtop;
machdep->cmd_mach = riscv64_cmd_mach;
+ machdep->get_stack_frame = riscv64_get_stack_frame;
+ machdep->back_trace = riscv64_back_trace_cmd;
machdep->vmalloc_start = riscv64_vmalloc_start;
machdep->processor_speed = riscv64_processor_speed;
@@ -998,6 +1280,7 @@ riscv64_init(int when)
case POST_GDB:
machdep->section_size_bits = _SECTION_SIZE_BITS;
machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
+ riscv64_stackframe_init();
riscv64_page_type_init();
if (!machdep->hz)
--
2.37.1

View File

@ -1,80 +0,0 @@
From 15ac3968a929adebc27985be77fe90d3847abd57 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:11 +0800
Subject: [PATCH 56/89] RISCV64: Add 'help -r' command support
Add support form printing out the registers from the dump file.
With the patch, we can get the regs,
crash> help -r
CPU 0:
epc : 00ffffffa5537400 ra : ffffffff80088620 sp : ff2000001039bb90
gp : ffffffff810dde38 tp : ff60000002269600 t0 : ffffffff8032be5c
t1 : 0720072007200720 t2 : 666666666666663c s0 : ff2000001039bcf0
s1 : 0000000000000000 a0 : ff2000001039bb98 a1 : 0000000000000001
a2 : 0000000000000010 a3 : 0000000000000000 a4 : 0000000000000000
a5 : ff60000001c7d000 a6 : 000000000000003c a7 : ffffffff8035c998
s2 : ffffffff810df0a8 s3 : ffffffff810df718 s4 : ff2000001039bb98
s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80c4a468
s8 : 00fffffffde45410 s9 : 0000000000000007 s10: 00aaaaaad1640700
s11: 0000000000000001 t3 : ff60000001218f00 t4 : ff60000001218f00
t5 : ff60000001218000 t6 : ff2000001039b988
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/riscv64.c b/riscv64.c
index 4c9b35bb93f2..6d1d3b5f36d1 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -1320,6 +1320,44 @@ riscv64_init(int when)
void
riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
{
+ const struct machine_specific *ms = machdep->machspec;
+ struct riscv64_register *regs;
+
+ if (!ms->crash_task_regs) {
+ error(INFO, "registers not collected for cpu %d\n", cpu);
+ return;
+ }
+
+ regs = &ms->crash_task_regs[cpu];
+ if (!regs->regs[RISCV64_REGS_SP] && !regs->regs[RISCV64_REGS_EPC]) {
+ error(INFO, "registers not collected for cpu %d\n", cpu);
+ return;
+ }
+
+ /* Print riscv64 32 regs */
+ fprintf(ofp,
+ "epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n"
+ " gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n"
+ " t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n"
+ " s1 : " REG_FMT " a0 : " REG_FMT " a1 : " REG_FMT "\n"
+ " a2 : " REG_FMT " a3 : " REG_FMT " a4 : " REG_FMT "\n"
+ " a5 : " REG_FMT " a6 : " REG_FMT " a7 : " REG_FMT "\n"
+ " s2 : " REG_FMT " s3 : " REG_FMT " s4 : " REG_FMT "\n"
+ " s5 : " REG_FMT " s6 : " REG_FMT " s7 : " REG_FMT "\n"
+ " s8 : " REG_FMT " s9 : " REG_FMT " s10: " REG_FMT "\n"
+ " s11: " REG_FMT " t3 : " REG_FMT " t4 : " REG_FMT "\n"
+ " t5 : " REG_FMT " t6 : " REG_FMT "\n",
+ regs->regs[0], regs->regs[1], regs->regs[2],
+ regs->regs[3], regs->regs[4], regs->regs[5],
+ regs->regs[6], regs->regs[7], regs->regs[8],
+ regs->regs[9], regs->regs[10], regs->regs[11],
+ regs->regs[12], regs->regs[13], regs->regs[14],
+ regs->regs[15], regs->regs[16], regs->regs[17],
+ regs->regs[18], regs->regs[19], regs->regs[20],
+ regs->regs[21], regs->regs[22], regs->regs[23],
+ regs->regs[24], regs->regs[25], regs->regs[26],
+ regs->regs[27], regs->regs[28], regs->regs[29],
+ regs->regs[30], regs->regs[31]);
}
#else /* !RISCV64 */
--
2.37.1

View File

@ -1,141 +0,0 @@
From 012c8ebc3d6d71c995522dcf8d8aad530cdb4d02 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:12 +0800
Subject: [PATCH 57/89] RISCV64: Add 'help -m/M' command support
Add riscv64_dump_machdep_table() implementation, display machdep_table.
crash> help -m
flags: 80 ()
kvbase: ff60000000000000
identity_map_base: ff60000000000000
pagesize: 4096
pageshift: 12
pagemask: fffffffffffff000
pageoffset: fff
pgdir_shift: 48
ptrs_per_pgd: 512
ptrs_per_pte: 512
stacksize: 16384
hz: 250
memsize: 1071644672 (0x3fe00000)
bits: 64
back_trace: riscv64_back_trace_cmd()
processor_speed: riscv64_processor_speed()
uvtop: riscv64_uvtop()
kvtop: riscv64_kvtop()
get_stack_frame: riscv64_get_stack_frame()
get_stackbase: generic_get_stackbase()
get_stacktop: generic_get_stacktop()
translate_pte: riscv64_translate_pte()
memory_size: generic_memory_size()
vmalloc_start: riscv64_vmalloc_start()
is_task_addr: riscv64_is_task_addr()
verify_symbol: riscv64_verify_symbol()
dis_filter: generic_dis_filter()
dump_irq: generic_dump_irq()
show_interrupts: generic_show_interrupts()
get_irq_affinity: generic_get_irq_affinity()
cmd_mach: riscv64_cmd_mach()
get_smp_cpus: riscv64_get_smp_cpus()
is_kvaddr: riscv64_is_kvaddr()
is_uvaddr: riscv64_is_uvaddr()
verify_paddr: generic_verify_paddr()
init_kernel_pgd: NULL
value_to_symbol: generic_machdep_value_to_symbol()
line_number_hooks: NULL
last_pgd_read: ffffffff810e9000
last_p4d_read: 81410000
last_pud_read: 81411000
last_pmd_read: 81412000
last_ptbl_read: 81415000
pgd: 560d586f3ab0
p4d: 560d586f4ac0
pud: 560d586f5ad0
pmd: 560d586f6ae0
ptbl: 560d586f7af0
section_size_bits: 27
max_physmem_bits: 56
sections_per_root: 0
machspec: 560d57d204a0
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 59 insertions(+), 1 deletion(-)
diff --git a/riscv64.c b/riscv64.c
index 6d1d3b5f36d1..5e8c7d12227c 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -132,7 +132,65 @@ riscv64_verify_symbol(const char *name, ulong value, char type)
void
riscv64_dump_machdep_table(ulong arg)
{
- /* TODO: */
+ int others = 0;
+
+ fprintf(fp, " flags: %lx (", machdep->flags);
+ if (machdep->flags & KSYMS_START)
+ fprintf(fp, "%sKSYMS_START", others++ ? "|" : "");
+ fprintf(fp, ")\n");
+
+ fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
+ fprintf(fp, " identity_map_base: %lx\n", machdep->identity_map_base);
+ fprintf(fp, " pagesize: %d\n", machdep->pagesize);
+ fprintf(fp, " pageshift: %d\n", machdep->pageshift);
+ fprintf(fp, " pagemask: %llx\n", machdep->pagemask);
+ fprintf(fp, " pageoffset: %lx\n", machdep->pageoffset);
+ fprintf(fp, " pgdir_shift: %ld\n", machdep->machspec->va_bits - 9);
+ fprintf(fp, " ptrs_per_pgd: %u\n", PTRS_PER_PGD);
+ fprintf(fp, " ptrs_per_pte: %d\n", PTRS_PER_PTE);
+ fprintf(fp, " stacksize: %ld\n", machdep->stacksize);
+ fprintf(fp, " hz: %d\n", machdep->hz);
+ fprintf(fp, " memsize: %ld (0x%lx)\n",
+ machdep->memsize, machdep->memsize);
+ fprintf(fp, " bits: %d\n", machdep->bits);
+ fprintf(fp, " back_trace: riscv64_back_trace_cmd()\n");
+ fprintf(fp, " processor_speed: riscv64_processor_speed()\n");
+ fprintf(fp, " uvtop: riscv64_uvtop()\n");
+ fprintf(fp, " kvtop: riscv64_kvtop()\n");
+ fprintf(fp, " get_stack_frame: riscv64_get_stack_frame()\n");
+ fprintf(fp, " get_stackbase: generic_get_stackbase()\n");
+ fprintf(fp, " get_stacktop: generic_get_stacktop()\n");
+ fprintf(fp, " translate_pte: riscv64_translate_pte()\n");
+ fprintf(fp, " memory_size: generic_memory_size()\n");
+ fprintf(fp, " vmalloc_start: riscv64_vmalloc_start()\n");
+ fprintf(fp, " is_task_addr: riscv64_is_task_addr()\n");
+ fprintf(fp, " verify_symbol: riscv64_verify_symbol()\n");
+ fprintf(fp, " dis_filter: generic_dis_filter()\n");
+ fprintf(fp, " dump_irq: generic_dump_irq()\n");
+ fprintf(fp, " show_interrupts: generic_show_interrupts()\n");
+ fprintf(fp, " get_irq_affinity: generic_get_irq_affinity()\n");
+ fprintf(fp, " cmd_mach: riscv64_cmd_mach()\n");
+ fprintf(fp, " get_smp_cpus: riscv64_get_smp_cpus()\n");
+ fprintf(fp, " is_kvaddr: riscv64_is_kvaddr()\n");
+ fprintf(fp, " is_uvaddr: riscv64_is_uvaddr()\n");
+ fprintf(fp, " verify_paddr: generic_verify_paddr()\n");
+ fprintf(fp, " init_kernel_pgd: NULL\n");
+ fprintf(fp, " value_to_symbol: generic_machdep_value_to_symbol()\n");
+ fprintf(fp, " line_number_hooks: NULL\n");
+ fprintf(fp, " last_pgd_read: %lx\n", machdep->last_pgd_read);
+ fprintf(fp, " last_p4d_read: %lx\n", machdep->machspec->last_p4d_read);
+ fprintf(fp, " last_pud_read: %lx\n", machdep->last_pud_read);
+ fprintf(fp, " last_pmd_read: %lx\n", machdep->last_pmd_read);
+ fprintf(fp, " last_ptbl_read: %lx\n", machdep->last_ptbl_read);
+ fprintf(fp, " pgd: %lx\n", (ulong)machdep->pgd);
+ fprintf(fp, " p4d: %lx\n", (ulong)machdep->machspec->p4d);
+ fprintf(fp, " pud: %lx\n", (ulong)machdep->pud);
+ fprintf(fp, " pmd: %lx\n", (ulong)machdep->pmd);
+ fprintf(fp, " ptbl: %lx\n", (ulong)machdep->ptbl);
+ fprintf(fp, " section_size_bits: %ld\n", machdep->section_size_bits);
+ fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits);
+ fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root);
+ fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec);
}
static ulong
--
2.37.1

View File

@ -1,83 +0,0 @@
From 7a942ad69e1a4270dba89de9bff4f93cd6d87578 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:13 +0800
Subject: [PATCH 58/89] RISCV64: Add 'mach' command support
With the patch we can get some basic machine state information,
crash> mach
MACHINE TYPE: riscv64
MEMORY SIZE: 1 GB
CPUS: 1
PROCESSOR SPEED: (unknown)
HZ: 250
PAGE SIZE: 4096
KERNEL STACK SIZE: 16384
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 44 insertions(+), 1 deletion(-)
diff --git a/riscv64.c b/riscv64.c
index 5e8c7d12227c..ff77e41b9407 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -116,10 +116,53 @@ static void riscv64_get_struct_page_size(struct machine_specific *ms)
}
}
+/*
+ * "mach" command output.
+ */
+static void
+riscv64_display_machine_stats(void)
+{
+ struct new_utsname *uts;
+ char buf[BUFSIZE];
+ ulong mhz;
+
+ uts = &kt->utsname;
+
+ fprintf(fp, " MACHINE TYPE: %s\n", uts->machine);
+ fprintf(fp, " MEMORY SIZE: %s\n", get_memory_size(buf));
+ fprintf(fp, " CPUS: %d\n", get_cpus_to_display());
+ fprintf(fp, " PROCESSOR SPEED: ");
+ if ((mhz = machdep->processor_speed()))
+ fprintf(fp, "%ld Mhz\n", mhz);
+ else
+ fprintf(fp, "(unknown)\n");
+ fprintf(fp, " HZ: %d\n", machdep->hz);
+ fprintf(fp, " PAGE SIZE: %d\n", PAGESIZE());
+ fprintf(fp, " KERNEL STACK SIZE: %ld\n", STACKSIZE());
+}
+
static void
riscv64_cmd_mach(void)
{
- /* TODO: */
+ int c;
+
+ while ((c = getopt(argcnt, args, "cmo")) != EOF) {
+ switch (c) {
+ case 'c':
+ case 'm':
+ case 'o':
+ option_not_supported(c);
+ break;
+ default:
+ argerrs++;
+ break;
+ }
+ }
+
+ if (argerrs)
+ cmd_usage(pc->curcmd, SYNOPSIS);
+
+ riscv64_display_machine_stats();
}
static int
--
2.37.1

View File

@ -1,45 +0,0 @@
From ba2d96e12463157fe2ed9c134ad9cf42481427e8 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Thu, 20 Oct 2022 09:50:14 +0800
Subject: [PATCH 59/89] RISCV64: Add the implementation of symbol verify
Verify the symbol to accept or reject a symbol from the kernel namelist.
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
riscv64.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/riscv64.c b/riscv64.c
index ff77e41b9407..6b9a68840d4c 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -165,10 +165,23 @@ riscv64_cmd_mach(void)
riscv64_display_machine_stats();
}
+/*
+ * Accept or reject a symbol from the kernel namelist.
+ */
static int
riscv64_verify_symbol(const char *name, ulong value, char type)
{
- /* TODO: */
+ if (CRASHDEBUG(8) && name && strlen(name))
+ fprintf(fp, "%08lx %s\n", value, name);
+
+ if (!(machdep->flags & KSYMS_START)) {
+ if (STREQ(name, "_text") || STREQ(name, "_stext"))
+ machdep->flags |= KSYMS_START;
+
+ return (name && strlen(name) && !STRNEQ(name, "__func__.") &&
+ !STRNEQ(name, "__crc_"));
+ }
+
return TRUE;
}
--
2.37.1

View File

@ -1,149 +0,0 @@
From 47d375ac822d413999c92520402c4868ce2275cc Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 16 Dec 2022 14:03:46 +0900
Subject: [PATCH 60/89] SLUB: Fix for offset change of struct slab members on
Linux 6.2-rc1
The following kernel commits split slab info from struct page into
struct slab in Linux 5.17.
d122019bf061 ("mm: Split slab into its own type")
07f910f9b729 ("mm: Remove slab from struct page")
Crash commit 5f390ed811b0 followed the change for SLUB, but crash still
uses the offset of page.lru inappropriately. Luckily, it could work
because it was the same value as the offset of slab.slab_list until
Linux 6.1.
However, kernel commit 130d4df57390 ("mm/sl[au]b: rearrange struct slab
fields to allow larger rcu_head") in Linux 6.2-rc1 changed the offset of
slab.slab_list. As a result, without the patch, "kmem -s|-S" options
print the following errors and fail to print values correctly for
kernels configured with CONFIG_SLUB.
crash> kmem -S filp
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
kmem: filp: partial list slab: ffffcc650405ab88 invalid page.inuse: -1
ffff8fa0401eca00 232 1267 1792 56 8k filp
...
KMEM_CACHE_NODE NODE SLABS PARTIAL PER-CPU
ffff8fa0401cb8c0 0 56 24 8
NODE 0 PARTIAL:
SLAB MEMORY NODE TOTAL ALLOCATED FREE
kmem: filp: invalid partial list slab pointer: ffffcc650405ab88
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 16 ++++++++++------
symbols.c | 1 +
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/defs.h b/defs.h
index 9c91f38328d0..31702e707bee 100644
--- a/defs.h
+++ b/defs.h
@@ -2188,6 +2188,7 @@ struct offset_table { /* stash of commonly-used offsets */
long blk_mq_tags_rqs;
long request_queue_hctx_table;
long percpu_counter_counters;
+ long slab_slab_list;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index 9d003713534b..d05737cc1429 100644
--- a/memory.c
+++ b/memory.c
@@ -781,6 +781,8 @@ vm_init(void)
if (INVALID_MEMBER(page_slab))
MEMBER_OFFSET_INIT(page_slab, "slab", "slab_cache");
+ MEMBER_OFFSET_INIT(slab_slab_list, "slab", "slab_list");
+
MEMBER_OFFSET_INIT(page_slab_page, "page", "slab_page");
if (INVALID_MEMBER(page_slab_page))
ANON_MEMBER_OFFSET_INIT(page_slab_page, "page", "slab_page");
@@ -19474,6 +19476,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
{
ulong next, last, list_head, flags;
int first;
+ long list_off = VALID_MEMBER(slab_slab_list) ? OFFSET(slab_slab_list) : OFFSET(page_lru);
if (!node_ptr)
return;
@@ -19487,7 +19490,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
next == list_head ? " (empty)\n" : "");
first = 0;
while (next != list_head) {
- si->slab = last = next - OFFSET(page_lru);
+ si->slab = last = next - list_off;
if (first++ == 0)
fprintf(fp, " %s", slab_hdr);
@@ -19510,7 +19513,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
if (!IS_KVADDR(next) ||
((next != list_head) &&
- !is_page_ptr(next - OFFSET(page_lru), NULL))) {
+ !is_page_ptr(next - list_off, NULL))) {
error(INFO,
"%s: partial list slab: %lx invalid page.lru.next: %lx\n",
si->curname, last, next);
@@ -19537,7 +19540,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
next == list_head ? " (empty)\n" : "");
first = 0;
while (next != list_head) {
- si->slab = next - OFFSET(page_lru);
+ si->slab = next - list_off;
if (first++ == 0)
fprintf(fp, " %s", slab_hdr);
@@ -19754,6 +19757,7 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
short inuse, objects;
ulong total_inuse;
ulong count = 0;
+ long list_off = VALID_MEMBER(slab_slab_list) ? OFFSET(slab_slab_list) : OFFSET(page_lru);
count = 0;
total_inuse = 0;
@@ -19765,12 +19769,12 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
hq_open();
while (next != list_head) {
- if (!readmem(next - OFFSET(page_lru) + OFFSET(page_inuse),
+ if (!readmem(next - list_off + OFFSET(page_inuse),
KVADDR, &inuse, sizeof(ushort), "page.inuse", RETURN_ON_ERROR)) {
hq_close();
return -1;
}
- last = next - OFFSET(page_lru);
+ last = next - list_off;
if (inuse == -1) {
error(INFO,
@@ -19796,7 +19800,7 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
}
if (!IS_KVADDR(next) ||
((next != list_head) &&
- !is_page_ptr(next - OFFSET(page_lru), NULL))) {
+ !is_page_ptr(next - list_off, NULL))) {
error(INFO, "%s: partial list slab: %lx invalid page.lru.next: %lx\n",
si->curname, last, next);
break;
diff --git a/symbols.c b/symbols.c
index d1b35a56aa71..9e6fca73eaf9 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9722,6 +9722,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(slab_inuse));
fprintf(fp, " slab_free: %ld\n",
OFFSET(slab_free));
+ fprintf(fp, " slab_slab_list: %ld\n", OFFSET(slab_slab_list));
fprintf(fp, " kmem_cache_size: %ld\n",
OFFSET(kmem_cache_size));
--
2.37.1

View File

@ -1,49 +0,0 @@
From 699717c7720cb2a397ce59d0a0380c65d2693033 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Fri, 23 Dec 2022 18:42:35 +0800
Subject: [PATCH 61/89] Fix for "kmem -i" to display correct SLAB statistics on
Linux 5.9 and later
Kernel commit d42f3245c7e2 ("mm: memcg: convert vmstat slab counters to
bytes"), which is contained in Linux v5.9-rc1 and later kernels, renamed
NR_SLAB_{RECLAIMABLE,UNRECLAIMABLE} to NR_SLAB_{RECLAIMABLE,UNRECLAIMABLE}_B.
Without the patch, "kmem -i" command will display incorrect SLAB
statistics:
crash> kmem -i | grep -e PAGES -e SLAB
PAGES TOTAL PERCENTAGE
SLAB 89458 349.4 MB 0% of TOTAL MEM
^^^^^ ^^^^^
With the patch, the actual result is:
crash> kmem -i | grep -e PAGES -e SLAB
PAGES TOTAL PERCENTAGE
SLAB 261953 1023.3 MB 0% of TOTAL MEM
Reported-by: Buland Kumar Singh <bsingh@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
memory.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/memory.c b/memory.c
index d05737cc1429..625a94b7d7d4 100644
--- a/memory.c
+++ b/memory.c
@@ -8388,6 +8388,11 @@ dump_kmeminfo(void)
get_slabs = nr_slab;
if (dump_vm_stat("NR_SLAB_UNRECLAIMABLE", &nr_slab, 0))
get_slabs += nr_slab;
+ } else if (dump_vm_stat("NR_SLAB_RECLAIMABLE_B", &nr_slab, 0)) {
+ /* 5.9 and later */
+ get_slabs = nr_slab;
+ if (dump_vm_stat("NR_SLAB_UNRECLAIMABLE_B", &nr_slab, 0))
+ get_slabs += nr_slab;
}
}
--
2.37.1

View File

@ -1,49 +0,0 @@
From 4003c972c5ff0814847865a9f5487c8561a598ad Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Tue, 27 Dec 2022 09:53:46 +0900
Subject: [PATCH 62/89] Fix build failure due to no EM_RISCV with glibc-2.23
and earlier
With glibc-2.23 and earlier (e.g. RHEL7), crash build fails with errors
like this due to EM_RISCV undeclared:
$ make -j 24 warn
TARGET: X86_64
CRASH: 8.0.2++
GDB: 10.2
...
symbols.c: In function 'is_kernel':
symbols.c:3746:8: error: 'EM_RISCV' undeclared (first use in this function)
case EM_RISCV:
^
...
Define EM_RISCV as 243 [1][2] if not defined.
[1] https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=94e73c95d9b5
[2] http://www.sco.com/developers/gabi/latest/ch4.eheader.html
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/defs.h b/defs.h
index 31702e707bee..cb04562d0d92 100644
--- a/defs.h
+++ b/defs.h
@@ -3493,6 +3493,10 @@ struct arm64_stackframe {
#define _MAX_PHYSMEM_BITS 48
#endif /* MIPS64 */
+#ifndef EM_RISCV
+#define EM_RISCV 243
+#endif
+
#ifdef RISCV64
#define _64BIT_
#define MACHINE_TYPE "RISCV64"
--
2.37.1

View File

@ -1,46 +0,0 @@
From 652681dd7fa37a20d7ef3a51a27afa7d6d3f1872 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 5 Jan 2023 17:18:51 +0900
Subject: [PATCH 63/89] SLAB: Fix for "kmem -s|-S" options on Linux 6.1 and
later
Kernel commit e36ce448a08d ("mm/slab: use kmalloc_node() for off slab
freelist_idx_t array allocation"), which is contained in Linux 6.1 and
later kernels, removed kmem_cache.freelist_cache member on kernels
configured with CONFIG_SLAB=y.
Without the patch, crash does not set SLAB_OVERLOAD_PAGE and
"kmem -s|-S" options fail with the following error:
kmem: invalid structure member offset: slab_list
FILE: memory.c LINE: 12156 FUNCTION: verify_slab_v2()
Use kmem_cache.freelist_size instead, which was introduced together
with kmem_cache.freelist_cache by kernel commit 8456a648cf44.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/memory.c b/memory.c
index 625a94b7d7d4..71ded688206f 100644
--- a/memory.c
+++ b/memory.c
@@ -535,8 +535,11 @@ vm_init(void)
/*
* slab: overload struct slab over struct page
* https://lkml.org/lkml/2013/10/16/155
+ *
+ * commit e36ce448a08d removed kmem_cache.freelist_cache in 6.1,
+ * so use freelist_size instead.
*/
- if (MEMBER_EXISTS("kmem_cache", "freelist_cache")) {
+ if (MEMBER_EXISTS("kmem_cache", "freelist_size")) {
vt->flags |= SLAB_OVERLOAD_PAGE;
ANON_MEMBER_OFFSET_INIT(page_s_mem, "page", "s_mem");
ANON_MEMBER_OFFSET_INIT(page_freelist, "page", "freelist");
--
2.37.1

View File

@ -1,233 +0,0 @@
From 4060546b051751e7e593100027d2a160723a17ee Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 5 Jan 2023 17:36:42 +0900
Subject: [PATCH 64/89] SLAB: Fix for "kmem -s|-S" options on Linux 6.2-rc1 and
later
Kernel commit 130d4df57390 ("mm/sl[au]b: rearrange struct slab fields to
allow larger rcu_head"), which is contained in Linux 6.2-rc1 and later
kernels, changed the offset of slab.slab_list and now it's not equal to
the offset of page.lru.
Without the patch, "kmem -s|-S" options print errors and zeros for slab
counters like this for kernels configured with CONFIG_SLAB=y.
crash> kmem -s
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
kmem: rpc_inode_cache: partial list: page/slab: fffff31ac4125190 bad active counter: 99476865
kmem: rpc_inode_cache: partial list: page/slab: fffff31ac4125190 bad s_mem pointer: 100000003
kmem: rpc_inode_cache: full list: page/slab: fffff31ac4125150 bad active counter: 99476225
kmem: rpc_inode_cache: full list: page/slab: fffff31ac4125150 bad active counter: 99476225
kmem: rpc_inode_cache: full list: page/slab: fffff31ac4125150 bad s_mem pointer: 100000005
ffff930202adfb40 704 0 0 0 4k rpc_inode_cache
...
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 45 +++++++++++++++++++++++++--------------------
1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/memory.c b/memory.c
index 71ded688206f..156de2f7b5a3 100644
--- a/memory.c
+++ b/memory.c
@@ -78,6 +78,7 @@ struct meminfo { /* general purpose memory information structure */
int *freelist;
int freelist_index_size;
ulong random;
+ ulong list_offset;
};
/*
@@ -553,6 +554,8 @@ vm_init(void)
MEMBER_OFFSET_INIT(page_freelist, "slab", "freelist");
if (INVALID_MEMBER(page_active))
MEMBER_OFFSET_INIT(page_active, "slab", "active");
+
+ MEMBER_OFFSET_INIT(slab_slab_list, "slab", "slab_list");
}
if (!VALID_STRUCT(kmem_slab_s) && VALID_STRUCT(slab_s)) {
@@ -10767,6 +10770,8 @@ dump_kmem_cache_percpu_v2(struct meminfo *si)
if (vt->flags & SLAB_OVERLOAD_PAGE) {
si->freelist = si->kmem_bufctl;
si->freelist_index_size = slab_freelist_index_size();
+ si->list_offset = VALID_MEMBER(slab_slab_list) ?
+ OFFSET(slab_slab_list) : OFFSET(page_lru);
}
for (i = 0; i < vt->kmem_max_cpus; i++)
si->cpudata[i] = (ulong *)
@@ -11983,7 +11988,7 @@ do_slab_chain_slab_overload_page(long cmd, struct meminfo *si)
}
last = si->slab;
- readmem(si->slab - OFFSET(page_lru), KVADDR, page_buf,
+ readmem(si->slab - si->list_offset, KVADDR, page_buf,
SIZE(page), "page (slab) buffer",
FAULT_ON_ERROR);
@@ -11996,8 +12001,7 @@ do_slab_chain_slab_overload_page(long cmd, struct meminfo *si)
si->num_slabs++;
- si->slab = ULONG(page_buf +
- OFFSET(page_lru));
+ si->slab = ULONG(page_buf + si->list_offset);
/*
* Check for slab transition. (Tony Dziedzic)
@@ -12024,11 +12028,11 @@ do_slab_chain_slab_overload_page(long cmd, struct meminfo *si)
case SLAB_WALKTHROUGH:
if (si->flags & SLAB_OVERLOAD_PAGE_PTR) {
specified_slab = si->spec_addr;
- si->slab = si->spec_addr + OFFSET(page_lru);
+ si->slab = si->spec_addr + si->list_offset;
} else {
specified_slab = si->slab;
if (si->slab)
- si->slab += OFFSET(page_lru);
+ si->slab += si->list_offset;
}
si->flags |= (SLAB_WALKTHROUGH|SLAB_FIRST_NODE);
si->flags &= ~SLAB_GET_COUNTS;
@@ -12082,7 +12086,7 @@ do_slab_chain_slab_overload_page(long cmd, struct meminfo *si)
if (si->slab == slab_chains[s])
continue;
- readmem(si->slab - OFFSET(page_lru), KVADDR, page_buf,
+ readmem(si->slab - si->list_offset, KVADDR, page_buf,
SIZE(page), "page (slab) buffer",
FAULT_ON_ERROR);
@@ -12242,7 +12246,7 @@ verify_slab_overload_page(struct meminfo *si, ulong last, int s)
errcnt = 0;
- if (!readmem(si->slab - OFFSET(page_lru), KVADDR, page_buf,
+ if (!readmem(si->slab - si->list_offset, KVADDR, page_buf,
SIZE(page), "page (slab) buffer", QUIET|RETURN_ON_ERROR)) {
error(INFO, "%s: %s list: bad slab pointer: %lx\n",
si->curname, list, si->slab);
@@ -12250,7 +12254,7 @@ verify_slab_overload_page(struct meminfo *si, ulong last, int s)
return FALSE;
}
- list_head = (struct kernel_list_head *)(page_buf + OFFSET(page_lru));
+ list_head = (struct kernel_list_head *)(page_buf + si->list_offset);
if (!IS_KVADDR((ulong)list_head->next) ||
!accessible((ulong)list_head->next)) {
error(INFO, "%s: %s list: page/slab: %lx bad next pointer: %lx\n",
@@ -12569,7 +12573,7 @@ dump_slab_overload_page(struct meminfo *si)
int tmp;
ulong slab_overload_page, freelist;
- slab_overload_page = si->slab - OFFSET(page_lru);
+ slab_overload_page = si->slab - si->list_offset;
readmem(slab_overload_page + OFFSET(page_s_mem),
KVADDR, &si->s_mem, sizeof(ulong),
@@ -12796,12 +12800,12 @@ gather_slab_free_list_slab_overload_page(struct meminfo *si)
if (CRASHDEBUG(1))
fprintf(fp, "slab page: %lx active: %ld si->c_num: %ld\n",
- si->slab - OFFSET(page_lru), si->s_inuse, si->c_num);
+ si->slab - si->list_offset, si->s_inuse, si->c_num);
if (si->s_inuse == si->c_num )
return;
- slab_overload_page = si->slab - OFFSET(page_lru);
+ slab_overload_page = si->slab - si->list_offset;
readmem(slab_overload_page + OFFSET(page_freelist),
KVADDR, &freelist, sizeof(void *), "page freelist",
FAULT_ON_ERROR);
@@ -13099,7 +13103,7 @@ dump_slab_objects_percpu(struct meminfo *si)
if ((si->flags & ADDRESS_SPECIFIED) &&
(vt->flags & SLAB_OVERLOAD_PAGE)) {
- readmem(si->slab - OFFSET(page_lru) + OFFSET(page_freelist),
+ readmem(si->slab - si->list_offset + OFFSET(page_freelist),
KVADDR, &freelist, sizeof(ulong), "page.freelist",
FAULT_ON_ERROR);
@@ -18713,6 +18717,9 @@ dump_kmem_cache_slub(struct meminfo *si)
si->cache_buf = GETBUF(SIZE(kmem_cache));
+ si->list_offset = VALID_MEMBER(slab_slab_list) ?
+ OFFSET(slab_slab_list) : OFFSET(page_lru);
+
if (VALID_MEMBER(page_objects) &&
OFFSET(page_objects) == OFFSET(page_inuse))
si->flags |= SLAB_BITFIELD;
@@ -19484,7 +19491,6 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
{
ulong next, last, list_head, flags;
int first;
- long list_off = VALID_MEMBER(slab_slab_list) ? OFFSET(slab_slab_list) : OFFSET(page_lru);
if (!node_ptr)
return;
@@ -19498,7 +19504,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
next == list_head ? " (empty)\n" : "");
first = 0;
while (next != list_head) {
- si->slab = last = next - list_off;
+ si->slab = last = next - si->list_offset;
if (first++ == 0)
fprintf(fp, " %s", slab_hdr);
@@ -19521,7 +19527,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
if (!IS_KVADDR(next) ||
((next != list_head) &&
- !is_page_ptr(next - list_off, NULL))) {
+ !is_page_ptr(next - si->list_offset, NULL))) {
error(INFO,
"%s: partial list slab: %lx invalid page.lru.next: %lx\n",
si->curname, last, next);
@@ -19548,7 +19554,7 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node)
next == list_head ? " (empty)\n" : "");
first = 0;
while (next != list_head) {
- si->slab = next - list_off;
+ si->slab = next - si->list_offset;
if (first++ == 0)
fprintf(fp, " %s", slab_hdr);
@@ -19765,7 +19771,6 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
short inuse, objects;
ulong total_inuse;
ulong count = 0;
- long list_off = VALID_MEMBER(slab_slab_list) ? OFFSET(slab_slab_list) : OFFSET(page_lru);
count = 0;
total_inuse = 0;
@@ -19777,12 +19782,12 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
hq_open();
while (next != list_head) {
- if (!readmem(next - list_off + OFFSET(page_inuse),
+ if (!readmem(next - si->list_offset + OFFSET(page_inuse),
KVADDR, &inuse, sizeof(ushort), "page.inuse", RETURN_ON_ERROR)) {
hq_close();
return -1;
}
- last = next - list_off;
+ last = next - si->list_offset;
if (inuse == -1) {
error(INFO,
@@ -19808,7 +19813,7 @@ count_partial(ulong node, struct meminfo *si, ulong *free)
}
if (!IS_KVADDR(next) ||
((next != list_head) &&
- !is_page_ptr(next - list_off, NULL))) {
+ !is_page_ptr(next - si->list_offset, NULL))) {
error(INFO, "%s: partial list slab: %lx invalid page.lru.next: %lx\n",
si->curname, last, next);
break;
--
2.37.1

View File

@ -1,633 +0,0 @@
From 5b61f450d213cda90dd0f50a02594a0b87151c89 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 10 Jan 2023 14:56:27 +0800
Subject: [PATCH 65/89] Port the maple tree data structures and functions
There have been two ways to iterate vm_area_struct until Linux 6.0:
1) by rbtree, aka vma.vm_rb;
2) by linked list, aka vma.vm_{next,prev}.
However with the maple tree patches[1][2] in Linux 6.1, vm_rb and
vm_{next,prev} are removed from vm_area_struct. The vm_area_dump()
in crash mainly uses the linked list for vma iteration, which will
not work for this case. So the maple tree iteration needs to be
ported to crash.
For crash, currently it only iteratively reads the maple tree,
no more rcu safe or maple tree modification features needed.
So we only port a subset of kernel maple tree features.
In addition, we need to modify the ported kernel source code,
making it compatible with crash.
This patch deals with the two issues:
1) Poring mt_dump() function and all its dependencies from
kernel source to crash, to enable crash maple tree iteration,
2) adapting the ported code with crash.
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=524e00b36e8c547f5582eef3fb645a8d9fc5e3df
[2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=763ecb035029f500d7e6dc99acd1ad299b7726a1
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
Makefile | 10 +-
defs.h | 19 +++
maple_tree.c | 407 +++++++++++++++++++++++++++++++++++++++++++++++++++
maple_tree.h | 82 +++++++++++
4 files changed, 515 insertions(+), 3 deletions(-)
create mode 100644 maple_tree.c
create mode 100644 maple_tree.h
diff --git a/Makefile b/Makefile
index c0de5ef8ff75..b290836ffc83 100644
--- a/Makefile
+++ b/Makefile
@@ -59,6 +59,7 @@ IBM_HFILES=ibm_common.h
SADUMP_HFILES=sadump.h
UNWIND_HFILES=unwind.h unwind_i.h rse.h unwind_x86.h unwind_x86_64.h
VMWARE_HFILES=vmware_vmss.h
+MAPLE_TREE_HFILES=maple_tree.h
CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
kernel.c test.c gdb_interface.c configure.c net.c dev.c bpf.c \
@@ -73,12 +74,12 @@ CFILES=main.c tools.c global_data.c memory.c filesys.c help.c task.c \
xen_hyper.c xen_hyper_command.c xen_hyper_global_data.c \
xen_hyper_dump_tables.c kvmdump.c qemu.c qemu-load.c sadump.c ipcs.c \
ramdump.c vmware_vmss.c vmware_guestdump.c \
- xen_dom0.c kaslr_helper.c sbitmap.c
+ xen_dom0.c kaslr_helper.c sbitmap.c maple_tree.c
SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \
${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \
${LKCD_DUMP_HFILES} ${LKCD_TRACE_HFILES} ${LKCD_OBSOLETE_HFILES}\
- ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES}
+ ${IBM_HFILES} ${SADUMP_HFILES} ${VMWARE_HFILES} ${MAPLE_TREE_HFILES}
OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
build_data.o kernel.o test.o gdb_interface.o net.o dev.o bpf.o \
@@ -93,7 +94,7 @@ OBJECT_FILES=main.o tools.o global_data.o memory.o filesys.o help.o task.o \
xen_hyper.o xen_hyper_command.o xen_hyper_global_data.o \
xen_hyper_dump_tables.o kvmdump.o qemu.o qemu-load.o sadump.o ipcs.o \
ramdump.o vmware_vmss.o vmware_guestdump.o \
- xen_dom0.o kaslr_helper.o sbitmap.o
+ xen_dom0.o kaslr_helper.o sbitmap.o maple_tree.o
MEMORY_DRIVER_FILES=memory_driver/Makefile memory_driver/crash.c memory_driver/README
@@ -548,6 +549,9 @@ kaslr_helper.o: ${GENERIC_HFILES} kaslr_helper.c
bpf.o: ${GENERIC_HFILES} bpf.c
${CC} -c ${CRASH_CFLAGS} bpf.c ${WARNING_OPTIONS} ${WARNING_ERROR}
+maple_tree.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} maple_tree.c
+ ${CC} -c ${CRASH_CFLAGS} maple_tree.c ${WARNING_OPTIONS} ${WARNING_ERROR}
+
${PROGRAM}: force
@$(MAKE) all
diff --git a/defs.h b/defs.h
index cb04562d0d92..46ed10e17cd2 100644
--- a/defs.h
+++ b/defs.h
@@ -2189,6 +2189,21 @@ struct offset_table { /* stash of commonly-used offsets */
long request_queue_hctx_table;
long percpu_counter_counters;
long slab_slab_list;
+ long mm_struct_mm_mt;
+ long maple_tree_ma_root;
+ long maple_tree_ma_flags;
+ long maple_node_parent;
+ long maple_node_ma64;
+ long maple_node_mr64;
+ long maple_node_slot;
+ long maple_arange_64_pivot;
+ long maple_arange_64_slot;
+ long maple_arange_64_gap;
+ long maple_arange_64_meta;
+ long maple_range_64_pivot;
+ long maple_range_64_slot;
+ long maple_metadata_end;
+ long maple_metadata_gap;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2360,6 +2375,8 @@ struct size_table { /* stash of commonly-used sizes */
long sbq_wait_state;
long blk_mq_tags;
long percpu_counter;
+ long maple_tree;
+ long maple_node;
};
struct array_table {
@@ -5730,6 +5747,8 @@ int same_file(char *, char *);
#ifndef GDB_COMMON
int cleanup_memory_driver(void);
+void maple_init(void);
+int do_mptree(struct tree_data *);
/*
* help.c
diff --git a/maple_tree.c b/maple_tree.c
new file mode 100644
index 000000000000..474faeda6252
--- /dev/null
+++ b/maple_tree.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Maple Tree implementation
+ * Copyright (c) 2018-2022 Oracle Corporation
+ * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
+ * Matthew Wilcox <willy@infradead.org>
+ *
+ * The following are copied and modified from lib/maple_tree.c
+ */
+
+#include "maple_tree.h"
+#include "defs.h"
+
+unsigned char *mt_slots = NULL;
+unsigned char *mt_pivots = NULL;
+ulong mt_max[4] = {0};
+
+#define MAPLE_BUFSIZE 512
+
+static inline ulong mte_to_node(ulong maple_enode_entry)
+{
+ return maple_enode_entry & ~MAPLE_NODE_MASK;
+}
+
+static inline enum maple_type mte_node_type(ulong maple_enode_entry)
+{
+ return (maple_enode_entry >> MAPLE_NODE_TYPE_SHIFT) &
+ MAPLE_NODE_TYPE_MASK;
+}
+
+static inline ulong mt_slot(void **slots, unsigned char offset)
+{
+ return (ulong)slots[offset];
+}
+
+static inline bool ma_is_leaf(const enum maple_type type)
+{
+ return type < maple_range_64;
+}
+
+/*************** For cmd_tree ********************/
+
+struct maple_tree_ops {
+ void (*entry)(ulong node, ulong slot, const char *path,
+ ulong index, void *private);
+ void *private;
+ bool is_td;
+};
+
+static const char spaces[] = " ";
+
+static void do_mt_range64(ulong, ulong, ulong, uint, char *, ulong *,
+ struct maple_tree_ops *);
+static void do_mt_arange64(ulong, ulong, ulong, uint, char *, ulong *,
+ struct maple_tree_ops *);
+static void do_mt_entry(ulong, ulong, ulong, uint, uint, char *, ulong *,
+ struct maple_tree_ops *);
+static void do_mt_node(ulong, ulong, ulong, uint, char *, ulong *,
+ struct maple_tree_ops *);
+struct req_entry *fill_member_offsets(char *);
+void dump_struct_members_fast(struct req_entry *, int, ulong);
+void dump_struct_members_for_tree(struct tree_data *, int, ulong);
+
+static void mt_dump_range(ulong min, ulong max, uint depth)
+{
+ if (min == max)
+ fprintf(fp, "%.*s%lu: ", depth * 2, spaces, min);
+ else
+ fprintf(fp, "%.*s%lu-%lu: ", depth * 2, spaces, min, max);
+}
+
+static inline bool mt_is_reserved(ulong entry)
+{
+ return (entry < MAPLE_RESERVED_RANGE) && xa_is_internal(entry);
+}
+
+static inline bool mte_is_leaf(ulong maple_enode_entry)
+{
+ return ma_is_leaf(mte_node_type(maple_enode_entry));
+}
+
+static uint mt_height(char *mt_buf)
+{
+ return (UINT(mt_buf + OFFSET(maple_tree_ma_flags)) &
+ MT_FLAGS_HEIGHT_MASK)
+ >> MT_FLAGS_HEIGHT_OFFSET;
+}
+
+static void dump_mt_range64(char *mr64_buf)
+{
+ int i;
+
+ fprintf(fp, " contents: ");
+ for (i = 0; i < mt_slots[maple_range_64] - 1; i++)
+ fprintf(fp, "%p %lu ",
+ VOID_PTR(mr64_buf + OFFSET(maple_range_64_slot)
+ + sizeof(void *) * i),
+ ULONG(mr64_buf + OFFSET(maple_range_64_pivot)
+ + sizeof(ulong) * i));
+ fprintf(fp, "%p\n", VOID_PTR(mr64_buf + OFFSET(maple_range_64_slot)
+ + sizeof(void *) * i));
+}
+
+static void dump_mt_arange64(char *ma64_buf)
+{
+ int i;
+
+ fprintf(fp, " contents: ");
+ for (i = 0; i < mt_slots[maple_arange_64]; i++)
+ fprintf(fp, "%lu ", ULONG(ma64_buf + OFFSET(maple_arange_64_gap)
+ + sizeof(ulong) * i));
+
+ fprintf(fp, "| %02X %02X| ",
+ UCHAR(ma64_buf + OFFSET(maple_arange_64_meta) +
+ OFFSET(maple_metadata_end)),
+ UCHAR(ma64_buf + OFFSET(maple_arange_64_meta) +
+ OFFSET(maple_metadata_gap)));
+
+ for (i = 0; i < mt_slots[maple_arange_64] - 1; i++)
+ fprintf(fp, "%p %lu ",
+ VOID_PTR(ma64_buf + OFFSET(maple_arange_64_slot) +
+ sizeof(void *) * i),
+ ULONG(ma64_buf + OFFSET(maple_arange_64_pivot) +
+ sizeof(ulong) * i));
+ fprintf(fp, "%p\n", VOID_PTR(ma64_buf + OFFSET(maple_arange_64_slot) +
+ sizeof(void *) * i));
+}
+
+static void dump_mt_entry(ulong entry, ulong min, ulong max, uint depth)
+{
+ mt_dump_range(min, max, depth);
+
+ if (xa_is_value(entry))
+ fprintf(fp, "value %ld (0x%lx) [0x%lx]\n", xa_to_value(entry),
+ xa_to_value(entry), entry);
+ else if (xa_is_zero(entry))
+ fprintf(fp, "zero (%ld)\n", xa_to_internal(entry));
+ else if (mt_is_reserved(entry))
+ fprintf(fp, "UNKNOWN ENTRY (0x%lx)\n", entry);
+ else
+ fprintf(fp, "0x%lx\n", entry);
+}
+
+static void dump_mt_node(ulong maple_node, char *node_data, uint type,
+ ulong min, ulong max, uint depth)
+{
+ mt_dump_range(min, max, depth);
+
+ fprintf(fp, "node 0x%lx depth %d type %d parent %p",
+ maple_node, depth, type,
+ maple_node ? VOID_PTR(node_data + OFFSET(maple_node_parent)) :
+ NULL);
+}
+
+static void do_mt_range64(ulong entry, ulong min, ulong max,
+ uint depth, char *path, ulong *global_index,
+ struct maple_tree_ops *ops)
+{
+ ulong maple_node_m_node = mte_to_node(entry);
+ char node_buf[MAPLE_BUFSIZE];
+ bool leaf = mte_is_leaf(entry);
+ ulong first = min, last;
+ int i;
+ int len = strlen(path);
+ struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
+ char *mr64_buf;
+
+ if (SIZE(maple_node) > MAPLE_BUFSIZE)
+ error(FATAL, "MAPLE_BUFSIZE should be larger than maple_node struct");
+
+ readmem(maple_node_m_node, KVADDR, node_buf, SIZE(maple_node),
+ "mt_dump_range64 read maple_node", FAULT_ON_ERROR);
+
+ mr64_buf = node_buf + OFFSET(maple_node_mr64);
+
+ for (i = 0; i < mt_slots[maple_range_64]; i++) {
+ last = max;
+
+ if (i < (mt_slots[maple_range_64] - 1))
+ last = ULONG(mr64_buf + OFFSET(maple_range_64_pivot) +
+ sizeof(ulong) * i);
+
+ else if (!VOID_PTR(mr64_buf + OFFSET(maple_range_64_slot) +
+ sizeof(void *) * i) &&
+ max != mt_max[mte_node_type(entry)])
+ break;
+ if (last == 0 && i > 0)
+ break;
+ if (leaf)
+ do_mt_entry(mt_slot((void **)(mr64_buf +
+ OFFSET(maple_range_64_slot)), i),
+ first, last, depth + 1, i, path, global_index, ops);
+ else if (VOID_PTR(mr64_buf + OFFSET(maple_range_64_slot) +
+ sizeof(void *) * i)) {
+ sprintf(path + len, "/%d", i);
+ do_mt_node(mt_slot((void **)(mr64_buf +
+ OFFSET(maple_range_64_slot)), i),
+ first, last, depth + 1, path, global_index, ops);
+ }
+
+ if (last == max)
+ break;
+ if (last > max) {
+ fprintf(fp, "node %p last (%lu) > max (%lu) at pivot %d!\n",
+ mr64_buf, last, max, i);
+ break;
+ }
+ first = last + 1;
+ }
+}
+
+static void do_mt_arange64(ulong entry, ulong min, ulong max,
+ uint depth, char *path, ulong *global_index,
+ struct maple_tree_ops *ops)
+{
+ ulong maple_node_m_node = mte_to_node(entry);
+ char node_buf[MAPLE_BUFSIZE];
+ bool leaf = mte_is_leaf(entry);
+ ulong first = min, last;
+ int i;
+ int len = strlen(path);
+ struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
+ char *ma64_buf;
+
+ if (SIZE(maple_node) > MAPLE_BUFSIZE)
+ error(FATAL, "MAPLE_BUFSIZE should be larger than maple_node struct");
+
+ readmem(maple_node_m_node, KVADDR, node_buf, SIZE(maple_node),
+ "mt_dump_arange64 read maple_node", FAULT_ON_ERROR);
+
+ ma64_buf = node_buf + OFFSET(maple_node_ma64);
+
+ for (i = 0; i < mt_slots[maple_arange_64]; i++) {
+ last = max;
+
+ if (i < (mt_slots[maple_arange_64] - 1))
+ last = ULONG(ma64_buf + OFFSET(maple_arange_64_pivot) +
+ sizeof(ulong) * i);
+ else if (!VOID_PTR(ma64_buf + OFFSET(maple_arange_64_slot) +
+ sizeof(void *) * i))
+ break;
+ if (last == 0 && i > 0)
+ break;
+
+ if (leaf)
+ do_mt_entry(mt_slot((void **)(ma64_buf +
+ OFFSET(maple_arange_64_slot)), i),
+ first, last, depth + 1, i, path, global_index, ops);
+ else if (VOID_PTR(ma64_buf + OFFSET(maple_arange_64_slot) +
+ sizeof(void *) * i)) {
+ sprintf(path + len, "/%d", i);
+ do_mt_node(mt_slot((void **)(ma64_buf +
+ OFFSET(maple_arange_64_slot)), i),
+ first, last, depth + 1, path, global_index, ops);
+ }
+
+ if (last == max)
+ break;
+ if (last > max) {
+ fprintf(fp, "node %p last (%lu) > max (%lu) at pivot %d!\n",
+ ma64_buf, last, max, i);
+ break;
+ }
+ first = last + 1;
+ }
+}
+
+static void do_mt_entry(ulong entry, ulong min, ulong max, uint depth,
+ uint index, char *path, ulong *global_index,
+ struct maple_tree_ops *ops)
+{
+ int print_radix = 0, i;
+ static struct req_entry **e = NULL;
+ struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
+
+ if (!td)
+ return;
+}
+
+static void do_mt_node(ulong entry, ulong min, ulong max,
+ uint depth, char *path, ulong *global_index,
+ struct maple_tree_ops *ops)
+{
+ ulong maple_node = mte_to_node(entry);
+ uint type = mte_node_type(entry);
+ uint i;
+ char node_buf[MAPLE_BUFSIZE];
+ struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
+
+ if (SIZE(maple_node) > MAPLE_BUFSIZE)
+ error(FATAL, "MAPLE_BUFSIZE should be larger than maple_node struct");
+
+ readmem(maple_node, KVADDR, node_buf, SIZE(maple_node),
+ "mt_dump_node read maple_node", FAULT_ON_ERROR);
+
+ switch (type) {
+ case maple_dense:
+ for (i = 0; i < mt_slots[maple_dense]; i++) {
+ if (min + i > max)
+ fprintf(fp, "OUT OF RANGE: ");
+ do_mt_entry(mt_slot((void **)(node_buf + OFFSET(maple_node_slot)), i),
+ min + i, min + i, depth, i, path, global_index, ops);
+ }
+ break;
+ case maple_leaf_64:
+ case maple_range_64:
+ do_mt_range64(entry, min, max, depth, path, global_index, ops);
+ break;
+ case maple_arange_64:
+ do_mt_arange64(entry, min, max, depth, path, global_index, ops);
+ break;
+ default:
+ fprintf(fp, " UNKNOWN TYPE\n");
+ }
+}
+
+static int do_maple_tree_traverse(ulong ptr, int is_root,
+ struct maple_tree_ops *ops)
+{
+ char path[BUFSIZE] = {0};
+ char tree_buf[MAPLE_BUFSIZE];
+ ulong entry;
+ struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
+ ulong global_index = 0;
+
+ if (SIZE(maple_tree) > MAPLE_BUFSIZE)
+ error(FATAL, "MAPLE_BUFSIZE should be larger than maple_tree struct");
+
+ if (!is_root) {
+ strcpy(path, "direct");
+ do_mt_node(ptr, 0, mt_max[mte_node_type(ptr)],
+ 0, path, &global_index, ops);
+ } else {
+ readmem(ptr, KVADDR, tree_buf, SIZE(maple_tree),
+ "mt_dump read maple_tree", FAULT_ON_ERROR);
+ entry = ULONG(tree_buf + OFFSET(maple_tree_ma_root));
+
+ if (!xa_is_node(entry))
+ do_mt_entry(entry, 0, 0, 0, 0, path, &global_index, ops);
+ else if (entry) {
+ strcpy(path, "root");
+ do_mt_node(entry, 0, mt_max[mte_node_type(entry)], 0,
+ path, &global_index, ops);
+ }
+ }
+ return 0;
+}
+
+int do_mptree(struct tree_data *td)
+{
+ struct maple_tree_ops ops = {
+ .entry = NULL,
+ .private = td,
+ .is_td = true,
+ };
+
+ int is_root = !(td->flags & TREE_NODE_POINTER);
+
+ do_maple_tree_traverse(td->start, is_root, &ops);
+
+ return 0;
+}
+
+/***********************************************/
+void maple_init(void)
+{
+ int array_len;
+
+ STRUCT_SIZE_INIT(maple_tree, "maple_tree");
+ STRUCT_SIZE_INIT(maple_node, "maple_node");
+
+ MEMBER_OFFSET_INIT(maple_tree_ma_root, "maple_tree", "ma_root");
+ MEMBER_OFFSET_INIT(maple_tree_ma_flags, "maple_tree", "ma_flags");
+
+ MEMBER_OFFSET_INIT(maple_node_parent, "maple_node", "parent");
+ MEMBER_OFFSET_INIT(maple_node_ma64, "maple_node", "ma64");
+ MEMBER_OFFSET_INIT(maple_node_mr64, "maple_node", "mr64");
+ MEMBER_OFFSET_INIT(maple_node_slot, "maple_node", "slot");
+
+ MEMBER_OFFSET_INIT(maple_arange_64_pivot, "maple_arange_64", "pivot");
+ MEMBER_OFFSET_INIT(maple_arange_64_slot, "maple_arange_64", "slot");
+ MEMBER_OFFSET_INIT(maple_arange_64_gap, "maple_arange_64", "gap");
+ MEMBER_OFFSET_INIT(maple_arange_64_meta, "maple_arange_64", "meta");
+
+ MEMBER_OFFSET_INIT(maple_range_64_pivot, "maple_range_64", "pivot");
+ MEMBER_OFFSET_INIT(maple_range_64_slot, "maple_range_64", "slot");
+
+ MEMBER_OFFSET_INIT(maple_metadata_end, "maple_metadata", "end");
+ MEMBER_OFFSET_INIT(maple_metadata_gap, "maple_metadata", "gap");
+
+ array_len = get_array_length("mt_slots", NULL, sizeof(char));
+ mt_slots = calloc(array_len, sizeof(char));
+ readmem(symbol_value("mt_slots"), KVADDR, mt_slots,
+ array_len * sizeof(char), "maple_init read mt_slots",
+ RETURN_ON_ERROR);
+
+ array_len = get_array_length("mt_pivots", NULL, sizeof(char));
+ mt_pivots = calloc(array_len, sizeof(char));
+ readmem(symbol_value("mt_pivots"), KVADDR, mt_pivots,
+ array_len * sizeof(char), "maple_init read mt_pivots",
+ RETURN_ON_ERROR);
+
+ mt_max[maple_dense] = mt_slots[maple_dense];
+ mt_max[maple_leaf_64] = ULONG_MAX;
+ mt_max[maple_range_64] = ULONG_MAX;
+ mt_max[maple_arange_64] = ULONG_MAX;
+}
diff --git a/maple_tree.h b/maple_tree.h
new file mode 100644
index 000000000000..f53d5aaffd2e
--- /dev/null
+++ b/maple_tree.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _MAPLE_TREE_H
+#define _MAPLE_TREE_H
+/*
+ * Maple Tree - An RCU-safe adaptive tree for storing ranges
+ * Copyright (c) 2018-2022 Oracle
+ * Authors: Liam R. Howlett <Liam.Howlett@Oracle.com>
+ * Matthew Wilcox <willy@infradead.org>
+ *
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ *
+ * See Documentation/core-api/xarray.rst for how to use the XArray.
+ */
+#include <stdbool.h>
+#include <limits.h>
+#include <sys/types.h>
+
+/*
+ * The following are copied and modified from include/linux/maple_tree.h
+ */
+
+enum maple_type {
+ maple_dense,
+ maple_leaf_64,
+ maple_range_64,
+ maple_arange_64,
+};
+
+#define MAPLE_NODE_MASK 255UL
+
+#define MT_FLAGS_HEIGHT_OFFSET 0x02
+#define MT_FLAGS_HEIGHT_MASK 0x7C
+
+#define MAPLE_NODE_TYPE_MASK 0x0F
+#define MAPLE_NODE_TYPE_SHIFT 0x03
+
+#define MAPLE_RESERVED_RANGE 4096
+
+/*
+ * The following are copied and modified from include/linux/xarray.h
+ */
+
+#define XA_ZERO_ENTRY xa_mk_internal(257)
+
+static inline ulong xa_mk_internal(ulong v)
+{
+ return (v << 2) | 2;
+}
+
+static inline bool xa_is_internal(ulong entry)
+{
+ return (entry & 3) == 2;
+}
+
+static inline bool xa_is_node(ulong entry)
+{
+ return xa_is_internal(entry) && entry > 4096;
+}
+
+static inline bool xa_is_value(ulong entry)
+{
+ return entry & 1;
+}
+
+static inline bool xa_is_zero(ulong entry)
+{
+ return entry == XA_ZERO_ENTRY;
+}
+
+static inline unsigned long xa_to_internal(ulong entry)
+{
+ return entry >> 2;
+}
+
+static inline unsigned long xa_to_value(ulong entry)
+{
+ return entry >> 1;
+}
+
+#endif /* _MAPLE_TREE_H */
--
2.37.1

View File

@ -1,335 +0,0 @@
From 4a6aba5dc85a2f491f752fb37dd2838c1141375d Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 10 Jan 2023 14:56:28 +0800
Subject: [PATCH 66/89] Add maple tree support to "tree" command
The maple tree is a new data structure for crash, so "tree" command
needs to support it for users to dump and view the content of maple
trees. This patch achieves this by using ported mt_dump() and its
related functions from kernel and adapting them with "tree" command.
Also introduce a new -v arg specifically for dumping the complete
content of a maple tree:
crash> tree -t maple 0xffff9034c006aec0 -v
maple_tree(ffff9034c006aec0) flags 309, height 2 root 0xffff9034de70041e
0-18446744073709551615: node 0xffff9034de700400 depth 0 type 3 parent 0xffff9034c006aec1 contents:...
0-140112331583487: node 0xffff9034c01e8800 depth 1 type 1 parent 0xffff9034de700406 contents:...
0-94643156942847: (nil)
94643156942848-94643158024191: 0xffff9035131754c0
94643158024192-94643160117247: (nil)
...
The existing options of "tree" command can work as well:
crash> tree -t maple -r mm_struct.mm_mt 0xffff9034c006aec0 -p
ffff9035131754c0
index: 1 position: root/0/1
ffff9035131751c8
index: 2 position: root/0/3
ffff9035131757b8
index: 3 position: root/0/4
...
crash> tree -t maple 0xffff9034c006aec0 -p -x -s vm_area_struct.vm_start,vm_end
ffff9035131754c0
index: 1 position: root/0/1
vm_start = 0x5613d3c00000,
vm_end = 0x5613d3d08000,
ffff9035131751c8
index: 2 position: root/0/3
vm_start = 0x5613d3f07000,
vm_end = 0x5613d3f0b000,
ffff9035131757b8
index: 3 position: root/0/4
vm_start = 0x5613d3f0b000,
vm_end = 0x5613d3f14000,
....
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
maple_tree.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
tools.c | 67 +++++++++++++++++++++++++++++++++++++++-------------
3 files changed, 114 insertions(+), 17 deletions(-)
diff --git a/defs.h b/defs.h
index 46ed10e17cd2..cc9af21a6b64 100644
--- a/defs.h
+++ b/defs.h
@@ -2713,6 +2713,7 @@ struct tree_data {
#define TREE_PARSE_MEMBER (VERBOSE << 7)
#define TREE_READ_MEMBER (VERBOSE << 8)
#define TREE_LINEAR_ORDER (VERBOSE << 9)
+#define TREE_STRUCT_VERBOSE (VERBOSE << 10)
#define ALIAS_RUNTIME (1)
#define ALIAS_RCLOCAL (2)
diff --git a/maple_tree.c b/maple_tree.c
index 474faeda6252..471136f3eb1d 100644
--- a/maple_tree.c
+++ b/maple_tree.c
@@ -173,6 +173,10 @@ static void do_mt_range64(ulong entry, ulong min, ulong max,
mr64_buf = node_buf + OFFSET(maple_node_mr64);
+ if (td && td->flags & TREE_STRUCT_VERBOSE) {
+ dump_mt_range64(mr64_buf);
+ }
+
for (i = 0; i < mt_slots[maple_range_64]; i++) {
last = max;
@@ -230,6 +234,10 @@ static void do_mt_arange64(ulong entry, ulong min, ulong max,
ma64_buf = node_buf + OFFSET(maple_node_ma64);
+ if (td && td->flags & TREE_STRUCT_VERBOSE) {
+ dump_mt_arange64(ma64_buf);
+ }
+
for (i = 0; i < mt_slots[maple_arange_64]; i++) {
last = max;
@@ -275,6 +283,51 @@ static void do_mt_entry(ulong entry, ulong min, ulong max, uint depth,
if (!td)
return;
+
+ if (!td->count && td->structname_args) {
+ /*
+ * Retrieve all members' info only once (count == 0)
+ * After last iteration all memory will be freed up
+ */
+ e = (struct req_entry **)GETBUF(sizeof(*e) * td->structname_args);
+ for (i = 0; i < td->structname_args; i++)
+ e[i] = fill_member_offsets(td->structname[i]);
+ }
+
+ td->count++;
+
+ if (td->flags & TREE_STRUCT_VERBOSE) {
+ dump_mt_entry(entry, min, max, depth);
+ } else if (td->flags & VERBOSE && entry)
+ fprintf(fp, "%lx\n", entry);
+ if (td->flags & TREE_POSITION_DISPLAY && entry)
+ fprintf(fp, " index: %ld position: %s/%u\n",
+ ++(*global_index), path, index);
+
+ if (td->structname) {
+ if (td->flags & TREE_STRUCT_RADIX_10)
+ print_radix = 10;
+ else if (td->flags & TREE_STRUCT_RADIX_16)
+ print_radix = 16;
+ else
+ print_radix = 0;
+
+ for (i = 0; i < td->structname_args; i++) {
+ switch (count_chars(td->structname[i], '.')) {
+ case 0:
+ dump_struct(td->structname[i], entry, print_radix);
+ break;
+ default:
+ if (td->flags & TREE_PARSE_MEMBER)
+ dump_struct_members_for_tree(td, i, entry);
+ else if (td->flags & TREE_READ_MEMBER)
+ dump_struct_members_fast(e[i], print_radix, entry);
+ }
+ }
+ }
+
+ if (e)
+ FREEBUF(e);
}
static void do_mt_node(ulong entry, ulong min, ulong max,
@@ -293,6 +346,10 @@ static void do_mt_node(ulong entry, ulong min, ulong max,
readmem(maple_node, KVADDR, node_buf, SIZE(maple_node),
"mt_dump_node read maple_node", FAULT_ON_ERROR);
+ if (td && td->flags & TREE_STRUCT_VERBOSE) {
+ dump_mt_node(maple_node, node_buf, type, min, max, depth);
+ }
+
switch (type) {
case maple_dense:
for (i = 0; i < mt_slots[maple_dense]; i++) {
@@ -335,6 +392,12 @@ static int do_maple_tree_traverse(ulong ptr, int is_root,
"mt_dump read maple_tree", FAULT_ON_ERROR);
entry = ULONG(tree_buf + OFFSET(maple_tree_ma_root));
+ if (td && td->flags & TREE_STRUCT_VERBOSE) {
+ fprintf(fp, "maple_tree(%lx) flags %X, height %u root 0x%lx\n\n",
+ ptr, UINT(tree_buf + OFFSET(maple_tree_ma_flags)),
+ mt_height(tree_buf), entry);
+ }
+
if (!xa_is_node(entry))
do_mt_entry(entry, 0, 0, 0, 0, path, &global_index, ops);
else if (entry) {
diff --git a/tools.c b/tools.c
index 7f64bd6328cf..cc5c3448b93e 100644
--- a/tools.c
+++ b/tools.c
@@ -30,7 +30,7 @@ static void dealloc_hq_entry(struct hq_entry *);
static void show_options(void);
static void dump_struct_members(struct list_data *, int, ulong);
static void rbtree_iteration(ulong, struct tree_data *, char *);
-static void dump_struct_members_for_tree(struct tree_data *, int, ulong);
+void dump_struct_members_for_tree(struct tree_data *, int, ulong);
struct req_entry {
char *arg, *name, **member;
@@ -40,8 +40,8 @@ struct req_entry {
};
static void print_value(struct req_entry *, unsigned int, ulong, unsigned int);
-static struct req_entry *fill_member_offsets(char *);
-static void dump_struct_members_fast(struct req_entry *, int, ulong);
+struct req_entry *fill_member_offsets(char *);
+void dump_struct_members_fast(struct req_entry *, int, ulong);
FILE *
set_error(char *target)
@@ -3666,7 +3666,7 @@ dump_struct_members_fast(struct req_entry *e, int radix, ulong p)
}
}
-static struct req_entry *
+struct req_entry *
fill_member_offsets(char *arg)
{
int j;
@@ -4307,6 +4307,7 @@ dump_struct_members(struct list_data *ld, int idx, ulong next)
#define RADIXTREE_REQUEST (0x1)
#define RBTREE_REQUEST (0x2)
#define XARRAY_REQUEST (0x4)
+#define MAPLE_REQUEST (0x8)
void
cmd_tree()
@@ -4317,6 +4318,7 @@ cmd_tree()
struct datatype_member struct_member, *sm;
struct syment *sp;
ulong value;
+ char *type_name = NULL;
type_flag = 0;
root_offset = 0;
@@ -4324,25 +4326,33 @@ cmd_tree()
td = &tree_data;
BZERO(td, sizeof(struct tree_data));
- while ((c = getopt(argcnt, args, "xdt:r:o:s:S:plN")) != EOF) {
+ while ((c = getopt(argcnt, args, "xdt:r:o:s:S:plNv")) != EOF) {
switch (c)
{
case 't':
- if (type_flag & (RADIXTREE_REQUEST|RBTREE_REQUEST|XARRAY_REQUEST)) {
+ if (type_flag & (RADIXTREE_REQUEST|RBTREE_REQUEST|XARRAY_REQUEST|MAPLE_REQUEST)) {
error(INFO, "multiple tree types may not be entered\n");
cmd_usage(pc->curcmd, SYNOPSIS);
}
if (STRNEQ(optarg, "ra"))
- if (MEMBER_EXISTS("radix_tree_root", "xa_head"))
+ if (MEMBER_EXISTS("radix_tree_root", "xa_head")) {
type_flag = XARRAY_REQUEST;
- else
+ type_name = "Xarrays";
+ } else {
type_flag = RADIXTREE_REQUEST;
- else if (STRNEQ(optarg, "rb"))
+ type_name = "radix trees";
+ }
+ else if (STRNEQ(optarg, "rb")) {
type_flag = RBTREE_REQUEST;
- else if (STRNEQ(optarg, "x"))
+ type_name = "rbtrees";
+ } else if (STRNEQ(optarg, "x")) {
type_flag = XARRAY_REQUEST;
- else {
+ type_name = "Xarrays";
+ } else if (STRNEQ(optarg, "m")) {
+ type_flag = MAPLE_REQUEST;
+ type_name = "maple trees";
+ } else {
error(INFO, "invalid tree type: %s\n", optarg);
cmd_usage(pc->curcmd, SYNOPSIS);
}
@@ -4417,6 +4427,9 @@ cmd_tree()
"-d and -x are mutually exclusive\n");
td->flags |= TREE_STRUCT_RADIX_10;
break;
+ case 'v':
+ td->flags |= TREE_STRUCT_VERBOSE;
+ break;
default:
argerrs++;
break;
@@ -4426,13 +4439,17 @@ cmd_tree()
if (argerrs)
cmd_usage(pc->curcmd, SYNOPSIS);
- if ((type_flag & (XARRAY_REQUEST|RADIXTREE_REQUEST)) && (td->flags & TREE_LINEAR_ORDER))
- error(FATAL, "-l option is not applicable to %s\n",
- type_flag & RADIXTREE_REQUEST ? "radix trees" : "Xarrays");
+ if ((type_flag & (XARRAY_REQUEST|RADIXTREE_REQUEST|MAPLE_REQUEST)) &&
+ (td->flags & TREE_LINEAR_ORDER))
+ error(FATAL, "-l option is not applicable to %s\n", type_name);
- if ((type_flag & (XARRAY_REQUEST|RADIXTREE_REQUEST)) && (td->flags & TREE_NODE_OFFSET_ENTERED))
- error(FATAL, "-o option is not applicable to %s\n",
- type_flag & RADIXTREE_REQUEST ? "radix trees" : "Xarrays");
+ if ((type_flag & (XARRAY_REQUEST|RADIXTREE_REQUEST|MAPLE_REQUEST)) &&
+ (td->flags & TREE_NODE_OFFSET_ENTERED))
+ error(FATAL, "-o option is not applicable to %s\n", type_name);
+
+ if ((type_flag & (RBTREE_REQUEST|XARRAY_REQUEST|RADIXTREE_REQUEST)) &&
+ (td->flags & TREE_STRUCT_VERBOSE))
+ error(FATAL, "-v option is not applicable to %s\n", type_name);
if ((td->flags & TREE_ROOT_OFFSET_ENTERED) &&
(td->flags & TREE_NODE_POINTER))
@@ -4506,12 +4523,26 @@ next_arg:
if (td->flags & TREE_STRUCT_RADIX_16)
fprintf(fp, "%sTREE_STRUCT_RADIX_16",
others++ ? "|" : "");
+ if (td->flags & TREE_PARSE_MEMBER)
+ fprintf(fp, "%sTREE_PARSE_MEMBER",
+ others++ ? "|" : "");
+ if (td->flags & TREE_READ_MEMBER)
+ fprintf(fp, "%sTREE_READ_MEMBER",
+ others++ ? "|" : "");
+ if (td->flags & TREE_LINEAR_ORDER)
+ fprintf(fp, "%sTREE_LINEAR_ORDER",
+ others++ ? "|" : "");
+ if (td->flags & TREE_STRUCT_VERBOSE)
+ fprintf(fp, "%sTREE_STRUCT_VERBOSE",
+ others++ ? "|" : "");
fprintf(fp, ")\n");
fprintf(fp, " type: ");
if (type_flag & RADIXTREE_REQUEST)
fprintf(fp, "radix\n");
else if (type_flag & XARRAY_REQUEST)
fprintf(fp, "xarray\n");
+ else if (type_flag & MAPLE_REQUEST)
+ fprintf(fp, "maple\n");
else
fprintf(fp, "red-black%s",
type_flag & RBTREE_REQUEST ?
@@ -4532,6 +4563,8 @@ next_arg:
do_rdtree(td);
else if (type_flag & XARRAY_REQUEST)
do_xatree(td);
+ else if (type_flag & MAPLE_REQUEST)
+ do_mptree(td);
else
do_rbtree(td);
hq_close();
--
2.37.1

View File

@ -1,205 +0,0 @@
From 7955002f1292f0c0d02076440b441ab7ebaf650d Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 10 Jan 2023 14:56:29 +0800
Subject: [PATCH 67/89] Add do_maple_tree() for maple tree operations
do_maple_tree() is similar to do_radix_tree() and do_xarray(), which
takes the same do_maple_tree_traverse entry as tree command.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 6 +++
maple_tree.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 151 insertions(+)
diff --git a/defs.h b/defs.h
index cc9af21a6b64..b2389cd82fae 100644
--- a/defs.h
+++ b/defs.h
@@ -5750,6 +5750,12 @@ int cleanup_memory_driver(void);
void maple_init(void);
int do_mptree(struct tree_data *);
+ulong do_maple_tree(ulong, int, struct list_pair *);
+#define MAPLE_TREE_COUNT (1)
+#define MAPLE_TREE_SEARCH (2)
+#define MAPLE_TREE_DUMP (3)
+#define MAPLE_TREE_GATHER (4)
+#define MAPLE_TREE_DUMP_CB (5)
/*
* help.c
diff --git a/maple_tree.c b/maple_tree.c
index 471136f3eb1d..807c17f7dfa0 100644
--- a/maple_tree.c
+++ b/maple_tree.c
@@ -40,6 +40,12 @@ static inline bool ma_is_leaf(const enum maple_type type)
/*************** For cmd_tree ********************/
+struct do_maple_tree_info {
+ ulong maxcount;
+ ulong count;
+ void *data;
+};
+
struct maple_tree_ops {
void (*entry)(ulong node, ulong slot, const char *path,
ulong index, void *private);
@@ -281,6 +287,9 @@ static void do_mt_entry(ulong entry, ulong min, ulong max, uint depth,
static struct req_entry **e = NULL;
struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
+ if (ops->entry)
+ ops->entry(entry, entry, path, max, ops->private);
+
if (!td)
return;
@@ -424,6 +433,142 @@ int do_mptree(struct tree_data *td)
return 0;
}
+/************* For do_maple_tree *****************/
+static void do_maple_tree_count(ulong node, ulong slot, const char *path,
+ ulong index, void *private)
+{
+ struct do_maple_tree_info *info = private;
+ info->count++;
+}
+
+static void do_maple_tree_search(ulong node, ulong slot, const char *path,
+ ulong index, void *private)
+{
+ struct do_maple_tree_info *info = private;
+ struct list_pair *lp = info->data;
+
+ if (lp->index == index) {
+ lp->value = (void *)slot;
+ info->count = 1;
+ }
+}
+
+static void do_maple_tree_dump(ulong node, ulong slot, const char *path,
+ ulong index, void *private)
+{
+ struct do_maple_tree_info *info = private;
+ fprintf(fp, "[%lu] %lx\n", index, slot);
+ info->count++;
+}
+
+static void do_maple_tree_gather(ulong node, ulong slot, const char *path,
+ ulong index, void *private)
+{
+ struct do_maple_tree_info *info = private;
+ struct list_pair *lp = info->data;
+
+ if (info->maxcount) {
+ lp[info->count].index = index;
+ lp[info->count].value = (void *)slot;
+
+ info->count++;
+ info->maxcount--;
+ }
+}
+
+static void do_maple_tree_dump_cb(ulong node, ulong slot, const char *path,
+ ulong index, void *private)
+{
+ struct do_maple_tree_info *info = private;
+ struct list_pair *lp = info->data;
+ int (*cb)(ulong) = lp->value;
+
+ /* Caller defined operation */
+ if (!cb(slot)) {
+ error(FATAL, "do_maple_tree: callback "
+ "operation failed: entry: %ld item: %lx\n",
+ info->count, slot);
+ }
+ info->count++;
+}
+
+/*
+ * do_maple_tree argument usage:
+ *
+ * root: Address of a maple_tree_root structure
+ *
+ * flag: MAPLE_TREE_COUNT - Return the number of entries in the tree.
+ * MAPLE_TREE_SEARCH - Search for an entry at lp->index; if found,
+ * store the entry in lp->value and return a count of 1; otherwise
+ * return a count of 0.
+ * MAPLE_TREE_DUMP - Dump all existing index/value pairs.
+ * MAPLE_TREE_GATHER - Store all existing index/value pairs in the
+ * passed-in array of list_pair structs starting at lp,
+ * returning the count of entries stored; the caller can/should
+ * limit the number of returned entries by putting the array size
+ * (max count) in the lp->index field of the first structure
+ * in the passed-in array.
+ * MAPLE_TREE_DUMP_CB - Similar with MAPLE_TREE_DUMP, but for each
+ * maple tree entry, a user defined callback at lp->value will
+ * be invoked.
+ *
+ * lp: Unused by MAPLE_TREE_COUNT and MAPLE_TREE_DUMP.
+ * A pointer to a list_pair structure for MAPLE_TREE_SEARCH.
+ * A pointer to an array of list_pair structures for
+ * MAPLE_TREE_GATHER; the dimension (max count) of the array may
+ * be stored in the index field of the first structure to avoid
+ * any chance of an overrun.
+ * For MAPLE_TREE_DUMP_CB, the lp->value must be initialized as a
+ * callback function. The callback prototype must be: int (*)(ulong);
+ */
+ulong
+do_maple_tree(ulong root, int flag, struct list_pair *lp)
+{
+ struct do_maple_tree_info info = {
+ .count = 0,
+ .data = lp,
+ };
+ struct maple_tree_ops ops = {
+ .private = &info,
+ .is_td = false,
+ };
+
+ switch (flag)
+ {
+ case MAPLE_TREE_COUNT:
+ ops.entry = do_maple_tree_count;
+ break;
+
+ case MAPLE_TREE_SEARCH:
+ ops.entry = do_maple_tree_search;
+ break;
+
+ case MAPLE_TREE_DUMP:
+ ops.entry = do_maple_tree_dump;
+ break;
+
+ case MAPLE_TREE_GATHER:
+ if (!(info.maxcount = lp->index))
+ info.maxcount = (ulong)(-1); /* caller beware */
+
+ ops.entry = do_maple_tree_gather;
+ break;
+
+ case MAPLE_TREE_DUMP_CB:
+ if (lp->value == NULL) {
+ error(FATAL, "do_maple_tree: need set callback function");
+ }
+ ops.entry = do_maple_tree_dump_cb;
+ break;
+
+ default:
+ error(FATAL, "do_maple_tree: invalid flag: %lx\n", flag);
+ }
+
+ do_maple_tree_traverse(root, true, &ops);
+ return info.count;
+}
+
/***********************************************/
void maple_init(void)
{
--
2.37.1

View File

@ -1,455 +0,0 @@
From 77e9fc571e2782b0eafe1d376cfd7f30fb08fecf Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 10 Jan 2023 14:56:30 +0800
Subject: [PATCH 68/89] Introduce maple tree vma iteration to vm_area_dump()
Since memory.c:vm_area_dump() will iterate all vma, this patch mainly
introduces maple tree vma iteration to it.
We extract the code which handles each vma into a function. If
mm_struct_mmap exist, aka the linked list of vma iteration available,
we goto the original way; if not and mm_struct_mm_mt exist, aka
maple tree is available, then we goto the maple tree vma iteration.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
Makefile | 2 +-
memory.c | 321 +++++++++++++++++++++++++++++++++----------------------
2 files changed, 193 insertions(+), 130 deletions(-)
diff --git a/Makefile b/Makefile
index b290836ffc83..2af4dd0ae904 100644
--- a/Makefile
+++ b/Makefile
@@ -364,7 +364,7 @@ filesys.o: ${GENERIC_HFILES} filesys.c
help.o: ${GENERIC_HFILES} help.c
${CC} -c ${CRASH_CFLAGS} help.c ${WARNING_OPTIONS} ${WARNING_ERROR}
-memory.o: ${GENERIC_HFILES} memory.c
+memory.o: ${GENERIC_HFILES} ${MAPLE_TREE_HFILES} memory.c
${CC} -c ${CRASH_CFLAGS} memory.c ${WARNING_OPTIONS} ${WARNING_ERROR}
test.o: ${GENERIC_HFILES} test.c
diff --git a/memory.c b/memory.c
index 156de2f7b5a3..5141fbea4b40 100644
--- a/memory.c
+++ b/memory.c
@@ -21,6 +21,7 @@
#include <ctype.h>
#include <netinet/in.h>
#include <byteswap.h>
+#include "maple_tree.h"
struct meminfo { /* general purpose memory information structure */
ulong cache; /* used by the various memory searching/dumping */
@@ -137,6 +138,27 @@ struct searchinfo {
char buf[BUFSIZE];
};
+struct handle_each_vm_area_args {
+ ulong task;
+ ulong flag;
+ ulong vaddr;
+ struct reference *ref;
+ char *vma_header;
+ char *buf1;
+ char *buf2;
+ char *buf3;
+ char *buf4;
+ char *buf5;
+ ulong vma;
+ char **vma_buf;
+ struct task_mem_usage *tm;
+ int *found;
+ int *single_vma_found;
+ unsigned int radix;
+ struct task_context *tc;
+ ulong *single_vma;
+};
+
static char *memtype_string(int, int);
static char *error_handle_string(ulong);
static void collect_page_member_data(char *, struct meminfo *);
@@ -299,6 +321,7 @@ static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
+static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
/*
* Memory display modes specific to this file.
@@ -363,6 +386,10 @@ vm_init(void)
MEMBER_OFFSET_INIT(task_struct_mm, "task_struct", "mm");
MEMBER_OFFSET_INIT(mm_struct_mmap, "mm_struct", "mmap");
+ MEMBER_OFFSET_INIT(mm_struct_mm_mt, "mm_struct", "mm_mt");
+ if (VALID_MEMBER(mm_struct_mm_mt)) {
+ maple_init();
+ }
MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd");
MEMBER_OFFSET_INIT(mm_struct_rss, "mm_struct", "rss");
if (!VALID_MEMBER(mm_struct_rss))
@@ -3874,7 +3901,7 @@ bailout:
* for references -- and only then does a display
*/
-#define PRINT_VM_DATA() \
+#define PRINT_VM_DATA(buf4, buf5, tm) \
{ \
fprintf(fp, "%s %s ", \
mkstring(buf4, VADDR_PRLEN, CENTER|LJUST, "MM"), \
@@ -3896,9 +3923,9 @@ bailout:
mkstring(buf5, 8, CENTER|LJUST, NULL)); \
}
-#define PRINT_VMA_DATA() \
+#define PRINT_VMA_DATA(buf1, buf2, buf3, buf4, vma) \
fprintf(fp, "%s%s%s%s%s %6llx%s%s\n", \
- mkstring(buf4, VADDR_PRLEN, CENTER|LJUST|LONG_HEX, MKSTR(vma)), \
+ mkstring(buf4, VADDR_PRLEN, CENTER|LJUST|LONG_HEX, MKSTR(vma)),\
space(MINSPACE), \
mkstring(buf2, UVADDR_PRLEN, RJUST|LONG_HEX, MKSTR(vm_start)), \
space(MINSPACE), \
@@ -3925,18 +3952,137 @@ bailout:
(DO_REF_SEARCH(X) && (string_exists(S)) && FILENAME_COMPONENT((S),(X)->str))
#define VM_REF_FOUND(X) ((X) && ((X)->cmdflags & VM_REF_HEADER))
-ulong
-vm_area_dump(ulong task, ulong flag, ulong vaddr, struct reference *ref)
+static ulong handle_each_vm_area(struct handle_each_vm_area_args *args)
{
- struct task_context *tc;
- ulong vma;
+ char *dentry_buf, *file_buf;
ulong vm_start;
ulong vm_end;
- ulong vm_next, vm_mm;
- char *dentry_buf, *vma_buf, *file_buf;
+ ulong vm_mm;
ulonglong vm_flags;
ulong vm_file, inode;
ulong dentry, vfsmnt;
+
+ if ((args->flag & PHYSADDR) && !DO_REF_SEARCH(args->ref))
+ fprintf(fp, "%s", args->vma_header);
+
+ inode = 0;
+ BZERO(args->buf1, BUFSIZE);
+ *(args->vma_buf) = fill_vma_cache(args->vma);
+
+ vm_mm = ULONG(*(args->vma_buf) + OFFSET(vm_area_struct_vm_mm));
+ vm_end = ULONG(*(args->vma_buf) + OFFSET(vm_area_struct_vm_end));
+ vm_start = ULONG(*(args->vma_buf) + OFFSET(vm_area_struct_vm_start));
+ vm_flags = get_vm_flags(*(args->vma_buf));
+ vm_file = ULONG(*(args->vma_buf) + OFFSET(vm_area_struct_vm_file));
+
+ if (args->flag & PRINT_SINGLE_VMA) {
+ if (args->vma != *(args->single_vma))
+ return 0;
+ fprintf(fp, "%s", args->vma_header);
+ *(args->single_vma_found) = TRUE;
+ }
+
+ if (args->flag & PRINT_VMA_STRUCTS) {
+ dump_struct("vm_area_struct", args->vma, args->radix);
+ return 0;
+ }
+
+ if (vm_file && !(args->flag & VERIFY_ADDR)) {
+ file_buf = fill_file_cache(vm_file);
+ dentry = ULONG(file_buf + OFFSET(file_f_dentry));
+ dentry_buf = NULL;
+ if (dentry) {
+ dentry_buf = fill_dentry_cache(dentry);
+ if (VALID_MEMBER(file_f_vfsmnt)) {
+ vfsmnt = ULONG(file_buf + OFFSET(file_f_vfsmnt));
+ get_pathname(dentry, args->buf1, BUFSIZE, 1, vfsmnt);
+ } else
+ get_pathname(dentry, args->buf1, BUFSIZE, 1, 0);
+ }
+ if ((args->flag & PRINT_INODES) && dentry)
+ inode = ULONG(dentry_buf + OFFSET(dentry_d_inode));
+ }
+
+ if (!(args->flag & UVADDR) || ((args->flag & UVADDR) &&
+ ((args->vaddr >= vm_start) && (args->vaddr < vm_end)))) {
+ *(args->found) = TRUE;
+
+ if (args->flag & VERIFY_ADDR)
+ return args->vma;
+
+ if (DO_REF_SEARCH(args->ref)) {
+ if (VM_REF_CHECK_HEXVAL(args->ref, args->vma) ||
+ VM_REF_CHECK_HEXVAL(args->ref, (ulong)vm_flags) ||
+ VM_REF_CHECK_STRING(args->ref, args->buf1)) {
+ if (!(args->ref->cmdflags & VM_REF_HEADER)) {
+ print_task_header(fp, args->tc, 0);
+ PRINT_VM_DATA(args->buf4, args->buf5, args->tm);
+ args->ref->cmdflags |= VM_REF_HEADER;
+ }
+ if (!(args->ref->cmdflags & VM_REF_VMA) ||
+ (args->ref->cmdflags & VM_REF_PAGE)) {
+ fprintf(fp, "%s", args->vma_header);
+ args->ref->cmdflags |= VM_REF_VMA;
+ args->ref->cmdflags &= ~VM_REF_PAGE;
+ args->ref->ref1 = args->vma;
+ }
+ PRINT_VMA_DATA(args->buf1, args->buf2,
+ args->buf3, args->buf4, args->vma);
+ }
+
+ if (vm_area_page_dump(args->vma, args->task,
+ vm_start, vm_end, vm_mm, args->ref)) {
+ if (!(args->ref->cmdflags & VM_REF_HEADER)) {
+ print_task_header(fp, args->tc, 0);
+ PRINT_VM_DATA(args->buf4, args->buf5, args->tm);
+ args->ref->cmdflags |= VM_REF_HEADER;
+ }
+ if (!(args->ref->cmdflags & VM_REF_VMA) ||
+ (args->ref->ref1 != args->vma)) {
+ fprintf(fp, "%s", args->vma_header);
+ PRINT_VMA_DATA(args->buf1, args->buf2,
+ args->buf3, args->buf4, args->vma);
+ args->ref->cmdflags |= VM_REF_VMA;
+ args->ref->ref1 = args->vma;
+ }
+
+ args->ref->cmdflags |= VM_REF_DISPLAY;
+ vm_area_page_dump(args->vma, args->task,
+ vm_start, vm_end, vm_mm, args->ref);
+ args->ref->cmdflags &= ~VM_REF_DISPLAY;
+ }
+
+ return 0;
+ }
+
+ if (inode) {
+ fprintf(fp, "%lx%s%s%s%s%s%6llx%s%lx %s\n",
+ args->vma, space(MINSPACE),
+ mkstring(args->buf2, UVADDR_PRLEN, RJUST|LONG_HEX,
+ MKSTR(vm_start)), space(MINSPACE),
+ mkstring(args->buf3, UVADDR_PRLEN, RJUST|LONG_HEX,
+ MKSTR(vm_end)), space(MINSPACE),
+ vm_flags, space(MINSPACE), inode, args->buf1);
+ } else {
+ PRINT_VMA_DATA(args->buf1, args->buf2,
+ args->buf3, args->buf4, args->vma);
+
+ if (args->flag & (PHYSADDR|PRINT_SINGLE_VMA))
+ vm_area_page_dump(args->vma, args->task,
+ vm_start, vm_end, vm_mm, args->ref);
+ }
+
+ if (args->flag & UVADDR)
+ return args->vma;
+ }
+ return 0;
+}
+
+ulong
+vm_area_dump(ulong task, ulong flag, ulong vaddr, struct reference *ref)
+{
+ struct task_context *tc;
+ ulong vma;
ulong single_vma;
unsigned int radix;
int single_vma_found;
@@ -3948,6 +4094,10 @@ vm_area_dump(ulong task, ulong flag, ulong vaddr, struct reference *ref)
char buf4[BUFSIZE];
char buf5[BUFSIZE];
char vma_header[BUFSIZE];
+ char *vma_buf;
+ int i;
+ ulong mm_mt, entry_num;
+ struct list_pair *entry_list;
tc = task_to_context(task);
tm = &task_mem_usage;
@@ -3981,14 +4131,14 @@ vm_area_dump(ulong task, ulong flag, ulong vaddr, struct reference *ref)
if (VM_REF_CHECK_HEXVAL(ref, tm->mm_struct_addr) ||
VM_REF_CHECK_HEXVAL(ref, tm->pgd_addr)) {
print_task_header(fp, tc, 0);
- PRINT_VM_DATA();
+ PRINT_VM_DATA(buf4, buf5, tm);
fprintf(fp, "\n");
return (ulong)NULL;
}
if (!(flag & (UVADDR|PRINT_MM_STRUCT|PRINT_VMA_STRUCTS|PRINT_SINGLE_VMA)) &&
!DO_REF_SEARCH(ref))
- PRINT_VM_DATA();
+ PRINT_VM_DATA(buf4, buf5, tm);
if (!tm->mm_struct_addr) {
if (pc->curcmd_flags & MM_STRUCT_FORCE) {
@@ -4012,9 +4162,6 @@ vm_area_dump(ulong task, ulong flag, ulong vaddr, struct reference *ref)
return (ulong)NULL;
}
- readmem(tm->mm_struct_addr + OFFSET(mm_struct_mmap), KVADDR,
- &vma, sizeof(void *), "mm_struct mmap", FAULT_ON_ERROR);
-
sprintf(vma_header, "%s%s%s%s%s FLAGS%sFILE\n",
mkstring(buf1, VADDR_PRLEN, CENTER|LJUST, "VMA"),
space(MINSPACE),
@@ -4027,125 +4174,41 @@ vm_area_dump(ulong task, ulong flag, ulong vaddr, struct reference *ref)
!DO_REF_SEARCH(ref))
fprintf(fp, "%s", vma_header);
- for (found = FALSE; vma; vma = vm_next) {
-
- if ((flag & PHYSADDR) && !DO_REF_SEARCH(ref))
- fprintf(fp, "%s", vma_header);
-
- inode = 0;
- BZERO(buf1, BUFSIZE);
- vma_buf = fill_vma_cache(vma);
-
- vm_mm = ULONG(vma_buf + OFFSET(vm_area_struct_vm_mm));
- vm_end = ULONG(vma_buf + OFFSET(vm_area_struct_vm_end));
- vm_next = ULONG(vma_buf + OFFSET(vm_area_struct_vm_next));
- vm_start = ULONG(vma_buf + OFFSET(vm_area_struct_vm_start));
- vm_flags = get_vm_flags(vma_buf);
- vm_file = ULONG(vma_buf + OFFSET(vm_area_struct_vm_file));
-
- if (flag & PRINT_SINGLE_VMA) {
- if (vma != single_vma)
- continue;
- fprintf(fp, "%s", vma_header);
- single_vma_found = TRUE;
- }
-
- if (flag & PRINT_VMA_STRUCTS) {
- dump_struct("vm_area_struct", vma, radix);
- continue;
- }
+ found = FALSE;
- if (vm_file && !(flag & VERIFY_ADDR)) {
- file_buf = fill_file_cache(vm_file);
- dentry = ULONG(file_buf + OFFSET(file_f_dentry));
- dentry_buf = NULL;
- if (dentry) {
- dentry_buf = fill_dentry_cache(dentry);
- if (VALID_MEMBER(file_f_vfsmnt)) {
- vfsmnt = ULONG(file_buf +
- OFFSET(file_f_vfsmnt));
- get_pathname(dentry, buf1, BUFSIZE,
- 1, vfsmnt);
- } else {
- get_pathname(dentry, buf1, BUFSIZE,
- 1, 0);
- }
- }
- if ((flag & PRINT_INODES) && dentry) {
- inode = ULONG(dentry_buf +
- OFFSET(dentry_d_inode));
+ struct handle_each_vm_area_args args = {
+ .task = task, .flag = flag, .vaddr = vaddr,
+ .ref = ref, .tc = tc, .radix = radix,
+ .tm = tm, .buf1 = buf1, .buf2 = buf2,
+ .buf3 = buf3, .buf4 = buf4, .buf5 = buf5,
+ .vma_header = vma_header, .single_vma = &single_vma,
+ .single_vma_found = &single_vma_found, .found = &found,
+ .vma_buf = &vma_buf,
+ };
+
+ if (INVALID_MEMBER(mm_struct_mmap) && VALID_MEMBER(mm_struct_mm_mt)) {
+ mm_mt = tm->mm_struct_addr + OFFSET(mm_struct_mm_mt);
+ entry_num = do_maple_tree(mm_mt, MAPLE_TREE_COUNT, NULL);
+ entry_list = (struct list_pair *)GETBUF(entry_num * sizeof(struct list_pair));
+ do_maple_tree(mm_mt, MAPLE_TREE_GATHER, entry_list);
+
+ for (i = 0; i < entry_num; i++) {
+ if (!!(args.vma = (ulong)entry_list[i].value) &&
+ handle_each_vm_area(&args)) {
+ FREEBUF(entry_list);
+ return args.vma;
}
}
-
- if (!(flag & UVADDR) || ((flag & UVADDR) &&
- ((vaddr >= vm_start) && (vaddr < vm_end)))) {
- found = TRUE;
-
- if (flag & VERIFY_ADDR)
- return vma;
-
- if (DO_REF_SEARCH(ref)) {
- if (VM_REF_CHECK_HEXVAL(ref, vma) ||
- VM_REF_CHECK_HEXVAL(ref, (ulong)vm_flags) ||
- VM_REF_CHECK_STRING(ref, buf1)) {
- if (!(ref->cmdflags & VM_REF_HEADER)) {
- print_task_header(fp, tc, 0);
- PRINT_VM_DATA();
- ref->cmdflags |= VM_REF_HEADER;
- }
- if (!(ref->cmdflags & VM_REF_VMA) ||
- (ref->cmdflags & VM_REF_PAGE)) {
- fprintf(fp, "%s", vma_header);
- ref->cmdflags |= VM_REF_VMA;
- ref->cmdflags &= ~VM_REF_PAGE;
- ref->ref1 = vma;
- }
- PRINT_VMA_DATA();
- }
-
- if (vm_area_page_dump(vma, task,
- vm_start, vm_end, vm_mm, ref)) {
- if (!(ref->cmdflags & VM_REF_HEADER)) {
- print_task_header(fp, tc, 0);
- PRINT_VM_DATA();
- ref->cmdflags |= VM_REF_HEADER;
- }
- if (!(ref->cmdflags & VM_REF_VMA) ||
- (ref->ref1 != vma)) {
- fprintf(fp, "%s", vma_header);
- PRINT_VMA_DATA();
- ref->cmdflags |= VM_REF_VMA;
- ref->ref1 = vma;
- }
-
- ref->cmdflags |= VM_REF_DISPLAY;
- vm_area_page_dump(vma, task,
- vm_start, vm_end, vm_mm, ref);
- ref->cmdflags &= ~VM_REF_DISPLAY;
- }
-
- continue;
- }
-
- if (inode) {
- fprintf(fp, "%lx%s%s%s%s%s%6llx%s%lx %s\n",
- vma, space(MINSPACE),
- mkstring(buf2, UVADDR_PRLEN, RJUST|LONG_HEX,
- MKSTR(vm_start)), space(MINSPACE),
- mkstring(buf3, UVADDR_PRLEN, RJUST|LONG_HEX,
- MKSTR(vm_end)), space(MINSPACE),
- vm_flags, space(MINSPACE), inode, buf1);
- } else {
- PRINT_VMA_DATA();
-
- if (flag & (PHYSADDR|PRINT_SINGLE_VMA))
- vm_area_page_dump(vma, task,
- vm_start, vm_end, vm_mm, ref);
- }
-
- if (flag & UVADDR)
+ FREEBUF(entry_list);
+ } else {
+ readmem(tm->mm_struct_addr + OFFSET(mm_struct_mmap), KVADDR,
+ &vma, sizeof(void *), "mm_struct mmap", FAULT_ON_ERROR);
+ while (vma) {
+ args.vma = vma;
+ if (handle_each_vm_area(&args))
return vma;
- }
+ vma = ULONG(vma_buf + OFFSET(vm_area_struct_vm_next));
+ }
}
if (flag & VERIFY_ADDR)
--
2.37.1

View File

@ -1,135 +0,0 @@
From a3dbd5c455992baf6bfc196b9059a2b485823b83 Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 10 Jan 2023 14:56:31 +0800
Subject: [PATCH 69/89] Update the help text of "tree" command for maple tree
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
help.c | 86 +++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 64 insertions(+), 22 deletions(-)
diff --git a/help.c b/help.c
index bfba43f6e9fd..7ceefa06732b 100644
--- a/help.c
+++ b/help.c
@@ -6313,19 +6313,20 @@ NULL
char *help_tree[] = {
"tree",
-"display radix tree, XArray or red-black tree",
-"[-t [radix|xarray|rbtree]] [-r offset] [-[s|S] struct[.member[,member]]]\n -[x|d] [-o offset] [-l] [-p] [-N] start",
-" This command dumps the contents of a radix tree, an XAarray, or a red-black",
-" tree. The arguments are as follows:\n",
+"display radix tree, XArray, red-black tree or maple tree",
+"[-t [radix|xarray|rbtree|maple]] [-r offset] [-[s|S] struct[.member[,member]]]\n"
+" -[x|d] [-o offset] [-l] [-p] [-v] [-N] start",
+" This command dumps the contents of a radix tree, an XAarray, a red-black",
+" tree, or a maple tree. The arguments are as follows:\n",
" -t type The type of tree to dump; the type string can be one of ",
-" \"radix\", \"rbtree\", or \"xarray\", or alternatively, \"ra\",",
-" \"rb\" or \"x\" are acceptable. If not specified, rbtree is the",
-" default type.",
+" \"radix\", \"rbtree\", \"xarray\", or \"maple\", or alternatively,",
+" \"ra\", \"rb\", \"x\" or \"m\" are acceptable. If not specified,",
+" rbtree is the default type.",
" -r offset If the \"start\" argument is the address of a data structure that",
-" contains an radix_tree_root, xarray or rb_root structure, then this",
-" is the offset to that structure member. If the offset is non-zero,",
-" then this option is required. The offset may be entered in either",
-" of two manners:",
+" contains an radix_tree_root, maple_tree, xarray or rb_root",
+" structure, then this is the offset to that structure member. If",
+" the offset is non-zero, then this option is required. The offset",
+" may be entered in either of two manners:",
" 1. In \"structure.member\" format.",
" 2. A number of bytes.",
" -o offset For red-black trees only, the offset of the rb_node within its ",
@@ -6354,25 +6355,26 @@ char *help_tree[] = {
" -p Display the node's position information, showing the relationship",
" between it and the root. For red-black trees, a position that",
" indicates \"root/l/r\" means that the node is the right child",
-" of the left child of the root node. For radix trees and xarrays,",
-" the index, the height, and the slot index values are shown with",
-" respect to the root.",
+" of the left child of the root node. For radix trees, xarrays and",
+" maple trees, the index, the height, and the slot index values are",
+" shown with respect to the root.",
" -x Override default output format with hexadecimal format.",
" -d Override default output format with decimal format.",
+" -v For maple trees only, dump the contents of each maple tree node.",
" ",
" The meaning of the \"start\" argument, which can be expressed either in",
" hexadecimal format or symbolically, depends upon whether the -N option",
" is prepended:",
" ",
-" start The address of a radix_tree_root, xarray or rb_root structure, or",
-" the address of a structure containing the radix_tree_root, xarray",
-" or rb_root structure; if the latter, then the \"-r offset\" option",
-" must be used if the member offset of the root structure is ",
-" non-zero.",
+" start The address of a radix_tree_root, maple_tree, xarray or rb_root",
+" structure, or the address of a structure containing the",
+" radix_tree_root, maple_tree, xarray or rb_root structure; if the",
+" latter, then the \"-r offset\" option must be used if the member",
+" offset of the root structure is non-zero.",
" ",
-" -N start The address of a radix_tree_node, xa_node or rb_node structure,",
-" bypassing the radix_tree_root, xarray, or rb_root that points",
-" to it.",
+" -N start The address of a radix_tree_node, maple_node, xa_node or rb_node",
+" structure, bypassing the radix_tree_root, maple_tree, xarray, or",
+" rb_root that points to it.",
"",
"\nEXAMPLES",
" The vmap_area_root is a standalone rb_root structure. Display the ",
@@ -6709,6 +6711,46 @@ char *help_tree[] = {
" _refcount = {",
" counter = 0x1",
" }",
+"",
+" The -v option is introduced specifically for dumping the complete content of",
+" maple tree:",
+"",
+" %s> tree -t maple 0xffff9034c006aec0 -v",
+"",
+" maple_tree(ffff9034c006aec0) flags 309, height 2 root 0xffff9034de70041e",
+"",
+" 0-18446744073709551615: node 0xffff9034de700400 depth 0 type 3 parent ...",
+" 0-140112331583487: node 0xffff9034c01e8800 depth 1 type 1 parent ...",
+" 0-94643156942847: 0x0",
+" 94643156942848-94643158024191: 0xffff9035131754c0",
+" 94643158024192-94643160117247: 0x0",
+" ...",
+"",
+" The existing options can work as well for maple tree:",
+"",
+" %s> tree -t maple -r mm_struct.mm_mt 0xffff9034c006aec0 -p",
+" ffff9035131754c0",
+" index: 1 position: root/0/1",
+" ffff9035131751c8",
+" index: 2 position: root/0/3",
+" ffff9035131757b8",
+" index: 3 position: root/0/4",
+" ...",
+"",
+" %s> tree -t maple 0xffff9034c006aec0 -p -x -s vm_area_struct.vm_start,vm_end",
+" ffff9035131754c0",
+" index: 1 position: root/0/1",
+" vm_start = 0x5613d3c00000,",
+" vm_end = 0x5613d3d08000,",
+" ffff9035131751c8",
+" index: 2 position: root/0/3",
+" vm_start = 0x5613d3f07000,",
+" vm_end = 0x5613d3f0b000,",
+" ffff9035131757b8",
+" index: 3 position: root/0/4",
+" vm_start = 0x5613d3f0b000,",
+" vm_end = 0x5613d3f14000,",
+" ....",
NULL
};
--
2.37.1

View File

@ -1,52 +0,0 @@
From 198ce92c480baa71e3c70694b7049432f8a5a6ec Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Tue, 10 Jan 2023 14:56:32 +0800
Subject: [PATCH 70/89] Dump maple tree offset variables by "help -o"
In the previous patches, some variables are added to offset_table and
size_table, print them out with "help -o" command.
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
symbols.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/symbols.c b/symbols.c
index 9e6fca73eaf9..20bfac35c1f6 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10778,6 +10778,21 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sbq_wait_state_wait_cnt));
fprintf(fp, " sbq_wait_state_wait: %ld\n",
OFFSET(sbq_wait_state_wait));
+ fprintf(fp, " mm_struct_mm_mt: %ld\n", OFFSET(mm_struct_mm_mt));
+ fprintf(fp, " maple_tree_ma_root: %ld\n", OFFSET(maple_tree_ma_root));
+ fprintf(fp, " maple_tree_ma_flags: %ld\n", OFFSET(maple_tree_ma_flags));
+ fprintf(fp, " maple_node_parent: %ld\n", OFFSET(maple_node_parent));
+ fprintf(fp, " maple_node_ma64: %ld\n", OFFSET(maple_node_ma64));
+ fprintf(fp, " maple_node_mr64: %ld\n", OFFSET(maple_node_mr64));
+ fprintf(fp, " maple_node_slot: %ld\n", OFFSET(maple_node_slot));
+ fprintf(fp, " maple_arange_64_pivot: %ld\n", OFFSET(maple_arange_64_pivot));
+ fprintf(fp, " maple_arange_64_slot: %ld\n", OFFSET(maple_arange_64_slot));
+ fprintf(fp, " maple_arange_64_gap: %ld\n", OFFSET(maple_arange_64_gap));
+ fprintf(fp, " maple_arange_64_meta: %ld\n", OFFSET(maple_arange_64_meta));
+ fprintf(fp, " maple_range_64_pivot: %ld\n", OFFSET(maple_range_64_pivot));
+ fprintf(fp, " maple_range_64_slot: %ld\n", OFFSET(maple_range_64_slot));
+ fprintf(fp, " maple_metadata_end: %ld\n", OFFSET(maple_metadata_end));
+ fprintf(fp, " maple_metadata_gap: %ld\n", OFFSET(maple_metadata_gap));
fprintf(fp, "\n size_table:\n");
fprintf(fp, " page: %ld\n", SIZE(page));
@@ -11050,6 +11065,8 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " sbitmap_queue: %ld\n", SIZE(sbitmap_queue));
fprintf(fp, " sbq_wait_state: %ld\n", SIZE(sbq_wait_state));
fprintf(fp, " blk_mq_tags: %ld\n", SIZE(blk_mq_tags));
+ fprintf(fp, " maple_tree: %ld\n", SIZE(maple_tree));
+ fprintf(fp, " maple_node: %ld\n", SIZE(maple_node));
fprintf(fp, " percpu_counter: %ld\n", SIZE(percpu_counter));
--
2.37.1

View File

@ -1,80 +0,0 @@
From a39b03603eba70d630121390f50abbc186bc8f56 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 13 Feb 2023 11:12:12 +0800
Subject: [PATCH 71/89] Fix for "bt" command printing "bogus exception frame"
warning
Currently, the "bt" command may print a bogus exception frame
and the remaining frame will be truncated on x86_64 when using the
"virsh send-key <kvm guest> KEY_LEFTALT KEY_SYSRQ KEY_C" command
to trigger a panic from the KVM host. For example:
crash> bt
PID: 0 TASK: ffff9e7a47e32f00 CPU: 3 COMMAND: "swapper/3"
#0 [ffffba7900118bb8] machine_kexec at ffffffff87e5c2c7
#1 [ffffba7900118c08] __crash_kexec at ffffffff87f9500d
#2 [ffffba7900118cd0] panic at ffffffff87edfff9
#3 [ffffba7900118d50] sysrq_handle_crash at ffffffff883ce2c1
...
#16 [ffffba7900118fd8] handle_edge_irq at ffffffff87f559f2
#17 [ffffba7900118ff0] asm_call_on_stack at ffffffff88800fa2
--- <IRQ stack> ---
#18 [ffffba790008bda0] asm_call_on_stack at ffffffff88800fa2
RIP: ffffffffffffffff RSP: 0000000000000124 RFLAGS: 00000003
RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
RDX: ffffffff88800c1e RSI: 0000000000000000 RDI: 0000000000000000
RBP: 0000000000000001 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000000 R11: ffffffff88760555 R12: ffffba790008be08
R13: ffffffff87f18002 R14: ffff9e7a47e32f00 R15: ffff9e7bb6198e00
ORIG_RAX: 0000000000000000 CS: 0003 SS: 0000
bt: WARNING: possibly bogus exception frame
crash>
The following related kernel commits cause the current issue, crash
needs to adjust the value of irq_eframe_link.
Related kernel commits:
[1] v5.8: 931b94145981 ("x86/entry: Provide helpers for executing on the irqstack")
[2] v5.8: fa5e5c409213 ("x86/entry: Use idtentry for interrupts")
[3] v5.12: 52d743f3b712 ("x86/softirq: Remove indirection in do_softirq_own_stack()")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
x86_64.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/x86_64.c b/x86_64.c
index 1113a1055f77..27b1167ec630 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3935,6 +3935,11 @@ in_exception_stack:
if (irq_eframe) {
bt->flags |= BT_EXCEPTION_FRAME;
i = (irq_eframe - bt->stackbase)/sizeof(ulong);
+ if (symbol_exists("asm_common_interrupt")) {
+ i -= 1;
+ up = (ulong *)(&bt->stackbuf[i*sizeof(ulong)]);
+ bt->instptr = *up;
+ }
x86_64_print_stack_entry(bt, ofp, level, i, bt->instptr);
bt->flags &= ~(ulonglong)BT_EXCEPTION_FRAME;
cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
@@ -6513,6 +6518,14 @@ x86_64_irq_eframe_link_init(void)
else
return;
+ if (symbol_exists("asm_common_interrupt")) {
+ if (symbol_exists("asm_call_on_stack"))
+ machdep->machspec->irq_eframe_link = -64;
+ else
+ machdep->machspec->irq_eframe_link = -32;
+ return;
+ }
+
if (THIS_KERNEL_VERSION < LINUX(2,6,9))
return;
--
2.37.1

View File

@ -1,144 +0,0 @@
From 3c9c16c545319958d7fa14ef5ab8934fc5449d83 Mon Sep 17 00:00:00 2001
From: "Aureau, Georges (Kernel Tools ERT)" <georges.aureau@hpe.com>
Date: Wed, 8 Feb 2023 12:09:03 +0000
Subject: [PATCH 72/89] Fix "kmem -s|-S" not working properly on RHEL8.6 and
later
For CONFIG_SLAB_FREELIST_HARDENED, the crash memory.c:freelist_ptr()
code is checking for an additional bswap using a simple release test eg.
THIS_KERNEL_VERSION >= LINUX(5,7,0), basically checking for RHEL9 and
beyond.
However, for RHEL8.6 and later, we have CONFIG_SLAB_FREELIST_HARDENED=y,
and we also have the additional bswap, but the current crash is not
handling this case, hence "kmem -s|-S" will not work properly, and free
objects will not be counted nor reported properly.
An example from a RHEL8.6 x86_64 kdump, a kmem cache with a single slab
having 42 objects, only the freelist head is seen as free as crash can't
walk freelist next pointers, and crash is wrongly reporting 41 allocated
objects:
crash> sys | grep RELEASE
RELEASE: 4.18.0-372.9.1.el8.x86_64
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 41 42 1 32k nfs_commit_data
When properly accounting for the additional bswap, we can walk the
freelist and find 38 free objects, and crash is now reporting only 4
allocated objects:
crash> kmem -s nfs_commit_data
CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME
ffff9ad40c7cb2c0 728 4 42 1 32k nfs_commit_data
Signed-off-by: Georges Aureau <georges.aureau@hpe.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index b2389cd82fae..ae5d1244e8b3 100644
--- a/defs.h
+++ b/defs.h
@@ -2638,6 +2638,7 @@ struct vm_table { /* kernel VM-related data */
#define SLAB_OVERLOAD_PAGE (0x8000000)
#define SLAB_CPU_CACHE (0x10000000)
#define SLAB_ROOT_CACHES (0x20000000)
+#define FREELIST_PTR_BSWAP (0x40000000)
#define IS_FLATMEM() (vt->flags & FLATMEM)
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
diff --git a/memory.c b/memory.c
index 5141fbea4b40..e0742c1bd3a4 100644
--- a/memory.c
+++ b/memory.c
@@ -320,6 +320,7 @@ static void dump_per_cpu_offsets(void);
static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
+static void freelist_ptr_init(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
@@ -789,6 +790,8 @@ vm_init(void)
MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name");
MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags");
MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random");
+ if (VALID_MEMBER(kmem_cache_random))
+ freelist_ptr_init();
MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist");
MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page");
if (INVALID_MEMBER(kmem_cache_cpu_page))
@@ -13932,6 +13935,8 @@ dump_vm_table(int verbose)
fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
if (vt->flags & SLAB_ROOT_CACHES)
fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
+ if (vt->flags & FREELIST_PTR_BSWAP)
+ fprintf(fp, "%sFREELIST_PTR_BSWAP", others++ ? "|" : "");\
if (vt->flags & USE_VMAP_AREA)
fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
if (vt->flags & CONFIG_NUMA)
@@ -19519,13 +19524,55 @@ count_free_objects(struct meminfo *si, ulong freelist)
return c;
}
+/*
+ * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
+ * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
+ * may be using the additionnal bswap. The only easy and reliable way to tell is
+ * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
+ */
+static int
+freelist_ptr_bswap_x86(void)
+{
+ char buf1[BUFSIZE];
+ char buf2[BUFSIZE];
+ char *arglist[MAXARGS];
+ int found;
+
+ sprintf(buf1, "disassemble __slab_free");
+ open_tmpfile();
+ if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
+ close_tmpfile();
+ return FALSE;
+ }
+ rewind(pc->tmpfile);
+ found = FALSE;
+ while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
+ if (parse_line(buf2, arglist) < 3)
+ continue;
+ if (STREQ(arglist[2], "bswap")) {
+ found = TRUE;
+ break;
+ }
+ }
+ close_tmpfile();
+ return found;
+}
+
+static void
+freelist_ptr_init(void)
+{
+ if (THIS_KERNEL_VERSION >= LINUX(5,7,0) ||
+ ((machine_type("X86_64") || machine_type("X86")) && freelist_ptr_bswap_x86()))
+ vt->flags |= FREELIST_PTR_BSWAP;
+}
+
static ulong
freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
{
if (VALID_MEMBER(kmem_cache_random)) {
/* CONFIG_SLAB_FREELIST_HARDENED */
- if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
+ if (vt->flags & FREELIST_PTR_BSWAP)
ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
: bswap_32(ptr_addr);
return (ptr ^ si->random ^ ptr_addr);
--
2.37.1

View File

@ -1,106 +0,0 @@
From 49f689eda0bc3331beb70df17ac4990034460f2b Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Thu, 9 Feb 2023 20:15:46 +0800
Subject: [PATCH 73/89] Fix for "net -s" option to show IPv6 addresses on Linux
3.13 and later
Currently, the "net -s" option fails to show IPv6 addresses and ports
for the SOURCE-PORT and DESTINATION-PORT columns on Linux 3.13 and later
kernels, which have kernel commit efe4208f47f907 ("ipv6: make lookups
simpler and faster"). For example:
crash> net -s
PID: 305524 TASK: ffff9bc449895580 CPU: 6 COMMAND: "sshd"
FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT
3 ffff9bc446e9a680 ffff9bc4455b5940 UNIX:DGRAM
4 ffff9bc446e9c600 ffff9bc3b2b24e00 INET6:STREAM
With the patch:
crash> net -s
PID: 305524 TASK: ffff9bc449895580 CPU: 6 COMMAND: "sshd"
FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT
3 ffff9bc446e9a680 ffff9bc4455b5940 UNIX:DGRAM
4 ffff9bc446e9c600 ffff9bc3b2b24e00 INET6:STREAM xxxx:xx:x:xxxx:xxxx:xxxx:xxxx:xxxx-22 yyyy:yy:y:yyyy:yyyy:yyyy:yyyy:yyyy-44870
Reported-by: Buland Kumar Singh <bsingh@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
defs.h | 3 +++
net.c | 20 +++++++++++++++-----
symbols.c | 3 +++
3 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/defs.h b/defs.h
index ae5d1244e8b3..801781749666 100644
--- a/defs.h
+++ b/defs.h
@@ -2204,6 +2204,9 @@ struct offset_table { /* stash of commonly-used offsets */
long maple_range_64_slot;
long maple_metadata_end;
long maple_metadata_gap;
+ long sock_sk_common;
+ long sock_common_skc_v6_daddr;
+ long sock_common_skc_v6_rcv_saddr;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/net.c b/net.c
index 7c9c8bd9c98d..aa445ab7ee13 100644
--- a/net.c
+++ b/net.c
@@ -199,6 +199,9 @@ net_init(void)
MEMBER_OFFSET_INIT(sock_common_skc_family,
"sock_common", "skc_family");
MEMBER_OFFSET_INIT(sock_sk_type, "sock", "sk_type");
+ MEMBER_OFFSET_INIT(sock_sk_common, "sock", "__sk_common");
+ MEMBER_OFFSET_INIT(sock_common_skc_v6_daddr, "sock_common", "skc_v6_daddr");
+ MEMBER_OFFSET_INIT(sock_common_skc_v6_rcv_saddr, "sock_common", "skc_v6_rcv_saddr");
/*
* struct inet_sock {
* struct sock sk;
@@ -1104,12 +1107,19 @@ get_sock_info(ulong sock, char *buf)
break;
case SOCK_V2:
- if (INVALID_MEMBER(ipv6_pinfo_rcv_saddr) ||
- INVALID_MEMBER(ipv6_pinfo_daddr))
+ if (VALID_MEMBER(ipv6_pinfo_rcv_saddr) &&
+ VALID_MEMBER(ipv6_pinfo_daddr)) {
+ ipv6_rcv_saddr = ipv6_pinfo + OFFSET(ipv6_pinfo_rcv_saddr);
+ ipv6_daddr = ipv6_pinfo + OFFSET(ipv6_pinfo_daddr);
+ } else if (VALID_MEMBER(sock_sk_common) &&
+ VALID_MEMBER(sock_common_skc_v6_daddr) &&
+ VALID_MEMBER(sock_common_skc_v6_rcv_saddr)) {
+ ipv6_rcv_saddr = sock + OFFSET(sock_sk_common) + OFFSET(sock_common_skc_v6_rcv_saddr);
+ ipv6_daddr = sock + OFFSET(sock_sk_common) + OFFSET(sock_common_skc_v6_daddr);
+ } else {
+ sprintf(&buf[strlen(buf)], "%s", "(cannot get IPv6 addresses)");
break;
-
- ipv6_rcv_saddr = ipv6_pinfo + OFFSET(ipv6_pinfo_rcv_saddr);
- ipv6_daddr = ipv6_pinfo + OFFSET(ipv6_pinfo_daddr);
+ }
if (!readmem(ipv6_rcv_saddr, KVADDR, u6_addr16_src, SIZE(in6_addr),
"ipv6_rcv_saddr buffer", QUIET|RETURN_ON_ERROR))
diff --git a/symbols.c b/symbols.c
index 20bfac35c1f6..54115d753601 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9830,8 +9830,11 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " sock_sk_type: %ld\n",
OFFSET(sock_sk_type));
+ fprintf(fp, " sock_sk_common: %ld\n", OFFSET(sock_sk_common));
fprintf(fp, " sock_common_skc_family: %ld\n",
OFFSET(sock_common_skc_family));
+ fprintf(fp, " sock_common_skc_v6_daddr: %ld\n", OFFSET(sock_common_skc_v6_daddr));
+ fprintf(fp, " sock_common_skc_v6_rcv_saddr: %ld\n", OFFSET(sock_common_skc_v6_rcv_saddr));
fprintf(fp, " socket_alloc_vfs_inode: %ld\n",
OFFSET(socket_alloc_vfs_inode));
fprintf(fp, " inet_sock_inet: %ld\n",
--
2.37.1

Some files were not shown because too many files have changed in this diff Show More