Compare commits

...

No commits in common. 'c8' and 'a9-beta' have entirely different histories.
c8 ... a9-beta

@ -1,2 +1,2 @@
aab889c6471bfc42cf2b1d065a881ea33d8ba0b7 SOURCES/crash-7.3.2.tar.gz
026f4c9e1c8152a2773354551c523acd32d7f00e SOURCES/gdb-7.6.tar.gz
50fe939615cc034273e255e4903edec17dc6ee68 SOURCES/crash-8.0.2.tar.gz
6bf5ee7877a4740835745ed97ce525a00bb2232c SOURCES/gdb-10.2.tar.gz

4
.gitignore vendored

@ -1,2 +1,2 @@
SOURCES/crash-7.3.2.tar.gz
SOURCES/gdb-7.6.tar.gz
SOURCES/crash-8.0.2.tar.gz
SOURCES/gdb-10.2.tar.gz

@ -1,146 +0,0 @@
From f623cad20b092002d627a03451ea256add2e53d0 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 15 Jun 2022 10:50:13 +0900
Subject: [PATCH 01/28] Fix for "dev" command on Linux 5.11 and later
The following kernel commits eventually removed the bdev_map array in
Linux v5.11 kernel:
e418de3abcda ("block: switch gendisk lookup to a simple xarray")
22ae8ce8b892 ("block: simplify bdev/disk lookup in blkdev_get")
Without the patch, the "dev" command fails to dump block device data
with the following error:
crash> dev
...
dev: blkdevs or all_bdevs: symbols do not exist
To get block device's gendisk, search blockdev_superblock.s_inodes
instead of bdev_map.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
dev.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 72 insertions(+), 5 deletions(-)
diff --git a/dev.c b/dev.c
index db97f8aebdc2..75d30bd022a1 100644
--- a/dev.c
+++ b/dev.c
@@ -24,6 +24,7 @@ static void dump_blkdevs_v2(ulong);
static void dump_blkdevs_v3(ulong);
static ulong search_cdev_map_probes(char *, int, int, ulong *);
static ulong search_bdev_map_probes(char *, int, int, ulong *);
+static ulong search_blockdev_inodes(int, ulong *);
static void do_pci(void);
static void do_pci2(void);
static void do_io(void);
@@ -493,9 +494,10 @@ dump_blkdevs(ulong flags)
ulong ops;
} blkdevs[MAX_DEV], *bp;
- if (kernel_symbol_exists("major_names") &&
- kernel_symbol_exists("bdev_map")) {
- dump_blkdevs_v3(flags);
+ if (kernel_symbol_exists("major_names") &&
+ (kernel_symbol_exists("bdev_map") ||
+ kernel_symbol_exists("blockdev_superblock"))) {
+ dump_blkdevs_v3(flags);
return;
}
@@ -717,6 +719,7 @@ dump_blkdevs_v3(ulong flags)
char buf[BUFSIZE];
uint major;
ulong gendisk, addr, fops;
+ int use_bdev_map = kernel_symbol_exists("bdev_map");
if (!(len = get_array_length("major_names", NULL, 0)))
len = MAX_DEV;
@@ -745,8 +748,11 @@ dump_blkdevs_v3(ulong flags)
strncpy(buf, blk_major_name_buf +
OFFSET(blk_major_name_name), 16);
- fops = search_bdev_map_probes(buf, major == i ? major : i,
- UNUSED, &gendisk);
+ if (use_bdev_map)
+ fops = search_bdev_map_probes(buf, major == i ? major : i,
+ UNUSED, &gendisk);
+ else /* v5.11 and later */
+ fops = search_blockdev_inodes(major, &gendisk);
if (CRASHDEBUG(1))
fprintf(fp, "blk_major_name: %lx block major: %d name: %s gendisk: %lx fops: %lx\n",
@@ -829,6 +835,67 @@ search_bdev_map_probes(char *name, int major, int minor, ulong *gendisk)
return fops;
}
+/* For bdev_inode. See block/bdev.c */
+#define I_BDEV(inode) (inode - SIZE(block_device))
+
+static ulong
+search_blockdev_inodes(int major, ulong *gendisk)
+{
+ struct list_data list_data, *ld;
+ ulong addr, bd_sb, disk, fops = 0;
+ int i, inode_count, gendisk_major;
+ char *gendisk_buf;
+
+ ld = &list_data;
+ BZERO(ld, sizeof(struct list_data));
+
+ get_symbol_data("blockdev_superblock", sizeof(void *), &bd_sb);
+
+ addr = bd_sb + OFFSET(super_block_s_inodes);
+ if (!readmem(addr, KVADDR, &ld->start, sizeof(ulong),
+ "blockdev_superblock.s_inodes", QUIET|RETURN_ON_ERROR))
+ return 0;
+
+ if (empty_list(ld->start))
+ return 0;
+
+ ld->flags |= LIST_ALLOCATE;
+ ld->end = bd_sb + OFFSET(super_block_s_inodes);
+ ld->list_head_offset = OFFSET(inode_i_sb_list);
+
+ inode_count = do_list(ld);
+
+ gendisk_buf = GETBUF(SIZE(gendisk));
+
+ for (i = 0; i < inode_count; i++) {
+ addr = I_BDEV(ld->list_ptr[i]) + OFFSET(block_device_bd_disk);
+ if (!readmem(addr, KVADDR, &disk, sizeof(ulong),
+ "block_device.bd_disk", QUIET|RETURN_ON_ERROR))
+ continue;
+
+ if (!disk)
+ continue;
+
+ if (!readmem(disk, KVADDR, gendisk_buf, SIZE(gendisk),
+ "gendisk buffer", QUIET|RETURN_ON_ERROR))
+ continue;
+
+ gendisk_major = INT(gendisk_buf + OFFSET(gendisk_major));
+ if (gendisk_major != major)
+ continue;
+
+ fops = ULONG(gendisk_buf + OFFSET(gendisk_fops));
+ if (fops) {
+ *gendisk = disk;
+ break;
+ }
+ }
+
+ FREEBUF(ld->list_ptr);
+ FREEBUF(gendisk_buf);
+ return fops;
+}
+
void
dump_dev_table(void)
{
--
2.37.1

@ -0,0 +1,75 @@
From d941266da5fb9c386128a180f39281ec9d4aa242 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 20 Feb 2023 15:57:04 +0800
Subject: [PATCH] gdb: Fix an assertion failure in the gdb's copy_type()
This is a backported patch from gdb. Without the patch, the following
crash command may abort due to an assertion failure in the gdb's
copy_type():
crash> px __per_cpu_start:0
gdbtypes.c:5505: internal-error: type* copy_type(const type*): Assertion `TYPE_OBJFILE_OWNED (type)' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n)
The gdb commit 8e2da1651879 ("Fix assertion failure in copy_type")
solved the current issue.
Reported-by: Buland Kumar Singh <bsingh@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
gdb-10.2.patch | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/gdb-10.2.patch b/gdb-10.2.patch
index 91edfb338445..8c3b5a7fdf77 100644
--- a/gdb-10.2.patch
+++ b/gdb-10.2.patch
@@ -1737,3 +1737,43 @@ exit 0
struct field *nextfield;
short nfields;
struct type *typedef_type, *target_type;
+--- gdb-10.2/gdb/gdbtypes.c.orig
++++ gdb-10.2/gdb/gdbtypes.c
+@@ -5492,27 +5492,25 @@ copy_type_recursive (struct objfile *objfile,
+ }
+
+ /* Make a copy of the given TYPE, except that the pointer & reference
+- types are not preserved.
+-
+- This function assumes that the given type has an associated objfile.
+- This objfile is used to allocate the new type. */
++ types are not preserved. */
+
+ struct type *
+ copy_type (const struct type *type)
+ {
+- struct type *new_type;
+-
+- gdb_assert (TYPE_OBJFILE_OWNED (type));
++ struct type *new_type = alloc_type_copy (type);
+
+- new_type = alloc_type_copy (type);
+ TYPE_INSTANCE_FLAGS (new_type) = TYPE_INSTANCE_FLAGS (type);
+ TYPE_LENGTH (new_type) = TYPE_LENGTH (type);
+ memcpy (TYPE_MAIN_TYPE (new_type), TYPE_MAIN_TYPE (type),
+ sizeof (struct main_type));
+ if (type->main_type->dyn_prop_list != NULL)
+- new_type->main_type->dyn_prop_list
+- = copy_dynamic_prop_list (&TYPE_OBJFILE (type) -> objfile_obstack,
+- type->main_type->dyn_prop_list);
++ {
++ struct obstack *storage = (TYPE_OBJFILE_OWNED (type)
++ ? &TYPE_OBJFILE (type)->objfile_obstack
++ : gdbarch_obstack (TYPE_OWNER (type).gdbarch));
++ new_type->main_type->dyn_prop_list
++ = copy_dynamic_prop_list (storage, type->main_type->dyn_prop_list);
++ }
+
+ return new_type;
+ }
+
--
2.37.1

@ -1,31 +0,0 @@
From ae52398a13fa9a238279114ed671c7c514c154ee Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain@linux.ibm.com>
Date: Mon, 9 May 2022 12:49:56 +0530
Subject: [PATCH 01/18] ppc64: update the NR_CPUS to 8192
Since the kernel commit 2d8ae638bb86 ("powerpc: Make the NR_CPUS max 8192")
the NR_CPUS on Linux kernel ranges from 1-8192. So let's match NR_CPUS with
the max NR_CPUS count on the Linux kernel.
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 1e8360d65a3b..a6735d07b32f 100644
--- a/defs.h
+++ b/defs.h
@@ -136,7 +136,7 @@
#define NR_CPUS (4096)
#endif
#ifdef PPC64
-#define NR_CPUS (2048)
+#define NR_CPUS (8192)
#endif
#ifdef S390
#define NR_CPUS (512)
--
2.30.2

@ -1,84 +0,0 @@
From 6bc60e8cc87701c8f68c1cda56dd7120b5565700 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 22 Jun 2022 08:32:59 +0900
Subject: [PATCH 02/28] Extend field length of task attributes
Nowadays, some machines have many CPU cores and memory, and some
distributions have a larger kernel.pid_max parameter, e.g. 7 digits.
This impairs the readability of a few commands, especially "ps" and
"ps -l|-m" options.
Let's extend the field length of the task attributes, PID, CPU, VSZ,
and RSS to improve the readability.
Without the patch:
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
...
2802197 2699997 2 ffff916f63c40000 IN 0.0 307212 10688 timer
2802277 1 0 ffff9161a25bb080 IN 0.0 169040 2744 gpg-agent
2806711 3167854 10 ffff9167fc498000 IN 0.0 127208 6508 su
2806719 2806711 1 ffff91633c3a48c0 IN 0.0 29452 6416 bash
2988346 1 5 ffff916f7c629840 IN 2.8 9342476 1917384 qemu-kvm
With the patch:
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
...
2802197 2699997 2 ffff916f63c40000 IN 0.0 307212 10688 timer
2802277 1 0 ffff9161a25bb080 IN 0.0 169040 2744 gpg-agent
2806711 3167854 10 ffff9167fc498000 IN 0.0 127208 6508 su
2806719 2806711 1 ffff91633c3a48c0 IN 0.0 29452 6416 bash
2988346 1 5 ffff916f7c629840 IN 2.8 9342476 1917384 qemu-kvm
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
task.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/task.c b/task.c
index 864c838637ee..071c787fbfa5 100644
--- a/task.c
+++ b/task.c
@@ -3828,7 +3828,7 @@ show_ps_data(ulong flag, struct task_context *tc, struct psinfo *psi)
} else
fprintf(fp, " ");
- fprintf(fp, "%5ld %5ld %2s %s %3s",
+ fprintf(fp, "%7ld %7ld %3s %s %3s",
tc->pid, task_to_pid(tc->ptask),
task_cpu(tc->processor, buf2, !VERBOSE),
task_pointer_string(tc, flag & PS_KSTACKP, buf3),
@@ -3838,8 +3838,8 @@ show_ps_data(ulong flag, struct task_context *tc, struct psinfo *psi)
if (strlen(buf1) == 3)
mkstring(buf1, 4, CENTER|RJUST, NULL);
fprintf(fp, "%s ", buf1);
- fprintf(fp, "%7ld ", (tm->total_vm * PAGESIZE())/1024);
- fprintf(fp, "%6ld ", (tm->rss * PAGESIZE())/1024);
+ fprintf(fp, "%8ld ", (tm->total_vm * PAGESIZE())/1024);
+ fprintf(fp, "%8ld ", (tm->rss * PAGESIZE())/1024);
if (is_kernel_thread(tc->task))
fprintf(fp, "[%s]\n", tc->comm);
else
@@ -3856,7 +3856,7 @@ show_ps(ulong flag, struct psinfo *psi)
if (!(flag & ((PS_EXCLUSIVE & ~PS_ACTIVE)|PS_NO_HEADER)))
fprintf(fp,
- " PID PPID CPU %s ST %%MEM VSZ RSS COMM\n",
+ " PID PPID CPU %s ST %%MEM VSZ RSS COMM\n",
flag & PS_KSTACKP ?
mkstring(buf, VADDR_PRLEN, CENTER|RJUST, "KSTACKP") :
mkstring(buf, VADDR_PRLEN, CENTER, "TASK"));
@@ -7713,7 +7713,7 @@ print_task_header(FILE *out, struct task_context *tc, int newline)
char buf[BUFSIZE];
char buf1[BUFSIZE];
- fprintf(out, "%sPID: %-5ld TASK: %s CPU: %-2s COMMAND: \"%s\"\n",
+ fprintf(out, "%sPID: %-7ld TASK: %s CPU: %-3s COMMAND: \"%s\"\n",
newline ? "\n" : "", tc->pid,
mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, MKSTR(tc->task)),
task_cpu(tc->processor, buf, !VERBOSE), tc->comm);
--
2.37.1

@ -0,0 +1,79 @@
From 92de7c34b1f910abff4d77522f74454ea0263a90 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 13 Feb 2023 11:12:12 +0800
Subject: [PATCH] Fix for "bt" command printing "bogus exception frame" warning
Currently, the "bt" command may print a bogus exception frame
and the remaining frame will be truncated on x86_64 when using the
"virsh send-key <kvm guest> KEY_LEFTALT KEY_SYSRQ KEY_C" command
to trigger a panic from the KVM host. For example:
crash> bt
PID: 0 TASK: ffff9e7a47e32f00 CPU: 3 COMMAND: "swapper/3"
#0 [ffffba7900118bb8] machine_kexec at ffffffff87e5c2c7
#1 [ffffba7900118c08] __crash_kexec at ffffffff87f9500d
#2 [ffffba7900118cd0] panic at ffffffff87edfff9
#3 [ffffba7900118d50] sysrq_handle_crash at ffffffff883ce2c1
...
#16 [ffffba7900118fd8] handle_edge_irq at ffffffff87f559f2
#17 [ffffba7900118ff0] asm_call_on_stack at ffffffff88800fa2
--- <IRQ stack> ---
#18 [ffffba790008bda0] asm_call_on_stack at ffffffff88800fa2
RIP: ffffffffffffffff RSP: 0000000000000124 RFLAGS: 00000003
RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
RDX: ffffffff88800c1e RSI: 0000000000000000 RDI: 0000000000000000
RBP: 0000000000000001 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000000 R11: ffffffff88760555 R12: ffffba790008be08
R13: ffffffff87f18002 R14: ffff9e7a47e32f00 R15: ffff9e7bb6198e00
ORIG_RAX: 0000000000000000 CS: 0003 SS: 0000
bt: WARNING: possibly bogus exception frame
crash>
The following related kernel commits cause the current issue, crash
needs to adjust the value of irq_eframe_link.
Related kernel commits:
[1] v5.8: 931b94145981 ("x86/entry: Provide helpers for executing on the irqstack")
[2] v5.8: fa5e5c409213 ("x86/entry: Use idtentry for interrupts")
[3] v5.12: 52d743f3b712 ("x86/softirq: Remove indirection in do_softirq_own_stack()")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
---
x86_64.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/x86_64.c b/x86_64.c
index 7a5d6f050c89..5b671bd97775 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3938,6 +3938,11 @@ in_exception_stack:
if (irq_eframe) {
bt->flags |= BT_EXCEPTION_FRAME;
i = (irq_eframe - bt->stackbase)/sizeof(ulong);
+ if (symbol_exists("asm_common_interrupt")) {
+ i -= 1;
+ up = (ulong *)(&bt->stackbuf[i*sizeof(ulong)]);
+ bt->instptr = *up;
+ }
x86_64_print_stack_entry(bt, ofp, level, i, bt->instptr);
bt->flags &= ~(ulonglong)BT_EXCEPTION_FRAME;
cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
@@ -6521,6 +6526,14 @@ x86_64_irq_eframe_link_init(void)
else
return;
+ if (symbol_exists("asm_common_interrupt")) {
+ if (symbol_exists("asm_call_on_stack"))
+ machdep->machspec->irq_eframe_link = -64;
+ else
+ machdep->machspec->irq_eframe_link = -32;
+ return;
+ }
+
if (THIS_KERNEL_VERSION < LINUX(2,6,9))
return;
--
2.37.1

@ -1,62 +0,0 @@
From 364b2e413c69daf189d2bc0238e3ba9b0dcbd937 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:13 +0800
Subject: [PATCH 02/18] sbitmapq: remove struct and member validation in
sbitmapq_init()
Let's remove the struct and member validation from sbitmapq_init(), which
will help the crash to display the actual error when the sbitmapq fails.
Without the patch:
crash> sbitmapq ffff8e99d0dc8010
sbitmapq: command not supported or applicable on this architecture or kernel
With the patch:
crash> sbitmapq ffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_queue_alloc_hint
FILE: sbitmap.c LINE: 365 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 24 ------------------------
1 file changed, 24 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index 96a61e6c2c71..7693eef6cebd 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -525,30 +525,6 @@ void sbitmapq_init(void)
MEMBER_OFFSET_INIT(sbq_wait_state_wait_cnt, "sbq_wait_state", "wait_cnt");
MEMBER_OFFSET_INIT(sbq_wait_state_wait, "sbq_wait_state", "wait");
- if (!VALID_SIZE(sbitmap_word) ||
- !VALID_SIZE(sbitmap) ||
- !VALID_SIZE(sbitmap_queue) ||
- !VALID_SIZE(sbq_wait_state) ||
- INVALID_MEMBER(sbitmap_word_depth) ||
- INVALID_MEMBER(sbitmap_word_word) ||
- INVALID_MEMBER(sbitmap_word_cleared) ||
- INVALID_MEMBER(sbitmap_depth) ||
- INVALID_MEMBER(sbitmap_shift) ||
- INVALID_MEMBER(sbitmap_map_nr) ||
- INVALID_MEMBER(sbitmap_map) ||
- INVALID_MEMBER(sbitmap_queue_sb) ||
- INVALID_MEMBER(sbitmap_queue_alloc_hint) ||
- INVALID_MEMBER(sbitmap_queue_wake_batch) ||
- INVALID_MEMBER(sbitmap_queue_wake_index) ||
- INVALID_MEMBER(sbitmap_queue_ws) ||
- INVALID_MEMBER(sbitmap_queue_ws_active) ||
- INVALID_MEMBER(sbitmap_queue_round_robin) ||
- INVALID_MEMBER(sbitmap_queue_min_shallow_depth) ||
- INVALID_MEMBER(sbq_wait_state_wait_cnt) ||
- INVALID_MEMBER(sbq_wait_state_wait)) {
- command_not_supported();
- }
-
sb_flags |= SB_FLAG_INIT;
}
--
2.30.2

@ -0,0 +1,78 @@
From e0e6e4a7ee03b3d00b50a9e4db2f2ea6f7da0da3 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 15 Feb 2023 16:24:57 +0800
Subject: [PATCH] Fix for "bt" command unnecessarily printing an exception
frame
Kernel commit 7d65f4a65532 ("irq: Consolidate do_softirq() arch overriden
implementations") renamed the call_softirq to do_softirq_own_stack, and
there is no exception frame also when coming from do_softirq_own_stack.
Without the patch, crash may unnecessarily output an exception frame with
a warning as below:
crash> foreach bt
...
PID: 0 TASK: ffff914f820a8000 CPU: 25 COMMAND: "swapper/25"
#0 [fffffe0000504e48] crash_nmi_callback at ffffffffa665d763
#1 [fffffe0000504e50] nmi_handle at ffffffffa662a423
#2 [fffffe0000504ea8] default_do_nmi at ffffffffa6fe7dc9
#3 [fffffe0000504ec8] do_nmi at ffffffffa662a97f
#4 [fffffe0000504ef0] end_repeat_nmi at ffffffffa70015e8
[exception RIP: clone_endio+172]
RIP: ffffffffc005c1ec RSP: ffffa1d403d08e98 RFLAGS: 00000246
RAX: 0000000000000000 RBX: ffff915326fba230 RCX: 0000000000000018
RDX: ffffffffc0075400 RSI: 0000000000000000 RDI: ffff915326fba230
RBP: ffff915326fba1c0 R8: 0000000000001000 R9: ffff915308d6d2a0
R10: 000000a97dfe5e10 R11: ffffa1d40038fe98 R12: ffff915302babc40
R13: ffff914f94360000 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
--- <NMI exception stack> ---
#5 [ffffa1d403d08e98] clone_endio at ffffffffc005c1ec [dm_mod]
#6 [ffffa1d403d08ed0] blk_update_request at ffffffffa6a96954
#7 [ffffa1d403d08f10] scsi_end_request at ffffffffa6c9b968
#8 [ffffa1d403d08f48] scsi_io_completion at ffffffffa6c9bb3e
#9 [ffffa1d403d08f90] blk_complete_reqs at ffffffffa6aa0e95
#10 [ffffa1d403d08fa0] __softirqentry_text_start at ffffffffa72000dc
#11 [ffffa1d403d08ff0] do_softirq_own_stack at ffffffffa7000f9a
--- <IRQ stack> ---
#12 [ffffa1d40038fe70] do_softirq_own_stack at ffffffffa7000f9a
[exception RIP: unknown or invalid address]
RIP: 0000000000000000 RSP: 0000000000000000 RFLAGS: 00000000
RAX: ffffffffa672eae5 RBX: ffffffffa83b34e0 RCX: ffffffffa672eb12
RDX: 0000000000000010 RSI: 8b7d6c8869010c00 RDI: 0000000000000085
RBP: 0000000000000286 R8: ffff914f820a8000 R9: ffffffffa67a94e0
R10: 0000000000000286 R11: ffffffffa66fb4c5 R12: ffffffffa67a898b
R13: 0000000000000000 R14: fffffffffffffff8 R15: ffffffffa67a1e68
ORIG_RAX: 0000000000000000 CS: 0000 SS: ffffffffa672edff
bt: WARNING: possibly bogus exception frame
#13 [ffffa1d40038ff30] start_secondary at ffffffffa665fa2c
#14 [ffffa1d40038ff50] secondary_startup_64_no_verify at ffffffffa6600116
...
Reported-by: Marco Patalano <mpatalan@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
x86_64.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/x86_64.c b/x86_64.c
index 5b671bd97775..6cac3936b33d 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3825,10 +3825,11 @@ in_exception_stack:
up -= 1;
bt->instptr = *up;
/*
- * No exception frame when coming from call_softirq.
+ * No exception frame when coming from do_softirq_own_stack
+ * or call_softirq.
*/
if ((sp = value_search(bt->instptr, &offset)) &&
- STREQ(sp->name, "call_softirq"))
+ (STREQ(sp->name, "do_softirq_own_stack") || STREQ(sp->name, "call_softirq")))
irq_eframe = 0;
bt->frameptr = 0;
done = FALSE;
--
2.37.1

@ -1,45 +0,0 @@
From 1c918c621e48f53ea69a143aabc59c8366102236 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:41 +0530
Subject: [PATCH 03/28] ppc64: fix bt for '-S' case
Passing '-S' option to 'bt' command was intended to specify the stack
pointer manually. But get_stack_frame() handling on ppc64 is ignoring
this option altogether. Fix it.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/ppc64.c b/ppc64.c
index 975caa53b812..0e1d8678eef5 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -2330,6 +2330,22 @@ ppc64_vmcore_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
pt_regs = (struct ppc64_pt_regs *)bt_in->machdep;
if (!pt_regs || !pt_regs->gpr[1]) {
+ if (bt_in->hp) {
+ if (bt_in->hp->esp) {
+ *ksp = bt_in->hp->esp;
+ if (!bt_in->hp->eip) {
+ if (IS_KVADDR(*ksp)) {
+ readmem(*ksp+16, KVADDR, &unip, sizeof(ulong),
+ "Regs NIP value", FAULT_ON_ERROR);
+ *nip = unip;
+ }
+ } else
+ *nip = bt_in->hp->eip;
+
+ }
+ return TRUE;
+ }
+
/*
* Not collected regs. May be the corresponding CPU not
* responded to an IPI in case of KDump OR f/w has not
--
2.37.1

@ -1,118 +0,0 @@
From a295cb40cd5d24fb5995cc78d29c5def3843d285 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:14 +0800
Subject: [PATCH 03/18] sbitmapq: fix invalid offset for
"sbitmap_queue_alloc_hint" on Linux v5.13-rc1
Kernel commit c548e62bcf6a ("scsi: sbitmap: Move allocation hint
into sbitmap") moved the alloc_hint member from struct sbitmap_queue
to struct sbitmap. Without the patch, the sbitmapq will fail:
crash> sbitmapq 0xffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_queue_alloc_hint
FILE: sbitmap.c LINE: 365 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 ++
sbitmap.c | 14 ++++++++++++--
symbols.c | 2 ++
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index a6735d07b32f..0aeb98c4f654 100644
--- a/defs.h
+++ b/defs.h
@@ -2168,6 +2168,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sbitmap_queue_min_shallow_depth;
long sbq_wait_state_wait_cnt;
long sbq_wait_state_wait;
+ long sbitmap_alloc_hint;
};
struct size_table { /* stash of commonly-used sizes */
@@ -5907,6 +5908,7 @@ struct sbitmap_context {
unsigned shift;
unsigned map_nr;
ulong map_addr;
+ ulong alloc_hint;
};
typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p);
diff --git a/sbitmap.c b/sbitmap.c
index 7693eef6cebd..2921d5447c65 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -285,6 +285,7 @@ void sbitmap_for_each_set(const struct sbitmap_context *sc,
static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
const struct sbitmap_context *sc)
{
+ ulong alloc_hint_addr = 0;
int cpus = get_cpus_possible();
int sbq_wait_state_size, wait_cnt_off, wait_off, list_head_off;
char *sbq_wait_state_buf;
@@ -297,6 +298,11 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
fprintf(fp, "bits_per_word = %u\n", 1U << sc->shift);
fprintf(fp, "map_nr = %u\n", sc->map_nr);
+ if (VALID_MEMBER(sbitmap_queue_alloc_hint))
+ alloc_hint_addr = sqc->alloc_hint;
+ else if (VALID_MEMBER(sbitmap_alloc_hint)) /* 5.13 and later */
+ alloc_hint_addr = sc->alloc_hint;
+
fputs("alloc_hint = {", fp);
first = true;
for (i = 0; i < cpus; i++) {
@@ -307,7 +313,7 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
fprintf(fp, ", ");
first = false;
- ptr = kt->__per_cpu_offset[i] + sqc->alloc_hint;
+ ptr = kt->__per_cpu_offset[i] + alloc_hint_addr;
readmem(ptr, KVADDR, &val, sizeof(val), "alloc_hint", FAULT_ON_ERROR);
fprintf(fp, "%u", val);
@@ -362,7 +368,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context
error(FATAL, "cannot read sbitmap_queue\n");
}
- sqc->alloc_hint = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_alloc_hint));
+ if (VALID_MEMBER(sbitmap_queue_alloc_hint))
+ sqc->alloc_hint = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_alloc_hint));
sqc->wake_batch = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_batch));
sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index));
sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws));
@@ -387,6 +394,8 @@ void sbitmap_context_load(ulong addr, struct sbitmap_context *sc)
sc->shift = UINT(sbitmap_buf + OFFSET(sbitmap_shift));
sc->map_nr = UINT(sbitmap_buf + OFFSET(sbitmap_map_nr));
sc->map_addr = ULONG(sbitmap_buf + OFFSET(sbitmap_map));
+ if (VALID_MEMBER(sbitmap_alloc_hint))
+ sc->alloc_hint = ULONG(sbitmap_buf + OFFSET(sbitmap_alloc_hint));
FREEBUF(sbitmap_buf);
}
@@ -512,6 +521,7 @@ void sbitmapq_init(void)
MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
+ MEMBER_OFFSET_INIT(sbitmap_alloc_hint, "sbitmap", "alloc_hint");
MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
MEMBER_OFFSET_INIT(sbitmap_queue_alloc_hint, "sbitmap_queue", "alloc_hint");
diff --git a/symbols.c b/symbols.c
index ba5e2741347d..fd0eb06899f0 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10708,6 +10708,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sbitmap_map_nr));
fprintf(fp, " sbitmap_map: %ld\n",
OFFSET(sbitmap_map));
+ fprintf(fp, " sbitmap_alloc_hint: %ld\n",
+ OFFSET(sbitmap_alloc_hint));
fprintf(fp, " sbitmap_queue_sb: %ld\n",
OFFSET(sbitmap_queue_sb));
fprintf(fp, " sbitmap_queue_alloc_hint: %ld\n",
--
2.30.2

@ -1,147 +0,0 @@
From 6a89173a25450b679e4a713793b2ed36b077fe56 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:42 +0530
Subject: [PATCH 04/28] ppc64: dynamically allocate h/w interrupt stack
Only older kernel (v2.4) used h/w interrupt stack to store frames when
CPU received IPI. Memory used for this in 'struct machine_specific' is
useless for later kernels. For the sake of backward compatibility keep
h/w interrupt stack but dynamically allocate memory for it and save
some bytes from being wasted.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
ppc64.c | 51 +++++++++++++++++++++------------------------------
2 files changed, 22 insertions(+), 31 deletions(-)
diff --git a/defs.h b/defs.h
index c524a05d8105..d8fbeb89e335 100644
--- a/defs.h
+++ b/defs.h
@@ -6311,7 +6311,7 @@ struct ppc64_vmemmap {
* Used to store the HW interrupt stack. It is only for 2.4.
*/
struct machine_specific {
- ulong hwintrstack[NR_CPUS];
+ ulong *hwintrstack;
char *hwstackbuf;
uint hwstacksize;
diff --git a/ppc64.c b/ppc64.c
index 0e1d8678eef5..272eb207074a 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -256,7 +256,7 @@ static int set_ppc64_max_physmem_bits(void)
}
struct machine_specific ppc64_machine_specific = {
- .hwintrstack = { 0 },
+ .hwintrstack = NULL,
.hwstackbuf = 0,
.hwstacksize = 0,
.pte_rpn_shift = PTE_RPN_SHIFT_DEFAULT,
@@ -275,7 +275,7 @@ struct machine_specific ppc64_machine_specific = {
};
struct machine_specific book3e_machine_specific = {
- .hwintrstack = { 0 },
+ .hwintrstack = NULL,
.hwstackbuf = 0,
.hwstacksize = 0,
.pte_rpn_shift = PTE_RPN_SHIFT_L4_BOOK3E_64K,
@@ -676,6 +676,9 @@ ppc64_init(int when)
*/
offset = MEMBER_OFFSET("paca_struct", "xHrdIntStack");
paca_sym = symbol_value("paca");
+ if (!(machdep->machspec->hwintrstack =
+ (ulong *)calloc(NR_CPUS, sizeof(ulong))))
+ error(FATAL, "cannot malloc hwintrstack space.");
for (cpu = 0; cpu < kt->cpus; cpu++) {
readmem(paca_sym + (paca_size * cpu) + offset,
KVADDR,
@@ -686,14 +689,9 @@ ppc64_init(int when)
machdep->machspec->hwstacksize = 8 * machdep->pagesize;
if ((machdep->machspec->hwstackbuf = (char *)
malloc(machdep->machspec->hwstacksize)) == NULL)
- error(FATAL, "cannot malloc hwirqstack space.");
- } else
- /*
- * 'xHrdIntStack' member in "paca_struct" is not
- * available for 2.6 kernel.
- */
- BZERO(&machdep->machspec->hwintrstack,
- NR_CPUS*sizeof(ulong));
+ error(FATAL, "cannot malloc hwirqstack buffer space.");
+ }
+
if (!machdep->hz) {
machdep->hz = HZ;
if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
@@ -846,23 +844,15 @@ ppc64_dump_machdep_table(ulong arg)
fprintf(fp, " is_vmaddr: %s\n",
machdep->machspec->is_vmaddr == book3e_is_vmaddr ?
"book3e_is_vmaddr()" : "ppc64_is_vmaddr()");
- fprintf(fp, " hwintrstack[%d]: ", NR_CPUS);
- for (c = 0; c < NR_CPUS; c++) {
- for (others = 0, i = c; i < NR_CPUS; i++) {
- if (machdep->machspec->hwintrstack[i])
- others++;
+ if (machdep->machspec->hwintrstack) {
+ fprintf(fp, " hwintrstack[%d]: ", NR_CPUS);
+ for (c = 0; c < NR_CPUS; c++) {
+ fprintf(fp, "%s%016lx ",
+ ((c % 4) == 0) ? "\n " : "",
+ machdep->machspec->hwintrstack[c]);
}
- if (!others) {
- fprintf(fp, "%s%s",
- c && ((c % 4) == 0) ? "\n " : "",
- c ? "(remainder unused)" : "(unused)");
- break;
- }
-
- fprintf(fp, "%s%016lx ",
- ((c % 4) == 0) ? "\n " : "",
- machdep->machspec->hwintrstack[c]);
- }
+ } else
+ fprintf(fp, " hwintrstack: (unused)");
fprintf(fp, "\n");
fprintf(fp, " hwstackbuf: %lx\n", (ulong)machdep->machspec->hwstackbuf);
fprintf(fp, " hwstacksize: %d\n", machdep->machspec->hwstacksize);
@@ -1683,9 +1673,10 @@ ppc64_check_sp_in_HWintrstack(ulong sp, struct bt_info *bt)
*
* Note: HW Interrupt stack is used only in 2.4 kernel.
*/
- if (is_task_active(bt->task) && (tt->panic_task != bt->task) &&
- machdep->machspec->hwintrstack[bt->tc->processor]) {
+ if (machdep->machspec->hwintrstack && is_task_active(bt->task) &&
+ (bt->task != tt->panic_task)) {
ulong newsp;
+
readmem(machdep->machspec->hwintrstack[bt->tc->processor],
KVADDR, &newsp, sizeof(ulong),
"stack pointer", FAULT_ON_ERROR);
@@ -1958,7 +1949,7 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
bt->stackbase = irqstack;
bt->stacktop = bt->stackbase + STACKSIZE();
alter_stackbuf(bt);
- } else if (ms->hwintrstack[bt->tc->processor]) {
+ } else if (ms->hwintrstack) {
bt->stacktop = ms->hwintrstack[bt->tc->processor] +
sizeof(ulong);
bt->stackbase = ms->hwintrstack[bt->tc->processor] -
@@ -2555,7 +2546,7 @@ retry:
goto retry;
}
- if (check_intrstack && ms->hwintrstack[bt->tc->processor]) {
+ if (check_intrstack && ms->hwintrstack) {
bt->stacktop = ms->hwintrstack[bt->tc->processor] +
sizeof(ulong);
bt->stackbase = ms->hwintrstack[bt->tc->processor] -
--
2.37.1

@ -1,103 +0,0 @@
From 530fe6ad7e4d7ff6254596c1219d25ed929e3867 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:15 +0800
Subject: [PATCH 04/18] sbitmapq: fix invalid offset for
"sbitmap_queue_round_robin" on Linux v5.13-rc1
Kernel commit efe1f3a1d583 ("scsi: sbitmap: Maintain allocation
round_robin in sbitmap") moved the round_robin member from struct
sbitmap_queue to struct sbitmap. Without the patch, the sbitmapq
will fail:
crash> sbitmapq 0xffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_queue_round_robin
FILE: sbitmap.c LINE: 378 FUNCTION: sbitmap_queue_context_load()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 ++
sbitmap.c | 12 ++++++++++--
symbols.c | 2 ++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 0aeb98c4f654..ecbced24d2e3 100644
--- a/defs.h
+++ b/defs.h
@@ -2169,6 +2169,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sbq_wait_state_wait_cnt;
long sbq_wait_state_wait;
long sbitmap_alloc_hint;
+ long sbitmap_round_robin;
};
struct size_table { /* stash of commonly-used sizes */
@@ -5909,6 +5910,7 @@ struct sbitmap_context {
unsigned map_nr;
ulong map_addr;
ulong alloc_hint;
+ bool round_robin;
};
typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p);
diff --git a/sbitmap.c b/sbitmap.c
index 2921d5447c65..7b318b533702 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -352,7 +352,11 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
FREEBUF(sbq_wait_state_buf);
- fprintf(fp, "round_robin = %d\n", sqc->round_robin);
+ if (VALID_MEMBER(sbitmap_queue_round_robin))
+ fprintf(fp, "round_robin = %d\n", sqc->round_robin);
+ else if (VALID_MEMBER(sbitmap_round_robin)) /* 5.13 and later */
+ fprintf(fp, "round_robin = %d\n", sc->round_robin);
+
fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth);
}
@@ -374,7 +378,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context
sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index));
sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws));
sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active));
- sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin));
+ if (VALID_MEMBER(sbitmap_queue_round_robin))
+ sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin));
sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth));
FREEBUF(sbitmap_queue_buf);
@@ -396,6 +401,8 @@ void sbitmap_context_load(ulong addr, struct sbitmap_context *sc)
sc->map_addr = ULONG(sbitmap_buf + OFFSET(sbitmap_map));
if (VALID_MEMBER(sbitmap_alloc_hint))
sc->alloc_hint = ULONG(sbitmap_buf + OFFSET(sbitmap_alloc_hint));
+ if (VALID_MEMBER(sbitmap_round_robin))
+ sc->round_robin = BOOL(sbitmap_buf + OFFSET(sbitmap_round_robin));
FREEBUF(sbitmap_buf);
}
@@ -522,6 +529,7 @@ void sbitmapq_init(void)
MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
MEMBER_OFFSET_INIT(sbitmap_alloc_hint, "sbitmap", "alloc_hint");
+ MEMBER_OFFSET_INIT(sbitmap_round_robin, "sbitmap", "round_robin");
MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
MEMBER_OFFSET_INIT(sbitmap_queue_alloc_hint, "sbitmap_queue", "alloc_hint");
diff --git a/symbols.c b/symbols.c
index fd0eb06899f0..5d12a021c769 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10710,6 +10710,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(sbitmap_map));
fprintf(fp, " sbitmap_alloc_hint: %ld\n",
OFFSET(sbitmap_alloc_hint));
+ fprintf(fp, " sbitmap_round_robin: %ld\n",
+ OFFSET(sbitmap_round_robin));
fprintf(fp, " sbitmap_queue_sb: %ld\n",
OFFSET(sbitmap_queue_sb));
fprintf(fp, " sbitmap_queue_alloc_hint: %ld\n",
--
2.30.2

@ -1,56 +0,0 @@
From 4dbf7e296f6fde05894a55e23fbaf0d50e3b38b9 Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:43 +0530
Subject: [PATCH 05/28] ppc64: rename ppc64_paca_init to
ppc64_paca_percpu_offset_init
ppc64_paca_init() function is specifically used to initialize percpu
data_offset for kernels older than v2.6.36. So, the name is slightly
misleading. Rename it to ppc64_paca_percpu_offset_init to reflect its
purpose.
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/ppc64.c b/ppc64.c
index 272eb207074a..0a3aa5f7af91 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -52,7 +52,7 @@ static char * ppc64_check_eframe(struct ppc64_pt_regs *);
static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
struct bt_info *);
static void parse_cmdline_args(void);
-static int ppc64_paca_init(int);
+static int ppc64_paca_percpu_offset_init(int);
static void ppc64_init_cpu_info(void);
static int ppc64_get_cpu_map(void);
static void ppc64_clear_machdep_cache(void);
@@ -3285,7 +3285,7 @@ parse_cmdline_args(void)
* Initialize the per cpu data_offset values from paca structure.
*/
static int
-ppc64_paca_init(int map)
+ppc64_paca_percpu_offset_init(int map)
{
int i, cpus, nr_paca;
char *cpu_paca_buf;
@@ -3387,10 +3387,11 @@ ppc64_init_cpu_info(void)
* which was removed post v2.6.15 ppc64 and now we get the per cpu
* data_offset from __per_cpu_offset symbol during kernel_init()
* call. Hence for backward (pre-2.6.36) compatibility, call
- * ppc64_paca_init() only if symbol __per_cpu_offset does not exist.
+ * ppc64_paca_percpu_offset_init() only if symbol __per_cpu_offset
+ * does not exist.
*/
if (!symbol_exists("__per_cpu_offset"))
- cpus = ppc64_paca_init(map);
+ cpus = ppc64_paca_percpu_offset_init(map);
else {
if (!(nr_cpus = get_array_length("__per_cpu_offset", NULL, 0)))
nr_cpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS :
--
2.37.1

@ -1,101 +0,0 @@
From 3750803f6ae5f5ad071f86ca916dbbb17b7a83a5 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 23 May 2022 18:04:16 +0800
Subject: [PATCH 05/18] sbitmapq: fix invalid offset for "sbitmap_word_depth"
on Linux v5.18-rc1
Kernel commit 3301bc53358a ("lib/sbitmap: kill 'depth' from sbitmap_word")
removed the depth member from struct sbitmap_word. Without the patch, the
sbitmapq will fail:
crash> sbitmapq 0xffff8e99d0dc8010
sbitmapq: invalid structure member offset: sbitmap_word_depth
FILE: sbitmap.c LINE: 84 FUNCTION: __sbitmap_weight()
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
sbitmap.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/sbitmap.c b/sbitmap.c
index 7b318b533702..e8ebd62fe01c 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -78,10 +78,16 @@ static unsigned long bitmap_weight(unsigned long bitmap, unsigned int bits)
return w;
}
+static inline unsigned int __map_depth(const struct sbitmap_context *sc, int index)
+{
+ if (index == sc->map_nr - 1)
+ return sc->depth - (index << sc->shift);
+ return 1U << sc->shift;
+}
+
static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
- const ulong w_depth_off = OFFSET(sbitmap_word_depth);
const ulong w_word_off = OFFSET(sbitmap_word_word);
const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
@@ -99,7 +105,7 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
error(FATAL, "cannot read sbitmap_word\n");
}
- depth = ULONG(sbitmap_word_buf + w_depth_off);
+ depth = __map_depth(sc, i);
if (set) {
word = ULONG(sbitmap_word_buf + w_word_off);
@@ -142,7 +148,6 @@ static void sbitmap_emit_byte(unsigned int offset, uint8_t byte)
static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
- const ulong w_depth_off = OFFSET(sbitmap_word_depth);
const ulong w_word_off = OFFSET(sbitmap_word_word);
const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
@@ -165,7 +170,7 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
word = ULONG(sbitmap_word_buf + w_word_off);
cleared = ULONG(sbitmap_word_buf + w_cleared_off);
- word_bits = ULONG(sbitmap_word_buf + w_depth_off);
+ word_bits = __map_depth(sc, i);
word &= ~cleared;
@@ -213,7 +218,6 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
unsigned int start, sbitmap_for_each_fn fn, void *data)
{
const ulong sbitmap_word_size = SIZE(sbitmap_word);
- const ulong w_depth_off = OFFSET(sbitmap_word_depth);
const ulong w_word_off = OFFSET(sbitmap_word_word);
const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
@@ -232,7 +236,7 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
while (scanned < sc->depth) {
unsigned long w_addr = sc->map_addr + (sbitmap_word_size * index);
- unsigned long w_depth, w_word, w_cleared;
+ unsigned long w_word, w_cleared;
unsigned long word, depth;
if (!readmem(w_addr, KVADDR, sbitmap_word_buf, sbitmap_word_size, "sbitmap_word", RETURN_ON_ERROR)) {
@@ -240,11 +244,10 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
error(FATAL, "cannot read sbitmap_word\n");
}
- w_depth = ULONG(sbitmap_word_buf + w_depth_off);
w_word = ULONG(sbitmap_word_buf + w_word_off);
w_cleared = ULONG(sbitmap_word_buf + w_cleared_off);
- depth = min(w_depth - nr, sc->depth - scanned);
+ depth = min(__map_depth(sc, index) - nr, sc->depth - scanned);
scanned += depth;
word = w_word & ~w_cleared;
--
2.30.2

@ -1,352 +0,0 @@
From f256095c61355d8db11502709ab3a084343f2bec Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Mon, 4 Jul 2022 10:55:44 +0530
Subject: [PATCH 06/28] ppc64: handle backtrace when CPU is in an emergency
stack
A CPU could be in an emergency stack when it is running in real mode
or any special scenario like TM bad thing. Also, there are dedicated
emergency stacks for machine check and system reset interrupt. Right
now, no backtrace is provided if a CPU is in any of these stacks.
This change ensures backtrace is processed appropriately even when
a CPU is in any one of these emergency stacks. Also, if stack info
cannot be found, print that message always instead of only when
verbose logs are enabled.
Related kernel commits:
729b0f715371 ("powerpc/book3s: Introduce exclusive emergency stack for machine check exception.")
b1ee8a3de579 ("powerpc/64s: Dedicated system reset interrupt stack")
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 12 ++++
ppc64.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 203 insertions(+), 12 deletions(-)
diff --git a/defs.h b/defs.h
index d8fbeb89e335..6a1b6f8a16a8 100644
--- a/defs.h
+++ b/defs.h
@@ -6296,6 +6296,13 @@ struct ppc64_elf_prstatus {
#ifdef PPC64
+enum emergency_stack_type {
+ NONE_STACK = 0,
+ EMERGENCY_STACK,
+ NMI_EMERGENCY_STACK,
+ MC_EMERGENCY_STACK
+};
+
struct ppc64_opal {
uint64_t base;
uint64_t entry;
@@ -6315,6 +6322,11 @@ struct machine_specific {
char *hwstackbuf;
uint hwstacksize;
+ /* Emergency stacks */
+ ulong *emergency_sp;
+ ulong *nmi_emergency_sp;
+ ulong *mc_emergency_sp;
+
uint l4_index_size;
uint l3_index_size;
uint l2_index_size;
diff --git a/ppc64.c b/ppc64.c
index 0a3aa5f7af91..03047a85955d 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -48,6 +48,10 @@ static ulong ppc64_get_stackbase(ulong);
static ulong ppc64_get_stacktop(ulong);
void ppc64_compiler_warning_stub(void);
static ulong ppc64_in_irqstack(ulong);
+static enum emergency_stack_type ppc64_in_emergency_stack(int cpu, ulong addr,
+ bool verbose);
+static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
+ struct bt_info *bt);
static char * ppc64_check_eframe(struct ppc64_pt_regs *);
static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
struct bt_info *);
@@ -56,6 +60,7 @@ static int ppc64_paca_percpu_offset_init(int);
static void ppc64_init_cpu_info(void);
static int ppc64_get_cpu_map(void);
static void ppc64_clear_machdep_cache(void);
+static void ppc64_init_paca_info(void);
static void ppc64_vmemmap_init(void);
static int ppc64_get_kvaddr_ranges(struct vaddr_range *);
static uint get_ptetype(ulong pte);
@@ -692,6 +697,8 @@ ppc64_init(int when)
error(FATAL, "cannot malloc hwirqstack buffer space.");
}
+ ppc64_init_paca_info();
+
if (!machdep->hz) {
machdep->hz = HZ;
if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
@@ -1204,6 +1211,70 @@ ppc64_kvtop(struct task_context *tc, ulong kvaddr,
return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose);
}
+static void
+ppc64_init_paca_info(void)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong *paca_ptr;
+ int i;
+
+ if (!(paca_ptr = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc paca pointers space.\n");
+
+ /* Get paca pointers for all CPUs. */
+ if (symbol_exists("paca_ptrs")) {
+ ulong paca_loc;
+
+ readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc, sizeof(void *),
+ "paca double pointer", FAULT_ON_ERROR);
+ readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
+ "paca pointers", FAULT_ON_ERROR);
+ } else if (symbol_exists("paca") &&
+ (get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR)) {
+ readmem(symbol_value("paca"), KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
+ "paca pointers", FAULT_ON_ERROR);
+ } else {
+ free(paca_ptr);
+ return;
+ }
+
+ /* Initialize emergency stacks info. */
+ if (MEMBER_EXISTS("paca_struct", "emergency_sp")) {
+ ulong offset = MEMBER_OFFSET("paca_struct", "emergency_sp");
+
+ if (!(ms->emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc emergency stack space.\n");
+ for (i = 0; i < kt->cpus; i++)
+ readmem(paca_ptr[i] + offset, KVADDR, &ms->emergency_sp[i],
+ sizeof(void *), "paca->emergency_sp",
+ FAULT_ON_ERROR);
+ }
+
+ if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) {
+ ulong offset = MEMBER_OFFSET("paca_struct", "nmi_emergency_sp");
+
+ if (!(ms->nmi_emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc NMI emergency stack space.\n");
+ for (i = 0; i < kt->cpus; i++)
+ readmem(paca_ptr[i] + offset, KVADDR, &ms->nmi_emergency_sp[i],
+ sizeof(void *), "paca->nmi_emergency_sp",
+ FAULT_ON_ERROR);
+ }
+
+ if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) {
+ ulong offset = MEMBER_OFFSET("paca_struct", "mc_emergency_sp");
+
+ if (!(ms->mc_emergency_sp = (ulong *)calloc(kt->cpus, sizeof(ulong))))
+ error(FATAL, "cannot malloc machine check emergency stack space.\n");
+ for (i = 0; i < kt->cpus; i++)
+ readmem(paca_ptr[i] + offset, KVADDR, &ms->mc_emergency_sp[i],
+ sizeof(void *), "paca->mc_emergency_sp",
+ FAULT_ON_ERROR);
+ }
+
+ free(paca_ptr);
+}
+
/*
* Verify that the kernel has made the vmemmap list available,
* and if so, stash the relevant data required to make vtop
@@ -1755,6 +1826,11 @@ ppc64_eframe_search(struct bt_info *bt_in)
addr = bt->stackbase +
roundup(SIZE(thread_info), sizeof(ulong));
} else if (!INSTACK(addr, bt)) {
+ enum emergency_stack_type estype;
+
+ if ((estype = ppc64_in_emergency_stack(bt->tc->processor, addr, false)))
+ ppc64_set_bt_emergency_stack(estype, bt);
+
/*
* If the user specified SP is in HW interrupt stack
* (only for tasks running on other CPUs and in 2.4
@@ -1856,6 +1932,84 @@ ppc64_in_irqstack(ulong addr)
return 0;
}
+/*
+ * Check if the CPU is running in any of its emergency stacks.
+ * Returns
+ * NONE_STACK : if input is invalid or addr is not within any emergency stack.
+ * EMERGENCY_STACK : if the addr is within emergency stack.
+ * NMI_EMERGENCY_STACK : if the addr is within NMI emergency stack.
+ * MC_EMERGENCY_STACK : if the addr is within machine check emergency stack.
+ */
+static enum emergency_stack_type
+ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong base, top;
+
+ if (cpu < 0 || cpu >= kt->cpus)
+ return NONE_STACK;
+
+ if (ms->emergency_sp) {
+ top = ms->emergency_sp[cpu];
+ base = top - STACKSIZE();
+ if (addr >= base && addr < top) {
+ if (verbose)
+ fprintf(fp, "---<Emergency Stack>---\n");
+ return EMERGENCY_STACK;
+ }
+ }
+
+ if (ms->nmi_emergency_sp) {
+ top = ms->nmi_emergency_sp[cpu];
+ base = top - STACKSIZE();
+ if (addr >= base && addr < top) {
+ if (verbose)
+ fprintf(fp, "---<NMI Emergency Stack>---\n");
+ return NMI_EMERGENCY_STACK;
+ }
+ }
+
+ if (ms->mc_emergency_sp) {
+ top = ms->mc_emergency_sp[cpu];
+ base = top - STACKSIZE();
+ if (addr >= base && addr < top) {
+ if (verbose)
+ fprintf(fp, "---<Machine Check Emergency Stack>---\n");
+ return MC_EMERGENCY_STACK;
+ }
+ }
+
+ return NONE_STACK;
+}
+
+static void
+ppc64_set_bt_emergency_stack(enum emergency_stack_type type, struct bt_info *bt)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong top;
+
+ switch (type) {
+ case EMERGENCY_STACK:
+ top = ms->emergency_sp[bt->tc->processor];
+ break;
+ case NMI_EMERGENCY_STACK:
+ top = ms->nmi_emergency_sp[bt->tc->processor];
+ break;
+ case MC_EMERGENCY_STACK:
+ top = ms->mc_emergency_sp[bt->tc->processor];
+ break;
+ default:
+ top = 0;
+ break;
+ }
+
+ if (top) {
+ bt->stackbase = top - STACKSIZE();
+ bt->stacktop = top;
+ alter_stackbuf(bt);
+ }
+}
+
/*
* Unroll a kernel stack.
*/
@@ -1936,10 +2090,13 @@ ppc64_back_trace_cmd(struct bt_info *bt)
static void
ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
{
- int frame = 0;
- ulong lr = 0; /* hack...need to pass in initial lr reg */
+ enum emergency_stack_type estype;
ulong newpc = 0, newsp, marker;
+ int c = bt->tc->processor;
+ ulong nmi_sp = 0;
int eframe_found;
+ int frame = 0;
+ ulong lr = 0; /* hack...need to pass in initial lr reg */
if (!INSTACK(req->sp, bt)) {
ulong irqstack;
@@ -1949,6 +2106,10 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
bt->stackbase = irqstack;
bt->stacktop = bt->stackbase + STACKSIZE();
alter_stackbuf(bt);
+ } else if ((estype = ppc64_in_emergency_stack(c, req->sp, true))) {
+ if (estype == NMI_EMERGENCY_STACK)
+ nmi_sp = req->sp;
+ ppc64_set_bt_emergency_stack(estype, bt);
} else if (ms->hwintrstack) {
bt->stacktop = ms->hwintrstack[bt->tc->processor] +
sizeof(ulong);
@@ -1957,9 +2118,7 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
bt->stackbuf = ms->hwstackbuf;
alter_stackbuf(bt);
} else {
- if (CRASHDEBUG(1)) {
- fprintf(fp, "cannot find the stack info.\n");
- }
+ fprintf(fp, "cannot find the stack info.\n");
return;
}
}
@@ -1989,13 +2148,20 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
newsp =
*(ulong *)&bt->stackbuf[newsp - bt->stackbase];
if (!INSTACK(newsp, bt)) {
- /*
- * Switch HW interrupt stack to process's stack.
- */
- bt->stackbase = GET_STACKBASE(bt->task);
- bt->stacktop = GET_STACKTOP(bt->task);
- alter_stackbuf(bt);
- }
+ if ((estype = ppc64_in_emergency_stack(c, newsp, true))) {
+ if (!nmi_sp && estype == NMI_EMERGENCY_STACK)
+ nmi_sp = newsp;
+ ppc64_set_bt_emergency_stack(estype, bt);
+ } else {
+ /*
+ * Switch HW interrupt stack or emergency stack
+ * to process's stack.
+ */
+ bt->stackbase = GET_STACKBASE(bt->task);
+ bt->stacktop = GET_STACKTOP(bt->task);
+ alter_stackbuf(bt);
+ }
+ }
if (IS_KVADDR(newsp) && INSTACK(newsp, bt))
newpc = *(ulong *)&bt->stackbuf[newsp + 16 -
bt->stackbase];
@@ -2039,6 +2205,16 @@ ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
}
}
+ /*
+ * NMI stack may not be re-entrant. In so, an SP in the NMI stack
+ * is likely to point back to an SP within the NMI stack, in case
+ * of a nested NMI.
+ */
+ if (nmi_sp && nmi_sp == newsp) {
+ fprintf(fp, "---<Nested NMI>---\n");
+ break;
+ }
+
/*
* Some Linux 3.7 kernel threads have been seen to have
* their end-of-trace stack linkage pointer pointing
@@ -2416,6 +2592,9 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp)
pt_regs = (struct ppc64_pt_regs *)bt->machdep;
ur_nip = pt_regs->nip;
ur_ksp = pt_regs->gpr[1];
+ /* Print the collected regs for panic task. */
+ ppc64_print_regs(pt_regs);
+ ppc64_print_nip_lr(pt_regs, 1);
} else if ((pc->flags & KDUMP) ||
((pc->flags & DISKDUMP) &&
(*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
--
2.37.1

@ -1,205 +0,0 @@
From 6833262bf87177d8affe4f91b2e7d2c76ecdf636 Mon Sep 17 00:00:00 2001
From: Qi Zheng <zhengqi.arch@bytedance.com>
Date: Tue, 24 May 2022 20:25:53 +0800
Subject: [PATCH 07/18] bt: x86_64: filter out idle task stack
When we use crash to troubleshoot softlockup and other problems,
we often use the 'bt -a' command to print the stacks of running
processes on all CPUs. But now some servers have hundreds of CPUs
(such as AMD machines), which causes the 'bt -a' command to output
a lot of process stacks. And many of these stacks are the stacks
of the idle process, which are not needed by us.
Therefore, in order to reduce this part of the interference information,
this patch adds the -n option to the bt command. When we specify
'-n idle' (meaning no idle), the stack of the idle process will be
filtered out, thus speeding up our troubleshooting.
And the option works only for crash dumps captured by kdump.
The command output is as follows:
crash> bt -a -n idle
[...]
PID: 0 TASK: ffff889ff8c34380 CPU: 8 COMMAND: "swapper/8"
PID: 0 TASK: ffff889ff8c32d00 CPU: 9 COMMAND: "swapper/9"
PID: 0 TASK: ffff889ff8c31680 CPU: 10 COMMAND: "swapper/10"
PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: "swapper/11"
PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: "swapper/12"
PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: "bash"
#0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407
#1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d
#2 [ffffc9000d35bdf0] panic at ffffffff81081930
#3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1
#4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175
#5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b
#6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86
#7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5
#8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579
#9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259
RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274
RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001
RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80
R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760
R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002
ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
[...]
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Acked-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
help.c | 33 ++++++++++++++++++++++++++++++++-
kernel.c | 13 ++++++++++++-
x86_64.c | 8 ++++++++
4 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index ecbced24d2e3..c8444b4e54eb 100644
--- a/defs.h
+++ b/defs.h
@@ -5832,6 +5832,7 @@ ulong cpu_map_addr(const char *type);
#define BT_SHOW_ALL_REGS (0x2000000000000ULL)
#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
#define BT_OVERFLOW_STACK (0x8000000000000ULL)
+#define BT_SKIP_IDLE (0x10000000000000ULL)
#define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
#define BT_REF_HEXVAL (0x1)
diff --git a/help.c b/help.c
index 51a0fe3d687c..e1bbc5abe029 100644
--- a/help.c
+++ b/help.c
@@ -1909,12 +1909,14 @@ char *help_bt[] = {
"bt",
"backtrace",
"[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s [-x|d]]"
-"\n [-I ip] [-S sp] [pid | task]",
+"\n [-I ip] [-S sp] [-n idle] [pid | task]",
" Display a kernel stack backtrace. If no arguments are given, the stack",
" trace of the current context will be displayed.\n",
" -a displays the stack traces of the active task on each CPU.",
" (only applicable to crash dumps)",
" -A same as -a, but also displays vector registers (S390X only).",
+" -n idle filter the stack of idle tasks (x86_64).",
+" (only applicable to crash dumps)",
" -p display the stack trace of the panic task only.",
" (only applicable to crash dumps)",
" -c cpu display the stack trace of the active task on one or more CPUs,",
@@ -2004,6 +2006,35 @@ char *help_bt[] = {
" DS: 002b ESI: bfffc8a0 ES: 002b EDI: 00000000 ",
" SS: 002b ESP: bfffc82c EBP: bfffd224 ",
" CS: 0023 EIP: 400d032e ERR: 0000008e EFLAGS: 00000246 ",
+" ",
+" Display the stack trace of the active task(s) when the kernel panicked,",
+" and filter out the stack of the idle tasks:",
+" ",
+" %s> bt -a -n idle",
+" ...",
+" PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: \"swapper/11\"",
+" ",
+" PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: \"swapper/12\"",
+" ",
+" PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: \"bash\"",
+" #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407",
+" #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d",
+" #2 [ffffc9000d35bdf0] panic at ffffffff81081930",
+" #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1",
+" #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175",
+" #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b",
+" #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86",
+" #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5",
+" #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579",
+" #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259",
+" RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246",
+" RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274",
+" RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001",
+" RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80",
+" R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760",
+" R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002",
+" ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b",
+" ...",
"\n Display the stack trace of the active task on CPU 0 and 1:\n",
" %s> bt -c 0,1",
" PID: 0 TASK: ffffffff81a8d020 CPU: 0 COMMAND: \"swapper\"",
diff --git a/kernel.c b/kernel.c
index d0921cf567d9..411e9da1e54f 100644
--- a/kernel.c
+++ b/kernel.c
@@ -2503,7 +2503,7 @@ cmd_bt(void)
if (kt->flags & USE_OPT_BT)
bt->flags |= BT_OPT_BACK_TRACE;
- while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF) {
+ while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) != EOF) {
switch (c)
{
case 'f':
@@ -2672,6 +2672,13 @@ cmd_bt(void)
active++;
break;
+ case 'n':
+ if (machine_type("X86_64") && STREQ(optarg, "idle"))
+ bt->flags |= BT_SKIP_IDLE;
+ else
+ option_not_supported(c);
+ break;
+
case 'r':
bt->flags |= BT_RAW;
break;
@@ -3092,6 +3099,10 @@ back_trace(struct bt_info *bt)
} else
machdep->get_stack_frame(bt, &eip, &esp);
+ /* skip idle task stack */
+ if (bt->flags & BT_SKIP_IDLE)
+ return;
+
if (bt->flags & BT_KSTACKP) {
bt->stkptr = esp;
return;
diff --git a/x86_64.c b/x86_64.c
index ecaefd2f46a8..cfafbcc4dabe 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -4918,6 +4918,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
if (bt->flags & BT_DUMPFILE_SEARCH)
return x86_64_get_dumpfile_stack_frame(bt, pcp, spp);
+ if (bt->flags & BT_SKIP_IDLE)
+ bt->flags &= ~BT_SKIP_IDLE;
+
if (pcp)
*pcp = x86_64_get_pc(bt);
if (spp)
@@ -4960,6 +4963,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *rip, ulong *rsp)
estack = -1;
panic = FALSE;
+ if (bt_in->flags & BT_SKIP_IDLE)
+ bt_in->flags &= ~BT_SKIP_IDLE;
+
panic_task = tt->panic_task == bt->task ? TRUE : FALSE;
if (panic_task && bt->machdep) {
@@ -5098,6 +5104,8 @@ next_sysrq:
if (!panic_task && STREQ(sym, "crash_nmi_callback")) {
*rip = *up;
*rsp = bt->stackbase + ((char *)(up) - bt->stackbuf);