Update to the latest commit <28891d112754>

Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
This commit is contained in:
Lianbo Jiang 2024-01-23 15:40:23 +08:00
parent f9d2243c38
commit 2167108126
7 changed files with 1371 additions and 0 deletions

View File

@ -0,0 +1,151 @@
From edb2bd52885ccc2fbe3e0825efe0ac55951a7710 Mon Sep 17 00:00:00 2001
From: "qiwu.chen@transsion.com" <qiwu.chen@transsion.com>
Date: Fri, 22 Dec 2023 03:30:33 +0000
Subject: [PATCH 1/6] arm64: support HW Tag-Based KASAN (MTE) mode
Kernel commit 2e903b914797 ("kasan, arm64: implement HW_TAGS runtime")
introduced Hardware Tag-Based KASAN (MTE) mode for ARMv8.5 and later
CPUs, which uses the Top Byte Ignore (TBI) feature of arm64 CPUs to
store a pointer tag in the top byte of kernel pointers.
Currently, crash utility cannot load MTE ramdump due to access invalid
HW Tag-Based kernel virtual addresses. Here's the example error message:
please wait... (gathering kmem slab cache data)
crash: invalid kernel virtual address: f1ffff80c000201c type: "kmem_cache objsize/object_size"
please wait... (gathering task table data)
crash: invalid kernel virtual address: f9ffff8239c2cde0 type: "xa_node shift"
This patch replaces the orignal generic_is_kvaddr() with arm64_is_kvaddr(),
which checks the validity for a HW Tag-Based kvaddr. mte_tag_reset() is
used to convert a Tag-Based kvaddr to untaggged kvaddr in arm64_VTOP()
and arm64_IS_VMALLOC_ADDR().
Signed-off-by: chenqiwu <qiwu.chen@transsion.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++---
defs.h | 1 +
2 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/arm64.c b/arm64.c
index 57965c6cb3c8..6ab10ca9b5be 100644
--- a/arm64.c
+++ b/arm64.c
@@ -102,6 +102,41 @@ struct kernel_range {
static struct kernel_range *arm64_get_va_range(struct machine_specific *ms);
static void arm64_get_struct_page_size(struct machine_specific *ms);
+/* mte tag shift bit */
+#define MTE_TAG_SHIFT 56
+/* native kernel pointers tag */
+#define KASAN_TAG_KERNEL 0xFF
+/* minimum value for random tags */
+#define KASAN_TAG_MIN 0xF0
+/* right shift the tag to MTE_TAG_SHIFT bit */
+#define mte_tag_shifted(tag) ((ulong)(tag) << MTE_TAG_SHIFT)
+/* get the top byte value of the original kvaddr */
+#define mte_tag_get(addr) (unsigned char)((ulong)(addr) >> MTE_TAG_SHIFT)
+/* reset the top byte to get an untaggged kvaddr */
+#define mte_tag_reset(addr) (((ulong)addr & ~mte_tag_shifted(KASAN_TAG_KERNEL)) | \
+ mte_tag_shifted(KASAN_TAG_KERNEL))
+
+static inline bool is_mte_kvaddr(ulong addr)
+{
+ /* check for ARM64_MTE enabled */
+ if (!(machdep->flags & ARM64_MTE))
+ return false;
+
+ /* check the validity of HW Tag-Based kvaddr */
+ if (mte_tag_get(addr) >= KASAN_TAG_MIN && mte_tag_get(addr) < KASAN_TAG_KERNEL)
+ return true;
+
+ return false;
+}
+
+static int arm64_is_kvaddr(ulong addr)
+{
+ if (is_mte_kvaddr(addr))
+ return (mte_tag_reset(addr) >= (ulong)(machdep->kvbase));
+
+ return (addr >= (ulong)(machdep->kvbase));
+}
+
static void arm64_calc_kernel_start(void)
{
struct machine_specific *ms = machdep->machspec;
@@ -182,6 +217,9 @@ arm64_init(int when)
if (kernel_symbol_exists("kimage_voffset"))
machdep->flags |= NEW_VMEMMAP;
+ if (kernel_symbol_exists("cpu_enable_mte"))
+ machdep->flags |= ARM64_MTE;
+
if (!machdep->pagesize && arm64_get_vmcoreinfo(&value, "PAGESIZE", NUM_DEC))
machdep->pagesize = (unsigned int)value;
@@ -262,7 +300,7 @@ arm64_init(int when)
machdep->kvbase = ARM64_VA_START;
ms->userspace_top = ARM64_USERSPACE_TOP;
}
- machdep->is_kvaddr = generic_is_kvaddr;
+ machdep->is_kvaddr = arm64_is_kvaddr;
machdep->kvtop = arm64_kvtop;
/* The defaults */
@@ -975,6 +1013,8 @@ arm64_dump_machdep_table(ulong arg)
fprintf(fp, "%sFLIPPED_VM", others++ ? "|" : "");
if (machdep->flags & HAS_PHYSVIRT_OFFSET)
fprintf(fp, "%sHAS_PHYSVIRT_OFFSET", others++ ? "|" : "");
+ if (machdep->flags & ARM64_MTE)
+ fprintf(fp, "%sARM64_MTE", others++ ? "|" : "");
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
@@ -1023,7 +1063,7 @@ arm64_dump_machdep_table(ulong arg)
fprintf(fp, " dis_filter: arm64_dis_filter()\n");
fprintf(fp, " cmd_mach: arm64_cmd_mach()\n");
fprintf(fp, " get_smp_cpus: arm64_get_smp_cpus()\n");
- fprintf(fp, " is_kvaddr: generic_is_kvaddr()\n");
+ fprintf(fp, " is_kvaddr: arm64_is_kvaddr()\n");
fprintf(fp, " is_uvaddr: arm64_is_uvaddr()\n");
fprintf(fp, " value_to_symbol: generic_machdep_value_to_symbol()\n");
fprintf(fp, " init_kernel_pgd: arm64_init_kernel_pgd\n");
@@ -1633,6 +1673,9 @@ ulong arm64_PTOV(ulong paddr)
ulong
arm64_VTOP(ulong addr)
{
+ if (is_mte_kvaddr(addr))
+ addr = mte_tag_reset(addr);
+
if (machdep->flags & NEW_VMEMMAP) {
if (machdep->machspec->VA_START &&
(addr >= machdep->machspec->kimage_text) &&
@@ -4562,7 +4605,10 @@ int
arm64_IS_VMALLOC_ADDR(ulong vaddr)
{
struct machine_specific *ms = machdep->machspec;
-
+
+ if (is_mte_kvaddr(vaddr))
+ vaddr = mte_tag_reset(vaddr);
+
if ((machdep->flags & NEW_VMEMMAP) &&
(vaddr >= machdep->machspec->kimage_text) &&
(vaddr <= machdep->machspec->kimage_end))
diff --git a/defs.h b/defs.h
index 20237b72a10b..aa8eba83b7f4 100644
--- a/defs.h
+++ b/defs.h
@@ -3348,6 +3348,7 @@ typedef signed int s32;
#define FLIPPED_VM (0x400)
#define HAS_PHYSVIRT_OFFSET (0x800)
#define OVERFLOW_STACKS (0x1000)
+#define ARM64_MTE (0x2000)
/*
* Get kimage_voffset from /dev/crash
--
2.41.0

View File

@ -0,0 +1,334 @@
From d86dc6901ce76a0fc29022ed448a4baa83a47dd7 Mon Sep 17 00:00:00 2001
From: Song Shuai <songshuaishuai@tinylab.org>
Date: Wed, 13 Dec 2023 17:45:06 +0800
Subject: [PATCH 2/6] RISCV64: Add support for 'bt -e' option
With this patch we can search the stack for possible kernel and user
mode exception frames via 'bt -e' command.
TEST: a lkdtm DIRECT EXCEPTION vmcore
crash> bt -e
PID: 1 TASK: ff600000000e0000 CPU: 1 COMMAND: "sh"
KERNEL-MODE EXCEPTION FRAME AT: ff200000000138d8
PC: ffffffff805303c0 [lkdtm_EXCEPTION+6]
RA: ffffffff8052fe36 [lkdtm_do_action+16]
SP: ff20000000013cf0 CAUSE: 000000000000000f
epc : ffffffff805303c0 ra : ffffffff8052fe36 sp : ff20000000013cf0
gp : ffffffff814ef848 tp : ff600000000e0000 t0 : 6500000000000000
t1 : 000000000000006c t2 : 6550203a6d74646b s0 : ff20000000013d00
s1 : 000000000000000a a0 : ffffffff814aef40 a1 : c0000000ffffefff
a2 : 0000000000000010 a3 : 0000000000000001 a4 : 5d53ea10ca096e00
a5 : ffffffff805303ba a6 : 0000000000000008 a7 : 0000000000000038
s2 : ff60000001324000 s3 : ffffffff814aef40 s4 : ff20000000013e30
s5 : 000000000000000a s6 : ff20000000013e30 s7 : ff600000000ce000
s8 : 0000555560f0f8a8 s9 : 00007ffff497f6b4 s10: 00007ffff497f6b0
s11: 0000555560fa30e0 t3 : ffffffff81502197 t4 : ffffffff81502197
t5 : ffffffff81502198 t6 : ff20000000013b28
status: 0000000200000120 badaddr: 0000000000000000
cause: 000000000000000f orig_a0: 0000000000000000
USER-MODE EXCEPTION FRAME AT: ff20000000013ee0
PC: 007fff8780431aff RA: 007fff877b168400 SP: 007ffff497f5b000
ORIG_A0: 0000000000000100 SYSCALLNO: 0000000000004000
epc : 007fff8780431aff ra : 007fff877b168400 sp : 007ffff497f5b000
gp : 00555560f5134800 tp : 007fff8774378000 t0 : 0000000000100000
t1 : 00555560e427bc00 t2 : 0000000000271000 s0 : 007ffff497f5e000
s1 : 0000000000000a00 a0 : 0000000000000100 a1 : 00555560faa68000
a2 : 0000000000000a00 a3 : 4000000000000000 a4 : 20000000000000a8
a5 : 0000000000000054 a6 : 0000000000000400 a7 : 0000000000004000
s2 : 00555560faa68000 s3 : 007fff878b33f800 s4 : 0000000000000a00
s5 : 00555560faa68000 s6 : 0000000000000a00 s7 : 00555560f5131400
s8 : 00555560f0f8a800 s9 : 007ffff497f6b400 s10: 007ffff497f6b000
s11: 00555560fa30e000 t3 : 007fff877af1fe00 t4 : 00555560fa6f2000
t5 : 0000000000000100 t6 : 9e1fea5bf8683300
status: 00000200004020b9 badaddr: 0000000000000000
cause: 0000000000000800 orig_a0: 0000000000000100
crash>
Signed-off-by: Song Shuai <songshuaishuai@tinylab.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 15 +++--
riscv64.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 181 insertions(+), 25 deletions(-)
diff --git a/defs.h b/defs.h
index aa8eba83b7f4..9cf9501348ed 100644
--- a/defs.h
+++ b/defs.h
@@ -7011,17 +7011,16 @@ int riscv64_IS_VMALLOC_ADDR(ulong);
#define display_idt_table() \
error(FATAL, "-d option is not applicable to RISCV64 architecture\n")
-/* from arch/riscv/include/asm/ptrace.h */
+/*
+ * regs[0,31] : struct user_regs_struct
+ * from arch/riscv/include/uapi/asm/ptrace.h
+ * regs[0,35] : struct pt_regs
+ * from arch/riscv/include/asm/ptrace.h
+ */
struct riscv64_register {
ulong regs[36];
};
-struct riscv64_pt_regs {
- ulong badvaddr;
- ulong cause;
- ulong epc;
-};
-
struct riscv64_unwind_frame {
ulong fp;
ulong sp;
@@ -7085,6 +7084,8 @@ struct machine_specific {
#define RISCV64_REGS_RA 1
#define RISCV64_REGS_SP 2
#define RISCV64_REGS_FP 8
+#define RISCV64_REGS_STATUS 32
+#define RISCV64_REGS_CAUSE 34
#endif /* RISCV64 */
diff --git a/riscv64.c b/riscv64.c
index 872be594d72b..6097c0029ccc 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -35,6 +35,7 @@ static int riscv64_kvtop(struct task_context *tc, ulong kvaddr,
static void riscv64_cmd_mach(void);
static void riscv64_stackframe_init(void);
static void riscv64_back_trace_cmd(struct bt_info *bt);
+static int riscv64_eframe_search(struct bt_info *bt);
static int riscv64_get_dumpfile_stack_frame(struct bt_info *bt,
ulong *nip, ulong *ksp);
static void riscv64_get_stack_frame(struct bt_info *bt, ulong *pcp,
@@ -51,6 +52,8 @@ static int riscv64_get_elf_notes(void);
static void riscv64_get_va_range(struct machine_specific *ms);
static void riscv64_get_va_bits(struct machine_specific *ms);
static void riscv64_get_struct_page_size(struct machine_specific *ms);
+static void riscv64_print_exception_frame(struct bt_info *, ulong , int );
+static int riscv64_is_kernel_exception_frame(struct bt_info *, ulong );
#define REG_FMT "%016lx"
#define SZ_2G 0x80000000
@@ -210,6 +213,7 @@ riscv64_dump_machdep_table(ulong arg)
machdep->memsize, machdep->memsize);
fprintf(fp, " bits: %d\n", machdep->bits);
fprintf(fp, " back_trace: riscv64_back_trace_cmd()\n");
+ fprintf(fp, " eframe_search: riscv64_eframe_search()\n");
fprintf(fp, " processor_speed: riscv64_processor_speed()\n");
fprintf(fp, " uvtop: riscv64_uvtop()\n");
fprintf(fp, " kvtop: riscv64_kvtop()\n");
@@ -1398,6 +1402,7 @@ riscv64_init(int when)
machdep->cmd_mach = riscv64_cmd_mach;
machdep->get_stack_frame = riscv64_get_stack_frame;
machdep->back_trace = riscv64_back_trace_cmd;
+ machdep->eframe_search = riscv64_eframe_search;
machdep->vmalloc_start = riscv64_vmalloc_start;
machdep->processor_speed = riscv64_processor_speed;
@@ -1452,25 +1457,10 @@ riscv64_init(int when)
}
}
-/*
- * 'help -r' command output
- */
-void
-riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
+/* bool pt_regs : pass 1 to dump pt_regs , pass 0 to dump user_regs_struct */
+static void
+riscv64_dump_pt_regs(struct riscv64_register *regs, FILE *ofp, bool pt_regs)
{
- const struct machine_specific *ms = machdep->machspec;
- struct riscv64_register *regs;
-
- if (!ms->crash_task_regs) {
- error(INFO, "registers not collected for cpu %d\n", cpu);
- return;
- }
-
- regs = &ms->crash_task_regs[cpu];
- if (!regs->regs[RISCV64_REGS_SP] && !regs->regs[RISCV64_REGS_EPC]) {
- error(INFO, "registers not collected for cpu %d\n", cpu);
- return;
- }
/* Print riscv64 32 regs */
fprintf(ofp,
@@ -1496,6 +1486,171 @@ riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
regs->regs[24], regs->regs[25], regs->regs[26],
regs->regs[27], regs->regs[28], regs->regs[29],
regs->regs[30], regs->regs[31]);
+
+ if (pt_regs)
+ fprintf(ofp,
+ " status: " REG_FMT " badaddr: " REG_FMT "\n"
+ " cause: " REG_FMT " orig_a0: " REG_FMT "\n",
+ regs->regs[32], regs->regs[33], regs->regs[34],
+ regs->regs[35]);
+}
+
+/*
+ * 'help -r' command output
+ */
+void
+riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
+{
+ const struct machine_specific *ms = machdep->machspec;
+ struct riscv64_register *regs;
+
+ if (!ms->crash_task_regs) {
+ error(INFO, "registers not collected for cpu %d\n", cpu);
+ return;
+ }
+
+ regs = &ms->crash_task_regs[cpu];
+ if (!regs->regs[RISCV64_REGS_SP] && !regs->regs[RISCV64_REGS_EPC]) {
+ error(INFO, "registers not collected for cpu %d\n", cpu);
+ return;
+ }
+
+ riscv64_dump_pt_regs(regs, ofp, 0);
+}
+
+#define USER_MODE (0)
+#define KERNEL_MODE (1)
+
+static void
+riscv64_print_exception_frame(struct bt_info *bt, ulong ptr, int mode)
+{
+
+ struct syment *sp;
+ ulong PC, RA, SP, offset;
+ struct riscv64_register *regs;
+
+ regs = (struct riscv64_register *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(ptr))];
+
+ PC = regs->regs[RISCV64_REGS_EPC];
+ RA = regs->regs[RISCV64_REGS_RA];
+ SP = regs->regs[RISCV64_REGS_SP];
+
+ switch (mode) {
+ case USER_MODE:
+ fprintf(fp,
+ " PC: %016lx RA: %016lx SP: %016lx\n"
+ " ORIG_A0: %016lx SYSCALLNO: %016lx\n",
+ PC, RA, SP, regs->regs[35], regs->regs[17]);
+
+ break;
+
+ case KERNEL_MODE:
+ fprintf(fp, " PC: %016lx ", PC);
+ if (is_kernel_text(PC) && (sp = value_search(PC, &offset))) {
+ fprintf(fp, "[%s", sp->name);
+ if (offset)
+ fprintf(fp, (*gdb_output_radix == 16) ?
+ "+0x%lx" : "+%ld", offset);
+ fprintf(fp, "]\n");
+ } else
+ fprintf(fp, "[unknown or invalid address]\n");
+
+ fprintf(fp, " RA: %016lx ", RA);
+ if (is_kernel_text(RA) && (sp = value_search(RA, &offset))) {
+ fprintf(fp, "[%s", sp->name);
+ if (offset)
+ fprintf(fp, (*gdb_output_radix == 16) ?
+ "+0x%lx" : "+%ld", offset);
+ fprintf(fp, "]\n");
+ } else
+ fprintf(fp, "[unknown or invalid address]\n");
+
+ fprintf(fp, " SP: %016lx CAUSE: %016lx\n",
+ SP, regs->regs[RISCV64_REGS_CAUSE]);
+
+ break;
+ }
+
+ riscv64_dump_pt_regs(regs, fp, 1);
+
+}
+
+static int
+riscv64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
+{
+ struct riscv64_register *regs;
+
+ if (stkptr > STACKSIZE() && !INSTACK(stkptr, bt)) {
+ if (CRASHDEBUG(1))
+ error(WARNING, "stkptr: %lx is outside the kernel stack range\n", stkptr);
+ return FALSE;
+ }
+
+ regs = (struct riscv64_register *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
+
+ if (INSTACK(regs->regs[RISCV64_REGS_SP], bt) &&
+ INSTACK(regs->regs[RISCV64_REGS_FP], bt) &&
+ is_kernel_text(regs->regs[RISCV64_REGS_RA]) &&
+ is_kernel_text(regs->regs[RISCV64_REGS_EPC]) &&
+ ((regs->regs[RISCV64_REGS_STATUS] >> 8) & 0x1) && // sstatus.SPP != 0
+ !((regs->regs[RISCV64_REGS_CAUSE] >> 63) & 0x1 ) && // scause.Interrupt != 1
+ !(regs->regs[RISCV64_REGS_CAUSE] == 0x00000008UL)) { // scause != ecall from U-mode
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static int
+riscv64_dump_kernel_eframes(struct bt_info *bt)
+{
+ ulong ptr;
+ int count;
+
+ /*
+ * use old_regs to avoid the identical contiguous kernel exception frames
+ * created by Linux handle_exception() path ending at riscv_crash_save_regs()
+ */
+ struct riscv64_register *regs, *old_regs;
+
+ count = 0;
+ old_regs = NULL;
+
+ for (ptr = bt->stackbase; ptr < bt->stacktop - SIZE(pt_regs); ptr++) {
+
+ regs = (struct riscv64_register *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(ptr))];
+
+ if (riscv64_is_kernel_exception_frame(bt, ptr)){
+ if (!old_regs || (old_regs &&
+ memcmp(old_regs, regs, sizeof(struct riscv64_register))) != 0){
+ old_regs = regs;
+ fprintf(fp, "\nKERNEL-MODE EXCEPTION FRAME AT: %lx\n", ptr);
+ riscv64_print_exception_frame(bt, ptr, KERNEL_MODE);
+ count++;
+ }
+ }
+ }
+
+ return count;
+}
+
+static int
+riscv64_eframe_search(struct bt_info *bt)
+{
+ ulong ptr;
+ int count;
+
+ count = riscv64_dump_kernel_eframes(bt);
+
+ if (is_kernel_thread(bt->tc->task))
+ return count;
+
+ ptr = bt->stacktop - SIZE(pt_regs);
+ fprintf(fp, "%sUSER-MODE EXCEPTION FRAME AT: %lx\n", count++ ? "\n" : "", ptr);
+ riscv64_print_exception_frame(bt, ptr, USER_MODE);
+
+ return count;
}
#else /* !RISCV64 */
--
2.41.0

View File

@ -0,0 +1,436 @@
From 12fbed3280a147a40e572808b660aa838f3ca372 Mon Sep 17 00:00:00 2001
From: Song Shuai <songshuaishuai@tinylab.org>
Date: Wed, 13 Dec 2023 17:45:07 +0800
Subject: [PATCH 3/6] RISCV64: Add per-cpu IRQ stacks support
This patch introduces per-cpu IRQ stacks for RISCV64 to let
"bt" do backtrace on it and 'bt -E' search eframes on it,
and the 'help -m' command displays the addresses of each
per-cpu IRQ stack.
TEST: a vmcore dumped via hacking the handle_irq_event_percpu()
( Why not using lkdtm INT_HW_IRQ_EN EXCEPTION ?
There is a deadlock[1] in crash_kexec path if use that)
crash> bt
PID: 0 TASK: ffffffff8140db00 CPU: 0 COMMAND: "swapper/0"
#0 [ff20000000003e60] __handle_irq_event_percpu at ffffffff8006462e
#1 [ff20000000003ed0] handle_irq_event_percpu at ffffffff80064702
#2 [ff20000000003ef0] handle_irq_event at ffffffff8006477c
#3 [ff20000000003f20] handle_fasteoi_irq at ffffffff80068664
#4 [ff20000000003f50] generic_handle_domain_irq at ffffffff80063988
#5 [ff20000000003f60] plic_handle_irq at ffffffff8046633e
#6 [ff20000000003fb0] generic_handle_domain_irq at ffffffff80063988
#7 [ff20000000003fc0] riscv_intc_irq at ffffffff80465f8e
#8 [ff20000000003fd0] handle_riscv_irq at ffffffff808361e8
PC: ffffffff80837314 [default_idle_call+50]
RA: ffffffff80837310 [default_idle_call+46]
SP: ffffffff81403da0 CAUSE: 8000000000000009
epc : ffffffff80837314 ra : ffffffff80837310 sp : ffffffff81403da0
gp : ffffffff814ef848 tp : ffffffff8140db00 t0 : ff2000000004bb18
t1 : 0000000000032c73 t2 : ffffffff81200a48 s0 : ffffffff81403db0
s1 : 0000000000000000 a0 : 0000000000000004 a1 : 0000000000000000
a2 : ff6000009f1e7000 a3 : 0000000000002304 a4 : ffffffff80c1c2d8
a5 : 0000000000000000 a6 : ff6000001fe01958 a7 : 00002496ea89dbf1
s2 : ffffffff814f0220 s3 : 0000000000000001 s4 : 000000000000003f
s5 : ffffffff814f03d8 s6 : 0000000000000000 s7 : ffffffff814f00d0
s8 : ffffffff81526f10 s9 : ffffffff80c1d880 s10: 0000000000000000
s11: 0000000000000001 t3 : 0000000000003392 t4 : 0000000000000000
t5 : 0000000000000000 t6 : 0000000000000040
status: 0000000200000120 badaddr: 0000000000000000
cause: 8000000000000009 orig_a0: ffffffff80837310
--- <IRQ stack> ---
#9 [ffffffff81403da0] default_idle_call at ffffffff80837314
#10 [ffffffff81403db0] do_idle at ffffffff8004d0a0
#11 [ffffffff81403e40] cpu_startup_entry at ffffffff8004d21e
#12 [ffffffff81403e60] kernel_init at ffffffff8083746a
#13 [ffffffff81403e70] arch_post_acpi_subsys_init at ffffffff80a006d8
#14 [ffffffff81403e80] console_on_rootfs at ffffffff80a00c92
crash>
crash> bt -E
CPU 0 IRQ STACK:
KERNEL-MODE EXCEPTION FRAME AT: ff20000000003a48
PC: ffffffff8006462e [__handle_irq_event_percpu+30]
RA: ffffffff80064702 [handle_irq_event_percpu+18]
SP: ff20000000003e60 CAUSE: 000000000000000d
epc : ffffffff8006462e ra : ffffffff80064702 sp : ff20000000003e60
gp : ffffffff814ef848 tp : ffffffff8140db00 t0 : 0000000000046600
t1 : ffffffff80836464 t2 : ffffffff81200a48 s0 : ff20000000003ed0
s1 : 0000000000000000 a0 : 0000000000000000 a1 : 0000000000000118
a2 : 0000000000000052 a3 : 0000000000000000 a4 : 0000000000000000
a5 : 0000000000010001 a6 : ff6000001fe01958 a7 : 00002496ea89dbf1
s2 : ff60000000941ab0 s3 : ffffffff814a0658 s4 : ff60000000089230
s5 : ffffffff814a0518 s6 : ffffffff814a0620 s7 : ffffffff80e5f0f8
s8 : ffffffff80fc50b0 s9 : ffffffff80c1d880 s10: 0000000000000000
s11: 0000000000000001 t3 : 0000000000003392 t4 : 0000000000000000
t5 : 0000000000000000 t6 : 0000000000000040
status: 0000000200000100 badaddr: 0000000000000078
cause: 000000000000000d orig_a0: ff20000000003ea0
CPU 1 IRQ STACK: (none found)
crash>
crash> help -m
<snip>
machspec: ced1e0
irq_stack_size: 16384
irq_stacks[0]: ff20000000000000
irq_stacks[1]: ff20000000008000
crash>
[1]: https://lore.kernel.org/linux-riscv/20231208111015.173237-1-songshuaishuai@tinylab.org/
Signed-off-by: Song Shuai <songshuaishuai@tinylab.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 7 +-
help.c | 8 +--
riscv64.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 198 insertions(+), 12 deletions(-)
diff --git a/defs.h b/defs.h
index 9cf9501348ed..b71cdbd01b8d 100644
--- a/defs.h
+++ b/defs.h
@@ -3643,6 +3643,8 @@ typedef signed int s32;
#define pmd_index_l5_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define pte_index_l5_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+/* machdep->flags */
+#define KSYMS_START (0x1)
#define VM_L3_4K (0x2)
#define VM_L3_2M (0x4)
#define VM_L3_1G (0x8)
@@ -3652,6 +3654,7 @@ typedef signed int s32;
#define VM_L5_4K (0x80)
#define VM_L5_2M (0x100)
#define VM_L5_1G (0x200)
+#define IRQ_STACKS (0x400)
#define VM_FLAGS (VM_L3_4K | VM_L3_2M | VM_L3_1G | \
VM_L4_4K | VM_L4_2M | VM_L4_1G | \
@@ -7027,8 +7030,6 @@ struct riscv64_unwind_frame {
ulong pc;
};
-#define KSYMS_START (0x1)
-
struct machine_specific {
ulong phys_base;
ulong page_offset;
@@ -7058,6 +7059,8 @@ struct machine_specific {
ulong struct_page_size;
struct riscv64_register *crash_task_regs;
+ ulong irq_stack_size;
+ ulong *irq_stacks;
};
/* from arch/riscv/include/asm/pgtable-bits.h */
#define _PAGE_PRESENT (machdep->machspec->_page_present)
diff --git a/help.c b/help.c
index d80e843703c1..a4319dd2a717 100644
--- a/help.c
+++ b/help.c
@@ -1938,10 +1938,10 @@ char *help_bt[] = {
" fails or the -t option starts too high in the process stack).",
" -l show file and line number of each stack trace text location.",
" -e search the stack for possible kernel and user mode exception frames.",
-" -E search the IRQ stacks (x86, x86_64, arm64, and ppc64), and the",
-" exception stacks (x86_64) for possible exception frames; all other",
-" arguments except for -c will be ignored since this is not a context-",
-" sensitive operation.",
+" -E search the IRQ stacks (x86, x86_64, arm64, riscv64 and ppc64), and",
+" the exception stacks (x86_64) for possible exception frames; all",
+" other arguments except for -c will be ignored since this is not a",
+" context-sensitive operation.",
" -f display all stack data contained in a frame; this option can be",
" used to determine the arguments passed to each function; on ia64,",
" the argument register contents are dumped.",
diff --git a/riscv64.c b/riscv64.c
index 6097c0029ccc..a26b8a43cb29 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -33,6 +33,7 @@ static int riscv64_uvtop(struct task_context *tc, ulong vaddr,
static int riscv64_kvtop(struct task_context *tc, ulong kvaddr,
physaddr_t *paddr, int verbose);
static void riscv64_cmd_mach(void);
+static void riscv64_irq_stack_init(void);
static void riscv64_stackframe_init(void);
static void riscv64_back_trace_cmd(struct bt_info *bt);
static int riscv64_eframe_search(struct bt_info *bt);
@@ -54,9 +55,15 @@ static void riscv64_get_va_bits(struct machine_specific *ms);
static void riscv64_get_struct_page_size(struct machine_specific *ms);
static void riscv64_print_exception_frame(struct bt_info *, ulong , int );
static int riscv64_is_kernel_exception_frame(struct bt_info *, ulong );
+static int riscv64_on_irq_stack(int , ulong);
+static int riscv64_on_process_stack(struct bt_info *, ulong );
+static void riscv64_set_process_stack(struct bt_info *);
+static void riscv64_set_irq_stack(struct bt_info *);
#define REG_FMT "%016lx"
#define SZ_2G 0x80000000
+#define USER_MODE (0)
+#define KERNEL_MODE (1)
/*
* Holds registers during the crash.
@@ -191,11 +198,14 @@ riscv64_verify_symbol(const char *name, ulong value, char type)
void
riscv64_dump_machdep_table(ulong arg)
{
- int others = 0;
+ const struct machine_specific *ms = machdep->machspec;
+ int others = 0, i = 0;
fprintf(fp, " flags: %lx (", machdep->flags);
if (machdep->flags & KSYMS_START)
fprintf(fp, "%sKSYMS_START", others++ ? "|" : "");
+ if (machdep->flags & IRQ_STACKS)
+ fprintf(fp, "%sIRQ_STACKS", others++ ? "|" : "");
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
@@ -251,6 +261,15 @@ riscv64_dump_machdep_table(ulong arg)
fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits);
fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root);
fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec);
+ if (machdep->flags & IRQ_STACKS) {
+ fprintf(fp, " irq_stack_size: %ld\n", ms->irq_stack_size);
+ for (i = 0; i < kt->cpus; i++)
+ fprintf(fp, " irq_stacks[%d]: %lx\n",
+ i, ms->irq_stacks[i]);
+ } else {
+ fprintf(fp, " irq_stack_size: (unused)\n");
+ fprintf(fp, " irq_stacks: (unused)\n");
+ }
}
static ulong
@@ -665,6 +684,111 @@ riscv64_display_full_frame(struct bt_info *bt, struct riscv64_unwind_frame *curr
fprintf(fp, "\n");
}
+/*
+ * Gather IRQ stack values.
+ */
+static void
+riscv64_irq_stack_init(void)
+{
+ int i;
+ struct syment *sp;
+ struct gnu_request request, *req;
+ struct machine_specific *ms = machdep->machspec;
+ ulong p, sz;
+ req = &request;
+
+ if (symbol_exists("irq_stack_ptr") &&
+ (sp = per_cpu_symbol_search("irq_stack_ptr")) &&
+ get_symbol_type("irq_stack_ptr", NULL, req)) {
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "irq_stack_ptr: \n");
+ fprintf(fp, " type: %x, %s\n",
+ (int)req->typecode,
+ (req->typecode == TYPE_CODE_PTR) ?
+ "TYPE_CODE_PTR" : "other");
+ fprintf(fp, " target_typecode: %x, %s\n",
+ (int)req->target_typecode,
+ req->target_typecode == TYPE_CODE_INT ?
+ "TYPE_CODE_INT" : "other");
+ fprintf(fp, " target_length: %ld\n",
+ req->target_length);
+ fprintf(fp, " length: %ld\n", req->length);
+ }
+
+ if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong)))))
+ error(FATAL, "cannot malloc irq_stack addresses\n");
+
+ /*
+ * find IRQ_STACK_SIZE (i.e. THREAD_SIZE) via thread_union.stack
+ * or set STACKSIZE() as default.
+ */
+ if (MEMBER_EXISTS("thread_union", "stack")) {
+ if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
+ ms->irq_stack_size = sz;
+ } else
+ ms->irq_stack_size = machdep->stacksize;
+
+ machdep->flags |= IRQ_STACKS;
+
+ for (i = 0; i < kt->cpus; i++) {
+ p = kt->__per_cpu_offset[i] + sp->value;
+ if (CRASHDEBUG(1))
+ fprintf(fp, " IRQ stack pointer[%d] is %lx\n", i, p);
+ readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong),
+ "IRQ stack pointer", RETURN_ON_ERROR);
+ }
+ }
+}
+
+static int
+riscv64_on_irq_stack(int cpu, ulong stkptr)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong * stacks = ms->irq_stacks;
+ ulong stack_size = ms->irq_stack_size;
+
+ if ((cpu >= kt->cpus) || (stacks == NULL) || !stack_size)
+ return FALSE;
+
+ if ((stkptr >= stacks[cpu]) &&
+ (stkptr < (stacks[cpu] + stack_size)))
+ return TRUE;
+
+ return FALSE;
+}
+
+static int
+riscv64_on_process_stack(struct bt_info *bt, ulong stkptr)
+{
+ ulong stackbase, stacktop;
+
+ stackbase = GET_STACKBASE(bt->task);
+ stacktop = GET_STACKTOP(bt->task);
+
+ if ((stkptr >= stackbase) && (stkptr < stacktop))
+ return TRUE;
+
+ return FALSE;
+}
+
+static void
+riscv64_set_irq_stack(struct bt_info *bt)
+{
+ struct machine_specific *ms = machdep->machspec;
+
+ bt->stackbase = ms->irq_stacks[bt->tc->processor];
+ bt->stacktop = bt->stackbase + ms->irq_stack_size;
+ alter_stackbuf(bt);
+}
+
+static void
+riscv64_set_process_stack(struct bt_info *bt)
+{
+ bt->stackbase = GET_STACKBASE(bt->task);
+ bt->stacktop = GET_STACKTOP(bt->task);
+ alter_stackbuf(bt);
+}
+
static void
riscv64_stackframe_init(void)
{
@@ -751,7 +875,7 @@ riscv64_back_trace_cmd(struct bt_info *bt)
{
struct riscv64_unwind_frame current, previous;
struct stackframe curr_frame;
- struct riscv64_register * regs;
+ struct riscv64_register *regs, *irq_regs;
int level = 0;
if (bt->flags & BT_REGS_NOT_FOUND)
@@ -759,6 +883,11 @@ riscv64_back_trace_cmd(struct bt_info *bt)
regs = (struct riscv64_register *) bt->machdep;
+ if (riscv64_on_irq_stack(bt->tc->processor, bt->frameptr)) {
+ riscv64_set_irq_stack(bt);
+ bt->flags |= BT_IRQSTACK;
+ }
+
current.pc = bt->instptr;
current.sp = bt->stkptr;
current.fp = bt->frameptr;
@@ -813,6 +942,35 @@ riscv64_back_trace_cmd(struct bt_info *bt)
current.fp = previous.fp;
current.sp = previous.sp;
+ /*
+ * When backtracing to do_irq(), find the original FP of do_irq()
+ * and then use the saved pt_regs in process stack to continue
+ */
+ if ((bt->flags & BT_IRQSTACK) &&
+ !riscv64_on_irq_stack(bt->tc->processor, current.fp)){
+ if (riscv64_on_process_stack(bt, current.fp)){
+
+ frameptr = (struct stackframe *)current.fp - 1;
+
+ if (!readmem((ulong)frameptr, KVADDR, &curr_frame,
+ sizeof(curr_frame), "get do_irq stack frame", RETURN_ON_ERROR))
+ return;
+
+ riscv64_set_process_stack(bt);
+
+ irq_regs = (struct riscv64_register *)
+ &bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(curr_frame.fp))];
+
+ current.pc = irq_regs->regs[RISCV64_REGS_EPC];
+ current.fp = irq_regs->regs[RISCV64_REGS_FP];
+ current.sp = irq_regs->regs[RISCV64_REGS_SP];
+
+ bt->flags &= ~BT_IRQSTACK;
+ riscv64_print_exception_frame(bt, curr_frame.fp, KERNEL_MODE);
+ fprintf(fp, "--- <IRQ stack> ---\n");
+ }
+ }
+
if (CRASHDEBUG(8))
fprintf(fp, "next %d pc %#lx sp %#lx fp %lx\n",
level, current.pc, current.sp, current.fp);
@@ -1423,6 +1581,8 @@ riscv64_init(int when)
case POST_GDB:
machdep->section_size_bits = _SECTION_SIZE_BITS;
machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
+
+ riscv64_irq_stack_init();
riscv64_stackframe_init();
riscv64_page_type_init();
@@ -1518,9 +1678,6 @@ riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp)
riscv64_dump_pt_regs(regs, ofp, 0);
}
-#define USER_MODE (0)
-#define KERNEL_MODE (1)
-
static void
riscv64_print_exception_frame(struct bt_info *bt, ulong ptr, int mode)
{
@@ -1639,7 +1796,33 @@ static int
riscv64_eframe_search(struct bt_info *bt)
{
ulong ptr;
- int count;
+ int count, c;
+ struct machine_specific *ms = machdep->machspec;
+
+ if (bt->flags & BT_EFRAME_SEARCH2) {
+ if (!(machdep->flags & IRQ_STACKS))
+ error(FATAL, "IRQ stacks do not exist in this kernel\n");
+
+ for (c = 0; c < kt->cpus; c++) {
+ if ((bt->flags & BT_CPUMASK) &&
+ !(NUM_IN_BITMAP(bt->cpumask, c)))
+ continue;
+
+ fprintf(fp, "CPU %d IRQ STACK: ", c);
+ bt->stackbase = ms->irq_stacks[c];
+ bt->stacktop = bt->stackbase + ms->irq_stack_size;
+ alter_stackbuf(bt);
+
+ count = riscv64_dump_kernel_eframes(bt);
+
+ if (count)
+ fprintf(fp, "\n");
+ else
+ fprintf(fp, "(none found)\n\n");
+ }
+
+ return 0;
+ }
count = riscv64_dump_kernel_eframes(bt);
--
2.41.0

View File

@ -0,0 +1,287 @@
From a69496279133705f095f790a9b3425266f88b1d4 Mon Sep 17 00:00:00 2001
From: Song Shuai <songshuaishuai@tinylab.org>
Date: Wed, 13 Dec 2023 17:45:08 +0800
Subject: [PATCH 4/6] RISCV64: Add per-cpu overflow stacks support
The patch introduces per-cpu overflow stacks for RISCV64 to let
"bt" do backtrace on it and the 'help -m' command dispalys the
addresss of each per-cpu overflow stack.
TEST: a lkdtm DIRECT EXHAUST_STACK vmcore
crash> bt
PID: 1 TASK: ff600000000d8000 CPU: 1 COMMAND: "sh"
#0 [ff6000001fc501c0] riscv_crash_save_regs at ffffffff8000a1dc
#1 [ff6000001fc50320] panic at ffffffff808773ec
#2 [ff6000001fc50380] walk_stackframe at ffffffff800056da
PC: ffffffff80876a34 [memset+96]
RA: ffffffff80563dc0 [recursive_loop+68]
SP: ff2000000000fd50 CAUSE: 000000000000000f
epc : ffffffff80876a34 ra : ffffffff80563dc0 sp : ff2000000000fd50
gp : ffffffff81515d38 tp : 0000000000000000 t0 : ff2000000000fd58
t1 : ff600000000d88c8 t2 : 6143203a6d74646b s0 : ff20000000010190
s1 : 0000000000000012 a0 : ff2000000000fd58 a1 : 1212121212121212
a2 : 0000000000000400 a3 : ff20000000010158 a4 : 0000000000000000
a5 : 725bedba92260900 a6 : 000000000130e0f0 a7 : 0000000000000000
s2 : ff2000000000fd58 s3 : ffffffff815170d8 s4 : ff20000000013e60
s5 : 000000000000000e s6 : ff20000000013e60 s7 : 0000000000000000
s8 : ff60000000861000 s9 : 00007fffc3641694 s10: 00007fffc3641690
s11: 00005555796ed240 t3 : 0000000000010297 t4 : ffffffff80c17810
t5 : ffffffff8195e7b8 t6 : ff20000000013b18
status: 0000000200000120 badaddr: ff2000000000fd58
cause: 000000000000000f orig_a0: 0000000000000000
--- <OVERFLOW stack> ---
#3 [ff2000000000fd50] memset at ffffffff80876a34
#4 [ff20000000010190] recursive_loop at ffffffff80563e16
#5 [ff200000000105d0] recursive_loop at ffffffff80563e16
< recursive_loop ...>
#16 [ff20000000013490] recursive_loop at ffffffff80563e16
#17 [ff200000000138d0] recursive_loop at ffffffff80563e16
#18 [ff20000000013d10] lkdtm_EXHAUST_STACK at ffffffff8088005e
#19 [ff20000000013d30] lkdtm_do_action at ffffffff80563292
#20 [ff20000000013d40] direct_entry at ffffffff80563474
#21 [ff20000000013d70] full_proxy_write at ffffffff8032fb3a
#22 [ff20000000013db0] vfs_write at ffffffff801d6414
#23 [ff20000000013e60] ksys_write at ffffffff801d67b8
#24 [ff20000000013eb0] __riscv_sys_write at ffffffff801d6832
#25 [ff20000000013ec0] do_trap_ecall_u at ffffffff80884a20
crash>
crash> help -m
<snip>
irq_stack_size: 16384
irq_stacks[0]: ff20000000000000
irq_stacks[1]: ff20000000008000
overflow_stack_size: 4096
overflow_stacks[0]: ff6000001fa7a510
overflow_stacks[1]: ff6000001fc4f510
crash>
Signed-off-by: Song Shuai <songshuaishuai@tinylab.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 6 +++
riscv64.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 118 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index b71cdbd01b8d..2a29c07305f2 100644
--- a/defs.h
+++ b/defs.h
@@ -3655,6 +3655,9 @@ typedef signed int s32;
#define VM_L5_2M (0x100)
#define VM_L5_1G (0x200)
#define IRQ_STACKS (0x400)
+#define OVERFLOW_STACKS (0x800)
+
+#define RISCV64_OVERFLOW_STACK_SIZE (1 << 12)
#define VM_FLAGS (VM_L3_4K | VM_L3_2M | VM_L3_1G | \
VM_L4_4K | VM_L4_2M | VM_L4_1G | \
@@ -7061,6 +7064,9 @@ struct machine_specific {
struct riscv64_register *crash_task_regs;
ulong irq_stack_size;
ulong *irq_stacks;
+
+ ulong overflow_stack_size;
+ ulong *overflow_stacks;
};
/* from arch/riscv/include/asm/pgtable-bits.h */
#define _PAGE_PRESENT (machdep->machspec->_page_present)
diff --git a/riscv64.c b/riscv64.c
index a26b8a43cb29..98bf02a59b12 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -34,6 +34,7 @@ static int riscv64_kvtop(struct task_context *tc, ulong kvaddr,
physaddr_t *paddr, int verbose);
static void riscv64_cmd_mach(void);
static void riscv64_irq_stack_init(void);
+static void riscv64_overflow_stack_init(void);
static void riscv64_stackframe_init(void);
static void riscv64_back_trace_cmd(struct bt_info *bt);
static int riscv64_eframe_search(struct bt_info *bt);
@@ -59,6 +60,8 @@ static int riscv64_on_irq_stack(int , ulong);
static int riscv64_on_process_stack(struct bt_info *, ulong );
static void riscv64_set_process_stack(struct bt_info *);
static void riscv64_set_irq_stack(struct bt_info *);
+static int riscv64_on_overflow_stack(int, ulong);
+static void riscv64_set_overflow_stack(struct bt_info *);
#define REG_FMT "%016lx"
#define SZ_2G 0x80000000
@@ -206,6 +209,8 @@ riscv64_dump_machdep_table(ulong arg)
fprintf(fp, "%sKSYMS_START", others++ ? "|" : "");
if (machdep->flags & IRQ_STACKS)
fprintf(fp, "%sIRQ_STACKS", others++ ? "|" : "");
+ if (machdep->flags & OVERFLOW_STACKS)
+ fprintf(fp, "%sOVERFLOW_STACKS", others++ ? "|" : "");
fprintf(fp, ")\n");
fprintf(fp, " kvbase: %lx\n", machdep->kvbase);
@@ -270,6 +275,15 @@ riscv64_dump_machdep_table(ulong arg)
fprintf(fp, " irq_stack_size: (unused)\n");
fprintf(fp, " irq_stacks: (unused)\n");
}
+ if (machdep->flags & OVERFLOW_STACKS) {
+ fprintf(fp, " overflow_stack_size: %ld\n", ms->overflow_stack_size);
+ for (i = 0; i < kt->cpus; i++)
+ fprintf(fp, " overflow_stacks[%d]: %lx\n",
+ i, ms->overflow_stacks[i]);
+ } else {
+ fprintf(fp, " overflow_stack_size: (unused)\n");
+ fprintf(fp, " overflow_stacks: (unused)\n");
+ }
}
static ulong
@@ -684,6 +698,48 @@ riscv64_display_full_frame(struct bt_info *bt, struct riscv64_unwind_frame *curr
fprintf(fp, "\n");
}
+
+/*
+ * Gather Overflow stack values.
+ */
+static void
+riscv64_overflow_stack_init(void)
+{
+ int i;
+ struct syment *sp;
+ struct gnu_request request, *req;
+ struct machine_specific *ms = machdep->machspec;
+ req = &request;
+
+ if (symbol_exists("overflow_stack") &&
+ (sp = per_cpu_symbol_search("overflow_stack")) &&
+ get_symbol_type("overflow_stack", NULL, req)) {
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "overflow_stack: \n");
+ fprintf(fp, " type: %x, %s\n",
+ (int)req->typecode,
+ (req->typecode == TYPE_CODE_ARRAY) ?
+ "TYPE_CODE_ARRAY" : "other");
+ fprintf(fp, " target_typecode: %x, %s\n",
+ (int)req->target_typecode,
+ req->target_typecode == TYPE_CODE_INT ?
+ "TYPE_CODE_INT" : "other");
+ fprintf(fp, " target_length: %ld\n",
+ req->target_length);
+ fprintf(fp, " length: %ld\n", req->length);
+ }
+
+ if (!(ms->overflow_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong)))))
+ error(FATAL, "cannot malloc overflow_stack addresses\n");
+
+ ms->overflow_stack_size = RISCV64_OVERFLOW_STACK_SIZE;
+ machdep->flags |= OVERFLOW_STACKS;
+
+ for (i = 0; i < kt->cpus; i++)
+ ms->overflow_stacks[i] = kt->__per_cpu_offset[i] + sp->value;
+ }
+}
+
/*
* Gather IRQ stack values.
*/
@@ -757,6 +813,23 @@ riscv64_on_irq_stack(int cpu, ulong stkptr)
return FALSE;
}
+static int
+riscv64_on_overflow_stack(int cpu, ulong stkptr)
+{
+ struct machine_specific *ms = machdep->machspec;
+ ulong * stacks = ms->overflow_stacks;
+ ulong stack_size = ms->overflow_stack_size;
+
+ if ((cpu >= kt->cpus) || (stacks == NULL) || !stack_size)
+ return FALSE;
+
+ if ((stkptr >= stacks[cpu]) &&
+ (stkptr < (stacks[cpu] + stack_size)))
+ return TRUE;
+
+ return FALSE;
+}
+
static int
riscv64_on_process_stack(struct bt_info *bt, ulong stkptr)
{
@@ -781,6 +854,16 @@ riscv64_set_irq_stack(struct bt_info *bt)
alter_stackbuf(bt);
}
+static void
+riscv64_set_overflow_stack(struct bt_info *bt)
+{
+ struct machine_specific *ms = machdep->machspec;
+
+ bt->stackbase = ms->overflow_stacks[bt->tc->processor];
+ bt->stacktop = bt->stackbase + ms->overflow_stack_size;
+ alter_stackbuf(bt);
+}
+
static void
riscv64_set_process_stack(struct bt_info *bt)
{
@@ -875,7 +958,7 @@ riscv64_back_trace_cmd(struct bt_info *bt)
{
struct riscv64_unwind_frame current, previous;
struct stackframe curr_frame;
- struct riscv64_register *regs, *irq_regs;
+ struct riscv64_register *regs, *irq_regs, *overflow_regs;
int level = 0;
if (bt->flags & BT_REGS_NOT_FOUND)
@@ -888,6 +971,11 @@ riscv64_back_trace_cmd(struct bt_info *bt)
bt->flags |= BT_IRQSTACK;
}
+ if (riscv64_on_overflow_stack(bt->tc->processor, bt->frameptr)) {
+ riscv64_set_overflow_stack(bt);
+ bt->flags |= BT_OVERFLOW_STACK;
+ }
+
current.pc = bt->instptr;
current.sp = bt->stkptr;
current.fp = bt->frameptr;
@@ -971,6 +1059,28 @@ riscv64_back_trace_cmd(struct bt_info *bt)
}
}
+ /*
+ * When backtracing to handle_kernel_stack_overflow()
+ * use pt_regs saved in overflow stack to continue
+ */
+ if ((bt->flags & BT_OVERFLOW_STACK) &&
+ !riscv64_on_overflow_stack(bt->tc->processor, current.fp)) {
+
+ overflow_regs = (struct riscv64_register *)
+ &bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(current.sp))];
+
+ riscv64_print_exception_frame(bt, current.sp, KERNEL_MODE);
+
+ current.pc = overflow_regs->regs[RISCV64_REGS_EPC];
+ current.fp = overflow_regs->regs[RISCV64_REGS_FP];
+ current.sp = overflow_regs->regs[RISCV64_REGS_SP];
+
+ riscv64_set_process_stack(bt);
+
+ bt->flags &= ~BT_OVERFLOW_STACK;
+ fprintf(fp, "--- <OVERFLOW stack> ---\n");
+ }
+
if (CRASHDEBUG(8))
fprintf(fp, "next %d pc %#lx sp %#lx fp %lx\n",
level, current.pc, current.sp, current.fp);
@@ -1583,6 +1693,7 @@ riscv64_init(int when)
machdep->max_physmem_bits = _MAX_PHYSMEM_BITS;
riscv64_irq_stack_init();
+ riscv64_overflow_stack_init();
riscv64_stackframe_init();
riscv64_page_type_init();
--
2.41.0

View File

@ -0,0 +1,63 @@
From aed1b7d3a064112d5c34eff81fa9ca0c50c5c782 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Tue, 16 Jan 2024 17:00:48 +0900
Subject: [PATCH 5/6] x86_64: Fix "bt" command not printing stack trace enough
On recent x86_64 kernels, the check of caller function (BT_CHECK_CALLER)
does not work correctly due to inappropriate direct_call_targets. As a
result, the correct frame is ignored and the remaining frames will be
truncated.
Skip the caller check if ORC unwinder is available, as the check is not
necessary with it.
Without the patch:
crash> bt 493113
PID: 493113 TASK: ff2e34ecbd3ca2c0 CPU: 27 COMMAND: "sriov_fec_daemo"
#0 [ff77abc4e81cfb08] __schedule at ffffffff81b239cb
#1 [ff77abc4e81cfb70] schedule at ffffffff81b23e2d
#2 [ff77abc4e81cfb88] schedule_timeout at ffffffff81b2c9e8
RIP: 000000000047cdbb RSP: 000000c0000975a8 RFLAGS: 00000216
...
With the patch:
crash> bt 493113
PID: 493113 TASK: ff2e34ecbd3ca2c0 CPU: 27 COMMAND: "sriov_fec_daemo"
#0 [ff77abc4e81cfb08] __schedule at ffffffff81b239cb
#1 [ff77abc4e81cfb70] schedule at ffffffff81b23e2d
#2 [ff77abc4e81cfb88] schedule_timeout at ffffffff81b2c9e8
#3 [ff77abc4e81cfbf0] __wait_for_common at ffffffff81b24abb
#4 [ff77abc4e81cfc68] vfio_unregister_group_dev at ffffffffc10e76ae [vfio]
#5 [ff77abc4e81cfca8] vfio_pci_core_unregister_device at ffffffffc11bb599 [vfio_pci_core]
#6 [ff77abc4e81cfcc0] vfio_pci_remove at ffffffffc103e045 [vfio_pci]
#7 [ff77abc4e81cfcd0] pci_device_remove at ffffffff815d7513
...
Reported-by: Crystal Wood <crwood@redhat.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
x86_64.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/x86_64.c b/x86_64.c
index f59991f8c4c5..502817d3b2bd 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -3342,6 +3342,13 @@ x86_64_print_stack_entry(struct bt_info *bt, FILE *ofp, int level,
bt->call_target = name;
+ /*
+ * The caller check below does not work correctly for some kernels,
+ * so skip it if ORC unwinder is available.
+ */
+ if (machdep->flags & ORC)
+ return result;
+
if (is_direct_call_target(bt)) {
if (CRASHDEBUG(2))
fprintf(ofp, "< enable BT_CHECK_CALLER for %s >\n",
--
2.41.0

View File

@ -0,0 +1,88 @@
From 28891d1127542dbb2d5ba16c575e14e741ed73ef Mon Sep 17 00:00:00 2001
From: Tao Liu <ltao@redhat.com>
Date: Thu, 4 Jan 2024 09:20:27 +0800
Subject: [PATCH 6/6] symbols: skip the module if the given address is not
within its address range
Previously, to find a module symbol and its offset by an arbitrary address,
all symbols within the module will be iterated by address ascending order
until the last symbol with a smaller address been noticed.
However if the address is not within the module address range, e.g.
the address is higher than the module's last symbol's address, then
the module can be surely skipped, because its symbol iteration is
unnecessary. This can speed up the kernel module symbols finding and improve
the overall performance.
Without the patch:
$ time echo "bt 8993" | ~/crash-dev/crash vmcore vmlinux
crash> bt 8993
PID: 8993 TASK: ffff927569cc2100 CPU: 2 COMMAND: "WriterPool0"
#0 [ffff927569cd76f0] __schedule at ffffffffb3db78d8
#1 [ffff927569cd7758] schedule_preempt_disabled at ffffffffb3db8bf9
#2 [ffff927569cd7768] __mutex_lock_slowpath at ffffffffb3db6ca7
#3 [ffff927569cd77c0] mutex_lock at ffffffffb3db602f
#4 [ffff927569cd77d8] ucache_retrieve at ffffffffc0cf4409 [secfs2]
...snip the stacktrace of the same module...
#11 [ffff927569cd7ba0] cskal_path_vfs_getattr_nosec at ffffffffc05cae76 [falcon_kal]
...snip...
#13 [ffff927569cd7c40] _ZdlPv at ffffffffc086e751 [falcon_lsm_serviceable]
...snip...
#20 [ffff927569cd7ef8] unload_network_ops_symbols at ffffffffc06f11c0 [falcon_lsm_pinned_14713]
#21 [ffff927569cd7f50] system_call_fastpath at ffffffffb3dc539a
RIP: 00007f2b28ed4023 RSP: 00007f2a45fe7f80 RFLAGS: 00000206
RAX: 0000000000000012 RBX: 00007f2a68302e00 RCX: 00007f2a682546d8
RDX: 0000000000000826 RSI: 00007eb57ea6a000 RDI: 00000000000000e3
RBP: 00007eb57ea6a000 R8: 0000000000000826 R9: 00000002670bdfd2
R10: 00000002670bdfd2 R11: 0000000000000293 R12: 00000002670bdfd2
R13: 00007f29d501a480 R14: 0000000000000826 R15: 00000002670bdfd2
ORIG_RAX: 0000000000000012 CS: 0033 SS: 002b
crash>
real 7m14.826s
user 7m12.502s
sys 0m1.091s
With the patch:
$ time echo "bt 8993" | ~/crash-dev/crash vmcore vmlinux
crash> bt 8993
PID: 8993 TASK: ffff927569cc2100 CPU: 2 COMMAND: "WriterPool0"
#0 [ffff927569cd76f0] __schedule at ffffffffb3db78d8
#1 [ffff927569cd7758] schedule_preempt_disabled at ffffffffb3db8bf9
...snip the same output...
crash>
real 0m8.827s
user 0m7.896s
sys 0m0.938s
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
symbols.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/symbols.c b/symbols.c
index 5d919910164e..88a3fd156cb5 100644
--- a/symbols.c
+++ b/symbols.c
@@ -5561,7 +5561,7 @@ value_search_module_6_4(ulong value, ulong *offset)
sp = lm->symtable[t];
sp_end = lm->symend[t];
- if (value < sp->value)
+ if (value < sp->value || value > sp_end->value)
continue;
splast = NULL;
@@ -5646,6 +5646,9 @@ retry:
if (sp->value > value) /* invalid -- between modules */
break;
+ if (sp_end->value < value) /* not within the module */
+ continue;
+
/*
* splast will contain the last module symbol encountered.
* Note: "__insmod_"-type symbols will be set in splast only
--
2.41.0

View File

@ -33,6 +33,12 @@ Patch12: 0011-RISCV64-Fix-bt-output-when-no-ra-on-the-stack-top.patch
Patch13: 0012-arm64-rewrite-the-arm64_get_vmcoreinfo_ul-to-arm64_g.patch Patch13: 0012-arm64-rewrite-the-arm64_get_vmcoreinfo_ul-to-arm64_g.patch
Patch14: 0013-help.c-Remove-kmem-l-help-messages.patch Patch14: 0013-help.c-Remove-kmem-l-help-messages.patch
Patch15: 0014-x86_64-check-bt-bptr-before-calculate-framesize.patch Patch15: 0014-x86_64-check-bt-bptr-before-calculate-framesize.patch
Patch16: 0001-arm64-support-HW-Tag-Based-KASAN-MTE-mode.patch
Patch17: 0002-RISCV64-Add-support-for-bt-e-option.patch
Patch18: 0003-RISCV64-Add-per-cpu-IRQ-stacks-support.patch
Patch19: 0004-RISCV64-Add-per-cpu-overflow-stacks-support.patch
Patch20: 0005-x86_64-Fix-bt-command-not-printing-stack-trace-enoug.patch
Patch21: 0006-symbols-skip-the-module-if-the-given-address-is-not-.patch
%description %description
The core analysis suite is a self-contained tool that can be used to The core analysis suite is a self-contained tool that can be used to
@ -68,6 +74,12 @@ offered by Mission Critical Linux, or the LKCD kernel patch.
%patch -P 13 -p1 %patch -P 13 -p1
%patch -P 14 -p1 %patch -P 14 -p1
%patch -P 15 -p1 %patch -P 15 -p1
%patch -P 16 -p1
%patch -P 17 -p1
%patch -P 18 -p1
%patch -P 19 -p1
%patch -P 20 -p1
%patch -P 21 -p1
%build %build