From 21671081261962aace4085ba18ec369e941f2df4 Mon Sep 17 00:00:00 2001 From: Lianbo Jiang Date: Tue, 23 Jan 2024 15:40:23 +0800 Subject: [PATCH] Update to the latest commit <28891d112754> Signed-off-by: Lianbo Jiang --- ...-support-HW-Tag-Based-KASAN-MTE-mode.patch | 151 ++++++ ...-RISCV64-Add-support-for-bt-e-option.patch | 334 ++++++++++++++ ...SCV64-Add-per-cpu-IRQ-stacks-support.patch | 436 ++++++++++++++++++ ...-Add-per-cpu-overflow-stacks-support.patch | 287 ++++++++++++ ...mmand-not-printing-stack-trace-enoug.patch | 63 +++ ...-module-if-the-given-address-is-not-.patch | 88 ++++ crash.spec | 12 + 7 files changed, 1371 insertions(+) create mode 100644 0001-arm64-support-HW-Tag-Based-KASAN-MTE-mode.patch create mode 100644 0002-RISCV64-Add-support-for-bt-e-option.patch create mode 100644 0003-RISCV64-Add-per-cpu-IRQ-stacks-support.patch create mode 100644 0004-RISCV64-Add-per-cpu-overflow-stacks-support.patch create mode 100644 0005-x86_64-Fix-bt-command-not-printing-stack-trace-enoug.patch create mode 100644 0006-symbols-skip-the-module-if-the-given-address-is-not-.patch diff --git a/0001-arm64-support-HW-Tag-Based-KASAN-MTE-mode.patch b/0001-arm64-support-HW-Tag-Based-KASAN-MTE-mode.patch new file mode 100644 index 0000000..063f936 --- /dev/null +++ b/0001-arm64-support-HW-Tag-Based-KASAN-MTE-mode.patch @@ -0,0 +1,151 @@ +From edb2bd52885ccc2fbe3e0825efe0ac55951a7710 Mon Sep 17 00:00:00 2001 +From: "qiwu.chen@transsion.com" +Date: Fri, 22 Dec 2023 03:30:33 +0000 +Subject: [PATCH 1/6] arm64: support HW Tag-Based KASAN (MTE) mode + +Kernel commit 2e903b914797 ("kasan, arm64: implement HW_TAGS runtime") +introduced Hardware Tag-Based KASAN (MTE) mode for ARMv8.5 and later +CPUs, which uses the Top Byte Ignore (TBI) feature of arm64 CPUs to +store a pointer tag in the top byte of kernel pointers. + +Currently, crash utility cannot load MTE ramdump due to access invalid +HW Tag-Based kernel virtual addresses. Here's the example error message: + + please wait... (gathering kmem slab cache data) + crash: invalid kernel virtual address: f1ffff80c000201c type: "kmem_cache objsize/object_size" + please wait... (gathering task table data) + crash: invalid kernel virtual address: f9ffff8239c2cde0 type: "xa_node shift" + +This patch replaces the orignal generic_is_kvaddr() with arm64_is_kvaddr(), +which checks the validity for a HW Tag-Based kvaddr. mte_tag_reset() is +used to convert a Tag-Based kvaddr to untaggged kvaddr in arm64_VTOP() +and arm64_IS_VMALLOC_ADDR(). + +Signed-off-by: chenqiwu +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + arm64.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- + defs.h | 1 + + 2 files changed, 50 insertions(+), 3 deletions(-) + +diff --git a/arm64.c b/arm64.c +index 57965c6cb3c8..6ab10ca9b5be 100644 +--- a/arm64.c ++++ b/arm64.c +@@ -102,6 +102,41 @@ struct kernel_range { + static struct kernel_range *arm64_get_va_range(struct machine_specific *ms); + static void arm64_get_struct_page_size(struct machine_specific *ms); + ++/* mte tag shift bit */ ++#define MTE_TAG_SHIFT 56 ++/* native kernel pointers tag */ ++#define KASAN_TAG_KERNEL 0xFF ++/* minimum value for random tags */ ++#define KASAN_TAG_MIN 0xF0 ++/* right shift the tag to MTE_TAG_SHIFT bit */ ++#define mte_tag_shifted(tag) ((ulong)(tag) << MTE_TAG_SHIFT) ++/* get the top byte value of the original kvaddr */ ++#define mte_tag_get(addr) (unsigned char)((ulong)(addr) >> MTE_TAG_SHIFT) ++/* reset the top byte to get an untaggged kvaddr */ ++#define mte_tag_reset(addr) (((ulong)addr & ~mte_tag_shifted(KASAN_TAG_KERNEL)) | \ ++ mte_tag_shifted(KASAN_TAG_KERNEL)) ++ ++static inline bool is_mte_kvaddr(ulong addr) ++{ ++ /* check for ARM64_MTE enabled */ ++ if (!(machdep->flags & ARM64_MTE)) ++ return false; ++ ++ /* check the validity of HW Tag-Based kvaddr */ ++ if (mte_tag_get(addr) >= KASAN_TAG_MIN && mte_tag_get(addr) < KASAN_TAG_KERNEL) ++ return true; ++ ++ return false; ++} ++ ++static int arm64_is_kvaddr(ulong addr) ++{ ++ if (is_mte_kvaddr(addr)) ++ return (mte_tag_reset(addr) >= (ulong)(machdep->kvbase)); ++ ++ return (addr >= (ulong)(machdep->kvbase)); ++} ++ + static void arm64_calc_kernel_start(void) + { + struct machine_specific *ms = machdep->machspec; +@@ -182,6 +217,9 @@ arm64_init(int when) + if (kernel_symbol_exists("kimage_voffset")) + machdep->flags |= NEW_VMEMMAP; + ++ if (kernel_symbol_exists("cpu_enable_mte")) ++ machdep->flags |= ARM64_MTE; ++ + if (!machdep->pagesize && arm64_get_vmcoreinfo(&value, "PAGESIZE", NUM_DEC)) + machdep->pagesize = (unsigned int)value; + +@@ -262,7 +300,7 @@ arm64_init(int when) + machdep->kvbase = ARM64_VA_START; + ms->userspace_top = ARM64_USERSPACE_TOP; + } +- machdep->is_kvaddr = generic_is_kvaddr; ++ machdep->is_kvaddr = arm64_is_kvaddr; + machdep->kvtop = arm64_kvtop; + + /* The defaults */ +@@ -975,6 +1013,8 @@ arm64_dump_machdep_table(ulong arg) + fprintf(fp, "%sFLIPPED_VM", others++ ? "|" : ""); + if (machdep->flags & HAS_PHYSVIRT_OFFSET) + fprintf(fp, "%sHAS_PHYSVIRT_OFFSET", others++ ? "|" : ""); ++ if (machdep->flags & ARM64_MTE) ++ fprintf(fp, "%sARM64_MTE", others++ ? "|" : ""); + fprintf(fp, ")\n"); + + fprintf(fp, " kvbase: %lx\n", machdep->kvbase); +@@ -1023,7 +1063,7 @@ arm64_dump_machdep_table(ulong arg) + fprintf(fp, " dis_filter: arm64_dis_filter()\n"); + fprintf(fp, " cmd_mach: arm64_cmd_mach()\n"); + fprintf(fp, " get_smp_cpus: arm64_get_smp_cpus()\n"); +- fprintf(fp, " is_kvaddr: generic_is_kvaddr()\n"); ++ fprintf(fp, " is_kvaddr: arm64_is_kvaddr()\n"); + fprintf(fp, " is_uvaddr: arm64_is_uvaddr()\n"); + fprintf(fp, " value_to_symbol: generic_machdep_value_to_symbol()\n"); + fprintf(fp, " init_kernel_pgd: arm64_init_kernel_pgd\n"); +@@ -1633,6 +1673,9 @@ ulong arm64_PTOV(ulong paddr) + ulong + arm64_VTOP(ulong addr) + { ++ if (is_mte_kvaddr(addr)) ++ addr = mte_tag_reset(addr); ++ + if (machdep->flags & NEW_VMEMMAP) { + if (machdep->machspec->VA_START && + (addr >= machdep->machspec->kimage_text) && +@@ -4562,7 +4605,10 @@ int + arm64_IS_VMALLOC_ADDR(ulong vaddr) + { + struct machine_specific *ms = machdep->machspec; +- ++ ++ if (is_mte_kvaddr(vaddr)) ++ vaddr = mte_tag_reset(vaddr); ++ + if ((machdep->flags & NEW_VMEMMAP) && + (vaddr >= machdep->machspec->kimage_text) && + (vaddr <= machdep->machspec->kimage_end)) +diff --git a/defs.h b/defs.h +index 20237b72a10b..aa8eba83b7f4 100644 +--- a/defs.h ++++ b/defs.h +@@ -3348,6 +3348,7 @@ typedef signed int s32; + #define FLIPPED_VM (0x400) + #define HAS_PHYSVIRT_OFFSET (0x800) + #define OVERFLOW_STACKS (0x1000) ++#define ARM64_MTE (0x2000) + + /* + * Get kimage_voffset from /dev/crash +-- +2.41.0 + diff --git a/0002-RISCV64-Add-support-for-bt-e-option.patch b/0002-RISCV64-Add-support-for-bt-e-option.patch new file mode 100644 index 0000000..4c3ac5d --- /dev/null +++ b/0002-RISCV64-Add-support-for-bt-e-option.patch @@ -0,0 +1,334 @@ +From d86dc6901ce76a0fc29022ed448a4baa83a47dd7 Mon Sep 17 00:00:00 2001 +From: Song Shuai +Date: Wed, 13 Dec 2023 17:45:06 +0800 +Subject: [PATCH 2/6] RISCV64: Add support for 'bt -e' option + +With this patch we can search the stack for possible kernel and user +mode exception frames via 'bt -e' command. + +TEST: a lkdtm DIRECT EXCEPTION vmcore + + crash> bt -e + PID: 1 TASK: ff600000000e0000 CPU: 1 COMMAND: "sh" + + KERNEL-MODE EXCEPTION FRAME AT: ff200000000138d8 + PC: ffffffff805303c0 [lkdtm_EXCEPTION+6] + RA: ffffffff8052fe36 [lkdtm_do_action+16] + SP: ff20000000013cf0 CAUSE: 000000000000000f + epc : ffffffff805303c0 ra : ffffffff8052fe36 sp : ff20000000013cf0 + gp : ffffffff814ef848 tp : ff600000000e0000 t0 : 6500000000000000 + t1 : 000000000000006c t2 : 6550203a6d74646b s0 : ff20000000013d00 + s1 : 000000000000000a a0 : ffffffff814aef40 a1 : c0000000ffffefff + a2 : 0000000000000010 a3 : 0000000000000001 a4 : 5d53ea10ca096e00 + a5 : ffffffff805303ba a6 : 0000000000000008 a7 : 0000000000000038 + s2 : ff60000001324000 s3 : ffffffff814aef40 s4 : ff20000000013e30 + s5 : 000000000000000a s6 : ff20000000013e30 s7 : ff600000000ce000 + s8 : 0000555560f0f8a8 s9 : 00007ffff497f6b4 s10: 00007ffff497f6b0 + s11: 0000555560fa30e0 t3 : ffffffff81502197 t4 : ffffffff81502197 + t5 : ffffffff81502198 t6 : ff20000000013b28 + status: 0000000200000120 badaddr: 0000000000000000 + cause: 000000000000000f orig_a0: 0000000000000000 + + USER-MODE EXCEPTION FRAME AT: ff20000000013ee0 + PC: 007fff8780431aff RA: 007fff877b168400 SP: 007ffff497f5b000 + ORIG_A0: 0000000000000100 SYSCALLNO: 0000000000004000 + epc : 007fff8780431aff ra : 007fff877b168400 sp : 007ffff497f5b000 + gp : 00555560f5134800 tp : 007fff8774378000 t0 : 0000000000100000 + t1 : 00555560e427bc00 t2 : 0000000000271000 s0 : 007ffff497f5e000 + s1 : 0000000000000a00 a0 : 0000000000000100 a1 : 00555560faa68000 + a2 : 0000000000000a00 a3 : 4000000000000000 a4 : 20000000000000a8 + a5 : 0000000000000054 a6 : 0000000000000400 a7 : 0000000000004000 + s2 : 00555560faa68000 s3 : 007fff878b33f800 s4 : 0000000000000a00 + s5 : 00555560faa68000 s6 : 0000000000000a00 s7 : 00555560f5131400 + s8 : 00555560f0f8a800 s9 : 007ffff497f6b400 s10: 007ffff497f6b000 + s11: 00555560fa30e000 t3 : 007fff877af1fe00 t4 : 00555560fa6f2000 + t5 : 0000000000000100 t6 : 9e1fea5bf8683300 + status: 00000200004020b9 badaddr: 0000000000000000 + cause: 0000000000000800 orig_a0: 0000000000000100 + crash> + +Signed-off-by: Song Shuai +Signed-off-by: Lianbo Jiang +--- + defs.h | 15 +++-- + riscv64.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 181 insertions(+), 25 deletions(-) + +diff --git a/defs.h b/defs.h +index aa8eba83b7f4..9cf9501348ed 100644 +--- a/defs.h ++++ b/defs.h +@@ -7011,17 +7011,16 @@ int riscv64_IS_VMALLOC_ADDR(ulong); + #define display_idt_table() \ + error(FATAL, "-d option is not applicable to RISCV64 architecture\n") + +-/* from arch/riscv/include/asm/ptrace.h */ ++/* ++ * regs[0,31] : struct user_regs_struct ++ * from arch/riscv/include/uapi/asm/ptrace.h ++ * regs[0,35] : struct pt_regs ++ * from arch/riscv/include/asm/ptrace.h ++ */ + struct riscv64_register { + ulong regs[36]; + }; + +-struct riscv64_pt_regs { +- ulong badvaddr; +- ulong cause; +- ulong epc; +-}; +- + struct riscv64_unwind_frame { + ulong fp; + ulong sp; +@@ -7085,6 +7084,8 @@ struct machine_specific { + #define RISCV64_REGS_RA 1 + #define RISCV64_REGS_SP 2 + #define RISCV64_REGS_FP 8 ++#define RISCV64_REGS_STATUS 32 ++#define RISCV64_REGS_CAUSE 34 + + #endif /* RISCV64 */ + +diff --git a/riscv64.c b/riscv64.c +index 872be594d72b..6097c0029ccc 100644 +--- a/riscv64.c ++++ b/riscv64.c +@@ -35,6 +35,7 @@ static int riscv64_kvtop(struct task_context *tc, ulong kvaddr, + static void riscv64_cmd_mach(void); + static void riscv64_stackframe_init(void); + static void riscv64_back_trace_cmd(struct bt_info *bt); ++static int riscv64_eframe_search(struct bt_info *bt); + static int riscv64_get_dumpfile_stack_frame(struct bt_info *bt, + ulong *nip, ulong *ksp); + static void riscv64_get_stack_frame(struct bt_info *bt, ulong *pcp, +@@ -51,6 +52,8 @@ static int riscv64_get_elf_notes(void); + static void riscv64_get_va_range(struct machine_specific *ms); + static void riscv64_get_va_bits(struct machine_specific *ms); + static void riscv64_get_struct_page_size(struct machine_specific *ms); ++static void riscv64_print_exception_frame(struct bt_info *, ulong , int ); ++static int riscv64_is_kernel_exception_frame(struct bt_info *, ulong ); + + #define REG_FMT "%016lx" + #define SZ_2G 0x80000000 +@@ -210,6 +213,7 @@ riscv64_dump_machdep_table(ulong arg) + machdep->memsize, machdep->memsize); + fprintf(fp, " bits: %d\n", machdep->bits); + fprintf(fp, " back_trace: riscv64_back_trace_cmd()\n"); ++ fprintf(fp, " eframe_search: riscv64_eframe_search()\n"); + fprintf(fp, " processor_speed: riscv64_processor_speed()\n"); + fprintf(fp, " uvtop: riscv64_uvtop()\n"); + fprintf(fp, " kvtop: riscv64_kvtop()\n"); +@@ -1398,6 +1402,7 @@ riscv64_init(int when) + machdep->cmd_mach = riscv64_cmd_mach; + machdep->get_stack_frame = riscv64_get_stack_frame; + machdep->back_trace = riscv64_back_trace_cmd; ++ machdep->eframe_search = riscv64_eframe_search; + + machdep->vmalloc_start = riscv64_vmalloc_start; + machdep->processor_speed = riscv64_processor_speed; +@@ -1452,25 +1457,10 @@ riscv64_init(int when) + } + } + +-/* +- * 'help -r' command output +- */ +-void +-riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp) ++/* bool pt_regs : pass 1 to dump pt_regs , pass 0 to dump user_regs_struct */ ++static void ++riscv64_dump_pt_regs(struct riscv64_register *regs, FILE *ofp, bool pt_regs) + { +- const struct machine_specific *ms = machdep->machspec; +- struct riscv64_register *regs; +- +- if (!ms->crash_task_regs) { +- error(INFO, "registers not collected for cpu %d\n", cpu); +- return; +- } +- +- regs = &ms->crash_task_regs[cpu]; +- if (!regs->regs[RISCV64_REGS_SP] && !regs->regs[RISCV64_REGS_EPC]) { +- error(INFO, "registers not collected for cpu %d\n", cpu); +- return; +- } + + /* Print riscv64 32 regs */ + fprintf(ofp, +@@ -1496,6 +1486,171 @@ riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp) + regs->regs[24], regs->regs[25], regs->regs[26], + regs->regs[27], regs->regs[28], regs->regs[29], + regs->regs[30], regs->regs[31]); ++ ++ if (pt_regs) ++ fprintf(ofp, ++ " status: " REG_FMT " badaddr: " REG_FMT "\n" ++ " cause: " REG_FMT " orig_a0: " REG_FMT "\n", ++ regs->regs[32], regs->regs[33], regs->regs[34], ++ regs->regs[35]); ++} ++ ++/* ++ * 'help -r' command output ++ */ ++void ++riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp) ++{ ++ const struct machine_specific *ms = machdep->machspec; ++ struct riscv64_register *regs; ++ ++ if (!ms->crash_task_regs) { ++ error(INFO, "registers not collected for cpu %d\n", cpu); ++ return; ++ } ++ ++ regs = &ms->crash_task_regs[cpu]; ++ if (!regs->regs[RISCV64_REGS_SP] && !regs->regs[RISCV64_REGS_EPC]) { ++ error(INFO, "registers not collected for cpu %d\n", cpu); ++ return; ++ } ++ ++ riscv64_dump_pt_regs(regs, ofp, 0); ++} ++ ++#define USER_MODE (0) ++#define KERNEL_MODE (1) ++ ++static void ++riscv64_print_exception_frame(struct bt_info *bt, ulong ptr, int mode) ++{ ++ ++ struct syment *sp; ++ ulong PC, RA, SP, offset; ++ struct riscv64_register *regs; ++ ++ regs = (struct riscv64_register *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(ptr))]; ++ ++ PC = regs->regs[RISCV64_REGS_EPC]; ++ RA = regs->regs[RISCV64_REGS_RA]; ++ SP = regs->regs[RISCV64_REGS_SP]; ++ ++ switch (mode) { ++ case USER_MODE: ++ fprintf(fp, ++ " PC: %016lx RA: %016lx SP: %016lx\n" ++ " ORIG_A0: %016lx SYSCALLNO: %016lx\n", ++ PC, RA, SP, regs->regs[35], regs->regs[17]); ++ ++ break; ++ ++ case KERNEL_MODE: ++ fprintf(fp, " PC: %016lx ", PC); ++ if (is_kernel_text(PC) && (sp = value_search(PC, &offset))) { ++ fprintf(fp, "[%s", sp->name); ++ if (offset) ++ fprintf(fp, (*gdb_output_radix == 16) ? ++ "+0x%lx" : "+%ld", offset); ++ fprintf(fp, "]\n"); ++ } else ++ fprintf(fp, "[unknown or invalid address]\n"); ++ ++ fprintf(fp, " RA: %016lx ", RA); ++ if (is_kernel_text(RA) && (sp = value_search(RA, &offset))) { ++ fprintf(fp, "[%s", sp->name); ++ if (offset) ++ fprintf(fp, (*gdb_output_radix == 16) ? ++ "+0x%lx" : "+%ld", offset); ++ fprintf(fp, "]\n"); ++ } else ++ fprintf(fp, "[unknown or invalid address]\n"); ++ ++ fprintf(fp, " SP: %016lx CAUSE: %016lx\n", ++ SP, regs->regs[RISCV64_REGS_CAUSE]); ++ ++ break; ++ } ++ ++ riscv64_dump_pt_regs(regs, fp, 1); ++ ++} ++ ++static int ++riscv64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr) ++{ ++ struct riscv64_register *regs; ++ ++ if (stkptr > STACKSIZE() && !INSTACK(stkptr, bt)) { ++ if (CRASHDEBUG(1)) ++ error(WARNING, "stkptr: %lx is outside the kernel stack range\n", stkptr); ++ return FALSE; ++ } ++ ++ regs = (struct riscv64_register *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))]; ++ ++ if (INSTACK(regs->regs[RISCV64_REGS_SP], bt) && ++ INSTACK(regs->regs[RISCV64_REGS_FP], bt) && ++ is_kernel_text(regs->regs[RISCV64_REGS_RA]) && ++ is_kernel_text(regs->regs[RISCV64_REGS_EPC]) && ++ ((regs->regs[RISCV64_REGS_STATUS] >> 8) & 0x1) && // sstatus.SPP != 0 ++ !((regs->regs[RISCV64_REGS_CAUSE] >> 63) & 0x1 ) && // scause.Interrupt != 1 ++ !(regs->regs[RISCV64_REGS_CAUSE] == 0x00000008UL)) { // scause != ecall from U-mode ++ ++ return TRUE; ++ } ++ ++ return FALSE; ++} ++ ++static int ++riscv64_dump_kernel_eframes(struct bt_info *bt) ++{ ++ ulong ptr; ++ int count; ++ ++ /* ++ * use old_regs to avoid the identical contiguous kernel exception frames ++ * created by Linux handle_exception() path ending at riscv_crash_save_regs() ++ */ ++ struct riscv64_register *regs, *old_regs; ++ ++ count = 0; ++ old_regs = NULL; ++ ++ for (ptr = bt->stackbase; ptr < bt->stacktop - SIZE(pt_regs); ptr++) { ++ ++ regs = (struct riscv64_register *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(ptr))]; ++ ++ if (riscv64_is_kernel_exception_frame(bt, ptr)){ ++ if (!old_regs || (old_regs && ++ memcmp(old_regs, regs, sizeof(struct riscv64_register))) != 0){ ++ old_regs = regs; ++ fprintf(fp, "\nKERNEL-MODE EXCEPTION FRAME AT: %lx\n", ptr); ++ riscv64_print_exception_frame(bt, ptr, KERNEL_MODE); ++ count++; ++ } ++ } ++ } ++ ++ return count; ++} ++ ++static int ++riscv64_eframe_search(struct bt_info *bt) ++{ ++ ulong ptr; ++ int count; ++ ++ count = riscv64_dump_kernel_eframes(bt); ++ ++ if (is_kernel_thread(bt->tc->task)) ++ return count; ++ ++ ptr = bt->stacktop - SIZE(pt_regs); ++ fprintf(fp, "%sUSER-MODE EXCEPTION FRAME AT: %lx\n", count++ ? "\n" : "", ptr); ++ riscv64_print_exception_frame(bt, ptr, USER_MODE); ++ ++ return count; + } + + #else /* !RISCV64 */ +-- +2.41.0 + diff --git a/0003-RISCV64-Add-per-cpu-IRQ-stacks-support.patch b/0003-RISCV64-Add-per-cpu-IRQ-stacks-support.patch new file mode 100644 index 0000000..53068bf --- /dev/null +++ b/0003-RISCV64-Add-per-cpu-IRQ-stacks-support.patch @@ -0,0 +1,436 @@ +From 12fbed3280a147a40e572808b660aa838f3ca372 Mon Sep 17 00:00:00 2001 +From: Song Shuai +Date: Wed, 13 Dec 2023 17:45:07 +0800 +Subject: [PATCH 3/6] RISCV64: Add per-cpu IRQ stacks support + +This patch introduces per-cpu IRQ stacks for RISCV64 to let +"bt" do backtrace on it and 'bt -E' search eframes on it, +and the 'help -m' command displays the addresses of each +per-cpu IRQ stack. + +TEST: a vmcore dumped via hacking the handle_irq_event_percpu() +( Why not using lkdtm INT_HW_IRQ_EN EXCEPTION ? + There is a deadlock[1] in crash_kexec path if use that) + + crash> bt + PID: 0 TASK: ffffffff8140db00 CPU: 0 COMMAND: "swapper/0" + #0 [ff20000000003e60] __handle_irq_event_percpu at ffffffff8006462e + #1 [ff20000000003ed0] handle_irq_event_percpu at ffffffff80064702 + #2 [ff20000000003ef0] handle_irq_event at ffffffff8006477c + #3 [ff20000000003f20] handle_fasteoi_irq at ffffffff80068664 + #4 [ff20000000003f50] generic_handle_domain_irq at ffffffff80063988 + #5 [ff20000000003f60] plic_handle_irq at ffffffff8046633e + #6 [ff20000000003fb0] generic_handle_domain_irq at ffffffff80063988 + #7 [ff20000000003fc0] riscv_intc_irq at ffffffff80465f8e + #8 [ff20000000003fd0] handle_riscv_irq at ffffffff808361e8 + PC: ffffffff80837314 [default_idle_call+50] + RA: ffffffff80837310 [default_idle_call+46] + SP: ffffffff81403da0 CAUSE: 8000000000000009 + epc : ffffffff80837314 ra : ffffffff80837310 sp : ffffffff81403da0 + gp : ffffffff814ef848 tp : ffffffff8140db00 t0 : ff2000000004bb18 + t1 : 0000000000032c73 t2 : ffffffff81200a48 s0 : ffffffff81403db0 + s1 : 0000000000000000 a0 : 0000000000000004 a1 : 0000000000000000 + a2 : ff6000009f1e7000 a3 : 0000000000002304 a4 : ffffffff80c1c2d8 + a5 : 0000000000000000 a6 : ff6000001fe01958 a7 : 00002496ea89dbf1 + s2 : ffffffff814f0220 s3 : 0000000000000001 s4 : 000000000000003f + s5 : ffffffff814f03d8 s6 : 0000000000000000 s7 : ffffffff814f00d0 + s8 : ffffffff81526f10 s9 : ffffffff80c1d880 s10: 0000000000000000 + s11: 0000000000000001 t3 : 0000000000003392 t4 : 0000000000000000 + t5 : 0000000000000000 t6 : 0000000000000040 + status: 0000000200000120 badaddr: 0000000000000000 + cause: 8000000000000009 orig_a0: ffffffff80837310 + --- --- + #9 [ffffffff81403da0] default_idle_call at ffffffff80837314 + #10 [ffffffff81403db0] do_idle at ffffffff8004d0a0 + #11 [ffffffff81403e40] cpu_startup_entry at ffffffff8004d21e + #12 [ffffffff81403e60] kernel_init at ffffffff8083746a + #13 [ffffffff81403e70] arch_post_acpi_subsys_init at ffffffff80a006d8 + #14 [ffffffff81403e80] console_on_rootfs at ffffffff80a00c92 + crash> + + crash> bt -E + CPU 0 IRQ STACK: + KERNEL-MODE EXCEPTION FRAME AT: ff20000000003a48 + PC: ffffffff8006462e [__handle_irq_event_percpu+30] + RA: ffffffff80064702 [handle_irq_event_percpu+18] + SP: ff20000000003e60 CAUSE: 000000000000000d + epc : ffffffff8006462e ra : ffffffff80064702 sp : ff20000000003e60 + gp : ffffffff814ef848 tp : ffffffff8140db00 t0 : 0000000000046600 + t1 : ffffffff80836464 t2 : ffffffff81200a48 s0 : ff20000000003ed0 + s1 : 0000000000000000 a0 : 0000000000000000 a1 : 0000000000000118 + a2 : 0000000000000052 a3 : 0000000000000000 a4 : 0000000000000000 + a5 : 0000000000010001 a6 : ff6000001fe01958 a7 : 00002496ea89dbf1 + s2 : ff60000000941ab0 s3 : ffffffff814a0658 s4 : ff60000000089230 + s5 : ffffffff814a0518 s6 : ffffffff814a0620 s7 : ffffffff80e5f0f8 + s8 : ffffffff80fc50b0 s9 : ffffffff80c1d880 s10: 0000000000000000 + s11: 0000000000000001 t3 : 0000000000003392 t4 : 0000000000000000 + t5 : 0000000000000000 t6 : 0000000000000040 + status: 0000000200000100 badaddr: 0000000000000078 + cause: 000000000000000d orig_a0: ff20000000003ea0 + + CPU 1 IRQ STACK: (none found) + + crash> + + crash> help -m + + machspec: ced1e0 + irq_stack_size: 16384 + irq_stacks[0]: ff20000000000000 + irq_stacks[1]: ff20000000008000 + crash> + +[1]: https://lore.kernel.org/linux-riscv/20231208111015.173237-1-songshuaishuai@tinylab.org/ + +Signed-off-by: Song Shuai +Signed-off-by: Lianbo Jiang +--- + defs.h | 7 +- + help.c | 8 +-- + riscv64.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- + 3 files changed, 198 insertions(+), 12 deletions(-) + +diff --git a/defs.h b/defs.h +index 9cf9501348ed..b71cdbd01b8d 100644 +--- a/defs.h ++++ b/defs.h +@@ -3643,6 +3643,8 @@ typedef signed int s32; + #define pmd_index_l5_4k(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + #define pte_index_l5_4k(addr) (((addr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) + ++/* machdep->flags */ ++#define KSYMS_START (0x1) + #define VM_L3_4K (0x2) + #define VM_L3_2M (0x4) + #define VM_L3_1G (0x8) +@@ -3652,6 +3654,7 @@ typedef signed int s32; + #define VM_L5_4K (0x80) + #define VM_L5_2M (0x100) + #define VM_L5_1G (0x200) ++#define IRQ_STACKS (0x400) + + #define VM_FLAGS (VM_L3_4K | VM_L3_2M | VM_L3_1G | \ + VM_L4_4K | VM_L4_2M | VM_L4_1G | \ +@@ -7027,8 +7030,6 @@ struct riscv64_unwind_frame { + ulong pc; + }; + +-#define KSYMS_START (0x1) +- + struct machine_specific { + ulong phys_base; + ulong page_offset; +@@ -7058,6 +7059,8 @@ struct machine_specific { + ulong struct_page_size; + + struct riscv64_register *crash_task_regs; ++ ulong irq_stack_size; ++ ulong *irq_stacks; + }; + /* from arch/riscv/include/asm/pgtable-bits.h */ + #define _PAGE_PRESENT (machdep->machspec->_page_present) +diff --git a/help.c b/help.c +index d80e843703c1..a4319dd2a717 100644 +--- a/help.c ++++ b/help.c +@@ -1938,10 +1938,10 @@ char *help_bt[] = { + " fails or the -t option starts too high in the process stack).", + " -l show file and line number of each stack trace text location.", + " -e search the stack for possible kernel and user mode exception frames.", +-" -E search the IRQ stacks (x86, x86_64, arm64, and ppc64), and the", +-" exception stacks (x86_64) for possible exception frames; all other", +-" arguments except for -c will be ignored since this is not a context-", +-" sensitive operation.", ++" -E search the IRQ stacks (x86, x86_64, arm64, riscv64 and ppc64), and", ++" the exception stacks (x86_64) for possible exception frames; all", ++" other arguments except for -c will be ignored since this is not a", ++" context-sensitive operation.", + " -f display all stack data contained in a frame; this option can be", + " used to determine the arguments passed to each function; on ia64,", + " the argument register contents are dumped.", +diff --git a/riscv64.c b/riscv64.c +index 6097c0029ccc..a26b8a43cb29 100644 +--- a/riscv64.c ++++ b/riscv64.c +@@ -33,6 +33,7 @@ static int riscv64_uvtop(struct task_context *tc, ulong vaddr, + static int riscv64_kvtop(struct task_context *tc, ulong kvaddr, + physaddr_t *paddr, int verbose); + static void riscv64_cmd_mach(void); ++static void riscv64_irq_stack_init(void); + static void riscv64_stackframe_init(void); + static void riscv64_back_trace_cmd(struct bt_info *bt); + static int riscv64_eframe_search(struct bt_info *bt); +@@ -54,9 +55,15 @@ static void riscv64_get_va_bits(struct machine_specific *ms); + static void riscv64_get_struct_page_size(struct machine_specific *ms); + static void riscv64_print_exception_frame(struct bt_info *, ulong , int ); + static int riscv64_is_kernel_exception_frame(struct bt_info *, ulong ); ++static int riscv64_on_irq_stack(int , ulong); ++static int riscv64_on_process_stack(struct bt_info *, ulong ); ++static void riscv64_set_process_stack(struct bt_info *); ++static void riscv64_set_irq_stack(struct bt_info *); + + #define REG_FMT "%016lx" + #define SZ_2G 0x80000000 ++#define USER_MODE (0) ++#define KERNEL_MODE (1) + + /* + * Holds registers during the crash. +@@ -191,11 +198,14 @@ riscv64_verify_symbol(const char *name, ulong value, char type) + void + riscv64_dump_machdep_table(ulong arg) + { +- int others = 0; ++ const struct machine_specific *ms = machdep->machspec; ++ int others = 0, i = 0; + + fprintf(fp, " flags: %lx (", machdep->flags); + if (machdep->flags & KSYMS_START) + fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); ++ if (machdep->flags & IRQ_STACKS) ++ fprintf(fp, "%sIRQ_STACKS", others++ ? "|" : ""); + fprintf(fp, ")\n"); + + fprintf(fp, " kvbase: %lx\n", machdep->kvbase); +@@ -251,6 +261,15 @@ riscv64_dump_machdep_table(ulong arg) + fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits); + fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root); + fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec); ++ if (machdep->flags & IRQ_STACKS) { ++ fprintf(fp, " irq_stack_size: %ld\n", ms->irq_stack_size); ++ for (i = 0; i < kt->cpus; i++) ++ fprintf(fp, " irq_stacks[%d]: %lx\n", ++ i, ms->irq_stacks[i]); ++ } else { ++ fprintf(fp, " irq_stack_size: (unused)\n"); ++ fprintf(fp, " irq_stacks: (unused)\n"); ++ } + } + + static ulong +@@ -665,6 +684,111 @@ riscv64_display_full_frame(struct bt_info *bt, struct riscv64_unwind_frame *curr + fprintf(fp, "\n"); + } + ++/* ++ * Gather IRQ stack values. ++ */ ++static void ++riscv64_irq_stack_init(void) ++{ ++ int i; ++ struct syment *sp; ++ struct gnu_request request, *req; ++ struct machine_specific *ms = machdep->machspec; ++ ulong p, sz; ++ req = &request; ++ ++ if (symbol_exists("irq_stack_ptr") && ++ (sp = per_cpu_symbol_search("irq_stack_ptr")) && ++ get_symbol_type("irq_stack_ptr", NULL, req)) { ++ if (CRASHDEBUG(1)) { ++ fprintf(fp, "irq_stack_ptr: \n"); ++ fprintf(fp, " type: %x, %s\n", ++ (int)req->typecode, ++ (req->typecode == TYPE_CODE_PTR) ? ++ "TYPE_CODE_PTR" : "other"); ++ fprintf(fp, " target_typecode: %x, %s\n", ++ (int)req->target_typecode, ++ req->target_typecode == TYPE_CODE_INT ? ++ "TYPE_CODE_INT" : "other"); ++ fprintf(fp, " target_length: %ld\n", ++ req->target_length); ++ fprintf(fp, " length: %ld\n", req->length); ++ } ++ ++ if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) ++ error(FATAL, "cannot malloc irq_stack addresses\n"); ++ ++ /* ++ * find IRQ_STACK_SIZE (i.e. THREAD_SIZE) via thread_union.stack ++ * or set STACKSIZE() as default. ++ */ ++ if (MEMBER_EXISTS("thread_union", "stack")) { ++ if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0) ++ ms->irq_stack_size = sz; ++ } else ++ ms->irq_stack_size = machdep->stacksize; ++ ++ machdep->flags |= IRQ_STACKS; ++ ++ for (i = 0; i < kt->cpus; i++) { ++ p = kt->__per_cpu_offset[i] + sp->value; ++ if (CRASHDEBUG(1)) ++ fprintf(fp, " IRQ stack pointer[%d] is %lx\n", i, p); ++ readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong), ++ "IRQ stack pointer", RETURN_ON_ERROR); ++ } ++ } ++} ++ ++static int ++riscv64_on_irq_stack(int cpu, ulong stkptr) ++{ ++ struct machine_specific *ms = machdep->machspec; ++ ulong * stacks = ms->irq_stacks; ++ ulong stack_size = ms->irq_stack_size; ++ ++ if ((cpu >= kt->cpus) || (stacks == NULL) || !stack_size) ++ return FALSE; ++ ++ if ((stkptr >= stacks[cpu]) && ++ (stkptr < (stacks[cpu] + stack_size))) ++ return TRUE; ++ ++ return FALSE; ++} ++ ++static int ++riscv64_on_process_stack(struct bt_info *bt, ulong stkptr) ++{ ++ ulong stackbase, stacktop; ++ ++ stackbase = GET_STACKBASE(bt->task); ++ stacktop = GET_STACKTOP(bt->task); ++ ++ if ((stkptr >= stackbase) && (stkptr < stacktop)) ++ return TRUE; ++ ++ return FALSE; ++} ++ ++static void ++riscv64_set_irq_stack(struct bt_info *bt) ++{ ++ struct machine_specific *ms = machdep->machspec; ++ ++ bt->stackbase = ms->irq_stacks[bt->tc->processor]; ++ bt->stacktop = bt->stackbase + ms->irq_stack_size; ++ alter_stackbuf(bt); ++} ++ ++static void ++riscv64_set_process_stack(struct bt_info *bt) ++{ ++ bt->stackbase = GET_STACKBASE(bt->task); ++ bt->stacktop = GET_STACKTOP(bt->task); ++ alter_stackbuf(bt); ++} ++ + static void + riscv64_stackframe_init(void) + { +@@ -751,7 +875,7 @@ riscv64_back_trace_cmd(struct bt_info *bt) + { + struct riscv64_unwind_frame current, previous; + struct stackframe curr_frame; +- struct riscv64_register * regs; ++ struct riscv64_register *regs, *irq_regs; + int level = 0; + + if (bt->flags & BT_REGS_NOT_FOUND) +@@ -759,6 +883,11 @@ riscv64_back_trace_cmd(struct bt_info *bt) + + regs = (struct riscv64_register *) bt->machdep; + ++ if (riscv64_on_irq_stack(bt->tc->processor, bt->frameptr)) { ++ riscv64_set_irq_stack(bt); ++ bt->flags |= BT_IRQSTACK; ++ } ++ + current.pc = bt->instptr; + current.sp = bt->stkptr; + current.fp = bt->frameptr; +@@ -813,6 +942,35 @@ riscv64_back_trace_cmd(struct bt_info *bt) + current.fp = previous.fp; + current.sp = previous.sp; + ++ /* ++ * When backtracing to do_irq(), find the original FP of do_irq() ++ * and then use the saved pt_regs in process stack to continue ++ */ ++ if ((bt->flags & BT_IRQSTACK) && ++ !riscv64_on_irq_stack(bt->tc->processor, current.fp)){ ++ if (riscv64_on_process_stack(bt, current.fp)){ ++ ++ frameptr = (struct stackframe *)current.fp - 1; ++ ++ if (!readmem((ulong)frameptr, KVADDR, &curr_frame, ++ sizeof(curr_frame), "get do_irq stack frame", RETURN_ON_ERROR)) ++ return; ++ ++ riscv64_set_process_stack(bt); ++ ++ irq_regs = (struct riscv64_register *) ++ &bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(curr_frame.fp))]; ++ ++ current.pc = irq_regs->regs[RISCV64_REGS_EPC]; ++ current.fp = irq_regs->regs[RISCV64_REGS_FP]; ++ current.sp = irq_regs->regs[RISCV64_REGS_SP]; ++ ++ bt->flags &= ~BT_IRQSTACK; ++ riscv64_print_exception_frame(bt, curr_frame.fp, KERNEL_MODE); ++ fprintf(fp, "--- ---\n"); ++ } ++ } ++ + if (CRASHDEBUG(8)) + fprintf(fp, "next %d pc %#lx sp %#lx fp %lx\n", + level, current.pc, current.sp, current.fp); +@@ -1423,6 +1581,8 @@ riscv64_init(int when) + case POST_GDB: + machdep->section_size_bits = _SECTION_SIZE_BITS; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; ++ ++ riscv64_irq_stack_init(); + riscv64_stackframe_init(); + riscv64_page_type_init(); + +@@ -1518,9 +1678,6 @@ riscv64_display_regs_from_elf_notes(int cpu, FILE *ofp) + riscv64_dump_pt_regs(regs, ofp, 0); + } + +-#define USER_MODE (0) +-#define KERNEL_MODE (1) +- + static void + riscv64_print_exception_frame(struct bt_info *bt, ulong ptr, int mode) + { +@@ -1639,7 +1796,33 @@ static int + riscv64_eframe_search(struct bt_info *bt) + { + ulong ptr; +- int count; ++ int count, c; ++ struct machine_specific *ms = machdep->machspec; ++ ++ if (bt->flags & BT_EFRAME_SEARCH2) { ++ if (!(machdep->flags & IRQ_STACKS)) ++ error(FATAL, "IRQ stacks do not exist in this kernel\n"); ++ ++ for (c = 0; c < kt->cpus; c++) { ++ if ((bt->flags & BT_CPUMASK) && ++ !(NUM_IN_BITMAP(bt->cpumask, c))) ++ continue; ++ ++ fprintf(fp, "CPU %d IRQ STACK: ", c); ++ bt->stackbase = ms->irq_stacks[c]; ++ bt->stacktop = bt->stackbase + ms->irq_stack_size; ++ alter_stackbuf(bt); ++ ++ count = riscv64_dump_kernel_eframes(bt); ++ ++ if (count) ++ fprintf(fp, "\n"); ++ else ++ fprintf(fp, "(none found)\n\n"); ++ } ++ ++ return 0; ++ } + + count = riscv64_dump_kernel_eframes(bt); + +-- +2.41.0 + diff --git a/0004-RISCV64-Add-per-cpu-overflow-stacks-support.patch b/0004-RISCV64-Add-per-cpu-overflow-stacks-support.patch new file mode 100644 index 0000000..40ec82c --- /dev/null +++ b/0004-RISCV64-Add-per-cpu-overflow-stacks-support.patch @@ -0,0 +1,287 @@ +From a69496279133705f095f790a9b3425266f88b1d4 Mon Sep 17 00:00:00 2001 +From: Song Shuai +Date: Wed, 13 Dec 2023 17:45:08 +0800 +Subject: [PATCH 4/6] RISCV64: Add per-cpu overflow stacks support + +The patch introduces per-cpu overflow stacks for RISCV64 to let +"bt" do backtrace on it and the 'help -m' command dispalys the +addresss of each per-cpu overflow stack. + +TEST: a lkdtm DIRECT EXHAUST_STACK vmcore + + crash> bt + PID: 1 TASK: ff600000000d8000 CPU: 1 COMMAND: "sh" + #0 [ff6000001fc501c0] riscv_crash_save_regs at ffffffff8000a1dc + #1 [ff6000001fc50320] panic at ffffffff808773ec + #2 [ff6000001fc50380] walk_stackframe at ffffffff800056da + PC: ffffffff80876a34 [memset+96] + RA: ffffffff80563dc0 [recursive_loop+68] + SP: ff2000000000fd50 CAUSE: 000000000000000f + epc : ffffffff80876a34 ra : ffffffff80563dc0 sp : ff2000000000fd50 + gp : ffffffff81515d38 tp : 0000000000000000 t0 : ff2000000000fd58 + t1 : ff600000000d88c8 t2 : 6143203a6d74646b s0 : ff20000000010190 + s1 : 0000000000000012 a0 : ff2000000000fd58 a1 : 1212121212121212 + a2 : 0000000000000400 a3 : ff20000000010158 a4 : 0000000000000000 + a5 : 725bedba92260900 a6 : 000000000130e0f0 a7 : 0000000000000000 + s2 : ff2000000000fd58 s3 : ffffffff815170d8 s4 : ff20000000013e60 + s5 : 000000000000000e s6 : ff20000000013e60 s7 : 0000000000000000 + s8 : ff60000000861000 s9 : 00007fffc3641694 s10: 00007fffc3641690 + s11: 00005555796ed240 t3 : 0000000000010297 t4 : ffffffff80c17810 + t5 : ffffffff8195e7b8 t6 : ff20000000013b18 + status: 0000000200000120 badaddr: ff2000000000fd58 + cause: 000000000000000f orig_a0: 0000000000000000 + --- --- + #3 [ff2000000000fd50] memset at ffffffff80876a34 + #4 [ff20000000010190] recursive_loop at ffffffff80563e16 + #5 [ff200000000105d0] recursive_loop at ffffffff80563e16 + < recursive_loop ...> + #16 [ff20000000013490] recursive_loop at ffffffff80563e16 + #17 [ff200000000138d0] recursive_loop at ffffffff80563e16 + #18 [ff20000000013d10] lkdtm_EXHAUST_STACK at ffffffff8088005e + #19 [ff20000000013d30] lkdtm_do_action at ffffffff80563292 + #20 [ff20000000013d40] direct_entry at ffffffff80563474 + #21 [ff20000000013d70] full_proxy_write at ffffffff8032fb3a + #22 [ff20000000013db0] vfs_write at ffffffff801d6414 + #23 [ff20000000013e60] ksys_write at ffffffff801d67b8 + #24 [ff20000000013eb0] __riscv_sys_write at ffffffff801d6832 + #25 [ff20000000013ec0] do_trap_ecall_u at ffffffff80884a20 + crash> + + crash> help -m + + irq_stack_size: 16384 + irq_stacks[0]: ff20000000000000 + irq_stacks[1]: ff20000000008000 + overflow_stack_size: 4096 + overflow_stacks[0]: ff6000001fa7a510 + overflow_stacks[1]: ff6000001fc4f510 + crash> + +Signed-off-by: Song Shuai +Signed-off-by: Lianbo Jiang +--- + defs.h | 6 +++ + riscv64.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 118 insertions(+), 1 deletion(-) + +diff --git a/defs.h b/defs.h +index b71cdbd01b8d..2a29c07305f2 100644 +--- a/defs.h ++++ b/defs.h +@@ -3655,6 +3655,9 @@ typedef signed int s32; + #define VM_L5_2M (0x100) + #define VM_L5_1G (0x200) + #define IRQ_STACKS (0x400) ++#define OVERFLOW_STACKS (0x800) ++ ++#define RISCV64_OVERFLOW_STACK_SIZE (1 << 12) + + #define VM_FLAGS (VM_L3_4K | VM_L3_2M | VM_L3_1G | \ + VM_L4_4K | VM_L4_2M | VM_L4_1G | \ +@@ -7061,6 +7064,9 @@ struct machine_specific { + struct riscv64_register *crash_task_regs; + ulong irq_stack_size; + ulong *irq_stacks; ++ ++ ulong overflow_stack_size; ++ ulong *overflow_stacks; + }; + /* from arch/riscv/include/asm/pgtable-bits.h */ + #define _PAGE_PRESENT (machdep->machspec->_page_present) +diff --git a/riscv64.c b/riscv64.c +index a26b8a43cb29..98bf02a59b12 100644 +--- a/riscv64.c ++++ b/riscv64.c +@@ -34,6 +34,7 @@ static int riscv64_kvtop(struct task_context *tc, ulong kvaddr, + physaddr_t *paddr, int verbose); + static void riscv64_cmd_mach(void); + static void riscv64_irq_stack_init(void); ++static void riscv64_overflow_stack_init(void); + static void riscv64_stackframe_init(void); + static void riscv64_back_trace_cmd(struct bt_info *bt); + static int riscv64_eframe_search(struct bt_info *bt); +@@ -59,6 +60,8 @@ static int riscv64_on_irq_stack(int , ulong); + static int riscv64_on_process_stack(struct bt_info *, ulong ); + static void riscv64_set_process_stack(struct bt_info *); + static void riscv64_set_irq_stack(struct bt_info *); ++static int riscv64_on_overflow_stack(int, ulong); ++static void riscv64_set_overflow_stack(struct bt_info *); + + #define REG_FMT "%016lx" + #define SZ_2G 0x80000000 +@@ -206,6 +209,8 @@ riscv64_dump_machdep_table(ulong arg) + fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); + if (machdep->flags & IRQ_STACKS) + fprintf(fp, "%sIRQ_STACKS", others++ ? "|" : ""); ++ if (machdep->flags & OVERFLOW_STACKS) ++ fprintf(fp, "%sOVERFLOW_STACKS", others++ ? "|" : ""); + fprintf(fp, ")\n"); + + fprintf(fp, " kvbase: %lx\n", machdep->kvbase); +@@ -270,6 +275,15 @@ riscv64_dump_machdep_table(ulong arg) + fprintf(fp, " irq_stack_size: (unused)\n"); + fprintf(fp, " irq_stacks: (unused)\n"); + } ++ if (machdep->flags & OVERFLOW_STACKS) { ++ fprintf(fp, " overflow_stack_size: %ld\n", ms->overflow_stack_size); ++ for (i = 0; i < kt->cpus; i++) ++ fprintf(fp, " overflow_stacks[%d]: %lx\n", ++ i, ms->overflow_stacks[i]); ++ } else { ++ fprintf(fp, " overflow_stack_size: (unused)\n"); ++ fprintf(fp, " overflow_stacks: (unused)\n"); ++ } + } + + static ulong +@@ -684,6 +698,48 @@ riscv64_display_full_frame(struct bt_info *bt, struct riscv64_unwind_frame *curr + fprintf(fp, "\n"); + } + ++ ++/* ++ * Gather Overflow stack values. ++ */ ++static void ++riscv64_overflow_stack_init(void) ++{ ++ int i; ++ struct syment *sp; ++ struct gnu_request request, *req; ++ struct machine_specific *ms = machdep->machspec; ++ req = &request; ++ ++ if (symbol_exists("overflow_stack") && ++ (sp = per_cpu_symbol_search("overflow_stack")) && ++ get_symbol_type("overflow_stack", NULL, req)) { ++ if (CRASHDEBUG(1)) { ++ fprintf(fp, "overflow_stack: \n"); ++ fprintf(fp, " type: %x, %s\n", ++ (int)req->typecode, ++ (req->typecode == TYPE_CODE_ARRAY) ? ++ "TYPE_CODE_ARRAY" : "other"); ++ fprintf(fp, " target_typecode: %x, %s\n", ++ (int)req->target_typecode, ++ req->target_typecode == TYPE_CODE_INT ? ++ "TYPE_CODE_INT" : "other"); ++ fprintf(fp, " target_length: %ld\n", ++ req->target_length); ++ fprintf(fp, " length: %ld\n", req->length); ++ } ++ ++ if (!(ms->overflow_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong))))) ++ error(FATAL, "cannot malloc overflow_stack addresses\n"); ++ ++ ms->overflow_stack_size = RISCV64_OVERFLOW_STACK_SIZE; ++ machdep->flags |= OVERFLOW_STACKS; ++ ++ for (i = 0; i < kt->cpus; i++) ++ ms->overflow_stacks[i] = kt->__per_cpu_offset[i] + sp->value; ++ } ++} ++ + /* + * Gather IRQ stack values. + */ +@@ -757,6 +813,23 @@ riscv64_on_irq_stack(int cpu, ulong stkptr) + return FALSE; + } + ++static int ++riscv64_on_overflow_stack(int cpu, ulong stkptr) ++{ ++ struct machine_specific *ms = machdep->machspec; ++ ulong * stacks = ms->overflow_stacks; ++ ulong stack_size = ms->overflow_stack_size; ++ ++ if ((cpu >= kt->cpus) || (stacks == NULL) || !stack_size) ++ return FALSE; ++ ++ if ((stkptr >= stacks[cpu]) && ++ (stkptr < (stacks[cpu] + stack_size))) ++ return TRUE; ++ ++ return FALSE; ++} ++ + static int + riscv64_on_process_stack(struct bt_info *bt, ulong stkptr) + { +@@ -781,6 +854,16 @@ riscv64_set_irq_stack(struct bt_info *bt) + alter_stackbuf(bt); + } + ++static void ++riscv64_set_overflow_stack(struct bt_info *bt) ++{ ++ struct machine_specific *ms = machdep->machspec; ++ ++ bt->stackbase = ms->overflow_stacks[bt->tc->processor]; ++ bt->stacktop = bt->stackbase + ms->overflow_stack_size; ++ alter_stackbuf(bt); ++} ++ + static void + riscv64_set_process_stack(struct bt_info *bt) + { +@@ -875,7 +958,7 @@ riscv64_back_trace_cmd(struct bt_info *bt) + { + struct riscv64_unwind_frame current, previous; + struct stackframe curr_frame; +- struct riscv64_register *regs, *irq_regs; ++ struct riscv64_register *regs, *irq_regs, *overflow_regs; + int level = 0; + + if (bt->flags & BT_REGS_NOT_FOUND) +@@ -888,6 +971,11 @@ riscv64_back_trace_cmd(struct bt_info *bt) + bt->flags |= BT_IRQSTACK; + } + ++ if (riscv64_on_overflow_stack(bt->tc->processor, bt->frameptr)) { ++ riscv64_set_overflow_stack(bt); ++ bt->flags |= BT_OVERFLOW_STACK; ++ } ++ + current.pc = bt->instptr; + current.sp = bt->stkptr; + current.fp = bt->frameptr; +@@ -971,6 +1059,28 @@ riscv64_back_trace_cmd(struct bt_info *bt) + } + } + ++ /* ++ * When backtracing to handle_kernel_stack_overflow() ++ * use pt_regs saved in overflow stack to continue ++ */ ++ if ((bt->flags & BT_OVERFLOW_STACK) && ++ !riscv64_on_overflow_stack(bt->tc->processor, current.fp)) { ++ ++ overflow_regs = (struct riscv64_register *) ++ &bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(current.sp))]; ++ ++ riscv64_print_exception_frame(bt, current.sp, KERNEL_MODE); ++ ++ current.pc = overflow_regs->regs[RISCV64_REGS_EPC]; ++ current.fp = overflow_regs->regs[RISCV64_REGS_FP]; ++ current.sp = overflow_regs->regs[RISCV64_REGS_SP]; ++ ++ riscv64_set_process_stack(bt); ++ ++ bt->flags &= ~BT_OVERFLOW_STACK; ++ fprintf(fp, "--- ---\n"); ++ } ++ + if (CRASHDEBUG(8)) + fprintf(fp, "next %d pc %#lx sp %#lx fp %lx\n", + level, current.pc, current.sp, current.fp); +@@ -1583,6 +1693,7 @@ riscv64_init(int when) + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; + + riscv64_irq_stack_init(); ++ riscv64_overflow_stack_init(); + riscv64_stackframe_init(); + riscv64_page_type_init(); + +-- +2.41.0 + diff --git a/0005-x86_64-Fix-bt-command-not-printing-stack-trace-enoug.patch b/0005-x86_64-Fix-bt-command-not-printing-stack-trace-enoug.patch new file mode 100644 index 0000000..f2d25dd --- /dev/null +++ b/0005-x86_64-Fix-bt-command-not-printing-stack-trace-enoug.patch @@ -0,0 +1,63 @@ +From aed1b7d3a064112d5c34eff81fa9ca0c50c5c782 Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Tue, 16 Jan 2024 17:00:48 +0900 +Subject: [PATCH 5/6] x86_64: Fix "bt" command not printing stack trace enough + +On recent x86_64 kernels, the check of caller function (BT_CHECK_CALLER) +does not work correctly due to inappropriate direct_call_targets. As a +result, the correct frame is ignored and the remaining frames will be +truncated. + +Skip the caller check if ORC unwinder is available, as the check is not +necessary with it. + +Without the patch: + crash> bt 493113 + PID: 493113 TASK: ff2e34ecbd3ca2c0 CPU: 27 COMMAND: "sriov_fec_daemo" + #0 [ff77abc4e81cfb08] __schedule at ffffffff81b239cb + #1 [ff77abc4e81cfb70] schedule at ffffffff81b23e2d + #2 [ff77abc4e81cfb88] schedule_timeout at ffffffff81b2c9e8 + RIP: 000000000047cdbb RSP: 000000c0000975a8 RFLAGS: 00000216 + ... + +With the patch: + crash> bt 493113 + PID: 493113 TASK: ff2e34ecbd3ca2c0 CPU: 27 COMMAND: "sriov_fec_daemo" + #0 [ff77abc4e81cfb08] __schedule at ffffffff81b239cb + #1 [ff77abc4e81cfb70] schedule at ffffffff81b23e2d + #2 [ff77abc4e81cfb88] schedule_timeout at ffffffff81b2c9e8 + #3 [ff77abc4e81cfbf0] __wait_for_common at ffffffff81b24abb + #4 [ff77abc4e81cfc68] vfio_unregister_group_dev at ffffffffc10e76ae [vfio] + #5 [ff77abc4e81cfca8] vfio_pci_core_unregister_device at ffffffffc11bb599 [vfio_pci_core] + #6 [ff77abc4e81cfcc0] vfio_pci_remove at ffffffffc103e045 [vfio_pci] + #7 [ff77abc4e81cfcd0] pci_device_remove at ffffffff815d7513 + ... + +Reported-by: Crystal Wood +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + x86_64.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/x86_64.c b/x86_64.c +index f59991f8c4c5..502817d3b2bd 100644 +--- a/x86_64.c ++++ b/x86_64.c +@@ -3342,6 +3342,13 @@ x86_64_print_stack_entry(struct bt_info *bt, FILE *ofp, int level, + + bt->call_target = name; + ++ /* ++ * The caller check below does not work correctly for some kernels, ++ * so skip it if ORC unwinder is available. ++ */ ++ if (machdep->flags & ORC) ++ return result; ++ + if (is_direct_call_target(bt)) { + if (CRASHDEBUG(2)) + fprintf(ofp, "< enable BT_CHECK_CALLER for %s >\n", +-- +2.41.0 + diff --git a/0006-symbols-skip-the-module-if-the-given-address-is-not-.patch b/0006-symbols-skip-the-module-if-the-given-address-is-not-.patch new file mode 100644 index 0000000..bfe8734 --- /dev/null +++ b/0006-symbols-skip-the-module-if-the-given-address-is-not-.patch @@ -0,0 +1,88 @@ +From 28891d1127542dbb2d5ba16c575e14e741ed73ef Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Thu, 4 Jan 2024 09:20:27 +0800 +Subject: [PATCH 6/6] symbols: skip the module if the given address is not + within its address range + +Previously, to find a module symbol and its offset by an arbitrary address, +all symbols within the module will be iterated by address ascending order +until the last symbol with a smaller address been noticed. + +However if the address is not within the module address range, e.g. +the address is higher than the module's last symbol's address, then +the module can be surely skipped, because its symbol iteration is +unnecessary. This can speed up the kernel module symbols finding and improve +the overall performance. + +Without the patch: + $ time echo "bt 8993" | ~/crash-dev/crash vmcore vmlinux + crash> bt 8993 + PID: 8993 TASK: ffff927569cc2100 CPU: 2 COMMAND: "WriterPool0" + #0 [ffff927569cd76f0] __schedule at ffffffffb3db78d8 + #1 [ffff927569cd7758] schedule_preempt_disabled at ffffffffb3db8bf9 + #2 [ffff927569cd7768] __mutex_lock_slowpath at ffffffffb3db6ca7 + #3 [ffff927569cd77c0] mutex_lock at ffffffffb3db602f + #4 [ffff927569cd77d8] ucache_retrieve at ffffffffc0cf4409 [secfs2] + ...snip the stacktrace of the same module... + #11 [ffff927569cd7ba0] cskal_path_vfs_getattr_nosec at ffffffffc05cae76 [falcon_kal] + ...snip... + #13 [ffff927569cd7c40] _ZdlPv at ffffffffc086e751 [falcon_lsm_serviceable] + ...snip... + #20 [ffff927569cd7ef8] unload_network_ops_symbols at ffffffffc06f11c0 [falcon_lsm_pinned_14713] + #21 [ffff927569cd7f50] system_call_fastpath at ffffffffb3dc539a + RIP: 00007f2b28ed4023 RSP: 00007f2a45fe7f80 RFLAGS: 00000206 + RAX: 0000000000000012 RBX: 00007f2a68302e00 RCX: 00007f2a682546d8 + RDX: 0000000000000826 RSI: 00007eb57ea6a000 RDI: 00000000000000e3 + RBP: 00007eb57ea6a000 R8: 0000000000000826 R9: 00000002670bdfd2 + R10: 00000002670bdfd2 R11: 0000000000000293 R12: 00000002670bdfd2 + R13: 00007f29d501a480 R14: 0000000000000826 R15: 00000002670bdfd2 + ORIG_RAX: 0000000000000012 CS: 0033 SS: 002b + crash> + real 7m14.826s + user 7m12.502s + sys 0m1.091s + +With the patch: + $ time echo "bt 8993" | ~/crash-dev/crash vmcore vmlinux + crash> bt 8993 + PID: 8993 TASK: ffff927569cc2100 CPU: 2 COMMAND: "WriterPool0" + #0 [ffff927569cd76f0] __schedule at ffffffffb3db78d8 + #1 [ffff927569cd7758] schedule_preempt_disabled at ffffffffb3db8bf9 + ...snip the same output... + crash> + real 0m8.827s + user 0m7.896s + sys 0m0.938s + +Signed-off-by: Tao Liu +Signed-off-by: Lianbo Jiang +--- + symbols.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/symbols.c b/symbols.c +index 5d919910164e..88a3fd156cb5 100644 +--- a/symbols.c ++++ b/symbols.c +@@ -5561,7 +5561,7 @@ value_search_module_6_4(ulong value, ulong *offset) + sp = lm->symtable[t]; + sp_end = lm->symend[t]; + +- if (value < sp->value) ++ if (value < sp->value || value > sp_end->value) + continue; + + splast = NULL; +@@ -5646,6 +5646,9 @@ retry: + if (sp->value > value) /* invalid -- between modules */ + break; + ++ if (sp_end->value < value) /* not within the module */ ++ continue; ++ + /* + * splast will contain the last module symbol encountered. + * Note: "__insmod_"-type symbols will be set in splast only +-- +2.41.0 + diff --git a/crash.spec b/crash.spec index 39bf3ff..f655dce 100644 --- a/crash.spec +++ b/crash.spec @@ -33,6 +33,12 @@ Patch12: 0011-RISCV64-Fix-bt-output-when-no-ra-on-the-stack-top.patch Patch13: 0012-arm64-rewrite-the-arm64_get_vmcoreinfo_ul-to-arm64_g.patch Patch14: 0013-help.c-Remove-kmem-l-help-messages.patch Patch15: 0014-x86_64-check-bt-bptr-before-calculate-framesize.patch +Patch16: 0001-arm64-support-HW-Tag-Based-KASAN-MTE-mode.patch +Patch17: 0002-RISCV64-Add-support-for-bt-e-option.patch +Patch18: 0003-RISCV64-Add-per-cpu-IRQ-stacks-support.patch +Patch19: 0004-RISCV64-Add-per-cpu-overflow-stacks-support.patch +Patch20: 0005-x86_64-Fix-bt-command-not-printing-stack-trace-enoug.patch +Patch21: 0006-symbols-skip-the-module-if-the-given-address-is-not-.patch %description The core analysis suite is a self-contained tool that can be used to @@ -68,6 +74,12 @@ offered by Mission Critical Linux, or the LKCD kernel patch. %patch -P 13 -p1 %patch -P 14 -p1 %patch -P 15 -p1 +%patch -P 16 -p1 +%patch -P 17 -p1 +%patch -P 18 -p1 +%patch -P 19 -p1 +%patch -P 20 -p1 +%patch -P 21 -p1 %build