diff --git a/0001-vmware_guestdump-Version-7-support.patch b/0001-vmware_guestdump-Version-7-support.patch new file mode 100644 index 0000000..9a9d0cb --- /dev/null +++ b/0001-vmware_guestdump-Version-7-support.patch @@ -0,0 +1,125 @@ +From a18b8a7fb83ae79f5c0698063f26ec8a289cf90b Mon Sep 17 00:00:00 2001 +From: Alexey Makhalov +Date: Wed, 30 Apr 2025 21:54:27 +0000 +Subject: [PATCH 1/9] vmware_guestdump: Version 7 support + +ESXi 9.0 updated debug.guest format. CPU architecture type was +introduced and several fields of the header not used by the crash +were moved around. It is version 7 now. + +Make corresponding changes in debug.guest parser and keep it +backward compatible with older versions. + +Fix comment and log messages typos as well. + +Signed-off-by: Alexey Makhalov +--- + vmware_guestdump.c | 48 ++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 44 insertions(+), 4 deletions(-) + +diff --git a/vmware_guestdump.c b/vmware_guestdump.c +index 78f37fb..1a6ef9b 100644 +--- a/vmware_guestdump.c ++++ b/vmware_guestdump.c +@@ -30,6 +30,7 @@ + * 2. Number of Virtual CPUs (4 bytes) } - struct guestdumpheader + * 3. Reserved gap + * 4. Main Memory information - struct mainmeminfo{,_old} ++ * 5. Reserved gap #2. Only in v7+ + * (use get_vcpus_offset() to get total size of guestdumpheader) + * vcpus_offset: ---------\ + * 1. struct vcpu_state1 \ +@@ -111,6 +112,22 @@ struct vcpu_state2 { + uint8_t reserved3[65]; + } __attribute__((packed)); + ++typedef enum { ++ CPU_ARCH_AARCH64, ++ CPU_ARCH_X86, ++} cpu_arch; ++ ++/* ++ * Returns the size of reserved gap #2 in the header right after the Main Mem. ++ */ ++static inline long ++get_gap2_size(uint32_t version) ++{ ++ if (version == 7) ++ return 11; ++ return 0; ++} ++ + /* + * Returns the size of the guest dump header. + */ +@@ -128,6 +145,9 @@ get_vcpus_offset(uint32_t version, int mem_holes) + return sizeof(struct guestdumpheader) + 14 + sizeof(struct mainmeminfo); + case 6: /* ESXi 8.0u2 */ + return sizeof(struct guestdumpheader) + 15 + sizeof(struct mainmeminfo); ++ case 7: /* ESXi 9.0 */ ++ return sizeof(struct guestdumpheader) + 8 + sizeof(struct mainmeminfo) + ++ get_gap2_size(version); + + } + return 0; +@@ -155,10 +175,10 @@ get_vcpu_gapsize(uint32_t version) + * + * guestdump (debug.guest) is a simplified version of the *.vmss which does + * not contain a full VM state, but minimal guest state, such as a memory +- * layout and CPUs state, needed for debugger. is_vmware_guestdump() ++ * layout and CPUs state, needed for the debugger. is_vmware_guestdump() + * and vmware_guestdump_init() functions parse guestdump header and + * populate vmss data structure (from vmware_vmss.c). In result, all +- * handlers (except mempry_dump) from vmware_vmss.c can be reused. ++ * handlers (except memory_dump) from vmware_vmss.c can be reused. + * + * debug.guest does not have a dedicated header magic or file format signature + * To probe debug.guest we need to perform series of validations. In addition, +@@ -225,7 +245,8 @@ is_vmware_guestdump(char *filename) + /* vcpu_offset adjustment for mem_holes is required only for version 1. */ + vcpus_offset = get_vcpus_offset(hdr.version, mmi.mem_holes); + } else { +- if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo), SEEK_SET) == -1) { ++ if (fseek(fp, vcpus_offset - sizeof(struct mainmeminfo) - get_gap2_size(hdr.version), ++ SEEK_SET) == -1) { + if (CRASHDEBUG(1)) + error(INFO, LOGPRX"Failed to fseek '%s': [Error %d] %s\n", + filename, errno, strerror(errno)); +@@ -240,6 +261,25 @@ is_vmware_guestdump(char *filename) + fclose(fp); + return FALSE; + } ++ ++ /* Check CPU architecture field. Next 4 bytes after the Main Mem */ ++ if (hdr.version >= 7) { ++ cpu_arch arch; ++ if (fread(&arch, sizeof(cpu_arch), 1, fp) != 1) { ++ if (CRASHDEBUG(1)) ++ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n", ++ "CPU arch", filename, errno, strerror(errno)); ++ fclose(fp); ++ return FALSE; ++ } ++ if (arch != CPU_ARCH_X86) { ++ if (CRASHDEBUG(1)) ++ error(INFO, ++ LOGPRX"Invalid or unsupported CPU architecture: %d\n", arch); ++ fclose(fp); ++ return FALSE; ++ } ++ } + } + if (fseek(fp, 0L, SEEK_END) == -1) { + if (CRASHDEBUG(1)) +@@ -300,7 +340,7 @@ vmware_guestdump_init(char *filename, FILE *ofp) + + if (!machine_type("X86") && !machine_type("X86_64")) { + error(INFO, +- LOGPRX"Invalid or unsupported host architecture for .vmss file: %s\n", ++ LOGPRX"Invalid or unsupported host architecture for .guest file: %s\n", + MACHINE_TYPE); + result = FALSE; + goto exit; +-- +2.47.0 + diff --git a/0002-Fix-incorrect-task-state-during-exit.patch b/0002-Fix-incorrect-task-state-during-exit.patch new file mode 100644 index 0000000..ad9795a --- /dev/null +++ b/0002-Fix-incorrect-task-state-during-exit.patch @@ -0,0 +1,83 @@ +From 6eb51d8284aaca9cc882ddb1b9e135c708abbaa4 Mon Sep 17 00:00:00 2001 +From: Stephen Brennan +Date: Fri, 2 May 2025 13:18:17 -0700 +Subject: [PATCH 2/9] Fix incorrect task state during exit + +task_state() assumes that exit_state is a unsigned long, when in +reality, it has been declared as an int since 97dc32cdb1b53 ("reduce +size of task_struct on 64-bit machines"), in Linux 2.6.22. So on 64-bit +machines, task_state() reads 8 bytes rather than 4, and gets the wrong +exit_state value by including the next field. + +This has gone unnoticed because directly after exit_state comes +exit_code, which is generally zero while the task is alive. When the +exit_code is set, exit_state is usually set not long after. Since +task_state_string() only checks whether exit_state bits are set, it +never notices the presence of the exit code inside of the state. + +But this leaves open a window during the process exit, when the +exit_code has been set (in do_exit()), but the exit_state has not (in +exit_notify()). In this case, crash reports a state of "??", but in +reality, the task is still running -- it's just running the exit() +system call. This race window can be long enough to be observed in core +dumps, for example if the mmput() takes a long time. + +This should be considered a bug. A task state of "??" or "(unknown)" is +frequently of concern when debugging, as it could indicate that the +state fields had some sort of corruption, and draw the attention of the +debugger. To handle it properly, record the size of exit_state, and read +it conditionally as a UINT or ULONG, just like the state. This ensures +we retain compatibility with kernel before v2.6.22. Whether that is +actually desirable is anybody's guess. + +Reported-by: Jeffery Yoder +Signed-off-by: Stephen Brennan +--- + defs.h | 1 + + task.c | 11 +++++++++-- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/defs.h b/defs.h +index 4cf169c..2fdb4db 100644 +--- a/defs.h ++++ b/defs.h +@@ -2448,6 +2448,7 @@ struct size_table { /* stash of commonly-used sizes */ + long fred_frame; + long vmap_node; + long cpumask_t; ++ long task_struct_exit_state; + }; + + struct array_table { +diff --git a/task.c b/task.c +index 3bafe79..e07b479 100644 +--- a/task.c ++++ b/task.c +@@ -306,6 +306,7 @@ task_init(void) + MEMBER_SIZE_INIT(task_struct_state, "task_struct", "__state"); + } + MEMBER_OFFSET_INIT(task_struct_exit_state, "task_struct", "exit_state"); ++ MEMBER_SIZE_INIT(task_struct_exit_state, "task_struct", "exit_state"); + MEMBER_OFFSET_INIT(task_struct_pid, "task_struct", "pid"); + MEMBER_OFFSET_INIT(task_struct_comm, "task_struct", "comm"); + MEMBER_OFFSET_INIT(task_struct_next_task, "task_struct", "next_task"); +@@ -5965,8 +5966,14 @@ task_state(ulong task) + state = ULONG(tt->task_struct + OFFSET(task_struct_state)); + else + state = UINT(tt->task_struct + OFFSET(task_struct_state)); +- exit_state = VALID_MEMBER(task_struct_exit_state) ? +- ULONG(tt->task_struct + OFFSET(task_struct_exit_state)) : 0; ++ ++ if (VALID_MEMBER(task_struct_exit_state) ++ && SIZE(task_struct_exit_state) == sizeof(ulong)) ++ exit_state = ULONG(tt->task_struct + OFFSET(task_struct_exit_state)); ++ else if (VALID_MEMBER(task_struct_exit_state)) ++ exit_state = UINT(tt->task_struct + OFFSET(task_struct_exit_state)); ++ else ++ exit_state = 0; + + return (state | exit_state); + } +-- +2.47.0 + diff --git a/0003-Add-multi-threads-support-in-crash-target.patch b/0003-Add-multi-threads-support-in-crash-target.patch new file mode 100644 index 0000000..5264436 --- /dev/null +++ b/0003-Add-multi-threads-support-in-crash-target.patch @@ -0,0 +1,210 @@ +From 099f74640c965cd9c0e3620b9b5a0367b81a4e33 Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 25 Jun 2025 16:01:59 +1200 +Subject: [PATCH 3/9] Add multi-threads support in crash target + +Previously, only one thread is created in crash target by [1]. And this one +thread will work as the common container for different tasks whenever +"set " to it. Its tid number is 0 and will never be deleted. + +In order to support multi-stacks, we enable multi-threads in crash +target. Each thread will represent one stack, and "info threads" will +list all available stacks, "thread " will switch to it. + +Since multi-stacks is task binded, each task switching will trigger +a thread delete of those tid number other than 0. In addition, we will +pass the tid number to each arch's get_current_task_reg(), in order to +retrive the regs value of the specific stack. + +[1]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01085.html + +Co-developed-by: Alexey Makhalov +Co-developed-by: Tao Liu +Signed-off-by: Tao Liu +--- + arm64.c | 2 +- + crash_target.c | 42 +++++++++++++++++++++++++++++++++++++++--- + defs.h | 3 ++- + gdb_interface.c | 6 +++--- + ppc64.c | 4 ++-- + x86_64.c | 4 ++-- + 6 files changed, 49 insertions(+), 12 deletions(-) + +diff --git a/arm64.c b/arm64.c +index ef4a2b8..1cdde5f 100644 +--- a/arm64.c ++++ b/arm64.c +@@ -204,7 +204,7 @@ out: + + static int + arm64_get_current_task_reg(int regno, const char *name, +- int size, void *value) ++ int size, void *value, int sid) + { + struct bt_info bt_info, bt_setup; + struct task_context *tc; +diff --git a/crash_target.c b/crash_target.c +index 5966b7b..71998ef 100644 +--- a/crash_target.c ++++ b/crash_target.c +@@ -27,8 +27,9 @@ void crash_target_init (void); + + extern "C" int gdb_readmem_callback(unsigned long, void *, int, int); + extern "C" int crash_get_current_task_reg (int regno, const char *regname, +- int regsize, void *val); ++ int regsize, void *val, int sid); + extern "C" int gdb_change_thread_context (void); ++extern "C" int gdb_add_substack (int); + extern "C" void crash_get_current_task_info(unsigned long *pid, char **comm); + + /* The crash target. */ +@@ -66,7 +67,12 @@ public: + crash_get_current_task_info(&pid, &comm); + return string_printf ("%ld %s", pid, comm); + } +- ++ const char *extra_thread_info (thread_info *tp) override ++ { ++ static char buf[16] = {0}; ++ snprintf(buf, sizeof(buf), "stack %ld", tp->ptid.tid()); ++ return buf; ++ } + }; + + static void supply_registers(struct regcache *regcache, int regno) +@@ -79,7 +85,7 @@ static void supply_registers(struct regcache *regcache, int regno) + if (regsize > sizeof (regval)) + error (_("fatal error: buffer size is not enough to fit register value")); + +- if (crash_get_current_task_reg (regno, regname, regsize, (void *)®val)) ++ if (crash_get_current_task_reg (regno, regname, regsize, (void *)®val, inferior_thread()->ptid.tid())) + regcache->raw_supply (regno, regval); + else + regcache->raw_supply (regno, NULL); +@@ -144,7 +150,37 @@ crash_target_init (void) + extern "C" int + gdb_change_thread_context (void) + { ++ /* 1st, switch to tid 0 if we are not */ ++ if (inferior_thread()->ptid.tid()) { ++ switch_to_thread (&(current_inferior()->thread_list.front())); ++ } ++ /* 2nd, delete threads whose tid is not 0 */ ++ for (thread_info *tp : current_inferior()->threads_safe()) { ++ if (tp->ptid.tid() && tp->deletable()) { ++ delete_thread_silent(tp); ++ current_inferior()->highest_thread_num--; ++ } ++ } ++ /* 3rd, refresh regcache for tid 0 */ + target_fetch_registers(get_thread_regcache(inferior_thread()), -1); + reinit_frame_cache(); + return TRUE; + } ++ ++/* Add a thread for each additional stack. Use stack ID as a thread ID */ ++extern "C" int ++gdb_add_substack (int sid) ++{ ++ thread_info *tp; ++ thread_info *current_thread = inferior_thread(); ++ ++ ptid_t ptid = ptid_t(CRASH_INFERIOR_PID, 0, sid + 1); ++ tp = current_inferior()->find_thread(ptid); ++ if (tp == nullptr) { ++ tp = add_thread_silent(current_inferior()->process_target(), ptid); ++ } ++ switch_to_thread (tp); ++ target_fetch_registers(get_thread_regcache(tp), -1); ++ switch_to_thread (current_thread); ++ return TRUE; ++} +\ No newline at end of file +diff --git a/defs.h b/defs.h +index 2fdb4db..bbd6d4b 100644 +--- a/defs.h ++++ b/defs.h +@@ -1081,7 +1081,7 @@ struct machdep_table { + void (*get_irq_affinity)(int); + void (*show_interrupts)(int, ulong *); + int (*is_page_ptr)(ulong, physaddr_t *); +- int (*get_current_task_reg)(int, const char *, int, void *); ++ int (*get_current_task_reg)(int, const char *, int, void *, int); + int (*is_cpu_prstatus_valid)(int cpu); + }; + +@@ -8324,5 +8324,6 @@ enum ppc64_regnum { + + /* crash_target.c */ + extern int gdb_change_thread_context (void); ++extern int gdb_add_substack (int); + + #endif /* !GDB_COMMON */ +diff --git a/gdb_interface.c b/gdb_interface.c +index fa2e85b..9f76f85 100644 +--- a/gdb_interface.c ++++ b/gdb_interface.c +@@ -1076,13 +1076,13 @@ unsigned long crash_get_kaslr_offset(void) + + /* Callbacks for crash_target */ + int crash_get_current_task_reg (int regno, const char *regname, +- int regsize, void *value); ++ int regsize, void *value, int sid); + int crash_get_current_task_reg (int regno, const char *regname, +- int regsize, void *value) ++ int regsize, void *value, int sid) + { + if (!machdep->get_current_task_reg) + return FALSE; +- return machdep->get_current_task_reg(regno, regname, regsize, value); ++ return machdep->get_current_task_reg(regno, regname, regsize, value, sid); + } + + /* arm64 kernel lr maybe has patuh */ +diff --git a/ppc64.c b/ppc64.c +index 782107b..7ac12fe 100644 +--- a/ppc64.c ++++ b/ppc64.c +@@ -56,7 +56,7 @@ static char * ppc64_check_eframe(struct ppc64_pt_regs *); + static void ppc64_print_eframe(char *, struct ppc64_pt_regs *, + struct bt_info *); + static int ppc64_get_current_task_reg(int regno, const char *name, int size, +- void *value); ++ void *value, int); + static void parse_cmdline_args(void); + static int ppc64_paca_percpu_offset_init(int); + static void ppc64_init_cpu_info(void); +@@ -2512,7 +2512,7 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs, + + static int + ppc64_get_current_task_reg(int regno, const char *name, int size, +- void *value) ++ void *value, int sid) + { + struct bt_info bt_info, bt_setup; + struct task_context *tc; +diff --git a/x86_64.c b/x86_64.c +index d4bbd15..a46fb9d 100644 +--- a/x86_64.c ++++ b/x86_64.c +@@ -126,7 +126,7 @@ static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *); + static void x86_64_framesize_debug(struct bt_info *); + static void x86_64_get_active_set(void); + static int x86_64_get_kvaddr_ranges(struct vaddr_range *); +-static int x86_64_get_current_task_reg(int, const char *, int, void *); ++static int x86_64_get_current_task_reg(int, const char *, int, void *, int); + static int x86_64_verify_paddr(uint64_t); + static void GART_init(void); + static void x86_64_exception_stacks_init(void); +@@ -9233,7 +9233,7 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp) + + static int + x86_64_get_current_task_reg(int regno, const char *name, +- int size, void *value) ++ int size, void *value, int sid) + { + struct bt_info bt_info, bt_setup; + struct task_context *tc; +-- +2.47.0 + diff --git a/0004-Call-cmd_bt-silently-after-set-pid.patch b/0004-Call-cmd_bt-silently-after-set-pid.patch new file mode 100644 index 0000000..91c8197 --- /dev/null +++ b/0004-Call-cmd_bt-silently-after-set-pid.patch @@ -0,0 +1,173 @@ +From d3ef6e456629fc5711708a88872304da5159c1c6 Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 25 Jun 2025 16:02:00 +1200 +Subject: [PATCH 4/9] Call cmd_bt silently after "set pid" + +Cmd bt will list multi-stacks of one task. After we "set " switch +context to one task, we first need a bt call to detect the multi-stacks, +however we don't want any console output from it, so a nullfp is used for +output receive. The silent bt call is only triggered once as part of task +context switch by cmd set. + +A array of user_regs pointers is reserved for each supported arch. If one +extra stack found, a user_regs structure will be allocated for storing regs +value of the stack. + +Co-developed-by: Alexey Makhalov +Co-developed-by: Tao Liu +Signed-off-by: Tao Liu +--- + arm64.c | 4 ++++ + crash_target.c | 7 +++++++ + kernel.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ + ppc64.c | 4 ++++ + task.c | 4 ++-- + x86_64.c | 3 +++ + 6 files changed, 64 insertions(+), 2 deletions(-) + +diff --git a/arm64.c b/arm64.c +index 1cdde5f..8291301 100644 +--- a/arm64.c ++++ b/arm64.c +@@ -126,6 +126,10 @@ struct user_regs_bitmap_struct { + ulong bitmap[32]; + }; + ++#define MAX_EXCEPTION_STACKS 7 ++ulong extra_stacks_idx = 0; ++struct user_regs_bitmap_struct *extra_stacks_regs[MAX_EXCEPTION_STACKS] = {0}; ++ + static inline bool is_mte_kvaddr(ulong addr) + { + /* check for ARM64_MTE enabled */ +diff --git a/crash_target.c b/crash_target.c +index 71998ef..ad1480c 100644 +--- a/crash_target.c ++++ b/crash_target.c +@@ -31,6 +31,9 @@ extern "C" int crash_get_current_task_reg (int regno, const char *regname, + extern "C" int gdb_change_thread_context (void); + extern "C" int gdb_add_substack (int); + extern "C" void crash_get_current_task_info(unsigned long *pid, char **comm); ++#if defined (X86_64) || defined (ARM64) || defined (PPC64) ++extern "C" void silent_call_bt(void); ++#endif + + /* The crash target. */ + +@@ -164,6 +167,10 @@ gdb_change_thread_context (void) + /* 3rd, refresh regcache for tid 0 */ + target_fetch_registers(get_thread_regcache(inferior_thread()), -1); + reinit_frame_cache(); ++#if defined (X86_64) || defined (ARM64) || defined (PPC64) ++ /* 4th, invoke bt silently to refresh the additional stacks */ ++ silent_call_bt(); ++#endif + return TRUE; + } + +diff --git a/kernel.c b/kernel.c +index b8d3b79..e4213d7 100644 +--- a/kernel.c ++++ b/kernel.c +@@ -12002,3 +12002,47 @@ int get_linux_banner_from_vmlinux(char *buf, size_t size) + + return TRUE; + } ++ ++#if defined(X86_64) || defined(ARM64) || defined(PPC64) ++extern ulong extra_stacks_idx; ++extern void *extra_stacks_regs[]; ++void silent_call_bt(void); ++void silent_call_bt(void) ++{ ++ jmp_buf main_loop_env_save; ++ unsigned long long flags_save = pc->flags; ++ FILE *fp_save = fp; ++ FILE *error_fp_save = pc->error_fp; ++ /* Redirect all cmd_bt() outputs into null */ ++ fp = pc->nullfp; ++ pc->error_fp = pc->nullfp; ++ ++ for (int i = 0; i < extra_stacks_idx; i++) { ++ /* Note: GETBUF/FREEBUF is not applicable for extra_stacks_regs, ++ because we are reserving extra_stacks_regs by cmd_bt() ++ for later use. But GETBUF/FREEBUF is designed for use only ++ within one cmd. See process_command_line() -> restore_sanity() ++ -> free_all_bufs(). So we use malloc/free instead. */ ++ free(extra_stacks_regs[i]); ++ extra_stacks_regs[i] = NULL; ++ } ++ /* Prepare args used by cmd_bt() */ ++ sprintf(pc->command_line, "bt\n"); ++ argcnt = parse_line(pc->command_line, args); ++ optind = 1; ++ pc->flags |= RUNTIME; ++ ++ /* Catch error FATAL generated by cmd_bt() if any */ ++ memcpy(&main_loop_env_save, &pc->main_loop_env, sizeof(jmp_buf)); ++ if (setjmp(pc->main_loop_env)) { ++ goto out; ++ } ++ cmd_bt(); ++out: ++ /* Restore all */ ++ memcpy(&pc->main_loop_env, &main_loop_env_save, sizeof(jmp_buf)); ++ pc->flags = flags_save; ++ fp = fp_save; ++ pc->error_fp = error_fp_save; ++} ++#endif +diff --git a/ppc64.c b/ppc64.c +index 7ac12fe..532eb3f 100644 +--- a/ppc64.c ++++ b/ppc64.c +@@ -80,6 +80,10 @@ struct user_regs_bitmap_struct { + ulong bitmap[32]; + }; + ++#define MAX_EXCEPTION_STACKS 7 ++ulong extra_stacks_idx = 0; ++struct user_regs_bitmap_struct *extra_stacks_regs[MAX_EXCEPTION_STACKS] = {0}; ++ + static int is_opal_context(ulong sp, ulong nip) + { + uint64_t opal_start, opal_end; +diff --git a/task.c b/task.c +index e07b479..ec04b55 100644 +--- a/task.c ++++ b/task.c +@@ -3062,7 +3062,7 @@ sort_context_array(void) + curtask = CURRENT_TASK(); + qsort((void *)tt->context_array, (size_t)tt->running_tasks, + sizeof(struct task_context), sort_by_pid); +- set_context(curtask, NO_PID, TRUE); ++ set_context(curtask, NO_PID, FALSE); + + sort_context_by_task(); + } +@@ -3109,7 +3109,7 @@ sort_context_array_by_last_run(void) + curtask = CURRENT_TASK(); + qsort((void *)tt->context_array, (size_t)tt->running_tasks, + sizeof(struct task_context), sort_by_last_run); +- set_context(curtask, NO_PID, TRUE); ++ set_context(curtask, NO_PID, FALSE); + + sort_context_by_task(); + } +diff --git a/x86_64.c b/x86_64.c +index a46fb9d..ee23d8b 100644 +--- a/x86_64.c ++++ b/x86_64.c +@@ -160,6 +160,9 @@ struct user_regs_bitmap_struct { + ulong bitmap[32]; + }; + ++ulong extra_stacks_idx = 0; ++struct user_regs_bitmap_struct *extra_stacks_regs[MAX_EXCEPTION_STACKS] = {0}; ++ + /* + * Do all necessary machine-specific setup here. This is called several + * times during initialization. +-- +2.47.0 + diff --git a/0005-x86_64-Add-gdb-multi-stack-unwind-support.patch b/0005-x86_64-Add-gdb-multi-stack-unwind-support.patch new file mode 100644 index 0000000..c7fd932 --- /dev/null +++ b/0005-x86_64-Add-gdb-multi-stack-unwind-support.patch @@ -0,0 +1,236 @@ +From 7b488818107fff9f92e9778749d0046f2024e6af Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 25 Jun 2025 16:02:01 +1200 +Subject: [PATCH 5/9] x86_64: Add gdb multi-stack unwind support + +Whenever extra stack is found, a user_regs structure is allocated and +regs value copied there. Later the values will be retrived by +get_current_task_reg() by given the thread's tid, aka the index of +stack. + +Co-developed-by: Alexey Makhalov +Co-developed-by: Tao Liu +Signed-off-by: Tao Liu +--- + x86_64.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 125 insertions(+), 6 deletions(-) + +diff --git a/x86_64.c b/x86_64.c +index ee23d8b..cfefe3f 100644 +--- a/x86_64.c ++++ b/x86_64.c +@@ -3508,6 +3508,8 @@ x86_64_exception_RIP_message(struct bt_info *bt, ulong rip) + #define STACK_TRANSITION_ERRMSG_I_P \ + "cannot transition from IRQ stack to current process stack:\n IRQ stack pointer: %lx\n process stack pointer: %lx\n current stack base: %lx\n" + ++#define SET_REG_BITMAP(REGMAP, TYPE, MEMBER) \ ++ SET_BIT(REGMAP, REG_SEQ(TYPE, MEMBER)) + /* + * Low-budget back tracer -- dump text return addresses, following call chain + * when possible, along with any verifiable exception frames. +@@ -3528,6 +3530,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in) + ulong last_process_stack_eframe; + ulong user_mode_eframe; + char *rip_symbol; ++ char buf[BUFSIZE]; + + /* + * User may have made a run-time switch. +@@ -3551,6 +3554,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in) + irq_eframe = 0; + last_process_stack_eframe = 0; + bt->call_target = NULL; ++ extra_stacks_idx = 0; + rsp = bt->stkptr; + ms = machdep->machspec; + +@@ -3632,6 +3636,90 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in) + level++; + } + ++ if (is_task_active(bt->task) && bt->flags & BT_DUMPFILE_SEARCH) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = ++ (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ extra_stacks_regs[extra_stacks_idx]->ur.ip = bt->instptr; ++ extra_stacks_regs[extra_stacks_idx]->ur.sp = bt->stkptr + 8; ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, ip); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, sp); ++ ++ /* Sometimes bp is needed for stack unwinding, so we try to get ++ it. The bt->instptr usually points to a inst after a call ++ inst, let's check the previous call inst. Note the call inst ++ len is 5 */ ++ open_tmpfile2(); ++ sprintf(buf, "x/1i 0x%lx", bt->instptr - 5); ++ gdb_pass_through(buf, pc->tmpfile2, GNU_RETURN_ON_ERROR); ++ rewind(pc->tmpfile2); ++ fgets(buf, BUFSIZE, pc->tmpfile2); ++ if (strstr(buf, "call")) { ++ if (strstr(buf, "") || ++ strstr(buf, "")) { ++ /* OK, we are calling relocate_kernel(), which ++ * is written in assembly and hasn't changed for ++ * years, so we get some extra regs out of it. */ ++ readmem(bt->stkptr - sizeof(ulong) * 6, KVADDR, buf, ++ sizeof(ulong) * 6, "relocate_kernel", FAULT_ON_ERROR); ++ extra_stacks_regs[extra_stacks_idx]->ur.r15 = ++ *(ulong *)(buf + sizeof(ulong) * 0); ++ extra_stacks_regs[extra_stacks_idx]->ur.r14 = ++ *(ulong *)(buf + sizeof(ulong) * 1); ++ extra_stacks_regs[extra_stacks_idx]->ur.r13 = ++ *(ulong *)(buf + sizeof(ulong) * 2); ++ extra_stacks_regs[extra_stacks_idx]->ur.r12 = ++ *(ulong *)(buf + sizeof(ulong) * 3); ++ extra_stacks_regs[extra_stacks_idx]->ur.bp = ++ *(ulong *)(buf + sizeof(ulong) * 4); ++ extra_stacks_regs[extra_stacks_idx]->ur.bx = ++ *(ulong *)(buf + sizeof(ulong) * 5); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, r15); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, r14); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, r13); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, r12); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, bp); ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, bx); ++ } else { ++ /* This is a try-best effort. Usually the call ++ inst will result in a next-inst addr pushed ++ in and a rbp push of the calling function. ++ So we can get rbp here */ ++ readmem(extra_stacks_regs[extra_stacks_idx]->ur.sp ++ - sizeof(ulong) * 2, KVADDR, ++ &extra_stacks_regs[extra_stacks_idx]->ur.bp, ++ sizeof(ulong), "extra_stacks_regs.bp", FAULT_ON_ERROR); ++ if (INSTACK(extra_stacks_regs[extra_stacks_idx]->ur.bp, bt)) { ++ SET_REG_BITMAP(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ x86_64_user_regs_struct, bp); ++ extra_stacks_regs[extra_stacks_idx]->ur.ip -= 5; ++ } ++ } ++ } ++ close_tmpfile2(); ++ /* ++ * bt->machdep is handled at x86_64_get_stack_frame(), so skip it ++ */ ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.sp != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp && ++ extra_stacks_regs[extra_stacks_idx]->ur.ip != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.ip)) { ++ gdb_add_substack(extra_stacks_idx++); ++ } ++ } + + if ((estack = x86_64_in_exception_stack(bt, &estack_index))) { + in_exception_stack: +@@ -4159,6 +4247,7 @@ x86_64_dwarf_back_trace_cmd(struct bt_info *bt_in) + last_process_stack_eframe = 0; + bt->call_target = NULL; + bt->bptr = 0; ++ extra_stacks_idx = 0; + rsp = bt->stkptr; + if (!rsp) { + error(INFO, "cannot determine starting stack pointer\n"); +@@ -4799,6 +4888,31 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local, + } else if (machdep->flags & ORC) + bt->bptr = rbp; + ++ /* ++ * Preserve registers set for each additional in-kernel stack ++ */ ++ if (!(cs & 3) && verified && flags & EFRAME_PRINT && ++ extra_stacks_idx < MAX_EXCEPTION_STACKS && ++ !(bt->flags & BT_EFRAME_SEARCH)) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ memcpy(&extra_stacks_regs[extra_stacks_idx]->ur, ++ pt_regs_buf, SIZE(pt_regs)); ++ for (int i = 0; i < SIZE(pt_regs)/sizeof(long); i++) ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, i); ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.sp != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp && ++ extra_stacks_regs[extra_stacks_idx]->ur.ip != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.ip)) { ++ gdb_add_substack(extra_stacks_idx++); ++ } ++ } ++ + if (kvaddr) + FREEBUF(pt_regs_buf); + +@@ -5002,9 +5116,6 @@ get_reg_from_inactive_task_frame(struct bt_info *bt, char *reg_name, + return reg_value; + } + +-#define SET_REG_BITMAP(REGMAP, TYPE, MEMBER) \ +- SET_BIT(REGMAP, REG_SEQ(TYPE, MEMBER)) +- + /* + * Get a stack frame combination of pc and ra from the most relevent spot. + */ +@@ -9221,7 +9332,8 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp) + case R##_REGNUM: \ + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, \ + REG_SEQ(x86_64_user_regs_struct, r))) { \ +- FREEBUF(ur_bitmap); \ ++ if (!sid) \ ++ FREEBUF(ur_bitmap); \ + return FALSE; \ + } \ + break; +@@ -9256,6 +9368,12 @@ x86_64_get_current_task_reg(int regno, const char *name, + if (!tc) + return FALSE; + ++ /* Non zero stack ID, use extra stacks regs */ ++ if (sid && sid <= extra_stacks_idx) { ++ ur_bitmap = extra_stacks_regs[sid - 1]; ++ goto get_sub; ++ } ++ + /* + * Task is active, grab CPU's registers + */ +@@ -9280,6 +9398,7 @@ x86_64_get_current_task_reg(int regno, const char *name, + } + + /* Get subset registers from stack frame*/ ++get_sub: + switch (regno) { + CHECK_REG_CASE(RAX, ax); + CHECK_REG_CASE(RBX, bx); +@@ -9341,7 +9460,7 @@ get_all: + COPY_REG_CASE(ORIG_RAX, orig_ax); + } + +- if (bt_info.need_free) { ++ if (!sid && bt_info.need_free) { + FREEBUF(ur_bitmap); + bt_info.need_free = FALSE; + } +@@ -9805,4 +9924,4 @@ x86_64_swp_offset(ulong entry) + return SWP_OFFSET(entry); + } + +-#endif /* X86_64 */ ++#endif /* X86_64 */ +-- +2.47.0 + diff --git a/0006-arm64-Add-gdb-multi-stack-unwind-support.patch b/0006-arm64-Add-gdb-multi-stack-unwind-support.patch new file mode 100644 index 0000000..423a52d --- /dev/null +++ b/0006-arm64-Add-gdb-multi-stack-unwind-support.patch @@ -0,0 +1,180 @@ +From 08271e45e4ea6263fc2957d1e876becd6cfc1a0d Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 25 Jun 2025 16:02:02 +1200 +Subject: [PATCH 6/9] arm64: Add gdb multi-stack unwind support + +Co-developed-by: Alexey Makhalov +Co-developed-by: Tao Liu +Signed-off-by: Tao Liu +--- + arm64.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 92 insertions(+), 4 deletions(-) + +diff --git a/arm64.c b/arm64.c +index 8291301..354d17a 100644 +--- a/arm64.c ++++ b/arm64.c +@@ -226,6 +226,12 @@ arm64_get_current_task_reg(int regno, const char *name, + tc = CURRENT_CONTEXT(); + if (!tc) + return FALSE; ++ ++ if (sid && sid <= extra_stacks_idx) { ++ ur_bitmap = extra_stacks_regs[extra_stacks_idx - 1]; ++ goto get_sub; ++ } ++ + BZERO(&bt_setup, sizeof(struct bt_info)); + clone_bt_info(&bt_setup, &bt_info, tc); + fill_stackbuf(&bt_info); +@@ -241,25 +247,29 @@ arm64_get_current_task_reg(int regno, const char *name, + goto get_all; + } + ++get_sub: + switch (regno) { + case X0_REGNUM ... X30_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(arm64_pt_regs, regs[0]) + regno - X0_REGNUM)) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; + case SP_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(arm64_pt_regs, sp))) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; + case PC_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(arm64_pt_regs, pc))) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; +@@ -287,7 +297,7 @@ get_all: + break; + } + +- if (bt_info.need_free) { ++ if (!sid && bt_info.need_free) { + FREEBUF(ur_bitmap); + bt_info.need_free = FALSE; + } +@@ -3680,6 +3690,7 @@ arm64_back_trace_cmd(struct bt_info *bt) + int level; + ulong exception_frame; + FILE *ofp; ++ extra_stacks_idx = 0; + + if (bt->flags & BT_OPT_BACK_TRACE) { + if (machdep->flags & UNW_4_14) { +@@ -3733,6 +3744,35 @@ arm64_back_trace_cmd(struct bt_info *bt) + stackframe.fp = bt->frameptr; + } + ++ if (is_task_active(bt->task)) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ if (bt->task != tt->panic_task && stackframe.sp) { ++ readmem(stackframe.sp - 8, KVADDR, &extra_stacks_regs[extra_stacks_idx]->ur.pc, ++ sizeof(ulong), "extra_stacks_regs.pc", RETURN_ON_ERROR); ++ readmem(stackframe.sp - 16, KVADDR, &extra_stacks_regs[extra_stacks_idx]->ur.sp, ++ sizeof(ulong), "extra_stacks_regs.sp", RETURN_ON_ERROR); ++ } else { ++ extra_stacks_regs[extra_stacks_idx]->ur.pc = stackframe.pc; ++ extra_stacks_regs[extra_stacks_idx]->ur.sp = stackframe.sp; ++ } ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ REG_SEQ(arm64_pt_regs, pc)); ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ REG_SEQ(arm64_pt_regs, sp)); ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.sp != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp && ++ extra_stacks_regs[extra_stacks_idx]->ur.pc != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc)) { ++ gdb_add_substack (extra_stacks_idx++); ++ } ++ } ++ + if (bt->flags & BT_TEXT_SYMBOLS) { + arm64_print_text_symbols(bt, &stackframe, ofp); + if (BT_REFERENCE_FOUND(bt)) { +@@ -3854,6 +3894,35 @@ arm64_back_trace_cmd_v2(struct bt_info *bt) + stackframe.fp = bt->frameptr; + } + ++ if (is_task_active(bt->task)) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ if (bt->task != tt->panic_task && stackframe.sp) { ++ readmem(stackframe.sp - 8, KVADDR, &extra_stacks_regs[extra_stacks_idx]->ur.pc, ++ sizeof(ulong), "extra_stacks_regs.pc", RETURN_ON_ERROR); ++ readmem(stackframe.sp - 16, KVADDR, &extra_stacks_regs[extra_stacks_idx]->ur.sp, ++ sizeof(ulong), "extra_stacks_regs.sp", RETURN_ON_ERROR); ++ } else { ++ extra_stacks_regs[extra_stacks_idx]->ur.pc = stackframe.pc; ++ extra_stacks_regs[extra_stacks_idx]->ur.sp = stackframe.sp; ++ } ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ REG_SEQ(arm64_pt_regs, pc)); ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ REG_SEQ(arm64_pt_regs, sp)); ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.sp != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp && ++ extra_stacks_regs[extra_stacks_idx]->ur.pc != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc)) { ++ gdb_add_substack (extra_stacks_idx++); ++ } ++ } ++ + if (bt->flags & BT_TEXT_SYMBOLS) { + arm64_print_text_symbols(bt, &stackframe, ofp); + if (BT_REFERENCE_FOUND(bt)) { +@@ -4468,6 +4537,25 @@ arm64_print_exception_frame(struct bt_info *bt, ulong pt_regs, int mode, FILE *o + fprintf(ofp, "ORIG_X0: %016lx SYSCALLNO: %lx", + (ulong)regs->orig_x0, (ulong)regs->syscallno); + fprintf(ofp, " PSTATE: %08lx\n", (ulong)regs->pstate); ++ } else if (!(bt->flags & BT_EFRAME_SEARCH)) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = ++ (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ memcpy(&extra_stacks_regs[extra_stacks_idx]->ur, regs, ++ sizeof(struct arm64_pt_regs)); ++ for (int i = 0; i < sizeof(struct arm64_pt_regs)/sizeof(long); i++) ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, i); ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.sp != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.sp && ++ extra_stacks_regs[extra_stacks_idx]->ur.pc != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.pc)) { ++ gdb_add_substack (extra_stacks_idx++); ++ } + } + } + +-- +2.47.0 + diff --git a/0007-ppc64-Add-gdb-multi-stack-unwind-support.patch b/0007-ppc64-Add-gdb-multi-stack-unwind-support.patch new file mode 100644 index 0000000..38eda90 --- /dev/null +++ b/0007-ppc64-Add-gdb-multi-stack-unwind-support.patch @@ -0,0 +1,179 @@ +From 0c14080b7d7bacadba3611568bd87b347677fa12 Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 25 Jun 2025 16:02:03 +1200 +Subject: [PATCH 7/9] ppc64: Add gdb multi-stack unwind support + +Please note, there is a known issue which some ppc cannot stack unwind +successfully as: + + crash> bt + PID: 2 TASK: c000000004797f80 CPU: 0 COMMAND: "kthreadd" + #0 [c00000000484fbc0] _end at c00000000484fd70 (unreliable) + #1 [c00000000484fd70] __switch_to at c00000000001fabc + #2 [c00000000484fdd0] __schedule at c0000000011ca9dc + #3 [c00000000484feb0] schedule at c0000000011caeb0 + #4 [c00000000484ff20] kthreadd at c0000000001af6c4 + #5 [c00000000484ffe0] start_kernel_thread at c00000000000ded8 + crash> gdb bt + #0 0xc00000000484fd70 in ?? () + gdb: gdb request failed: bt + +This has nothing to do with bt/gdb bt, see root cause in [1][2]. + +[1]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01124.html +[2]: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01139.html + +Co-developed-by: Alexey Makhalov +Co-developed-by: Tao Liu +Signed-off-by: Tao Liu +--- + ppc64.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 64 insertions(+), 6 deletions(-) + +diff --git a/ppc64.c b/ppc64.c +index 532eb3f..d1a5067 100644 +--- a/ppc64.c ++++ b/ppc64.c +@@ -2053,6 +2053,7 @@ ppc64_back_trace_cmd(struct bt_info *bt) + char buf[BUFSIZE]; + struct gnu_request *req; + extern void print_stack_text_syms(struct bt_info *, ulong, ulong); ++ extra_stacks_idx = 0; + + bt->flags |= BT_EXCEPTION_FRAME; + +@@ -2071,6 +2072,29 @@ ppc64_back_trace_cmd(struct bt_info *bt) + req->pc = bt->instptr; + req->sp = bt->stkptr; + ++ if (is_task_active(bt->task)) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = ++ (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ extra_stacks_regs[extra_stacks_idx]->ur.nip = req->pc; ++ extra_stacks_regs[extra_stacks_idx]->ur.gpr[1] = req->sp; ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ REG_SEQ(ppc64_pt_regs, nip)); ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, ++ REG_SEQ(ppc64_pt_regs, gpr[0]) + 1); ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.gpr[1] != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.gpr[1] && ++ extra_stacks_regs[extra_stacks_idx]->ur.nip != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.nip)) { ++ gdb_add_substack (extra_stacks_idx++); ++ } ++ } ++ + if (bt->flags & + (BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_PRINT|BT_TEXT_SYMBOLS_NOPRINT)) { + if (!INSTACK(req->sp, bt)) +@@ -2512,6 +2536,28 @@ ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs, + fprintf(fp, " %s [%lx] exception frame:\n", efrm_str, regs->trap); + ppc64_print_regs(regs); + ppc64_print_nip_lr(regs, 1); ++ ++ if (!((regs->msr >> MSR_PR_LG) & 0x1) && ++ !(bt->flags & BT_EFRAME_SEARCH)) { ++ if (!extra_stacks_regs[extra_stacks_idx]) { ++ extra_stacks_regs[extra_stacks_idx] = ++ (struct user_regs_bitmap_struct *) ++ malloc(sizeof(struct user_regs_bitmap_struct)); ++ } ++ memset(extra_stacks_regs[extra_stacks_idx], 0, ++ sizeof(struct user_regs_bitmap_struct)); ++ memcpy(&extra_stacks_regs[extra_stacks_idx]->ur, regs, ++ sizeof(struct ppc64_pt_regs)); ++ for (int i = 0; i < sizeof(struct ppc64_pt_regs)/sizeof(ulong); i++) ++ SET_BIT(extra_stacks_regs[extra_stacks_idx]->bitmap, i); ++ if (!bt->machdep || ++ (extra_stacks_regs[extra_stacks_idx]->ur.gpr[1] != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.gpr[1] && ++ extra_stacks_regs[extra_stacks_idx]->ur.nip != ++ ((struct user_regs_bitmap_struct *)(bt->machdep))->ur.nip)) { ++ gdb_add_substack (extra_stacks_idx++); ++ } ++ } + } + + static int +@@ -2552,6 +2598,12 @@ ppc64_get_current_task_reg(int regno, const char *name, int size, + tc = CURRENT_CONTEXT(); + if (!tc) + return FALSE; ++ ++ if (sid && sid <= extra_stacks_idx) { ++ ur_bitmap = extra_stacks_regs[sid - 1]; ++ goto get_sub; ++ } ++ + BZERO(&bt_setup, sizeof(struct bt_info)); + clone_bt_info(&bt_setup, &bt_info, tc); + fill_stackbuf(&bt_info); +@@ -2570,39 +2622,45 @@ ppc64_get_current_task_reg(int regno, const char *name, int size, + goto get_all; + } + ++get_sub: + switch (regno) { + case PPC64_R0_REGNUM ... PPC64_R31_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(ppc64_pt_regs, gpr[0]) + regno - PPC64_R0_REGNUM)) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; + case PPC64_PC_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(ppc64_pt_regs, nip))) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; + case PPC64_MSR_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(ppc64_pt_regs, msr))) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; + case PPC64_LR_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(ppc64_pt_regs, link))) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; + case PPC64_CTR_REGNUM: + if (!NUM_IN_BITMAP(ur_bitmap->bitmap, + REG_SEQ(ppc64_pt_regs, ctr))) { +- FREEBUF(ur_bitmap); ++ if (!sid) ++ FREEBUF(ur_bitmap); + return FALSE; + } + break; +@@ -2645,7 +2703,7 @@ get_all: + ret = TRUE; + break; + } +- if (bt_info.need_free) { ++ if (!sid && bt_info.need_free) { + FREEBUF(ur_bitmap); + bt_info.need_free = FALSE; + } +-- +2.47.0 + diff --git a/0008-Fix-the-issue-of-page-excluded-messages-flooding.patch b/0008-Fix-the-issue-of-page-excluded-messages-flooding.patch new file mode 100644 index 0000000..7e259aa --- /dev/null +++ b/0008-Fix-the-issue-of-page-excluded-messages-flooding.patch @@ -0,0 +1,42 @@ +From e906eaca2b1a77fe9f8ba512484b4e914c303f11 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Wed, 11 Jun 2025 11:19:01 +0800 +Subject: [PATCH 8/9] Fix the issue of "page excluded" messages flooding + +The current issue is only observed on PPC64le machine when loading crash, +E.g: + ... + crash: page excluded: kernel virtual address: c0000000022d6098 type: "gdb_readmem_callback" + crash: page excluded: kernel virtual address: c0000000022d6098 type: "gdb_readmem_callback" + ... + crash> + +And this issue can not be reproduced on crash 8, which only occurred +after the gdb-16.2 upgrade(see commit dfb2bb55e530). + +So far I haven't found out why it always reads the same address(excluded +page) many times, anyway, crash tool should avoid flooding messages firstly, +similarly let's use the same debug level(8) such as the read_diskdump()(see +diskdump.c). + +Signed-off-by: Lianbo Jiang +--- + memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 0d8d898..58624bb 100644 +--- a/memory.c ++++ b/memory.c +@@ -2504,7 +2504,7 @@ readmem(ulonglong addr, int memtype, void *buffer, long size, + + case PAGE_EXCLUDED: + RETURN_ON_PARTIAL_READ(); +- if (PRINT_ERROR_MESSAGE) ++ if (CRASHDEBUG(8)) + error(INFO, PAGE_EXCLUDED_ERRMSG, memtype_string(memtype, 0), addr, type); + goto readmem_error; + +-- +2.47.0 + diff --git a/0009-Fix-kmem-p-option-on-Linux-6.16-rc1-and-later-kernel.patch b/0009-Fix-kmem-p-option-on-Linux-6.16-rc1-and-later-kernel.patch new file mode 100644 index 0000000..f99b236 --- /dev/null +++ b/0009-Fix-kmem-p-option-on-Linux-6.16-rc1-and-later-kernel.patch @@ -0,0 +1,34 @@ +From 7e8a2796580d992ed19b2e49b5d555e432303e96 Mon Sep 17 00:00:00 2001 +From: "k-hagio-ab@nec.com" +Date: Tue, 17 Jun 2025 06:08:52 +0000 +Subject: [PATCH 9/9] Fix "kmem -p" option on Linux 6.16-rc1 and later kernels + +Kernel commit acc53a0b4c156 ("mm: rename page->index to +page->__folio_index"), which is contained in Linux 6.16-rc1 and later +kernels, renamed the member. Without the patch, the "kmem -p" option +fails with the following error: + + kmem: invalid structure member offset: page_index + FILE: memory.c LINE: 6016 FUNCTION: dump_mem_map_SPARSEMEM() + +Signed-off-by: Kazuhito Hagio +--- + memory.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/memory.c b/memory.c +index 58624bb..400d31a 100644 +--- a/memory.c ++++ b/memory.c +@@ -531,6 +531,8 @@ vm_init(void) + ASSIGN_OFFSET(page_mapping) = MEMBER_OFFSET("page", "_mapcount") + + STRUCT_SIZE("atomic_t") + sizeof(ulong); + MEMBER_OFFSET_INIT(page_index, "page", "index"); ++ if (INVALID_MEMBER(page_index)) /* 6.16 and later */ ++ MEMBER_OFFSET_INIT(page_index, "page", "__folio_index"); + if (INVALID_MEMBER(page_index)) + ANON_MEMBER_OFFSET_INIT(page_index, "page", "index"); + MEMBER_OFFSET_INIT(page_buffers, "page", "buffers"); +-- +2.47.0 + diff --git a/crash.spec b/crash.spec index 7197a07..ccd9bc0 100644 --- a/crash.spec +++ b/crash.spec @@ -4,7 +4,7 @@ Summary: Kernel analysis utility for live systems, netdump, diskdump, kdump, LKCD or mcore dumpfiles Name: crash Version: 9.0.0 -Release: 1%{?dist}.alma.1 +Release: 2%{?dist}.alma.1 License: GPL-3.0-only Source0: https://github.com/crash-utility/crash/archive/crash-%{version}.tar.gz Source1: http://ftp.gnu.org/gnu/gdb/gdb-16.2.tar.gz @@ -19,6 +19,15 @@ Provides: bundled(libiberty) Provides: bundled(gdb) = 16.2 Patch0: lzo_snappy_zstd.patch Patch1: crash-9.0.0_build.patch +Patch2: 0001-vmware_guestdump-Version-7-support.patch +Patch3: 0002-Fix-incorrect-task-state-during-exit.patch +Patch4: 0003-Add-multi-threads-support-in-crash-target.patch +Patch5: 0004-Call-cmd_bt-silently-after-set-pid.patch +Patch6: 0005-x86_64-Add-gdb-multi-stack-unwind-support.patch +Patch7: 0006-arm64-Add-gdb-multi-stack-unwind-support.patch +Patch8: 0007-ppc64-Add-gdb-multi-stack-unwind-support.patch +Patch9: 0008-Fix-the-issue-of-page-excluded-messages-flooding.patch +Patch10: 0009-Fix-kmem-p-option-on-Linux-6.16-rc1-and-later-kernel.patch %description The core analysis suite is a self-contained tool that can be used to @@ -40,6 +49,15 @@ offered by Mission Critical Linux, or the LKCD kernel patch. %setup -n %{name}-%{version} -q %patch -P 0 -p1 -b lzo_snappy_zstd.patch %patch -P 1 -p1 +%patch -P 2 -p1 +%patch -P 3 -p1 +%patch -P 4 -p1 +%patch -P 5 -p1 +%patch -P 6 -p1 +%patch -P 7 -p1 +%patch -P 8 -p1 +%patch -P 9 -p1 +%patch -P 10 -p1 %build @@ -65,9 +83,12 @@ cp -p defs.h %{buildroot}%{_includedir}/crash %{_includedir}/* %changelog -* Tue May 06 2025 Eduard Abdullin - 9.0.0-1.alma.1 +* Thu Jul 03 2025 Eduard Abdullin - 9.0.0-2.alma.1 - Debrand for AlmaLinux +* Wed Jul 2 2025 Tao Liu - 9.0.0-2 +- Rebase to upstream crash 7e8a2796580 + * Mon May 5 2025 Tao Liu - 9.0.0-1 - Rebase to upstream crash 9.0.0