301 lines
11 KiB
Diff
301 lines
11 KiB
Diff
From 04a84a7071b34958f80633ea7bf96652810dadba Mon Sep 17 00:00:00 2001
|
|
From: Kazuhito Hagio <k-hagio-ab@nec.com>
|
|
Date: Mon, 20 Feb 2023 10:28:53 +0900
|
|
Subject: [PATCH 77/89] x86_64: Fix "bt" command on kernels with
|
|
random_kstack_offset=on
|
|
|
|
On kernels configured with CONFIG_RANDOMIZE_KSTACK_OFFSET=y and
|
|
random_kstack_offset=on, a random offset is added to task stacks with
|
|
__kstack_alloca() at the beginning of do_syscall_64() and other syscall
|
|
entry functions. This eventually does the following instruction.
|
|
|
|
<do_syscall_64+32>: sub %rax,%rsp
|
|
|
|
On the other hand, crash uses only a part of data for ORC unwinder to
|
|
unwind stacks and if an ip value doesn't have a usable ORC data, it
|
|
caluculates the frame size with parsing the assembly of the function.
|
|
|
|
However, crash cannot calculate the frame size correctly with the
|
|
instruction above, and prints stale return addresses like this:
|
|
|
|
crash> bt 1
|
|
PID: 1 TASK: ffff9c250023b880 CPU: 0 COMMAND: "systemd"
|
|
#0 [ffffb7e5c001fc80] __schedule at ffffffff91ae2b16
|
|
#1 [ffffb7e5c001fd00] schedule at ffffffff91ae2ed3
|
|
#2 [ffffb7e5c001fd18] schedule_hrtimeout_range_clock at ffffffff91ae7ed8
|
|
#3 [ffffb7e5c001fda8] ep_poll at ffffffff913ef828
|
|
#4 [ffffb7e5c001fe48] do_epoll_wait at ffffffff913ef943
|
|
#5 [ffffb7e5c001fe80] __x64_sys_epoll_wait at ffffffff913f0130
|
|
#6 [ffffb7e5c001fed0] do_syscall_64 at ffffffff91ad7169
|
|
#7 [ffffb7e5c001fef0] do_syscall_64 at ffffffff91ad7179 <<
|
|
#8 [ffffb7e5c001ff10] syscall_exit_to_user_mode at ffffffff91adaab2 << stale entries
|
|
#9 [ffffb7e5c001ff20] do_syscall_64 at ffffffff91ad7179 <<
|
|
#10 [ffffb7e5c001ff50] entry_SYSCALL_64_after_hwframe at ffffffff91c0009b
|
|
RIP: 00007f258d9427ae RSP: 00007fffda631d60 RFLAGS: 00000293
|
|
...
|
|
|
|
To fix this, enhance the use of ORC data. The ORC unwinder often uses
|
|
%rbp value, so keep it from exception frames and inactive task stacks.
|
|
|
|
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
|
|
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
|
|
---
|
|
defs.h | 1 +
|
|
symbols.c | 1 +
|
|
x86_64.c | 118 ++++++++++++++++++++++++++++++++++++++----------------
|
|
3 files changed, 85 insertions(+), 35 deletions(-)
|
|
|
|
diff --git a/defs.h b/defs.h
|
|
index 801781749666..3c6fa3b0d228 100644
|
|
--- a/defs.h
|
|
+++ b/defs.h
|
|
@@ -2207,6 +2207,7 @@ struct offset_table { /* stash of commonly-used offsets */
|
|
long sock_sk_common;
|
|
long sock_common_skc_v6_daddr;
|
|
long sock_common_skc_v6_rcv_saddr;
|
|
+ long inactive_task_frame_bp;
|
|
};
|
|
|
|
struct size_table { /* stash of commonly-used sizes */
|
|
diff --git a/symbols.c b/symbols.c
|
|
index 54115d753601..fc55da678ecd 100644
|
|
--- a/symbols.c
|
|
+++ b/symbols.c
|
|
@@ -8834,6 +8834,7 @@ dump_offset_table(char *spec, ulong makestruct)
|
|
OFFSET(task_struct_tss_ksp));
|
|
fprintf(fp, " task_struct_thread_eip: %ld\n",
|
|
OFFSET(task_struct_thread_eip));
|
|
+ fprintf(fp, " inactive_task_frame_bp: %ld\n", OFFSET(inactive_task_frame_bp));
|
|
fprintf(fp, " inactive_task_frame_ret_addr: %ld\n",
|
|
OFFSET(inactive_task_frame_ret_addr));
|
|
fprintf(fp, " task_struct_thread_esp: %ld\n",
|
|
diff --git a/x86_64.c b/x86_64.c
|
|
index 31c249699066..86abea00c9d6 100644
|
|
--- a/x86_64.c
|
|
+++ b/x86_64.c
|
|
@@ -122,7 +122,7 @@ static int x86_64_do_not_cache_framesize(struct syment *, ulong);
|
|
static int x86_64_framesize_cache_func(int, ulong, int *, int, struct syment *);
|
|
static ulong x86_64_get_framepointer(struct bt_info *, ulong);
|
|
int search_for_eframe_target_caller(struct bt_info *, ulong, int *);
|
|
-static int x86_64_get_framesize(struct bt_info *, ulong, ulong);
|
|
+static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *);
|
|
static void x86_64_framesize_debug(struct bt_info *);
|
|
static void x86_64_get_active_set(void);
|
|
static int x86_64_get_kvaddr_ranges(struct vaddr_range *);
|
|
@@ -3639,7 +3639,7 @@ in_exception_stack:
|
|
bt, ofp);
|
|
rsp += SIZE(pt_regs); /* guaranteed kernel mode */
|
|
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
|
|
- bt->eframe_ip, rsp)) >= 0))
|
|
+ bt->eframe_ip, rsp, NULL)) >= 0))
|
|
rsp += framesize;
|
|
level++;
|
|
irq_eframe = 0;
|
|
@@ -3671,7 +3671,7 @@ in_exception_stack:
|
|
case BACKTRACE_ENTRY_DISPLAYED:
|
|
level++;
|
|
if ((framesize = x86_64_get_framesize(bt,
|
|
- bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
|
|
+ bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
|
|
rsp += framesize;
|
|
i += framesize/sizeof(ulong);
|
|
}
|
|
@@ -3744,7 +3744,7 @@ in_exception_stack:
|
|
}
|
|
|
|
level++;
|
|
- if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp)) >= 0)
|
|
+ if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp, NULL)) >= 0)
|
|
rsp += framesize;
|
|
}
|
|
}
|
|
@@ -3796,7 +3796,7 @@ in_exception_stack:
|
|
case BACKTRACE_ENTRY_DISPLAYED:
|
|
level++;
|
|
if ((framesize = x86_64_get_framesize(bt,
|
|
- bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
|
|
+ bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
|
|
rsp += framesize;
|
|
i += framesize/sizeof(ulong);
|
|
}
|
|
@@ -3906,24 +3906,34 @@ in_exception_stack:
|
|
(STREQ(rip_symbol, "thread_return") ||
|
|
STREQ(rip_symbol, "schedule") ||
|
|
STREQ(rip_symbol, "__schedule"))) {
|
|
- if (STREQ(rip_symbol, "__schedule")) {
|
|
- i = (rsp - bt->stackbase)/sizeof(ulong);
|
|
- x86_64_print_stack_entry(bt, ofp, level,
|
|
- i, bt->instptr);
|
|
- level++;
|
|
- rsp = __schedule_frame_adjust(rsp, bt);
|
|
- if (STREQ(closest_symbol(bt->instptr), "schedule"))
|
|
+ if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_ret_addr)) {
|
|
+ /*
|
|
+ * %rsp should have the address of inactive_task_frame, so
|
|
+ * skip the registers before ret_addr to adjust rsp.
|
|
+ */
|
|
+ if (CRASHDEBUG(1))
|
|
+ fprintf(fp, "rsp: %lx rbp: %lx\n", rsp, bt->bptr);
|
|
+ rsp += OFFSET(inactive_task_frame_ret_addr);
|
|
+ } else {
|
|
+ if (STREQ(rip_symbol, "__schedule")) {
|
|
+ i = (rsp - bt->stackbase)/sizeof(ulong);
|
|
+ x86_64_print_stack_entry(bt, ofp, level,
|
|
+ i, bt->instptr);
|
|
+ level++;
|
|
+ rsp = __schedule_frame_adjust(rsp, bt);
|
|
+ if (STREQ(closest_symbol(bt->instptr), "schedule"))
|
|
+ bt->flags |= BT_SCHEDULE;
|
|
+ } else
|
|
bt->flags |= BT_SCHEDULE;
|
|
- } else
|
|
- bt->flags |= BT_SCHEDULE;
|
|
-
|
|
- if (bt->flags & BT_SCHEDULE) {
|
|
- i = (rsp - bt->stackbase)/sizeof(ulong);
|
|
- x86_64_print_stack_entry(bt, ofp, level,
|
|
- i, bt->instptr);
|
|
- bt->flags &= ~(ulonglong)BT_SCHEDULE;
|
|
- rsp += sizeof(ulong);
|
|
- level++;
|
|
+
|
|
+ if (bt->flags & BT_SCHEDULE) {
|
|
+ i = (rsp - bt->stackbase)/sizeof(ulong);
|
|
+ x86_64_print_stack_entry(bt, ofp, level,
|
|
+ i, bt->instptr);
|
|
+ bt->flags &= ~(ulonglong)BT_SCHEDULE;
|
|
+ rsp += sizeof(ulong);
|
|
+ level++;
|
|
+ }
|
|
}
|
|
}
|
|
|
|
@@ -3954,7 +3964,7 @@ in_exception_stack:
|
|
irq_eframe = 0;
|
|
bt->flags |= BT_EFRAME_TARGET;
|
|
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
|
|
- bt->eframe_ip, rsp)) >= 0))
|
|
+ bt->eframe_ip, rsp, NULL)) >= 0))
|
|
rsp += framesize;
|
|
bt->flags &= ~BT_EFRAME_TARGET;
|
|
}
|
|
@@ -4041,7 +4051,7 @@ in_exception_stack:
|
|
case BACKTRACE_ENTRY_DISPLAYED:
|
|
level++;
|
|
if ((framesize = x86_64_get_framesize(bt,
|
|
- bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
|
|
+ bt->eframe_ip ? bt->eframe_ip : *up, rsp, (char *)up)) >= 0) {
|
|
rsp += framesize;
|
|
i += framesize/sizeof(ulong);
|
|
}
|
|
@@ -4752,7 +4762,8 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local,
|
|
bt->instptr = rip;
|
|
bt->stkptr = rsp;
|
|
bt->bptr = rbp;
|
|
- }
|
|
+ } else if (machdep->flags & ORC)
|
|
+ bt->bptr = rbp;
|
|
|
|
if (kvaddr)
|
|
FREEBUF(pt_regs_buf);
|
|
@@ -5312,6 +5323,10 @@ x86_64_get_sp(struct bt_info *bt)
|
|
OFFSET(thread_struct_rsp), KVADDR,
|
|
&rsp, sizeof(void *),
|
|
"thread_struct rsp", FAULT_ON_ERROR);
|
|
+ if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_bp)) {
|
|
+ readmem(rsp + OFFSET(inactive_task_frame_bp), KVADDR, &bt->bptr,
|
|
+ sizeof(void *), "inactive_task_frame.bp", FAULT_ON_ERROR);
|
|
+ }
|
|
return rsp;
|
|
}
|
|
|
|
@@ -6418,6 +6433,9 @@ x86_64_ORC_init(void)
|
|
orc->__stop_orc_unwind = symbol_value("__stop_orc_unwind");
|
|
orc->orc_lookup = symbol_value("orc_lookup");
|
|
|
|
+ MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
|
|
+ MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");
|
|
+
|
|
machdep->flags |= ORC;
|
|
}
|
|
|
|
@@ -8480,7 +8498,7 @@ search_for_eframe_target_caller(struct bt_info *bt, ulong stkptr, int *framesize
|
|
(BT_OLD_BACK_TRACE|BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_ALL|BT_FRAMESIZE_DISABLE)
|
|
|
|
static int
|
|
-x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
|
|
+x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_ptr)
|
|
{
|
|
int c, framesize, instr, arg, max;
|
|
struct syment *sp;
|
|
@@ -8581,19 +8599,49 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
|
|
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
|
|
if (CRASHDEBUG(1)) {
|
|
fprintf(fp,
|
|
- "rsp: %lx textaddr: %lx framesize: %d -> spo: %d bpo: %d spr: %d bpr: %d type: %d %s",
|
|
- rsp, textaddr, framesize, korc->sp_offset, korc->bp_offset,
|
|
- korc->sp_reg, korc->bp_reg, korc->type,
|
|
- (korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP) ? "" : "(UNUSED)");
|
|
+ "rsp: %lx textaddr: %lx -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
|
|
+ rsp, textaddr, korc->sp_offset, korc->bp_offset,
|
|
+ korc->sp_reg, korc->bp_reg, korc->type);
|
|
if (MEMBER_EXISTS("orc_entry", "end"))
|
|
fprintf(fp, " end: %d", korc->end);
|
|
fprintf(fp, "\n");
|
|
}
|
|
|
|
- if ((korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP)) {
|
|
- framesize = (korc->sp_offset - 8);
|
|
- return (x86_64_framesize_cache_func(FRAMESIZE_ENTER, textaddr,
|
|
- &framesize, exception, NULL));
|
|
+ if (korc->type == ORC_TYPE_CALL) {
|
|
+ ulong prev_sp = 0, prev_bp = 0;
|
|
+ framesize = -1;
|
|
+
|
|
+ if (korc->sp_reg == ORC_REG_SP) {
|
|
+ framesize = (korc->sp_offset - 8);
|
|
+
|
|
+ /* rsp points to a return address, so +8 to use sp_offset */
|
|
+ prev_sp = (rsp + 8) + korc->sp_offset;
|
|
+ if (CRASHDEBUG(1))
|
|
+ fprintf(fp, "rsp: %lx prev_sp: %lx framesize: %d\n",
|
|
+ rsp, prev_sp, framesize);
|
|
+ } else if ((korc->sp_reg == ORC_REG_BP) && bt->bptr) {
|
|
+ prev_sp = bt->bptr + korc->sp_offset;
|
|
+ framesize = (prev_sp - (rsp + 8) - 8);
|
|
+ if (CRASHDEBUG(1))
|
|
+ fprintf(fp, "rsp: %lx rbp: %lx prev_sp: %lx framesize: %d\n",
|
|
+ rsp, bt->bptr, prev_sp, framesize);
|
|
+ }
|
|
+
|
|
+ if ((korc->bp_reg == ORC_REG_PREV_SP) && prev_sp) {
|
|
+ prev_bp = prev_sp + korc->bp_offset;
|
|
+ if (stack_ptr && INSTACK(prev_bp, bt)) {
|
|
+ bt->bptr = ULONG(stack_ptr + (prev_bp - rsp));
|
|
+ if (CRASHDEBUG(1))
|
|
+ fprintf(fp, "rsp: %lx prev_sp: %lx prev_bp: %lx -> %lx\n",
|
|
+ rsp, prev_sp, prev_bp, bt->bptr);
|
|
+ } else
|
|
+ bt->bptr = 0;
|
|
+ } else if ((korc->bp_reg != ORC_REG_UNDEFINED))
|
|
+ bt->bptr = 0;
|
|
+
|
|
+ if (framesize >= 0)
|
|
+ /* Do not cache this, possibly it may be variable. */
|
|
+ return framesize;
|
|
}
|
|
}
|
|
|
|
@@ -8749,7 +8797,7 @@ x86_64_framesize_debug(struct bt_info *bt)
|
|
if (!bt->hp->eip)
|
|
error(INFO, "x86_64_framesize_debug: ignoring command\n");
|
|
else
|
|
- x86_64_get_framesize(bt, bt->hp->eip, 0);
|
|
+ x86_64_get_framesize(bt, bt->hp->eip, 0, NULL);
|
|
break;
|
|
|
|
case -3:
|
|
--
|
|
2.37.1
|
|
|