crash/SOURCES/0077-x86_64-Fix-bt-command-on-kernels-with-random_kstack_.patch
2023-09-27 12:47:33 +00:00

301 lines
11 KiB
Diff

From 04a84a7071b34958f80633ea7bf96652810dadba Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Mon, 20 Feb 2023 10:28:53 +0900
Subject: [PATCH 77/89] x86_64: Fix "bt" command on kernels with
random_kstack_offset=on
On kernels configured with CONFIG_RANDOMIZE_KSTACK_OFFSET=y and
random_kstack_offset=on, a random offset is added to task stacks with
__kstack_alloca() at the beginning of do_syscall_64() and other syscall
entry functions. This eventually does the following instruction.
<do_syscall_64+32>: sub %rax,%rsp
On the other hand, crash uses only a part of data for ORC unwinder to
unwind stacks and if an ip value doesn't have a usable ORC data, it
caluculates the frame size with parsing the assembly of the function.
However, crash cannot calculate the frame size correctly with the
instruction above, and prints stale return addresses like this:
crash> bt 1
PID: 1 TASK: ffff9c250023b880 CPU: 0 COMMAND: "systemd"
#0 [ffffb7e5c001fc80] __schedule at ffffffff91ae2b16
#1 [ffffb7e5c001fd00] schedule at ffffffff91ae2ed3
#2 [ffffb7e5c001fd18] schedule_hrtimeout_range_clock at ffffffff91ae7ed8
#3 [ffffb7e5c001fda8] ep_poll at ffffffff913ef828
#4 [ffffb7e5c001fe48] do_epoll_wait at ffffffff913ef943
#5 [ffffb7e5c001fe80] __x64_sys_epoll_wait at ffffffff913f0130
#6 [ffffb7e5c001fed0] do_syscall_64 at ffffffff91ad7169
#7 [ffffb7e5c001fef0] do_syscall_64 at ffffffff91ad7179 <<
#8 [ffffb7e5c001ff10] syscall_exit_to_user_mode at ffffffff91adaab2 << stale entries
#9 [ffffb7e5c001ff20] do_syscall_64 at ffffffff91ad7179 <<
#10 [ffffb7e5c001ff50] entry_SYSCALL_64_after_hwframe at ffffffff91c0009b
RIP: 00007f258d9427ae RSP: 00007fffda631d60 RFLAGS: 00000293
...
To fix this, enhance the use of ORC data. The ORC unwinder often uses
%rbp value, so keep it from exception frames and inactive task stacks.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
symbols.c | 1 +
x86_64.c | 118 ++++++++++++++++++++++++++++++++++++++----------------
3 files changed, 85 insertions(+), 35 deletions(-)
diff --git a/defs.h b/defs.h
index 801781749666..3c6fa3b0d228 100644
--- a/defs.h
+++ b/defs.h
@@ -2207,6 +2207,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sock_sk_common;
long sock_common_skc_v6_daddr;
long sock_common_skc_v6_rcv_saddr;
+ long inactive_task_frame_bp;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/symbols.c b/symbols.c
index 54115d753601..fc55da678ecd 100644
--- a/symbols.c
+++ b/symbols.c
@@ -8834,6 +8834,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(task_struct_tss_ksp));
fprintf(fp, " task_struct_thread_eip: %ld\n",
OFFSET(task_struct_thread_eip));
+ fprintf(fp, " inactive_task_frame_bp: %ld\n", OFFSET(inactive_task_frame_bp));
fprintf(fp, " inactive_task_frame_ret_addr: %ld\n",
OFFSET(inactive_task_frame_ret_addr));
fprintf(fp, " task_struct_thread_esp: %ld\n",
diff --git a/x86_64.c b/x86_64.c
index 31c249699066..86abea00c9d6 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -122,7 +122,7 @@ static int x86_64_do_not_cache_framesize(struct syment *, ulong);
static int x86_64_framesize_cache_func(int, ulong, int *, int, struct syment *);
static ulong x86_64_get_framepointer(struct bt_info *, ulong);
int search_for_eframe_target_caller(struct bt_info *, ulong, int *);
-static int x86_64_get_framesize(struct bt_info *, ulong, ulong);
+static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *);
static void x86_64_framesize_debug(struct bt_info *);
static void x86_64_get_active_set(void);
static int x86_64_get_kvaddr_ranges(struct vaddr_range *);
@@ -3639,7 +3639,7 @@ in_exception_stack:
bt, ofp);
rsp += SIZE(pt_regs); /* guaranteed kernel mode */
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
- bt->eframe_ip, rsp)) >= 0))
+ bt->eframe_ip, rsp, NULL)) >= 0))
rsp += framesize;
level++;
irq_eframe = 0;
@@ -3671,7 +3671,7 @@ in_exception_stack:
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
- bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
+ bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
@@ -3744,7 +3744,7 @@ in_exception_stack:
}
level++;
- if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp)) >= 0)
+ if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp, NULL)) >= 0)
rsp += framesize;
}
}
@@ -3796,7 +3796,7 @@ in_exception_stack:
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
- bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
+ bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
@@ -3906,24 +3906,34 @@ in_exception_stack:
(STREQ(rip_symbol, "thread_return") ||
STREQ(rip_symbol, "schedule") ||
STREQ(rip_symbol, "__schedule"))) {
- if (STREQ(rip_symbol, "__schedule")) {
- i = (rsp - bt->stackbase)/sizeof(ulong);
- x86_64_print_stack_entry(bt, ofp, level,
- i, bt->instptr);
- level++;
- rsp = __schedule_frame_adjust(rsp, bt);
- if (STREQ(closest_symbol(bt->instptr), "schedule"))
+ if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_ret_addr)) {
+ /*
+ * %rsp should have the address of inactive_task_frame, so
+ * skip the registers before ret_addr to adjust rsp.
+ */
+ if (CRASHDEBUG(1))
+ fprintf(fp, "rsp: %lx rbp: %lx\n", rsp, bt->bptr);
+ rsp += OFFSET(inactive_task_frame_ret_addr);
+ } else {
+ if (STREQ(rip_symbol, "__schedule")) {
+ i = (rsp - bt->stackbase)/sizeof(ulong);
+ x86_64_print_stack_entry(bt, ofp, level,
+ i, bt->instptr);
+ level++;
+ rsp = __schedule_frame_adjust(rsp, bt);
+ if (STREQ(closest_symbol(bt->instptr), "schedule"))
+ bt->flags |= BT_SCHEDULE;
+ } else
bt->flags |= BT_SCHEDULE;
- } else
- bt->flags |= BT_SCHEDULE;
-
- if (bt->flags & BT_SCHEDULE) {
- i = (rsp - bt->stackbase)/sizeof(ulong);
- x86_64_print_stack_entry(bt, ofp, level,
- i, bt->instptr);
- bt->flags &= ~(ulonglong)BT_SCHEDULE;
- rsp += sizeof(ulong);
- level++;
+
+ if (bt->flags & BT_SCHEDULE) {
+ i = (rsp - bt->stackbase)/sizeof(ulong);
+ x86_64_print_stack_entry(bt, ofp, level,
+ i, bt->instptr);
+ bt->flags &= ~(ulonglong)BT_SCHEDULE;
+ rsp += sizeof(ulong);
+ level++;
+ }
}
}
@@ -3954,7 +3964,7 @@ in_exception_stack:
irq_eframe = 0;
bt->flags |= BT_EFRAME_TARGET;
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
- bt->eframe_ip, rsp)) >= 0))
+ bt->eframe_ip, rsp, NULL)) >= 0))
rsp += framesize;
bt->flags &= ~BT_EFRAME_TARGET;
}
@@ -4041,7 +4051,7 @@ in_exception_stack:
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
- bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
+ bt->eframe_ip ? bt->eframe_ip : *up, rsp, (char *)up)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
@@ -4752,7 +4762,8 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local,
bt->instptr = rip;
bt->stkptr = rsp;
bt->bptr = rbp;
- }
+ } else if (machdep->flags & ORC)
+ bt->bptr = rbp;
if (kvaddr)
FREEBUF(pt_regs_buf);
@@ -5312,6 +5323,10 @@ x86_64_get_sp(struct bt_info *bt)
OFFSET(thread_struct_rsp), KVADDR,
&rsp, sizeof(void *),
"thread_struct rsp", FAULT_ON_ERROR);
+ if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_bp)) {
+ readmem(rsp + OFFSET(inactive_task_frame_bp), KVADDR, &bt->bptr,
+ sizeof(void *), "inactive_task_frame.bp", FAULT_ON_ERROR);
+ }
return rsp;
}
@@ -6418,6 +6433,9 @@ x86_64_ORC_init(void)
orc->__stop_orc_unwind = symbol_value("__stop_orc_unwind");
orc->orc_lookup = symbol_value("orc_lookup");
+ MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
+ MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");
+
machdep->flags |= ORC;
}
@@ -8480,7 +8498,7 @@ search_for_eframe_target_caller(struct bt_info *bt, ulong stkptr, int *framesize
(BT_OLD_BACK_TRACE|BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_ALL|BT_FRAMESIZE_DISABLE)
static int
-x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
+x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_ptr)
{
int c, framesize, instr, arg, max;
struct syment *sp;
@@ -8581,19 +8599,49 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
if (CRASHDEBUG(1)) {
fprintf(fp,
- "rsp: %lx textaddr: %lx framesize: %d -> spo: %d bpo: %d spr: %d bpr: %d type: %d %s",
- rsp, textaddr, framesize, korc->sp_offset, korc->bp_offset,
- korc->sp_reg, korc->bp_reg, korc->type,
- (korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP) ? "" : "(UNUSED)");
+ "rsp: %lx textaddr: %lx -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
+ rsp, textaddr, korc->sp_offset, korc->bp_offset,
+ korc->sp_reg, korc->bp_reg, korc->type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
- if ((korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP)) {
- framesize = (korc->sp_offset - 8);
- return (x86_64_framesize_cache_func(FRAMESIZE_ENTER, textaddr,
- &framesize, exception, NULL));
+ if (korc->type == ORC_TYPE_CALL) {
+ ulong prev_sp = 0, prev_bp = 0;
+ framesize = -1;
+
+ if (korc->sp_reg == ORC_REG_SP) {
+ framesize = (korc->sp_offset - 8);
+
+ /* rsp points to a return address, so +8 to use sp_offset */
+ prev_sp = (rsp + 8) + korc->sp_offset;
+ if (CRASHDEBUG(1))
+ fprintf(fp, "rsp: %lx prev_sp: %lx framesize: %d\n",
+ rsp, prev_sp, framesize);
+ } else if ((korc->sp_reg == ORC_REG_BP) && bt->bptr) {
+ prev_sp = bt->bptr + korc->sp_offset;
+ framesize = (prev_sp - (rsp + 8) - 8);
+ if (CRASHDEBUG(1))
+ fprintf(fp, "rsp: %lx rbp: %lx prev_sp: %lx framesize: %d\n",
+ rsp, bt->bptr, prev_sp, framesize);
+ }
+
+ if ((korc->bp_reg == ORC_REG_PREV_SP) && prev_sp) {
+ prev_bp = prev_sp + korc->bp_offset;
+ if (stack_ptr && INSTACK(prev_bp, bt)) {
+ bt->bptr = ULONG(stack_ptr + (prev_bp - rsp));
+ if (CRASHDEBUG(1))
+ fprintf(fp, "rsp: %lx prev_sp: %lx prev_bp: %lx -> %lx\n",
+ rsp, prev_sp, prev_bp, bt->bptr);
+ } else
+ bt->bptr = 0;
+ } else if ((korc->bp_reg != ORC_REG_UNDEFINED))
+ bt->bptr = 0;
+
+ if (framesize >= 0)
+ /* Do not cache this, possibly it may be variable. */
+ return framesize;
}
}
@@ -8749,7 +8797,7 @@ x86_64_framesize_debug(struct bt_info *bt)
if (!bt->hp->eip)
error(INFO, "x86_64_framesize_debug: ignoring command\n");
else
- x86_64_get_framesize(bt, bt->hp->eip, 0);
+ x86_64_get_framesize(bt, bt->hp->eip, 0, NULL);
break;
case -3:
--
2.37.1