Add upstream fix for stack tracing/livepatch
Joe Lawrence reports that a fix is needed for livepatch/stack tracing behavior in v5.9, this commit adds the needed fix. This is already in 5.10 tree, so when we sync again from source git for 5.10 we will not loose it. Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
This commit is contained in:
parent
3cb004666e
commit
deb0607107
140
0002-x86-unwind-orc-Fix-inactive-tasks-with-stack-pointer.patch
Normal file
140
0002-x86-unwind-orc-Fix-inactive-tasks-with-stack-pointer.patch
Normal file
@ -0,0 +1,140 @@
|
||||
From f2ac57a4c49d40409c21c82d23b5706df9b438af Mon Sep 17 00:00:00 2001
|
||||
From: Jiri Slaby <jslaby@suse.cz>
|
||||
Date: Wed, 14 Oct 2020 07:30:51 +0200
|
||||
Subject: [PATCH] x86/unwind/orc: Fix inactive tasks with stack pointer in %sp
|
||||
on GCC 10 compiled kernels
|
||||
|
||||
GCC 10 optimizes the scheduler code differently than its predecessors.
|
||||
|
||||
When CONFIG_DEBUG_SECTION_MISMATCH=y, the Makefile forces GCC not
|
||||
to inline some functions (-fno-inline-functions-called-once). Before GCC
|
||||
10, "no-inlined" __schedule() starts with the usual prologue:
|
||||
|
||||
push %bp
|
||||
mov %sp, %bp
|
||||
|
||||
So the ORC unwinder simply picks stack pointer from %bp and
|
||||
unwinds from __schedule() just perfectly:
|
||||
|
||||
$ cat /proc/1/stack
|
||||
[<0>] ep_poll+0x3e9/0x450
|
||||
[<0>] do_epoll_wait+0xaa/0xc0
|
||||
[<0>] __x64_sys_epoll_wait+0x1a/0x20
|
||||
[<0>] do_syscall_64+0x33/0x40
|
||||
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
|
||||
|
||||
But now, with GCC 10, there is no %bp prologue in __schedule():
|
||||
|
||||
$ cat /proc/1/stack
|
||||
<nothing>
|
||||
|
||||
The ORC entry of the point in __schedule() is:
|
||||
|
||||
sp:sp+88 bp:last_sp-48 type:call end:0
|
||||
|
||||
In this case, nobody subtracts sizeof "struct inactive_task_frame" in
|
||||
__unwind_start(). The struct is put on the stack by __switch_to_asm() and
|
||||
only then __switch_to_asm() stores %sp to task->thread.sp. But we start
|
||||
unwinding from a point in __schedule() (stored in frame->ret_addr by
|
||||
'call') and not in __switch_to_asm().
|
||||
|
||||
So for these example values in __unwind_start():
|
||||
|
||||
sp=ffff94b50001fdc8 bp=ffff8e1f41d29340 ip=__schedule+0x1f0
|
||||
|
||||
The stack is:
|
||||
|
||||
ffff94b50001fdc8: ffff8e1f41578000 # struct inactive_task_frame
|
||||
ffff94b50001fdd0: 0000000000000000
|
||||
ffff94b50001fdd8: ffff8e1f41d29340
|
||||
ffff94b50001fde0: ffff8e1f41611d40 # ...
|
||||
ffff94b50001fde8: ffffffff93c41920 # bx
|
||||
ffff94b50001fdf0: ffff8e1f41d29340 # bp
|
||||
ffff94b50001fdf8: ffffffff9376cad0 # ret_addr (and end of the struct)
|
||||
|
||||
0xffffffff9376cad0 is __schedule+0x1f0 (after the call to
|
||||
__switch_to_asm). Now follow those 88 bytes from the ORC entry (sp+88).
|
||||
The entry is correct, __schedule() really pushes 48 bytes (8*7) + 32 bytes
|
||||
via subq to store some local values (like 4U below). So to unwind, look
|
||||
at the offset 88-sizeof(long) = 0x50 from here:
|
||||
|
||||
ffff94b50001fe00: ffff8e1f41578618
|
||||
ffff94b50001fe08: 00000cc000000255
|
||||
ffff94b50001fe10: 0000000500000004
|
||||
ffff94b50001fe18: 7793fab6956b2d00 # NOTE (see below)
|
||||
ffff94b50001fe20: ffff8e1f41578000
|
||||
ffff94b50001fe28: ffff8e1f41578000
|
||||
ffff94b50001fe30: ffff8e1f41578000
|
||||
ffff94b50001fe38: ffff8e1f41578000
|
||||
ffff94b50001fe40: ffff94b50001fed8
|
||||
ffff94b50001fe48: ffff8e1f41577ff0
|
||||
ffff94b50001fe50: ffffffff9376cf12
|
||||
|
||||
Here ^^^^^^^^^^^^^^^^ is the correct ret addr from
|
||||
__schedule(). It translates to schedule+0x42 (insn after a call to
|
||||
__schedule()).
|
||||
|
||||
BUT, unwind_next_frame() tries to take the address starting from
|
||||
0xffff94b50001fdc8. That is exactly from thread.sp+88-sizeof(long) =
|
||||
0xffff94b50001fdc8+88-8 = 0xffff94b50001fe18, which is garbage marked as
|
||||
NOTE above. So this quits the unwinding as 7793fab6956b2d00 is obviously
|
||||
not a kernel address.
|
||||
|
||||
There was a fix to skip 'struct inactive_task_frame' in
|
||||
unwind_get_return_address_ptr in the following commit:
|
||||
|
||||
187b96db5ca7 ("x86/unwind/orc: Fix unwind_get_return_address_ptr() for inactive tasks")
|
||||
|
||||
But we need to skip the struct already in the unwinder proper. So
|
||||
subtract the size (increase the stack pointer) of the structure in
|
||||
__unwind_start() directly. This allows for removal of the code added by
|
||||
commit 187b96db5ca7 completely, as the address is now at
|
||||
'(unsigned long *)state->sp - 1', the same as in the generic case.
|
||||
|
||||
[ mingo: Cleaned up the changelog a bit, for better readability. ]
|
||||
|
||||
Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
|
||||
Bug: https://bugzilla.suse.com/show_bug.cgi?id=1176907
|
||||
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Link: https://lore.kernel.org/r/20201014053051.24199-1-jslaby@suse.cz
|
||||
---
|
||||
arch/x86/kernel/unwind_orc.c | 9 +--------
|
||||
1 file changed, 1 insertion(+), 8 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
|
||||
index ec88bbe08a32..4a96aa3de7d8 100644
|
||||
--- a/arch/x86/kernel/unwind_orc.c
|
||||
+++ b/arch/x86/kernel/unwind_orc.c
|
||||
@@ -320,19 +320,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address);
|
||||
|
||||
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
|
||||
{
|
||||
- struct task_struct *task = state->task;
|
||||
-
|
||||
if (unwind_done(state))
|
||||
return NULL;
|
||||
|
||||
if (state->regs)
|
||||
return &state->regs->ip;
|
||||
|
||||
- if (task != current && state->sp == task->thread.sp) {
|
||||
- struct inactive_task_frame *frame = (void *)task->thread.sp;
|
||||
- return &frame->ret_addr;
|
||||
- }
|
||||
-
|
||||
if (state->sp)
|
||||
return (unsigned long *)state->sp - 1;
|
||||
|
||||
@@ -662,7 +655,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
} else {
|
||||
struct inactive_task_frame *frame = (void *)task->thread.sp;
|
||||
|
||||
- state->sp = task->thread.sp;
|
||||
+ state->sp = task->thread.sp + sizeof(*frame);
|
||||
state->bp = READ_ONCE_NOCHECK(frame->bp);
|
||||
state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
|
||||
state->signal = (void *)state->ip == ret_from_fork;
|
||||
--
|
||||
2.27.0
|
||||
|
12
kernel.spec
12
kernel.spec
@ -31,7 +31,7 @@ Summary: The Linux kernel
|
||||
# For a stable, released kernel, released_kernel should be 1.
|
||||
%global released_kernel 0
|
||||
|
||||
%global distro_build 37
|
||||
%global distro_build 38
|
||||
|
||||
%if 0%{?fedora}
|
||||
%define secure_boot_arch x86_64
|
||||
@ -70,13 +70,13 @@ Summary: The Linux kernel
|
||||
%endif
|
||||
|
||||
%define rpmversion 5.9.0
|
||||
%define pkgrelease 37
|
||||
%define pkgrelease 38
|
||||
|
||||
# This is needed to do merge window version magic
|
||||
%define patchlevel 9
|
||||
|
||||
# allow pkg_release to have configurable %%{?dist} tag
|
||||
%define specrelease 37%{?buildid}%{?dist}
|
||||
%define specrelease 38%{?buildid}%{?dist}
|
||||
|
||||
%define pkg_release %{specrelease}
|
||||
|
||||
@ -713,6 +713,7 @@ Source4000: README.rst
|
||||
%if !%{nopatches}
|
||||
|
||||
Patch1: patch-%{rpmversion}-redhat.patch
|
||||
Patch2: 0002-x86-unwind-orc-Fix-inactive-tasks-with-stack-pointer.patch
|
||||
%endif
|
||||
|
||||
# empty final patch to facilitate testing of kernel patches
|
||||
@ -1216,6 +1217,7 @@ cp -a %{SOURCE1} .
|
||||
%if !%{nopatches}
|
||||
|
||||
ApplyOptionalPatch patch-%{rpmversion}-redhat.patch
|
||||
ApplyPatch 0002-x86-unwind-orc-Fix-inactive-tasks-with-stack-pointer.patch
|
||||
%endif
|
||||
|
||||
ApplyOptionalPatch linux-kernel-test.patch
|
||||
@ -2643,6 +2645,10 @@ fi
|
||||
#
|
||||
#
|
||||
%changelog
|
||||
* Mon Nov 09 2020 Herton R. Krzesinski <herton@redhat.com> [5.9.0-38]
|
||||
- Apply upstream fix: commit f2ac57a4c49d - "x86/unwind/orc: Fix inactive tasks
|
||||
with stack pointer in %sp on GCC 10 compiled kernels" ("Herton R. Krzesinski")
|
||||
|
||||
* Fri Oct 30 2020 Don Zickus <dzickus@redhat.com> [5.9.0-37]
|
||||
- Enable kernel-tools. (Don Zickus)
|
||||
- Disable LTO on kernel and kernel-tools. (Don Zickus)
|
||||
|
File diff suppressed because it is too large
Load Diff
4
sources
4
sources
@ -1,3 +1,3 @@
|
||||
SHA512 (linux-5.9.tar.xz) = cafd463ca0c9b256479b7dd774f908cedef6d265c44f99de075558ab923808ddfacda1377ae000ce02730c6611527bddffbfc4421bbc4f44fd8e59d97cbc4363
|
||||
SHA512 (kernel-abi-whitelists-5.9.0-37.tar.bz2) = 03a9111423353a35e3cbe4473e1317f5dcc9b0834b9413b49ebe878f0cc94a42d6b78688132790ae6ba081cc50d6725f1a49b313b8f899943d30d7d095c64f40
|
||||
SHA512 (kernel-kabi-dw-5.9.0-37.tar.bz2) = 3cd53709d27d6d75b4e45b44933d39fbf7eafb1933c750cbaf48dfb3424f7e4ffeadcf136800f388040549bbe8e4b5a47608381ec85247c1474f1958859594d9
|
||||
SHA512 (kernel-abi-whitelists-5.9.0-38.tar.bz2) = 03a9111423353a35e3cbe4473e1317f5dcc9b0834b9413b49ebe878f0cc94a42d6b78688132790ae6ba081cc50d6725f1a49b313b8f899943d30d7d095c64f40
|
||||
SHA512 (kernel-kabi-dw-5.9.0-38.tar.bz2) = 3cd53709d27d6d75b4e45b44933d39fbf7eafb1933c750cbaf48dfb3424f7e4ffeadcf136800f388040549bbe8e4b5a47608381ec85247c1474f1958859594d9
|
||||
|
Loading…
Reference in New Issue
Block a user