commit 7eed8d1fef36997b9e4c1d9cdb67643483a51e56 Author: William Cohen Date: Fri Nov 4 11:12:05 2022 -0400 Ensure that SystemTap runtime uses smp_processor_id() in proper context There were cases on Fedora 36 and Rawhide running kernels with CONFIG_DEBUG_PREEMPT=y where systemtap scripts would trigger kernel log messages like the following: [ 257.544406] check_preemption_disabled: 4 callbacks suppressed [ 257.544409] BUG: using smp_processor_id() in preemptible [00000000] code: staprun/2106 [ 257.544465] caller is _stp_runtime_context_trylock+0x12/0x70 [stap_e36600406768aeefd49daf9fc7a3d23c_2106] [ 257.544507] CPU: 0 PID: 2106 Comm: staprun Tainted: G OE ------- --- 6.1.0-0.rc2.20221028git23758867219c.24.fc38.x86_64 #1 [ 257.544544] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-1.fc36 04/01/2014 [ 257.544571] Call Trace: [ 257.544583] [ 257.544593] dump_stack_lvl+0x5b/0x77 [ 257.544620] check_preemption_disabled+0xe1/0xf0 [ 257.544641] _stp_runtime_context_trylock+0x12/0x70 [stap_e36600406768aeefd49daf9fc7a3d23c_2106] [ 257.544673] _stp_runtime_entryfn_get_context+0xb/0x70 [stap_e36600406768aeefd49daf9fc7a3d23c_2106] [ 257.544705] _stp_ctl_send+0x76/0x1e0 [stap_e36600406768aeefd49daf9fc7a3d23c_2106] [ 257.544735] _stp_transport_init+0x71a/0x860 [stap_e36600406768aeefd49daf9fc7a3d23c_2106] [ 257.544771] ? kallsyms_on_each_symbol+0x30/0x30 [stap_e36600406768aeefd49daf9fc7a3d23c_2106] [ 257.544803] do_one_initcall+0x6b/0x320 [ 257.544827] do_init_module+0x4a/0x200 [ 257.544844] __do_sys_init_module+0x16a/0x1a0 [ 257.544870] do_syscall_64+0x58/0x80 [ 257.544885] ? up_read+0x17/0x20 [ 257.544902] ? lock_is_held_type+0xe8/0x140 [ 257.544921] ? asm_exc_page_fault+0x22/0x30 [ 257.544939] ? lockdep_hardirqs_on+0x7d/0x100 [ 257.544956] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 257.544975] RIP: 0033:0x7f3cde12f5de [ 257.544992] Code: 48 8b 0d 35 68 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 02 68 0c 00 f7 d8 64 89 01 48 [ 257.545010] RSP: 002b:00007ffc5170c418 EFLAGS: 00000246 ORIG_RAX: 00000000000000af [ 257.545010] RAX: ffffffffffffffda RBX: 0000563620bd4020 RCX: 00007f3cde12f5de [ 257.545010] RDX: 0000563620bd4020 RSI: 0000000000040ea0 RDI: 00007f3cde44a010 [ 257.545010] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 [ 257.545010] R10: 0000000000000053 R11: 0000000000000246 R12: 00007ffc5170c510 [ 257.545010] R13: 00007f3cde44a010 R14: 00007ffc5170c440 R15: 00007f3cde4631e8 [ 257.545010] This issue was introduced by git commit 1641b6e7ea which added a fast path check that used smp_processor_id() without first having a preempt_disable(). The code now ensures that preemption is disabled before using the smp_processor_id(). diff --git a/runtime/linux/runtime_context.h b/runtime/linux/runtime_context.h index 3ed3cbd22..ee3870f32 100644 --- a/runtime/linux/runtime_context.h +++ b/runtime/linux/runtime_context.h @@ -49,13 +49,18 @@ static bool _stp_runtime_context_trylock(void) { bool locked; + /* Need to disable preemption because of the smp_processor_id() call + in _stp_runtime_get_context(). */ + preempt_disable(); + /* fast path to ignore new online CPUs without percpu context memory * allocations. this also serves as an extra safe guard for NULL context * pointers. */ - if (unlikely(_stp_runtime_get_context() == NULL)) + if (unlikely(_stp_runtime_get_context() == NULL)) { + preempt_enable_no_resched(); return false; + } - preempt_disable(); locked = atomic_add_unless(&_stp_contexts_busy_ctr, 1, INT_MAX); if (!locked) preempt_enable_no_resched();