1404 lines
55 KiB
Diff
1404 lines
55 KiB
Diff
commit 11c39a7375bd2759b53b89236e755c91a4f5aad8
|
|
Author: Frank Ch. Eigler <fche@redhat.com>
|
|
Date: Tue Jun 16 20:35:53 2020 -0400
|
|
|
|
RHBZ1847676: uprobes-inode tweaks redux
|
|
|
|
Added (back) a spinlock to manage the stapiu_consumer -> process_list
|
|
structure, since it is occasionally travered from uprobe pre-handlers,
|
|
which are sometimes entered in atomic context (e.g. on rhel7). There,
|
|
the normal mutex_t is unsafe. So restoring a spinlock_t just for
|
|
those shortlived traversals, rhel7 and rawhide are both happy.
|
|
|
|
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
|
|
index 156360e..922c9f1 100644
|
|
--- a/runtime/linux/uprobes-inode.c
|
|
+++ b/runtime/linux/uprobes-inode.c
|
|
@@ -143,7 +143,8 @@ struct stapiu_consumer {
|
|
struct list_head instance_list_head; // the resulting uprobe instances for this consumer
|
|
|
|
struct list_head process_list_head; // the processes for this consumer
|
|
-
|
|
+ spinlock_t process_list_lock; // protect list; used briefly from even atomic contexts
|
|
+
|
|
// List of perf counters used by each probe
|
|
// This list is an index into struct stap_perf_probe,
|
|
long perf_counters_dim;
|
|
@@ -174,16 +175,19 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
|
|
|
|
// First find the related process, set by stapiu_change_plus.
|
|
// NB: This is a linear search performed for every probe hit!
|
|
- // This could be an algorithmic problem if the list gets large, but
|
|
- // we'll wait until this is demonstratedly a hotspot before optimizing.
|
|
- mutex_lock(&c->consumer_lock);
|
|
+ // This could be an algorithmic problem if the list gets large,
|
|
+ // but we'll wait until this is demonstratedly a hotspot before
|
|
+ // optimizing. NB: on rhel7 sometimes we're invoked from atomic
|
|
+ // context, so must be careful to use the spinlock, not the
|
|
+ // mutex.
|
|
+ spin_lock(&c->process_list_lock);
|
|
list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
if (p->tgid == current->tgid) {
|
|
process = p;
|
|
break;
|
|
}
|
|
}
|
|
- mutex_unlock(&c->consumer_lock);
|
|
+ spin_unlock(&c->process_list_lock);
|
|
if (!process) {
|
|
#ifdef UPROBE_HANDLER_REMOVE
|
|
/* Once we're past the starting phase, we can be sure that any
|
|
@@ -344,7 +348,7 @@ static void
|
|
stapiu_decrement_semaphores(struct stapiu_consumer *consumers, size_t nconsumers)
|
|
{
|
|
size_t i;
|
|
- /* NB: no stapiu_process_slots_lock needed, as the task_finder engine is
|
|
+ /* NB: no process_list_lock use needed as the task_finder engine is
|
|
* already stopped by now, so no one else will mess with us. We need
|
|
* to be sleepable for access_process_vm. */
|
|
for (i = 0; i < nconsumers; ++i) {
|
|
@@ -433,7 +437,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers)
|
|
INIT_LIST_HEAD(&c->instance_list_head);
|
|
INIT_LIST_HEAD(&c->process_list_head);
|
|
mutex_init(&c->consumer_lock);
|
|
-
|
|
+ spin_lock_init(&c->process_list_lock);
|
|
+
|
|
dbug_uprobes("registering task-finder for procname:%s buildid:%s\n",
|
|
((char*)c->finder.procname ?: (char*)""),
|
|
((char*)c->finder.build_id ?: (char*)""));
|
|
@@ -560,7 +565,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
* calls us in this case with relocation=offset=0, so
|
|
* we don't have to worry about it. */
|
|
p->base = relocation - offset;
|
|
+ spin_lock (&c->process_list_lock);
|
|
list_add(&p->process_list, &c->process_list_head);
|
|
+ spin_unlock (&c->process_list_lock);
|
|
|
|
rc = 0;
|
|
mutex_unlock(&c->consumer_lock);
|
|
@@ -587,28 +594,40 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
{
|
|
int rc = 0;
|
|
struct stapiu_process *p;
|
|
+ int any_found;
|
|
|
|
if (! c->sdt_sem_offset) // nothing to do
|
|
return 0;
|
|
|
|
- /* NB: no lock after this point, as we need to be sleepable for
|
|
- * get/put_user semaphore action. The given process should be frozen
|
|
- * while we're busy, so it's not an issue.
|
|
- */
|
|
-
|
|
- mutex_lock(&c->consumer_lock);
|
|
-
|
|
+ // NB: we mustn't hold a lock while changing the task memory,
|
|
+ // but we need a lock to protect the process_list from concurrent
|
|
+ // add/delete. So hold a spinlock during iteration until the first
|
|
+ // hit, then unlock & process. NB: We could in principle have multiple
|
|
+ // instances of the same process in the list (e.g., if the process
|
|
+ // somehow maps in the same solib multiple times). We can't easily
|
|
+ // both iterate this list (in a spinlock-protected safe way), and
|
|
+ // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
|
|
+ // call within the loop. So we will hit only the copy in our list.
|
|
+ any_found = 0;
|
|
+ spin_lock(&c->process_list_lock);
|
|
/* Look through all the consumer's processes and increment semaphores. */
|
|
list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
unsigned long addr = p->base + c->sdt_sem_offset;
|
|
if (addr >= relocation && addr < relocation + length) {
|
|
- int rc2 = stapiu_write_task_semaphore(task, addr, +1);
|
|
+ int rc2;
|
|
+ // unlock list and process write for this entry
|
|
+ spin_unlock(&c->process_list_lock);
|
|
+ any_found=1;
|
|
+ rc2 = stapiu_write_task_semaphore(task, addr, +1);
|
|
if (!rc)
|
|
- rc = rc2;
|
|
+ rc = rc2;
|
|
+ break; // exit list_for_each loop
|
|
}
|
|
}
|
|
-
|
|
- mutex_unlock(&c->consumer_lock);
|
|
+ if (! any_found)
|
|
+ spin_unlock(&c->process_list_lock);
|
|
+ else
|
|
+ ; // already unlocked
|
|
|
|
return rc;
|
|
}
|
|
@@ -635,8 +654,9 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
|
|
// process is dying anyway
|
|
// - the stapiu_consumer's process_list linked list will have a record
|
|
// of the dead process: well, not great, it'll be cleaned up eventually,
|
|
- // and cleaning it up NOW is tricky - need some spin lock to protect the list,
|
|
- // but not out sleepy mutex:
|
|
+ // and cleaning it up NOW is tricky - we could use the process_list_lock
|
|
+ // to protect the list (as done in stapiu_change_semaphore_plus),
|
|
+ // but not our sleepy mutex:
|
|
//
|
|
// [ 1955.410237] ? stapiu_change_minus+0x38/0xf0 [stap_54a723c01c50d972590a5c901516849_15522]
|
|
// [ 1955.411583] __mutex_lock+0x35/0x820
|
|
|
|
commit 4ccdfe4536d702612912e96d7b6278b169917eaa
|
|
Author: Frank Ch. Eigler <fche@redhat.com>
|
|
Date: Mon Jul 6 13:27:46 2020 -0400
|
|
|
|
RHBZ1847676 cont'd: more uprobes-inode/onthefly concurrency controls
|
|
|
|
The systemtap.onthefly/*.exp tests had recently become hang-prone on
|
|
some kernels, for reasons still not completely understood. This set
|
|
of patches adds:
|
|
|
|
- irq*-block spinlocks into uprobes-invoked paths, in case there is
|
|
peculiar reentrancy (from irq-related tracepoints)
|
|
|
|
- a mutex lock/unlock into the stapiu_exit() path, in case there is
|
|
a concurrent stapiu_refresh() invoked by onthefly machinery around
|
|
exit time
|
|
|
|
- restrictions into the onthefly module_refresh() translator code to
|
|
preclude STAP_SESSION_STOPPING as a time to do any sort of refresh
|
|
operation. Now probes that were disarmed will stay disarmed during
|
|
probe-end/error/etc. processing, which is always valid with the
|
|
spec, and avoids a class of late module-refresh ops
|
|
|
|
Testing on rhel7 and rawhide indicates the reproducible hang is gone.
|
|
Our testsuite already tortures this code; invoke by hand via:
|
|
|
|
% sudo make installcheck RUNTESTFLAGS="-v affection.exp hrtimer_onthefly.exp kprobes_onthefly.exp tracepoint_onthefly.exp uprobes_onthefly.exp"
|
|
|
|
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
|
|
index 922c9f1..3de7281 100644
|
|
--- a/runtime/linux/uprobes-inode.c
|
|
+++ b/runtime/linux/uprobes-inode.c
|
|
@@ -172,6 +172,7 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
|
|
if (_stp_target) // need we filter by pid at all?
|
|
{
|
|
struct stapiu_process *p, *process = NULL;
|
|
+ unsigned long flags;
|
|
|
|
// First find the related process, set by stapiu_change_plus.
|
|
// NB: This is a linear search performed for every probe hit!
|
|
@@ -180,14 +181,14 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
|
|
// optimizing. NB: on rhel7 sometimes we're invoked from atomic
|
|
// context, so must be careful to use the spinlock, not the
|
|
// mutex.
|
|
- spin_lock(&c->process_list_lock);
|
|
+ spin_lock_irqsave(&c->process_list_lock, flags);
|
|
list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
if (p->tgid == current->tgid) {
|
|
process = p;
|
|
break;
|
|
}
|
|
}
|
|
- spin_unlock(&c->process_list_lock);
|
|
+ spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
if (!process) {
|
|
#ifdef UPROBE_HANDLER_REMOVE
|
|
/* Once we're past the starting phase, we can be sure that any
|
|
@@ -398,7 +399,7 @@ static void
|
|
stapiu_consumer_refresh(struct stapiu_consumer *c)
|
|
{
|
|
struct stapiu_instance *inst;
|
|
-
|
|
+
|
|
mutex_lock(& c->consumer_lock);
|
|
|
|
list_for_each_entry(inst, &c->instance_list_head, instance_list) {
|
|
@@ -420,7 +421,10 @@ stapiu_exit(struct stapiu_consumer *consumers, size_t nconsumers)
|
|
stapiu_decrement_semaphores(consumers, nconsumers);
|
|
for (i = 0; i < nconsumers; ++i) {
|
|
struct stapiu_consumer *c = &consumers[i];
|
|
+ // protect against conceivable stapiu_refresh() at same time
|
|
+ mutex_lock(& c->consumer_lock);
|
|
stapiu_consumer_unreg(c);
|
|
+ mutex_unlock(& c->consumer_lock);
|
|
/* NB: task_finder needs no unregister. */
|
|
}
|
|
}
|
|
@@ -480,6 +484,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
struct stapiu_instance *inst = NULL;
|
|
struct stapiu_process *p;
|
|
int j;
|
|
+ unsigned long flags;
|
|
|
|
if (! inode) {
|
|
rc = -EINVAL;
|
|
@@ -565,9 +570,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
* calls us in this case with relocation=offset=0, so
|
|
* we don't have to worry about it. */
|
|
p->base = relocation - offset;
|
|
- spin_lock (&c->process_list_lock);
|
|
+ spin_lock_irqsave (&c->process_list_lock, flags);
|
|
list_add(&p->process_list, &c->process_list_head);
|
|
- spin_unlock (&c->process_list_lock);
|
|
+ spin_unlock_irqrestore (&c->process_list_lock, flags);
|
|
|
|
rc = 0;
|
|
mutex_unlock(&c->consumer_lock);
|
|
@@ -595,6 +600,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
int rc = 0;
|
|
struct stapiu_process *p;
|
|
int any_found;
|
|
+ unsigned long flags;
|
|
|
|
if (! c->sdt_sem_offset) // nothing to do
|
|
return 0;
|
|
@@ -609,14 +615,14 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
// relax the spinlock enough to do a safe stapiu_write_task_semaphore()
|
|
// call within the loop. So we will hit only the copy in our list.
|
|
any_found = 0;
|
|
- spin_lock(&c->process_list_lock);
|
|
+ spin_lock_irqsave(&c->process_list_lock, flags);
|
|
/* Look through all the consumer's processes and increment semaphores. */
|
|
list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
unsigned long addr = p->base + c->sdt_sem_offset;
|
|
if (addr >= relocation && addr < relocation + length) {
|
|
int rc2;
|
|
// unlock list and process write for this entry
|
|
- spin_unlock(&c->process_list_lock);
|
|
+ spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
any_found=1;
|
|
rc2 = stapiu_write_task_semaphore(task, addr, +1);
|
|
if (!rc)
|
|
@@ -625,7 +631,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
}
|
|
}
|
|
if (! any_found)
|
|
- spin_unlock(&c->process_list_lock);
|
|
+ spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
else
|
|
; // already unlocked
|
|
|
|
diff --git a/translate.cxx b/translate.cxx
|
|
index 10b3d32..b10af5a 100644
|
|
--- a/translate.cxx
|
|
+++ b/translate.cxx
|
|
@@ -2144,19 +2144,13 @@ c_unparser::emit_module_refresh ()
|
|
o->newline() << "mutex_lock(&module_refresh_mutex);";
|
|
|
|
/* If we're not in STARTING/RUNNING state, don't try doing any work.
|
|
- PR16766 */
|
|
+ PR16766. We don't want to run refresh ops during e.g. STOPPING,
|
|
+ so as to possibly activate uprobes near shutdown. */
|
|
o->newline() << "state = atomic_read (session_state());";
|
|
- o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING && state != STAP_SESSION_ERROR) {";
|
|
- // cannot _stp_warn etc. since we're not in probe context
|
|
- o->newline(1) << "#if defined(__KERNEL__)";
|
|
- o->newline() << "if (state != STAP_SESSION_STOPPING)";
|
|
- o->newline(1) << "printk (KERN_ERR \"stap module notifier triggered in unexpected state %d\\n\", state);";
|
|
- o->indent(-1);
|
|
- o->newline() << "#endif";
|
|
-
|
|
+ o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING) {";
|
|
+ o->newline(1);
|
|
if (!session->runtime_usermode_p())
|
|
o->newline() << "mutex_unlock(&module_refresh_mutex);";
|
|
-
|
|
o->newline() << "return;";
|
|
o->newline(-1) << "}";
|
|
|
|
|
|
commit 046fa017d2ab7fea1a4ba2295c31f768c072855e
|
|
Author: Frank Ch. Eigler <fche@redhat.com>
|
|
Date: Sun Jul 12 09:57:15 2020 -0400
|
|
|
|
RHBZ1847676 cont'd: one more uprobes-inode/onthefly concurrency control
|
|
|
|
In uprobes-inode.c (stapiu_change_plus), the runtime can react to
|
|
arrivals of new mappings of a solib or executable by registering new
|
|
uprobes. Due to an assumption that this could not happen at
|
|
inconvenient times (such as a stapiu_refresh or near shutdown times),
|
|
the actual uprobes registration operation was done outside the
|
|
consumer_lock mutex being held. But it appears this can happen at bad
|
|
times, so the mutex needs to be held, just like within
|
|
stapiu_consumer_refresh().
|
|
|
|
The onthefly tests now survive iterating testing on rawhide+lockdep
|
|
and rhel7+lockdep.
|
|
|
|
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
|
|
index 3de7281..01c8a07 100644
|
|
--- a/runtime/linux/uprobes-inode.c
|
|
+++ b/runtime/linux/uprobes-inode.c
|
|
@@ -575,12 +575,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
spin_unlock_irqrestore (&c->process_list_lock, flags);
|
|
|
|
rc = 0;
|
|
- mutex_unlock(&c->consumer_lock);
|
|
-
|
|
// Register actual uprobe if cond_enabled right now
|
|
if (c->probe->cond_enabled)
|
|
(void) stapiu_register(inst, c);
|
|
- goto out;
|
|
+ goto out1;
|
|
|
|
out2:
|
|
_stp_kfree(inst);
|
|
|
|
commit a9a0131eb59e8abc197d3d2a553a86bcdec3dd70
|
|
Author: Frank Ch. Eigler <fche@redhat.com>
|
|
Date: Fri Jul 17 22:33:04 2020 -0400
|
|
|
|
rhbz1857749: uprobes-inode regression in sdt semaphore setting
|
|
|
|
Previous code neglected to set sdt.h semaphores for more than the
|
|
first process systemtap happened to encounter. This was from a
|
|
mistaken understanding of what it meant for stapiu_change_plus() to be
|
|
called with the same inode/consumer combination. Even though uprobes
|
|
are automatically shared, each new process still needs its perfctr and
|
|
sdt-semaphores individually set, so we do that now (as before the
|
|
rework of this code). Mechanized testing incoming shortly.
|
|
|
|
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
|
|
index 01c8a07..de81839 100644
|
|
--- a/runtime/linux/uprobes-inode.c
|
|
+++ b/runtime/linux/uprobes-inode.c
|
|
@@ -190,6 +190,10 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
|
|
}
|
|
spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
if (!process) {
|
|
+ /* We know that we're in -c/-x mode, but this process is not
|
|
+ in the process hierarchy, so the uprobe should be ignored
|
|
+ and future hits prevented. PR15278
|
|
+ */
|
|
#ifdef UPROBE_HANDLER_REMOVE
|
|
/* Once we're past the starting phase, we can be sure that any
|
|
* processes which are executing code in a mapping have already
|
|
@@ -242,8 +246,8 @@ stapiu_register (struct stapiu_instance* inst, struct stapiu_consumer* c)
|
|
(unsigned long) inst->inode->i_ino,
|
|
(void*) (uintptr_t) c->offset,
|
|
c->probe->index,
|
|
- ((char*)c->finder.procname ?: (char*)""),
|
|
- ((char*)c->finder.build_id ?: (char*)""));
|
|
+ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
|
|
+ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
|
|
|
|
if (!c->return_p) {
|
|
inst->kconsumer.handler = stapiu_probe_prehandler;
|
|
@@ -444,8 +448,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers)
|
|
spin_lock_init(&c->process_list_lock);
|
|
|
|
dbug_uprobes("registering task-finder for procname:%s buildid:%s\n",
|
|
- ((char*)c->finder.procname ?: (char*)""),
|
|
- ((char*)c->finder.build_id ?: (char*)""));
|
|
+ ((char*)c->finder.procname ?: ""),
|
|
+ ((char*)c->finder.build_id ?: ""));
|
|
|
|
ret = stap_register_task_finder_target(&c->finder);
|
|
if (ret != 0) {
|
|
@@ -499,22 +503,22 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
if (rc)
|
|
goto out;
|
|
|
|
- dbug_uprobes("notified for inode-offset u%sprobe "
|
|
+ dbug_uprobes("notified for inode-offset arrival u%sprobe "
|
|
"%lu:%p pidx %zu target procname:%s buildid:%s\n",
|
|
c->return_p ? "ret" : "",
|
|
(unsigned long) inode->i_ino,
|
|
(void*) (uintptr_t) c->offset,
|
|
c->probe->index,
|
|
- ((char*)c->finder.procname ?: (char*)""),
|
|
- ((char*)c->finder.build_id ?: (char*)""));
|
|
+ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
|
|
+ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
|
|
|
|
/* Check the buildid of the target (if we haven't already). We
|
|
* lock the target so we don't have concurrency issues. */
|
|
mutex_lock(&c->consumer_lock);
|
|
|
|
- // Check if we already have an instance for this inode, as though we
|
|
- // were called twice by task-finder mishap, or (hypothetically) the
|
|
- // shlib was mmapped twice.
|
|
+ // Check if we already have an instance for this inode. This is normal:
|
|
+ // if a different process maps the same solib, or forks into the same
|
|
+ // executable. In this case, we must not re-register the same uprobe.
|
|
list_for_each_entry(i, &c->instance_list_head, instance_list) {
|
|
if (i->inode == inode) {
|
|
inst = i;
|
|
@@ -522,28 +526,33 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
}
|
|
}
|
|
|
|
- if (inst) { // wouldn't expect a re-notification
|
|
- if (inst->registered_p != c->probe->cond_enabled)
|
|
- // ... this should not happen
|
|
- ;
|
|
- goto out1;
|
|
- }
|
|
-
|
|
- // Normal case: need a new one.
|
|
- inst = _stp_kzalloc(sizeof(struct stapiu_instance));
|
|
- if (! inst) {
|
|
- rc = -ENOMEM;
|
|
- goto out1;
|
|
- }
|
|
+ if (!inst) { // new instance; need new uprobe etc.
|
|
+ // Normal case: need a new one.
|
|
+ inst = _stp_kzalloc(sizeof(struct stapiu_instance));
|
|
+ if (! inst) {
|
|
+ rc = -ENOMEM;
|
|
+ goto out1;
|
|
+ }
|
|
|
|
- inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback
|
|
+ inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback
|
|
+
|
|
+ /* Grab the inode first (to prevent TOCTTOU problems). */
|
|
+ inst->inode = igrab(inode);
|
|
+ if (!inst->inode) {
|
|
+ rc = -EINVAL;
|
|
+ goto out2;
|
|
+ }
|
|
+
|
|
+ // Add the inode/instance to the list
|
|
+ list_add(&inst->instance_list, &c->instance_list_head);
|
|
|
|
- /* Grab the inode first (to prevent TOCTTOU problems). */
|
|
- inst->inode = igrab(inode);
|
|
- if (!inst->inode) {
|
|
- rc = -EINVAL;
|
|
- goto out2;
|
|
+ // Register the actual uprobe if cond_enabled already
|
|
+ if (c->probe->cond_enabled)
|
|
+ (void) stapiu_register(inst, c);
|
|
}
|
|
+
|
|
+ // ... but we may have to do per-process work anyway: perfctr
|
|
+ // initialization and sdt.h semaphore manipulation!
|
|
|
|
// Perform perfctr registration if required
|
|
for (j=0; j < c->perf_counters_dim; j++) {
|
|
@@ -551,12 +560,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
(void) _stp_perf_read_init ((c->perf_counters)[j], task);
|
|
}
|
|
|
|
- // Add the inode/instance to the list
|
|
- list_add(&inst->instance_list, &c->instance_list_head);
|
|
-
|
|
// Associate this consumer with this process. If we encounter
|
|
// resource problems here, we don't really have to undo the uprobe
|
|
- // registrations etc. already in effect.
|
|
+ // registrations etc. already in effect. It may break correct
|
|
+ // tracking of process hierarchy in -c/-x operation, but too bad.
|
|
p = _stp_kzalloc(sizeof(struct stapiu_process));
|
|
if (! p) {
|
|
rc = -ENOMEM;
|
|
@@ -573,11 +580,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
spin_lock_irqsave (&c->process_list_lock, flags);
|
|
list_add(&p->process_list, &c->process_list_head);
|
|
spin_unlock_irqrestore (&c->process_list_lock, flags);
|
|
-
|
|
+ // NB: actual semaphore value bumping is done later
|
|
+
|
|
rc = 0;
|
|
// Register actual uprobe if cond_enabled right now
|
|
- if (c->probe->cond_enabled)
|
|
- (void) stapiu_register(inst, c);
|
|
goto out1;
|
|
|
|
out2:
|
|
@@ -617,11 +623,21 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
/* Look through all the consumer's processes and increment semaphores. */
|
|
list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
unsigned long addr = p->base + c->sdt_sem_offset;
|
|
+ if (p->tgid != task->tgid) // skip other processes in the list
|
|
+ continue;
|
|
if (addr >= relocation && addr < relocation + length) {
|
|
int rc2;
|
|
// unlock list and process write for this entry
|
|
spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
any_found=1;
|
|
+
|
|
+ dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
|
|
+ "pidx %zu address %lx\n",
|
|
+ c->return_p ? "ret" : "",
|
|
+ (long) task->tgid,
|
|
+ c->probe->index,
|
|
+ (unsigned long) addr);
|
|
+
|
|
rc2 = stapiu_write_task_semaphore(task, addr, +1);
|
|
if (!rc)
|
|
rc = rc2;
|
|
@@ -641,15 +657,8 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
* about the semaphores, so we can just release the process slot. */
|
|
static int
|
|
stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
|
|
- unsigned long relocation, unsigned long length)
|
|
+ unsigned long addr, unsigned long length)
|
|
{
|
|
- dbug_uprobes("notified for inode-offset departure u%sprobe "
|
|
- "pidx %zu target procname:%s buildid:%s\n",
|
|
- c->return_p ? "ret" : "",
|
|
- c->probe->index,
|
|
- ((char*)c->finder.procname ?: (char*)""),
|
|
- ((char*)c->finder.build_id ?: (char*)""));
|
|
-
|
|
// We don't need do anything really.
|
|
// A process going away means:
|
|
// - its uprobes will no longer fire: no problem, the uprobe inode
|
|
@@ -674,6 +683,36 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
|
|
// [ 1955.436334] ? __x64_sys_execve+0x27/0x30
|
|
// [ 1955.437700] ? do_syscall_64+0x5c/0xa0
|
|
|
|
+ // But as an optimization - to avoid having them build up indefinitely,
|
|
+ // and make semaphore operations go slowly, we will nuke matching entries anyway.
|
|
+ unsigned long flags;
|
|
+ struct stapiu_process *p, *tmp;
|
|
+ unsigned nmatch=0;
|
|
+
|
|
+ spin_lock_irqsave(&c->process_list_lock, flags);
|
|
+ list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) {
|
|
+ // we nuke by matching semaphore address (where ..._semaphore_plus wrote)
|
|
+ // against the address range being unmapped
|
|
+ unsigned long semaddr = p->base + c->sdt_sem_offset;
|
|
+ if (p->tgid != task->tgid) // skip other processes in the list
|
|
+ continue;
|
|
+ if (semaddr >= addr && semaddr < addr + length) {
|
|
+ list_del(&p->process_list);
|
|
+ _stp_kfree (p);
|
|
+ nmatch ++;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
+
|
|
+ if (nmatch > 0)
|
|
+ dbug_uprobes("notified for inode-offset departure u%sprobe "
|
|
+ "pidx %zu matches:%u procname:%s buildid:%s\n",
|
|
+ c->return_p ? "ret" : "",
|
|
+ c->probe->index,
|
|
+ nmatch,
|
|
+ ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
|
|
+ ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
|
|
commit e90530877ee21cffa2a9d53567ba5b5de1dd9b32
|
|
Author: Frank Ch. Eigler <fche@redhat.com>
|
|
Date: Mon Jul 27 07:58:30 2020 -0400
|
|
|
|
PR25568 / RHBZ1857749: buildid/uprobes/inode rework, task_finder etc. side
|
|
|
|
During work on a new stress tests for build-id based probes (coming in
|
|
next commit), it was found that the task_finder2 logic for buildid
|
|
verification didn't, well, work, because it was never run (due to an
|
|
erroneous pathlen conditional), and couldn't be safely run where it
|
|
was (because it was under spinlock but would have done
|
|
access_process_vm). Reworked the relevant bits of task_finder2 to
|
|
perform build-id verification for processes later - during the quiesce
|
|
callback periods. (Buildid verification for solibs is already done
|
|
in the task_finder2 consumer uprobes-inode.c.)
|
|
|
|
Testing with sdt_misc indicated a case where a preexisting process
|
|
(with solib sdt.h semaphores) was being attached to by a new stap
|
|
binary. task_finder2's enumeration of the preexising processes'
|
|
memory map segments violated assumptions by recent code related to
|
|
tracking in stapiu_process[] lists. (It did not mirror the temporal
|
|
ld.so mmap sequence.) Changed this tracking to use the inode* as the
|
|
key, and stop trying to track mapping lengths, to make positive
|
|
matches and eliminate duplicate stapiu_process[] entries for the same
|
|
(process,solib) permutation. Reworked stapiu_process[] accumulation
|
|
generally to move to the two immediate task_finder callbacks, out of
|
|
stapiu_change_plus().
|
|
|
|
Added lots of commentary and diagnostics throughout. stap
|
|
-DDEBUG_UPROBES give meaningful info about uprobes & sdt semaphores;
|
|
with -DDEBUG_TASK_FINDER, more but not overwhelming relevant info
|
|
appears.
|
|
|
|
diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
|
|
index 9777efb..8b8057a 100644
|
|
--- a/runtime/linux/task_finder2.c
|
|
+++ b/runtime/linux/task_finder2.c
|
|
@@ -652,8 +652,8 @@ __verify_build_id(struct task_struct *tsk, unsigned long addr,
|
|
tsk_build_id[build_id_len] = '\0';
|
|
|
|
if (strcmp(build_id, tsk_build_id)) {
|
|
- dbug_task(2, "target build-id not matched: [%s] != [%s]\n",
|
|
- build_id, tsk_build_id);
|
|
+ dbug_task(2, "target build-id not matched: [%s] @ 0x%lx != [%s]\n",
|
|
+ build_id, addr, tsk_build_id);
|
|
return false;
|
|
}
|
|
|
|
@@ -884,16 +884,9 @@ __stp_utrace_attach_match_filename(struct task_struct *tsk,
|
|
// procname/build-id and match an "all thread" probe.
|
|
if (tgt == NULL)
|
|
continue;
|
|
- /* buildid-based target */
|
|
- else if (tgt->build_id_len > 0 && tgt->procname > 0
|
|
- && !__verify_build_id(tsk,
|
|
- tgt->build_id_vaddr,
|
|
- tgt->build_id,
|
|
- tgt->build_id_len))
|
|
- {
|
|
- continue;
|
|
- }
|
|
- else if (tgt->build_id_len == 0 && tgt->pathlen > 0
|
|
+ /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */
|
|
+ /* procname-based target */
|
|
+ else if (tgt->pathlen > 0
|
|
&& (tgt->pathlen != filelen
|
|
|| strcmp(tgt->procname, filename) != 0))
|
|
{
|
|
@@ -1341,6 +1334,34 @@ __stp_tf_quiesce_worker(struct task_work *work)
|
|
return;
|
|
}
|
|
|
|
+ /* If we had a build-id based executable probe (so we have a
|
|
+ * tgt->build_id) set, we could not check it back in
|
|
+ * __stp_utrace_attach_* because we can't do sleepy
|
|
+ * access_process_vm() calls from there. BUt now that we're
|
|
+ * in process context, quiesced, finally we can check. If we
|
|
+ * were build-id based, and the build-id does not match, then
|
|
+ * we UTRACE_DETACH from this process and skip the callbacks.
|
|
+ *
|
|
+ * XXX: For processes that do match, we redo this check every
|
|
+ * time this callbacks is encountered somehow. That's
|
|
+ * probably unnecessary.
|
|
+ */
|
|
+ if (tgt->build_id_len > 0) {
|
|
+ int ok = __verify_build_id(current,
|
|
+ tgt->build_id_vaddr,
|
|
+ tgt->build_id,
|
|
+ tgt->build_id_len);
|
|
+
|
|
+ dbug_task(2, "verified buildid-target process pid=%ld ok=%d\n",
|
|
+ (long) current->tgid, ok);
|
|
+ if (!ok) {
|
|
+ // stap_utrace_detach (current, & tgt->ops);
|
|
+ /* Remember that this task_work_func is finished. */
|
|
+ stp_task_work_func_done();
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
__stp_tf_handler_start();
|
|
|
|
/* NB make sure we run mmap callbacks before other callbacks
|
|
@@ -1434,6 +1455,21 @@ __stp_utrace_task_finder_target_quiesce(u32 action,
|
|
}
|
|
}
|
|
else {
|
|
+ /* Like in __stp_tf_quiesce_worker(), verify build-id now if belated. */
|
|
+ if (tgt->build_id_len > 0) {
|
|
+ int ok = __verify_build_id(current,
|
|
+ tgt->build_id_vaddr,
|
|
+ tgt->build_id,
|
|
+ tgt->build_id_len);
|
|
+
|
|
+ dbug_task(2, "verified2 buildid-target process pid=%ld ok=%d\n",
|
|
+ (long) current->tgid, ok);
|
|
+ if (!ok) {
|
|
+ __stp_tf_handler_end();
|
|
+ return UTRACE_RESUME; // NB: not _DETACH; that interferes with other engines
|
|
+ }
|
|
+ }
|
|
+
|
|
/* NB make sure we run mmap callbacks before other callbacks
|
|
* like 'probe process.begin' handlers so that the vma tracker
|
|
* is already initialized in the latter contexts */
|
|
@@ -1797,15 +1833,7 @@ stap_start_task_finder(void)
|
|
struct stap_task_finder_target, list);
|
|
if (tgt == NULL)
|
|
continue;
|
|
- /* buildid-based target */
|
|
- else if (tgt->build_id_len > 0 && tgt->procname > 0
|
|
- && !__verify_build_id(tsk,
|
|
- tgt->build_id_vaddr,
|
|
- tgt->build_id,
|
|
- tgt->build_id_len))
|
|
- {
|
|
- continue;
|
|
- }
|
|
+ /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */
|
|
/* procname-based target */
|
|
else if (tgt->build_id == 0 && tgt->pathlen > 0
|
|
&& (tgt->pathlen != mmpathlen
|
|
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
|
|
index de81839..757da30 100644
|
|
--- a/runtime/linux/uprobes-inode.c
|
|
+++ b/runtime/linux/uprobes-inode.c
|
|
@@ -76,7 +76,7 @@ struct stapiu_instance {
|
|
struct list_head instance_list; // to find other instances e.g. during shutdown
|
|
|
|
struct uprobe_consumer kconsumer; // the kernel-side struct for uprobe callbacks etc.
|
|
- struct inode *inode; // XXX: refcount?
|
|
+ struct inode *inode; // refcounted
|
|
unsigned registered_p:1; // whether the this kconsumer is registered (= armed, live)
|
|
|
|
struct stapiu_consumer *sconsumer; // whose instance are we
|
|
@@ -86,10 +86,14 @@ struct stapiu_instance {
|
|
/* A snippet to record the per-process vm where a particular
|
|
executable/solib was mapped. Used for sdt semaphore setting, and
|
|
for identifying processes of our interest (vs. disinterest) for
|
|
- uprobe hits. This object is owned by a stapiu_consumer. */
|
|
+ uprobe hits. This object is owned by a stapiu_consumer. We use
|
|
+ the same inode* as the stapiu_instance, and have the same lifespan,
|
|
+ so don't bother separately refcount it.
|
|
+*/
|
|
struct stapiu_process {
|
|
struct list_head process_list; // to find other processes
|
|
|
|
+ struct inode *inode; // the inode* for solib or executable
|
|
unsigned long relocation; // the mmap'ed .text address
|
|
unsigned long base; // the address to apply sdt offsets against
|
|
pid_t tgid; // pid
|
|
@@ -392,6 +396,7 @@ stapiu_consumer_unreg(struct stapiu_consumer *c)
|
|
// multiple times in the list. Don't break after the first.
|
|
list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) {
|
|
list_del(&p->process_list);
|
|
+ // no refcount used for the inode field
|
|
_stp_kfree (p);
|
|
}
|
|
}
|
|
@@ -498,6 +503,8 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
/* Do the buildid check. NB: on F29+, offset may not equal
|
|
0 for LOADable "R E" segments, because the read-only .note.*
|
|
stuff may have been loaded earlier, separately. PR23890. */
|
|
+ // NB: this is not really necessary for buildid-based probes,
|
|
+ // which had this verified already.
|
|
rc = _stp_usermodule_check(task, c->module_name,
|
|
relocation - offset);
|
|
if (rc)
|
|
@@ -527,7 +534,6 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
}
|
|
|
|
if (!inst) { // new instance; need new uprobe etc.
|
|
- // Normal case: need a new one.
|
|
inst = _stp_kzalloc(sizeof(struct stapiu_instance));
|
|
if (! inst) {
|
|
rc = -ENOMEM;
|
|
@@ -560,30 +566,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
(void) _stp_perf_read_init ((c->perf_counters)[j], task);
|
|
}
|
|
|
|
- // Associate this consumer with this process. If we encounter
|
|
- // resource problems here, we don't really have to undo the uprobe
|
|
- // registrations etc. already in effect. It may break correct
|
|
- // tracking of process hierarchy in -c/-x operation, but too bad.
|
|
- p = _stp_kzalloc(sizeof(struct stapiu_process));
|
|
- if (! p) {
|
|
- rc = -ENOMEM;
|
|
- goto out1;
|
|
- }
|
|
- p->tgid = task->tgid;
|
|
- p->relocation = relocation;
|
|
- /* The base is used for relocating semaphores. If the
|
|
- * probe is in an ET_EXEC binary, then that offset
|
|
- * already is a real address. But stapiu_process_found
|
|
- * calls us in this case with relocation=offset=0, so
|
|
- * we don't have to worry about it. */
|
|
- p->base = relocation - offset;
|
|
- spin_lock_irqsave (&c->process_list_lock, flags);
|
|
- list_add(&p->process_list, &c->process_list_head);
|
|
- spin_unlock_irqrestore (&c->process_list_lock, flags);
|
|
- // NB: actual semaphore value bumping is done later
|
|
+ // NB: process_list[] already extended up in stapiu_mmap_found().
|
|
|
|
rc = 0;
|
|
- // Register actual uprobe if cond_enabled right now
|
|
goto out1;
|
|
|
|
out2:
|
|
@@ -599,7 +584,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
* Increment the semaphore now. */
|
|
static int
|
|
stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task,
|
|
- unsigned long relocation, unsigned long length)
|
|
+ unsigned long relocation, struct inode* inode)
|
|
{
|
|
int rc = 0;
|
|
struct stapiu_process *p;
|
|
@@ -609,6 +594,13 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
if (! c->sdt_sem_offset) // nothing to do
|
|
return 0;
|
|
|
|
+ dbug_uprobes("considering semaphore (u%sprobe) pid %ld inode 0x%lx"
|
|
+ "pidx %zu\n",
|
|
+ c->return_p ? "ret" : "",
|
|
+ (long) task->tgid,
|
|
+ (unsigned long) inode,
|
|
+ c->probe->index);
|
|
+
|
|
// NB: we mustn't hold a lock while changing the task memory,
|
|
// but we need a lock to protect the process_list from concurrent
|
|
// add/delete. So hold a spinlock during iteration until the first
|
|
@@ -617,32 +609,31 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
|
|
// somehow maps in the same solib multiple times). We can't easily
|
|
// both iterate this list (in a spinlock-protected safe way), and
|
|
// relax the spinlock enough to do a safe stapiu_write_task_semaphore()
|
|
- // call within the loop. So we will hit only the copy in our list.
|
|
+ // call within the loop. So we will hit only the first copy in our list.
|
|
any_found = 0;
|
|
spin_lock_irqsave(&c->process_list_lock, flags);
|
|
/* Look through all the consumer's processes and increment semaphores. */
|
|
list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
unsigned long addr = p->base + c->sdt_sem_offset;
|
|
- if (p->tgid != task->tgid) // skip other processes in the list
|
|
- continue;
|
|
- if (addr >= relocation && addr < relocation + length) {
|
|
- int rc2;
|
|
- // unlock list and process write for this entry
|
|
- spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
- any_found=1;
|
|
-
|
|
- dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
|
|
- "pidx %zu address %lx\n",
|
|
- c->return_p ? "ret" : "",
|
|
- (long) task->tgid,
|
|
- c->probe->index,
|
|
- (unsigned long) addr);
|
|
-
|
|
- rc2 = stapiu_write_task_semaphore(task, addr, +1);
|
|
- if (!rc)
|
|
- rc = rc2;
|
|
- break; // exit list_for_each loop
|
|
- }
|
|
+ int rc2;
|
|
+ if (p->tgid != task->tgid) continue; // skip other processes in the list
|
|
+ if (p->inode != inode) continue; // skip other inodes
|
|
+
|
|
+ // unlock list and process write for this entry
|
|
+ spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
+ any_found=1;
|
|
+
|
|
+ dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
|
|
+ "pidx %zu address 0x%lx\n",
|
|
+ c->return_p ? "ret" : "",
|
|
+ (long) task->tgid,
|
|
+ c->probe->index,
|
|
+ (unsigned long) addr);
|
|
+
|
|
+ rc2 = stapiu_write_task_semaphore(task, addr, +1);
|
|
+ if (!rc)
|
|
+ rc = rc2;
|
|
+ break; // exit list_for_each loop
|
|
}
|
|
if (! any_found)
|
|
spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
@@ -755,17 +746,41 @@ stapiu_process_found(struct stap_task_finder_target *tf_target,
|
|
|
|
if (!process_p)
|
|
return 0; /* ignore threads */
|
|
-
|
|
+
|
|
+ dbug_uprobes("process_found pid=%ld f.p=%s f.b=%s c.p=%s c.b=%s\n",
|
|
+ (long)task->tgid,
|
|
+ ((char*)c->finder.procname ?: ""),
|
|
+ ((char*)c->finder.build_id ?: ""),
|
|
+ ((char*)c->solib_pathname ?: ""),
|
|
+ ((char*)c->solib_build_id ?: ""));
|
|
+
|
|
/* ET_EXEC events are like shlib events, but with 0 relocation bases */
|
|
if (register_p) {
|
|
int rc = -EINVAL;
|
|
struct inode *inode = stapiu_get_task_inode(task);
|
|
|
|
if (inode) {
|
|
- rc = stapiu_change_plus(c, task, 0, TASK_SIZE,
|
|
- 0, 0, inode);
|
|
- stapiu_change_semaphore_plus(c, task, 0,
|
|
- TASK_SIZE);
|
|
+ // Add a stapiu_process record to the consumer, so that
|
|
+ // the semaphore increment logic will accept this task.
|
|
+ struct stapiu_process* p;
|
|
+ unsigned long flags;
|
|
+ p = _stp_kzalloc(sizeof(struct stapiu_process));
|
|
+ if (p) {
|
|
+ p->tgid = task->tgid;
|
|
+ p->relocation = 0;
|
|
+ p->inode = inode;
|
|
+ p->base = 0;
|
|
+ spin_lock_irqsave (&c->process_list_lock, flags);
|
|
+ list_add(&p->process_list, &c->process_list_head);
|
|
+ spin_unlock_irqrestore (&c->process_list_lock, flags);
|
|
+ } else {
|
|
+ _stp_warn("out of memory tracking executable in process %ld\n",
|
|
+ (long) task->tgid);
|
|
+ }
|
|
+
|
|
+ rc = stapiu_change_plus(c, task, 0, TASK_SIZE, 0, 0, inode);
|
|
+
|
|
+ stapiu_change_semaphore_plus(c, task, 0, inode);
|
|
}
|
|
return rc;
|
|
} else
|
|
@@ -776,6 +791,8 @@ stapiu_process_found(struct stap_task_finder_target *tf_target,
|
|
bool
|
|
__verify_build_id (struct task_struct *tsk, unsigned long addr,
|
|
unsigned const char *build_id, int build_id_len);
|
|
+// defined in task_finder2.c
|
|
+
|
|
|
|
|
|
/* The task_finder_mmap_callback. These callbacks are NOT
|
|
@@ -791,28 +808,119 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target,
|
|
struct stapiu_consumer *c =
|
|
container_of(tf_target, struct stapiu_consumer, finder);
|
|
int rc = 0;
|
|
+ struct stapiu_process* p;
|
|
+ int known_mapping_p;
|
|
+ unsigned long flags;
|
|
|
|
- /* The file path or build-id must match. The build-id address
|
|
- * is calculated using start address of this vma, the file
|
|
- * offset of the vma start address and the file offset of
|
|
- * the build-id. */
|
|
- if (c->solib_pathname && path && strcmp (path, c->solib_pathname))
|
|
- return 0;
|
|
- if (c->solib_build_id_len > 0 && !__verify_build_id(task,
|
|
- addr - offset + c->solib_build_id_vaddr,
|
|
- c->solib_build_id,
|
|
- c->solib_build_id_len))
|
|
- return 0;
|
|
+ /*
|
|
+ We need to verify that this file/mmap corresponds to the given stapiu_consumer.
|
|
+ One could compare (inode) file name, but that won't work with buildid-based
|
|
+ uprobes. For those, one cannot just
|
|
+
|
|
+ __verify_build_id(... addr - offset + c->solib_build_id_vaddr ...)
|
|
+
|
|
+ because dlopen()ing a shared library involves multiple mmaps, including
|
|
+ some at repeating/offset addresses. See glibc _dl_map_segments() in various
|
|
+ versions. So by the fourth call (!) on modern glibc's, we get a VM_WRITE-able
|
|
+ data segment mapped, but that's at a load/mapping address that is offset by a
|
|
+ page from the base (file offset=0) mapping.
|
|
+
|
|
+ e.g. on Fedora 32 / glibc 2.31, with testsuite/libsdt_buildid.so:
|
|
+
|
|
+ Program Headers:
|
|
+ Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
|
|
+ LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0004b8 0x0004b8 R 0x1000
|
|
+ LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000161 0x000161 R E 0x1000
|
|
+ LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x0000cc 0x0000cc R 0x1000
|
|
+ LOAD 0x002df8 0x0000000000003df8 0x0000000000003df8 0x000232 0x000238 RW 0x1000
|
|
+ DYNAMIC 0x002e10 0x0000000000003e10 0x0000000000003e10 0x0001d0 0x0001d0 RW 0x8
|
|
+
|
|
+ strace:
|
|
+ openat(AT_FDCWD, ".../libsdt_buildid.so", O_RDONLY|O_CLOEXEC) = 3
|
|
+ mmap(NULL, 16432, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x148c764ac000
|
|
+ mmap(0x148c764ad000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x148c764ad000
|
|
+ mmap(0x148c764ae000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764ae000
|
|
+ mmap(0x148c764af000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764af000
|
|
+
|
|
+ Note how the virtual mapping for the fourth mmap (also) maps file-offset 0x2000 at
|
|
+ vm offset 0x3000.
|
|
+
|
|
+ So what we do is rely on the name/buildid validation tests being run
|
|
+ -earlier- in the dlopen/mmap sequence to validate near-future
|
|
+ mmap()s. We search the c->process_list[] for a mapping that already
|
|
+ overlaps the new range, and if so, consider it validated ... whether
|
|
+ for the solib_pathname or the solib_build_id case.
|
|
+
|
|
+ This is complicated for startup-time traversal of processes/mmaps,
|
|
+ where it seems sometimes we get notifications out of temporal sequence.
|
|
+ */
|
|
|
|
- /* 1 - shared libraries' executable segments load from offset 0
|
|
- * - ld.so convention offset != 0 is now allowed
|
|
- * so stap_uprobe_change_plus can set a semaphore,
|
|
- * i.e. a static extern, in a shared object
|
|
- * 2 - the shared library we're interested in
|
|
- * 3 - mapping should be executable or writeable (for
|
|
- * semaphore in .so)
|
|
- * NB: or both, on kernels that lack noexec mapping
|
|
- */
|
|
+ known_mapping_p = 0;
|
|
+ spin_lock_irqsave(&c->process_list_lock, flags);
|
|
+ list_for_each_entry(p, &c->process_list_head, process_list) {
|
|
+ if (p->tgid != task->tgid) continue;
|
|
+ if (p->inode != dentry->d_inode) continue;
|
|
+ known_mapping_p = 1;
|
|
+ break;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&c->process_list_lock, flags);
|
|
+
|
|
+
|
|
+ // Check if this mapping (solib) is of interest: whether we expect
|
|
+ // it by buildid or name.
|
|
+
|
|
+ if (! known_mapping_p) {
|
|
+ /* The file path or build-id must match. The build-id address
|
|
+ * is calculated using start address of this vma, the file
|
|
+ * offset of the vma start address and the file offset of
|
|
+ * the build-id. */
|
|
+ if (c->solib_pathname && path && strcmp (path, c->solib_pathname))
|
|
+ return 0;
|
|
+ if (c->solib_build_id_len > 0 && !__verify_build_id(task,
|
|
+ addr - offset + c->solib_build_id_vaddr,
|
|
+ c->solib_build_id,
|
|
+ c->solib_build_id_len))
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ // If we made it this far, we have an interesting solib.
|
|
+
|
|
+ dbug_uprobes("mmap_found pid=%ld path=%s addr=0x%lx length=%lu offset=%lu flags=0x%lx known=%d\n",
|
|
+ (long) task->tgid, path, addr, length, offset, vm_flags, known_mapping_p);
|
|
+
|
|
+ if (! known_mapping_p) {
|
|
+ // OK, let's add it. The first mapping should be a VM_READ mapping
|
|
+ // of the entire solib file, which will also serve as the apprx.
|
|
+ // outer bounds of the repeatedly-mapped segments.
|
|
+
|
|
+#if 0
|
|
+ // Consider an assumption about the dlopen/mmap sequence
|
|
+ // If it comes out of sequence, we could get length/base wrong in the stored
|
|
+ // stapiu_process, which could lead us to miscalculate semaphore addresses.
|
|
+ //
|
|
+ // However, this has been observed on task-finder initial-enumeration case,
|
|
+ // (sdt_misc.exp, where a solib test is already running when stap starts).
|
|
+ if (offset != 0)
|
|
+ return 0;
|
|
+#endif
|
|
+
|
|
+ // Associate this consumer with this process. If we encounter
|
|
+ // resource problems here, we don't really have to undo the uprobe
|
|
+ // registrations etc. already in effect. It may break correct
|
|
+ // tracking of process hierarchy in -c/-x operation, but too bad.
|
|
+ p = _stp_kzalloc(sizeof(struct stapiu_process));
|
|
+ if (p) {
|
|
+ p->tgid = task->tgid;
|
|
+ p->relocation = addr;
|
|
+ p->inode = dentry->d_inode;
|
|
+ p->base = addr-offset; // ... in case caught this during the second mmap
|
|
+ spin_lock_irqsave (&c->process_list_lock, flags);
|
|
+ list_add(&p->process_list, &c->process_list_head);
|
|
+ spin_unlock_irqrestore (&c->process_list_lock, flags);
|
|
+ } else
|
|
+ _stp_warn("out of memory tracking solib %s in process %ld\n",
|
|
+ path, (long) task->tgid);
|
|
+ }
|
|
|
|
/* Check non-writable, executable sections for probes. */
|
|
if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE))
|
|
@@ -827,7 +935,7 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target,
|
|
*/
|
|
|
|
if ((rc == 0) && (vm_flags & VM_WRITE))
|
|
- rc = stapiu_change_semaphore_plus(c, task, addr, length);
|
|
+ rc = stapiu_change_semaphore_plus(c, task, addr, dentry->d_inode);
|
|
|
|
return rc;
|
|
}
|
|
diff --git a/runtime/sym.c b/runtime/sym.c
|
|
index be09ec8..21d820a 100644
|
|
--- a/runtime/sym.c
|
|
+++ b/runtime/sym.c
|
|
@@ -713,9 +713,10 @@ static int _stp_build_id_check (struct _stp_module *m,
|
|
// NB: It is normal for different binaries with the same file path
|
|
// coexist in the same system via chroot or namespaces, therefore
|
|
// we make sure below is really a warning.
|
|
- _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" address "
|
|
+ _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" pid %ld address "
|
|
"%#lx, expected %s actual %s\n",
|
|
- m->path, notes_addr, hexstring_theory, hexstring_practice);
|
|
+ m->path, (long) tsk->tgid,
|
|
+ notes_addr, hexstring_theory, hexstring_practice);
|
|
return 1;
|
|
}
|
|
|
|
|
|
commit 5e1ef9d7f2a5ea6e5511ef5228cf05dda1c570b3
|
|
Author: Frank Ch. Eigler <fche@redhat.com>
|
|
Date: Mon Jul 27 07:58:30 2020 -0400
|
|
|
|
PR25568 / RHBZ1857749: sdt_buildid.exp test case
|
|
|
|
Add new test that checks for combinations of buildid and pathname
|
|
based uprobes for executables and shared libraries.
|
|
|
|
diff --git a/testsuite/systemtap.base/sdt_buildid.c b/testsuite/systemtap.base/sdt_buildid.c
|
|
new file mode 100644
|
|
index 0000000..ccbb2f2
|
|
--- /dev/null
|
|
+++ b/testsuite/systemtap.base/sdt_buildid.c
|
|
@@ -0,0 +1,26 @@
|
|
+#include <unistd.h>
|
|
+#include <stdlib.h>
|
|
+#include <stdio.h>
|
|
+
|
|
+void bar ();
|
|
+
|
|
+#ifndef ONLY_MAIN
|
|
+#include "sdt_buildid_.h"
|
|
+
|
|
+void
|
|
+bar ()
|
|
+{
|
|
+ printf("%s=%ld\n", "test_probe_0_semaphore", SDT_BUILDID_TEST_PROBE_0_ENABLED());
|
|
+ if (SDT_BUILDID_TEST_PROBE_0_ENABLED())
|
|
+ SDT_BUILDID_TEST_PROBE_0();
|
|
+}
|
|
+#endif
|
|
+
|
|
+#ifndef NO_MAIN
|
|
+int
|
|
+main ()
|
|
+{
|
|
+ bar();
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
diff --git a/testsuite/systemtap.base/sdt_buildid.exp b/testsuite/systemtap.base/sdt_buildid.exp
|
|
new file mode 100644
|
|
index 0000000..3141fd6
|
|
--- /dev/null
|
|
+++ b/testsuite/systemtap.base/sdt_buildid.exp
|
|
@@ -0,0 +1,214 @@
|
|
+set test "sdt_buildid"
|
|
+
|
|
+set pbtype_flags {{additional_flags=-g} {} {}}
|
|
+set fail_count 0
|
|
+
|
|
+# Compile a C program to use as the user-space probing target
|
|
+set stap_path $env(SYSTEMTAP_PATH)/stap
|
|
+set sup_dpath "[pwd]/sdt_buildid_.d"
|
|
+set sup_hpath "[pwd]/sdt_buildid_.h"
|
|
+set sup_opath "[pwd]/sdt_buildid_.o"
|
|
+
|
|
+# Run dtrace
|
|
+if {[installtest_p]} {
|
|
+ set dtrace $env(SYSTEMTAP_PATH)/dtrace
|
|
+} else {
|
|
+ set dtrace ../dtrace
|
|
+}
|
|
+
|
|
+verbose -log "$dtrace --types -h -s $srcdir/$subdir/sdt_buildid_.d"
|
|
+if {[catch {exec $dtrace --types -h -s \
|
|
+ $srcdir/$subdir/sdt_buildid_.d} res]} {
|
|
+ verbose -log "unable to run $dtrace: $res"
|
|
+}
|
|
+verbose -log "$dtrace --types -G -s $srcdir/$subdir/sdt_buildid_.d"
|
|
+if {[catch {exec $dtrace --types -G -s \
|
|
+ $srcdir/$subdir/sdt_buildid_.d} res]} {
|
|
+ verbose -log "unable to run $dtrace: $res"
|
|
+}
|
|
+if {[file exists $sup_hpath] && [file exists $sup_opath]} then {
|
|
+ pass "$test dtrace"
|
|
+} else {
|
|
+ incr fail_count
|
|
+ fail "$test dtrace"
|
|
+ return
|
|
+}
|
|
+
|
|
+set sup_flags [sdt_includes]
|
|
+set sup_flags "$sup_flags additional_flags=-Wall"
|
|
+set sup_flags "$sup_flags additional_flags=-Werror"
|
|
+set sup_flags "$sup_flags additional_flags=$sup_opath"
|
|
+set sup_flags "$sup_flags additional_flags=-I."
|
|
+set sup_exepath "[pwd]/sdt_buildid.x"
|
|
+
|
|
+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exepath \
|
|
+ executable $sup_flags]
|
|
+if { $res != "" } {
|
|
+ incr fail_count
|
|
+ verbose "target_compile failed: $res" 2
|
|
+ fail "$test compiling"
|
|
+ return
|
|
+} else {
|
|
+ pass "$test compiling"
|
|
+}
|
|
+
|
|
+
|
|
+set sup41_flags "$sup_flags additional_flags=-shared"
|
|
+set sup41_flags "$sup41_flags additional_flags=-fPIC"
|
|
+set sup41_flags "$sup41_flags additional_flags=-DNO_MAIN"
|
|
+set sup_sopath "[pwd]/libsdt_buildid.so"
|
|
+set sup_exe2path "[pwd]/sdt_buildid_shared.x"
|
|
+set res0 [target_compile $srcdir/$subdir/sdt_buildid.c $sup_sopath \
|
|
+ executable $sup41_flags ]
|
|
+set sup42_flags "additional_flags=-Wl,-rpath,[pwd]"
|
|
+set sup42_flags "$sup42_flags additional_flags=-L[pwd] additional_flags=-lsdt_buildid"
|
|
+set sup42_flags "$sup42_flags additional_flags=-DONLY_MAIN"
|
|
+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exe2path \
|
|
+ executable $sup42_flags ]
|
|
+if { $res0 != "" || $res != "" } {
|
|
+ incr fail_count
|
|
+ verbose "target_compile failed: $res0 $res" 2
|
|
+ fail "$test compiling -shared"
|
|
+ return
|
|
+} else {
|
|
+ pass "$test compiling -shared"
|
|
+}
|
|
+
|
|
+catch { exec eu-readelf -n $sup_exepath | grep Build.ID | awk "{print \$NF}" } bid1
|
|
+catch { exec eu-readelf -n $sup_sopath | grep Build.ID | awk "{print \$NF}" } bidso
|
|
+catch { exec eu-readelf -n $sup_exe2path | grep Build.ID | awk "{print \$NF}" } bid2
|
|
+verbose -log "buildid: $sup_exepath $bid1"
|
|
+verbose -log "buildid: $sup_sopath $bidso"
|
|
+verbose -log "buildid: $sup_exe2path $bid2"
|
|
+# though we won't use the $bid2
|
|
+
|
|
+if {![installtest_p]} {
|
|
+ untested $test
|
|
+ return
|
|
+}
|
|
+
|
|
+# To test via build-id, we need a debuginfod server to scan the testsuite build
|
|
+# directory.
|
|
+
|
|
+
|
|
+if [catch {exec /usr/bin/which debuginfod} debuginfod] then {
|
|
+ untested "$test debuginfod"
|
|
+} else {
|
|
+ set port [expr {10000 + int(rand()*10000)}]
|
|
+ spawn $debuginfod -p $port -d :memory: -F .
|
|
+ set debuginfod_pid [exp_pid $spawn_id]
|
|
+ # give it time to scan the build directory
|
|
+ sleep 10
|
|
+ # XXX: we could expect some verbose traffic
|
|
+ set env(DEBUGINFOD_URLS) "http://localhost:$port $env(DEBUGINFOD_URLS)"
|
|
+ verbose -log "started debuginfod on port $port"
|
|
+
|
|
+ set subtest "$test debuginfod buildid-exe buildid-solib"
|
|
+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $bidso
|
|
+ set ok 0
|
|
+ expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+ }
|
|
+ catch {close}; catch {wait}
|
|
+ if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+ set subtest "$test debuginfod buildid-exe path-solib"
|
|
+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $sup_sopath
|
|
+ set ok 0
|
|
+ expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+ }
|
|
+ catch {close}; catch {wait}
|
|
+ if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+ set subtest "$test debuginfod path-exe buildid-solib"
|
|
+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $bidso
|
|
+ set ok 0
|
|
+ expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+ }
|
|
+ catch {close}; catch {wait}
|
|
+ if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+ set subtest "$test debuginfod buildid-solib"
|
|
+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bidso
|
|
+ set ok 0
|
|
+ expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+ }
|
|
+ catch {close}; catch {wait}
|
|
+ if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+ set subtest "$test debuginfod buildid-exe"
|
|
+ spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1
|
|
+ set ok 0
|
|
+ expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+ }
|
|
+ catch {close}; catch {wait}
|
|
+ if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+ kill -INT $debuginfod_pid
|
|
+}
|
|
+
|
|
+
|
|
+set subtest "$test non-buildid both"
|
|
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $sup_sopath
|
|
+set ok 0
|
|
+expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+}
|
|
+catch {close}; catch {wait}
|
|
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+set subtest "$test non-buildid exe"
|
|
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath
|
|
+set ok 0
|
|
+expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+}
|
|
+catch {close}; catch {wait}
|
|
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+set subtest "$test non-buildid solib"
|
|
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_sopath
|
|
+set ok 0
|
|
+expect {
|
|
+ -timeout 240
|
|
+ -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
|
|
+ eof { }
|
|
+ timeout { }
|
|
+}
|
|
+catch {close}; catch {wait}
|
|
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
|
|
+
|
|
+return
|
|
diff --git a/testsuite/systemtap.base/sdt_buildid.stp b/testsuite/systemtap.base/sdt_buildid.stp
|
|
new file mode 100644
|
|
index 0000000..a26d183
|
|
--- /dev/null
|
|
+++ b/testsuite/systemtap.base/sdt_buildid.stp
|
|
@@ -0,0 +1,19 @@
|
|
+global count
|
|
+
|
|
+function trace () {
|
|
+ printf ("Count %d [%d] %s %s\n", count++, pid(), $$name, pp())
|
|
+}
|
|
+
|
|
+probe process(@1).mark("test_probe_0") { trace() }
|
|
+%( $# > 1 %? probe process(@2).mark("test_probe_0") { trace() } %)
|
|
+
|
|
+probe begin
|
|
+{
|
|
+ printf ("Count %d\n", count++)
|
|
+}
|
|
+
|
|
+probe timer.s(1) // exit quickly after enough marks fire
|
|
+{
|
|
+ if (count > 10) exit()
|
|
+}
|
|
+
|
|
diff --git a/testsuite/systemtap.base/sdt_buildid_.d b/testsuite/systemtap.base/sdt_buildid_.d
|
|
new file mode 100644
|
|
index 0000000..ebfca55
|
|
--- /dev/null
|
|
+++ b/testsuite/systemtap.base/sdt_buildid_.d
|
|
@@ -0,0 +1,4 @@
|
|
+provider sdt_buildid {
|
|
+ probe test_probe_0 ();
|
|
+};
|
|
+
|