systemtap/SOURCES/rhbz1847676,1857749.patch

commit 11c39a7375bd2759b53b89236e755c91a4f5aad8
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Tue Jun 16 20:35:53 2020 -0400

    RHBZ1847676: uprobes-inode tweaks redux

    Added (back) a spinlock to manage the stapiu_consumer -> process_list
    structure, since it is occasionally travered from uprobe pre-handlers,
    which are sometimes entered in atomic context (e.g. on rhel7).  There,
    the normal mutex_t is unsafe.  So restoring a spinlock_t just for
    those shortlived traversals, rhel7 and rawhide are both happy.

diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index 156360e..922c9f1 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -143,7 +143,8 @@ struct stapiu_consumer {
   struct list_head instance_list_head; // the resulting uprobe instances for this consumer

   struct list_head process_list_head; // the processes for this consumer
-
+  spinlock_t process_list_lock; // protect list; used briefly from even atomic contexts
+
   // List of perf counters used by each probe
   // This list is an index into struct stap_perf_probe,
   long perf_counters_dim;
@@ -174,16 +175,19 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)

       // First find the related process, set by stapiu_change_plus.
       // NB: This is a linear search performed for every probe hit!
-      // This could be an algorithmic problem if the list gets large, but
-      // we'll wait until this is demonstratedly a hotspot before optimizing.
-      mutex_lock(&c->consumer_lock);
+      // This could be an algorithmic problem if the list gets large,
+      // but we'll wait until this is demonstratedly a hotspot before
+      // optimizing.  NB: on rhel7 sometimes we're invoked from atomic
+      // context, so must be careful to use the spinlock, not the
+      // mutex.
+      spin_lock(&c->process_list_lock);
       list_for_each_entry(p, &c->process_list_head, process_list) {
 	if (p->tgid == current->tgid) {
 	  process = p;
 	  break;
 	}
       }
-      mutex_unlock(&c->consumer_lock);
+      spin_unlock(&c->process_list_lock);
       if (!process) {
 #ifdef UPROBE_HANDLER_REMOVE
 	/* Once we're past the starting phase, we can be sure that any
@@ -344,7 +348,7 @@ static void
 stapiu_decrement_semaphores(struct stapiu_consumer *consumers, size_t nconsumers)
 {
   size_t i;
-  /* NB: no stapiu_process_slots_lock needed, as the task_finder engine is
+  /* NB: no process_list_lock use needed as the task_finder engine is
    * already stopped by now, so no one else will mess with us.  We need
    * to be sleepable for access_process_vm.  */
   for (i = 0; i < nconsumers; ++i) {
@@ -433,7 +437,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers)
     INIT_LIST_HEAD(&c->instance_list_head);
     INIT_LIST_HEAD(&c->process_list_head);
     mutex_init(&c->consumer_lock);
-
+    spin_lock_init(&c->process_list_lock);
+
     dbug_uprobes("registering task-finder for procname:%s buildid:%s\n",
 		 ((char*)c->finder.procname ?: (char*)""),
 		 ((char*)c->finder.build_id ?: (char*)""));
@@ -560,7 +565,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
    * calls us in this case with relocation=offset=0, so
    * we don't have to worry about it.  */
   p->base = relocation - offset;
+  spin_lock (&c->process_list_lock);
   list_add(&p->process_list, &c->process_list_head);
+  spin_unlock (&c->process_list_lock);

   rc = 0;
   mutex_unlock(&c->consumer_lock);
@@ -587,28 +594,40 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
 {
   int rc = 0;
   struct stapiu_process *p;
+  int any_found;

   if (! c->sdt_sem_offset) // nothing to do
     return 0;

-  /* NB: no lock after this point, as we need to be sleepable for
-   * get/put_user semaphore action.  The given process should be frozen
-   * while we're busy, so it's not an issue.
-   */
-
-  mutex_lock(&c->consumer_lock);
-
+  // NB: we mustn't hold a lock while changing the task memory,
+  // but we need a lock to protect the process_list from concurrent
+  // add/delete.  So hold a spinlock during iteration until the first
+  // hit, then unlock & process.  NB: We could in principle have multiple
+  // instances of the same process in the list (e.g., if the process
+  // somehow maps in the same solib multiple times).  We can't easily
+  // both iterate this list (in a spinlock-protected safe way), and
+  // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
+  // call within the loop.  So we will hit only the copy in our list.
+  any_found = 0;
+  spin_lock(&c->process_list_lock);
   /* Look through all the consumer's processes and increment semaphores.  */
   list_for_each_entry(p, &c->process_list_head, process_list) {
     unsigned long addr = p->base + c->sdt_sem_offset;
     if (addr >= relocation && addr < relocation + length) {
-      int rc2 = stapiu_write_task_semaphore(task, addr, +1);
+      int rc2;
+      // unlock list and process write for this entry
+      spin_unlock(&c->process_list_lock);
+      any_found=1;
+      rc2 = stapiu_write_task_semaphore(task, addr, +1);
       if (!rc)
-	rc = rc2;
+        rc = rc2;
+      break; // exit list_for_each loop
     }
   }
-
-  mutex_unlock(&c->consumer_lock);
+  if (! any_found)
+    spin_unlock(&c->process_list_lock);
+  else
+    ; // already unlocked

   return rc;
 }
@@ -635,8 +654,9 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
   //   process is dying anyway
   // - the stapiu_consumer's process_list linked list will have a record
   //   of the dead process: well, not great, it'll be cleaned up eventually,
-  //   and cleaning it up NOW is tricky - need some spin lock to protect the list,
-  //   but not out sleepy mutex:
+  //   and cleaning it up NOW is tricky - we could use the process_list_lock
+  //   to protect the list (as done in stapiu_change_semaphore_plus),
+  //   but not our sleepy mutex:
   //
   // [ 1955.410237]  ? stapiu_change_minus+0x38/0xf0 [stap_54a723c01c50d972590a5c901516849_15522]
   // [ 1955.411583]  __mutex_lock+0x35/0x820

commit 4ccdfe4536d702612912e96d7b6278b169917eaa
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Mon Jul 6 13:27:46 2020 -0400

    RHBZ1847676 cont'd: more uprobes-inode/onthefly concurrency controls

    The systemtap.onthefly/*.exp tests had recently become hang-prone on
    some kernels, for reasons still not completely understood.  This set
    of patches adds:

    - irq*-block spinlocks into uprobes-invoked paths, in case there is
      peculiar reentrancy (from irq-related tracepoints)

    - a mutex lock/unlock into the stapiu_exit() path, in case there is
      a concurrent stapiu_refresh() invoked by onthefly machinery around
      exit time

    - restrictions into the onthefly module_refresh() translator code to
      preclude STAP_SESSION_STOPPING as a time to do any sort of refresh
      operation.  Now probes that were disarmed will stay disarmed during
      probe-end/error/etc. processing, which is always valid with the
      spec, and avoids a class of late module-refresh ops

    Testing on rhel7 and rawhide indicates the reproducible hang is gone.
    Our testsuite already tortures this code; invoke by hand via:

    % sudo make installcheck RUNTESTFLAGS="-v affection.exp hrtimer_onthefly.exp kprobes_onthefly.exp tracepoint_onthefly.exp uprobes_onthefly.exp"

diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index 922c9f1..3de7281 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -172,6 +172,7 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
   if (_stp_target) // need we filter by pid at all?
     {
       struct stapiu_process *p, *process = NULL;
+      unsigned long flags;

       // First find the related process, set by stapiu_change_plus.
       // NB: This is a linear search performed for every probe hit!
@@ -180,14 +181,14 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
       // optimizing.  NB: on rhel7 sometimes we're invoked from atomic
       // context, so must be careful to use the spinlock, not the
       // mutex.
-      spin_lock(&c->process_list_lock);
+      spin_lock_irqsave(&c->process_list_lock, flags);
       list_for_each_entry(p, &c->process_list_head, process_list) {
 	if (p->tgid == current->tgid) {
 	  process = p;
 	  break;
 	}
       }
-      spin_unlock(&c->process_list_lock);
+      spin_unlock_irqrestore(&c->process_list_lock, flags);
       if (!process) {
 #ifdef UPROBE_HANDLER_REMOVE
 	/* Once we're past the starting phase, we can be sure that any
@@ -398,7 +399,7 @@ static void
 stapiu_consumer_refresh(struct stapiu_consumer *c)
 {
   struct stapiu_instance *inst;
-
+
   mutex_lock(& c->consumer_lock);

   list_for_each_entry(inst, &c->instance_list_head, instance_list) {
@@ -420,7 +421,10 @@ stapiu_exit(struct stapiu_consumer *consumers, size_t nconsumers)
   stapiu_decrement_semaphores(consumers, nconsumers);
   for (i = 0; i < nconsumers; ++i) {
     struct stapiu_consumer *c = &consumers[i];
+    // protect against conceivable stapiu_refresh() at same time
+    mutex_lock(& c->consumer_lock);
     stapiu_consumer_unreg(c);
+    mutex_unlock(& c->consumer_lock);
     /* NB: task_finder needs no unregister. */
   }
 }
@@ -480,6 +484,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
   struct stapiu_instance *inst = NULL;
   struct stapiu_process *p;
   int j;
+  unsigned long flags;

   if (! inode) {
       rc = -EINVAL;
@@ -565,9 +570,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
    * calls us in this case with relocation=offset=0, so
    * we don't have to worry about it.  */
   p->base = relocation - offset;
-  spin_lock (&c->process_list_lock);
+  spin_lock_irqsave (&c->process_list_lock, flags);
   list_add(&p->process_list, &c->process_list_head);
-  spin_unlock (&c->process_list_lock);
+  spin_unlock_irqrestore (&c->process_list_lock, flags);

   rc = 0;
   mutex_unlock(&c->consumer_lock);
@@ -595,6 +600,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
   int rc = 0;
   struct stapiu_process *p;
   int any_found;
+  unsigned long flags;

   if (! c->sdt_sem_offset) // nothing to do
     return 0;
@@ -609,14 +615,14 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
   // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
   // call within the loop.  So we will hit only the copy in our list.
   any_found = 0;
-  spin_lock(&c->process_list_lock);
+  spin_lock_irqsave(&c->process_list_lock, flags);
   /* Look through all the consumer's processes and increment semaphores.  */
   list_for_each_entry(p, &c->process_list_head, process_list) {
     unsigned long addr = p->base + c->sdt_sem_offset;
     if (addr >= relocation && addr < relocation + length) {
       int rc2;
       // unlock list and process write for this entry
-      spin_unlock(&c->process_list_lock);
+      spin_unlock_irqrestore(&c->process_list_lock, flags);
       any_found=1;
       rc2 = stapiu_write_task_semaphore(task, addr, +1);
       if (!rc)
@@ -625,7 +631,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
     }
   }
   if (! any_found)
-    spin_unlock(&c->process_list_lock);
+    spin_unlock_irqrestore(&c->process_list_lock, flags);
   else
     ; // already unlocked

diff --git a/translate.cxx b/translate.cxx
index 10b3d32..b10af5a 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -2144,19 +2144,13 @@ c_unparser::emit_module_refresh ()
     o->newline() << "mutex_lock(&module_refresh_mutex);";

   /* If we're not in STARTING/RUNNING state, don't try doing any work.
-     PR16766 */
+     PR16766.  We don't want to run refresh ops during e.g. STOPPING,
+     so as to possibly activate uprobes near shutdown. */
   o->newline() << "state = atomic_read (session_state());";
-  o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING && state != STAP_SESSION_ERROR) {";
-  // cannot _stp_warn etc. since we're not in probe context
-  o->newline(1) << "#if defined(__KERNEL__)";
-  o->newline() << "if (state != STAP_SESSION_STOPPING)";
-  o->newline(1) << "printk (KERN_ERR \"stap module notifier triggered in unexpected state %d\\n\", state);";
-  o->indent(-1);
-  o->newline() << "#endif";
-
+  o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING) {";
+  o->newline(1);
   if (!session->runtime_usermode_p())
     o->newline() << "mutex_unlock(&module_refresh_mutex);";
-
   o->newline() << "return;";
   o->newline(-1) << "}";


commit 046fa017d2ab7fea1a4ba2295c31f768c072855e
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Sun Jul 12 09:57:15 2020 -0400

    RHBZ1847676 cont'd: one more uprobes-inode/onthefly concurrency control

    In uprobes-inode.c (stapiu_change_plus), the runtime can react to
    arrivals of new mappings of a solib or executable by registering new
    uprobes.  Due to an assumption that this could not happen at
    inconvenient times (such as a stapiu_refresh or near shutdown times),
    the actual uprobes registration operation was done outside the
    consumer_lock mutex being held.  But it appears this can happen at bad
    times, so the mutex needs to be held, just like within
    stapiu_consumer_refresh().

    The onthefly tests now survive iterating testing on rawhide+lockdep
    and rhel7+lockdep.

diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index 3de7281..01c8a07 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -575,12 +575,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
   spin_unlock_irqrestore (&c->process_list_lock, flags);

   rc = 0;
-  mutex_unlock(&c->consumer_lock);
-
   // Register actual uprobe if cond_enabled right now
   if (c->probe->cond_enabled)
     (void) stapiu_register(inst, c);
-  goto out;
+  goto out1;

  out2:
   _stp_kfree(inst);

commit a9a0131eb59e8abc197d3d2a553a86bcdec3dd70
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Fri Jul 17 22:33:04 2020 -0400

    rhbz1857749: uprobes-inode regression in sdt semaphore setting

    Previous code neglected to set sdt.h semaphores for more than the
    first process systemtap happened to encounter.  This was from a
    mistaken understanding of what it meant for stapiu_change_plus() to be
    called with the same inode/consumer combination.  Even though uprobes
    are automatically shared, each new process still needs its perfctr and
    sdt-semaphores individually set, so we do that now (as before the
    rework of this code).  Mechanized testing incoming shortly.

diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index 01c8a07..de81839 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -190,6 +190,10 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
       }
       spin_unlock_irqrestore(&c->process_list_lock, flags);
       if (!process) {
+        /* We know that we're in -c/-x mode, but this process is not
+           in the process hierarchy, so the uprobe should be ignored
+           and future hits prevented.  PR15278
+        */
 #ifdef UPROBE_HANDLER_REMOVE
 	/* Once we're past the starting phase, we can be sure that any
 	 * processes which are executing code in a mapping have already
@@ -242,8 +246,8 @@ stapiu_register (struct stapiu_instance* inst, struct stapiu_consumer* c)
 	       (unsigned long) inst->inode->i_ino,
 	       (void*) (uintptr_t) c->offset,
 	       c->probe->index,
-	       ((char*)c->finder.procname ?: (char*)""),
-	       ((char*)c->finder.build_id ?: (char*)""));
+	       ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
+               ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));

   if (!c->return_p) {
     inst->kconsumer.handler = stapiu_probe_prehandler;
@@ -444,8 +448,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers)
     spin_lock_init(&c->process_list_lock);

     dbug_uprobes("registering task-finder for procname:%s buildid:%s\n",
-		 ((char*)c->finder.procname ?: (char*)""),
-		 ((char*)c->finder.build_id ?: (char*)""));
+                 ((char*)c->finder.procname ?: ""),
+                 ((char*)c->finder.build_id ?: ""));

     ret = stap_register_task_finder_target(&c->finder);
     if (ret != 0) {
@@ -499,22 +503,22 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
   if (rc)
     goto out;

-  dbug_uprobes("notified for inode-offset u%sprobe "
+  dbug_uprobes("notified for inode-offset arrival u%sprobe "
 	       "%lu:%p pidx %zu target procname:%s buildid:%s\n",
 	       c->return_p ? "ret" : "",
 	       (unsigned long) inode->i_ino,
 	       (void*) (uintptr_t) c->offset,
 	       c->probe->index,
-	       ((char*)c->finder.procname ?: (char*)""),
-	       ((char*)c->finder.build_id ?: (char*)""));
+	       ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
+               ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));

   /* Check the buildid of the target (if we haven't already). We
    * lock the target so we don't have concurrency issues. */
   mutex_lock(&c->consumer_lock);

-  // Check if we already have an instance for this inode, as though we
-  // were called twice by task-finder mishap, or (hypothetically) the
-  // shlib was mmapped twice.
+  // Check if we already have an instance for this inode.  This is normal:
+  // if a different process maps the same solib, or forks into the same
+  // executable.  In this case, we must not re-register the same uprobe.
   list_for_each_entry(i, &c->instance_list_head, instance_list) {
     if (i->inode == inode) {
       inst = i;
@@ -522,28 +526,33 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
     }
   }

-  if (inst) { // wouldn't expect a re-notification
-    if (inst->registered_p != c->probe->cond_enabled)
-      // ... this should not happen
-      ;
-    goto out1;
-  }
-
-  // Normal case: need a new one.
-  inst = _stp_kzalloc(sizeof(struct stapiu_instance));
-  if (! inst) {
-    rc = -ENOMEM;
-    goto out1;
-  }
+  if (!inst) { // new instance; need new uprobe etc.
+    // Normal case: need a new one.
+    inst = _stp_kzalloc(sizeof(struct stapiu_instance));
+    if (! inst) {
+      rc = -ENOMEM;
+      goto out1;
+    }

-  inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback
+    inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback
+
+    /* Grab the inode first (to prevent TOCTTOU problems). */
+    inst->inode = igrab(inode);
+    if (!inst->inode) {
+      rc = -EINVAL;
+      goto out2;
+    }
+
+    // Add the inode/instance to the list
+    list_add(&inst->instance_list, &c->instance_list_head);

-  /* Grab the inode first (to prevent TOCTTOU problems). */
-  inst->inode = igrab(inode);
-  if (!inst->inode) {
-    rc = -EINVAL;
-    goto out2;
+    // Register the actual uprobe if cond_enabled already
+    if (c->probe->cond_enabled)
+      (void) stapiu_register(inst, c);
   }
+
+  // ... but we may have to do per-process work anyway: perfctr
+  // initialization and sdt.h semaphore manipulation!

   // Perform perfctr registration if required
   for (j=0; j < c->perf_counters_dim; j++) {
@@ -551,12 +560,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
       (void) _stp_perf_read_init ((c->perf_counters)[j], task);
   }

-  // Add the inode/instance to the list
-  list_add(&inst->instance_list, &c->instance_list_head);
-
   // Associate this consumer with this process.  If we encounter
   // resource problems here, we don't really have to undo the uprobe
-  // registrations etc. already in effect.
+  // registrations etc. already in effect.  It may break correct
+  // tracking of process hierarchy in -c/-x operation, but too bad.
   p = _stp_kzalloc(sizeof(struct stapiu_process));
   if (! p) {
     rc = -ENOMEM;
@@ -573,11 +580,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
   spin_lock_irqsave (&c->process_list_lock, flags);
   list_add(&p->process_list, &c->process_list_head);
   spin_unlock_irqrestore (&c->process_list_lock, flags);
-
+  // NB: actual semaphore value bumping is done later
+
   rc = 0;
   // Register actual uprobe if cond_enabled right now
-  if (c->probe->cond_enabled)
-    (void) stapiu_register(inst, c);
   goto out1;

  out2:
@@ -617,11 +623,21 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
   /* Look through all the consumer's processes and increment semaphores.  */
   list_for_each_entry(p, &c->process_list_head, process_list) {
     unsigned long addr = p->base + c->sdt_sem_offset;
+    if (p->tgid != task->tgid) // skip other processes in the list
+      continue;
     if (addr >= relocation && addr < relocation + length) {
       int rc2;
       // unlock list and process write for this entry
       spin_unlock_irqrestore(&c->process_list_lock, flags);
       any_found=1;
+
+      dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
+                   "pidx %zu address %lx\n",
+                   c->return_p ? "ret" : "",
+                   (long) task->tgid,
+                   c->probe->index,
+                   (unsigned long) addr);
+
       rc2 = stapiu_write_task_semaphore(task, addr, +1);
       if (!rc)
         rc = rc2;
@@ -641,15 +657,8 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
  * about the semaphores, so we can just release the process slot.  */
 static int
 stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
-		    unsigned long relocation, unsigned long length)
+		    unsigned long addr, unsigned long length)
 {
-  dbug_uprobes("notified for inode-offset departure u%sprobe "
-	       "pidx %zu target procname:%s buildid:%s\n",
-	       c->return_p ? "ret" : "",
-	       c->probe->index,
-	       ((char*)c->finder.procname ?: (char*)""),
-	       ((char*)c->finder.build_id ?: (char*)""));
-
   // We don't need do anything really.
   // A process going away means:
   // - its uprobes will no longer fire: no problem, the uprobe inode
@@ -674,6 +683,36 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
   // [ 1955.436334]  ? __x64_sys_execve+0x27/0x30
   // [ 1955.437700]  ? do_syscall_64+0x5c/0xa0

+  // But as an optimization - to avoid having them build up indefinitely,
+  // and make semaphore operations go slowly, we will nuke matching entries anyway.
+  unsigned long flags;
+  struct stapiu_process *p, *tmp;
+  unsigned nmatch=0;
+
+  spin_lock_irqsave(&c->process_list_lock, flags);
+  list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) {
+    // we nuke by matching semaphore address (where ..._semaphore_plus wrote)
+    // against the address range being unmapped
+    unsigned long semaddr = p->base + c->sdt_sem_offset;
+    if (p->tgid != task->tgid) // skip other processes in the list
+      continue;
+    if (semaddr >= addr && semaddr < addr + length) {
+      list_del(&p->process_list);
+      _stp_kfree (p);
+      nmatch ++;
+    }
+  }
+  spin_unlock_irqrestore(&c->process_list_lock, flags);
+
+  if (nmatch > 0)
+    dbug_uprobes("notified for inode-offset departure u%sprobe "
+                 "pidx %zu matches:%u procname:%s buildid:%s\n",
+                 c->return_p ? "ret" : "",
+                 c->probe->index,
+                 nmatch,
+                 ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
+                 ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
+
   return 0;
 }


commit e90530877ee21cffa2a9d53567ba5b5de1dd9b32
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Mon Jul 27 07:58:30 2020 -0400

    PR25568 / RHBZ1857749: buildid/uprobes/inode rework, task_finder etc. side

    During work on a new stress tests for build-id based probes (coming in
    next commit), it was found that the task_finder2 logic for buildid
    verification didn't, well, work, because it was never run (due to an
    erroneous pathlen conditional), and couldn't be safely run where it
    was (because it was under spinlock but would have done
    access_process_vm).  Reworked the relevant bits of task_finder2 to
    perform build-id verification for processes later - during the quiesce
    callback periods.  (Buildid verification for solibs is already done
    in the task_finder2 consumer uprobes-inode.c.)

    Testing with sdt_misc indicated a case where a preexisting process
    (with solib sdt.h semaphores) was being attached to by a new stap
    binary.  task_finder2's enumeration of the preexising processes'
    memory map segments violated assumptions by recent code related to
    tracking in stapiu_process[] lists.  (It did not mirror the temporal
    ld.so mmap sequence.)  Changed this tracking to use the inode* as the
    key, and stop trying to track mapping lengths, to make positive
    matches and eliminate duplicate stapiu_process[] entries for the same
    (process,solib) permutation.  Reworked stapiu_process[] accumulation
    generally to move to the two immediate task_finder callbacks, out of
    stapiu_change_plus().

    Added lots of commentary and diagnostics throughout.  stap
    -DDEBUG_UPROBES give meaningful info about uprobes & sdt semaphores;
    with -DDEBUG_TASK_FINDER, more but not overwhelming relevant info
    appears.

diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
index 9777efb..8b8057a 100644
--- a/runtime/linux/task_finder2.c
+++ b/runtime/linux/task_finder2.c
@@ -652,8 +652,8 @@ __verify_build_id(struct task_struct *tsk, unsigned long addr,
 	tsk_build_id[build_id_len] = '\0';

 	if (strcmp(build_id, tsk_build_id)) {
-		dbug_task(2, "target build-id not matched: [%s] != [%s]\n",
-			  build_id, tsk_build_id);
+		dbug_task(2, "target build-id not matched: [%s] @ 0x%lx != [%s]\n",
+			  build_id, addr, tsk_build_id);
 		return false;
 	}

@@ -884,16 +884,9 @@ __stp_utrace_attach_match_filename(struct task_struct *tsk,
 		// procname/build-id and match an "all thread" probe.
 		if (tgt == NULL)
 			continue;
-		/* buildid-based target */
-		else if (tgt->build_id_len > 0 && tgt->procname > 0
-			 && !__verify_build_id(tsk,
-					       tgt->build_id_vaddr,
-					       tgt->build_id,
-					       tgt->build_id_len))
-		{
-			continue;
-		}
-		else if (tgt->build_id_len == 0 && tgt->pathlen > 0
+                /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */
+                /* procname-based target */
+		else if (tgt->pathlen > 0
 			 && (tgt->pathlen != filelen
 			     || strcmp(tgt->procname, filename) != 0))
 		{
@@ -1341,6 +1334,34 @@ __stp_tf_quiesce_worker(struct task_work *work)
 		return;
 	}

+        /* If we had a build-id based executable probe (so we have a
+         * tgt->build_id) set, we could not check it back in
+         * __stp_utrace_attach_* because we can't do sleepy
+         * access_process_vm() calls from there.  BUt now that we're
+         * in process context, quiesced, finally we can check.  If we
+         * were build-id based, and the build-id does not match, then
+         * we UTRACE_DETACH from this process and skip the callbacks.
+         *
+         * XXX: For processes that do match, we redo this check every
+         * time this callbacks is encountered somehow.  That's
+         * probably unnecessary.
+         */
+        if (tgt->build_id_len > 0) {
+                int ok = __verify_build_id(current,
+                                           tgt->build_id_vaddr,
+                                           tgt->build_id,
+                                           tgt->build_id_len);
+
+                dbug_task(2, "verified buildid-target process pid=%ld ok=%d\n",
+                          (long) current->tgid, ok);
+                if (!ok) {
+                        // stap_utrace_detach (current, & tgt->ops);
+                        /* Remember that this task_work_func is finished. */
+                        stp_task_work_func_done();
+                        return;
+                }
+        }
+
 	__stp_tf_handler_start();

 	/* NB make sure we run mmap callbacks before other callbacks
@@ -1434,6 +1455,21 @@ __stp_utrace_task_finder_target_quiesce(u32 action,
 		}
 	}
 	else {
+                /* Like in __stp_tf_quiesce_worker(), verify build-id now if belated. */
+                if (tgt->build_id_len > 0) {
+                        int ok = __verify_build_id(current,
+                                                   tgt->build_id_vaddr,
+                                                   tgt->build_id,
+                                                   tgt->build_id_len);
+
+                        dbug_task(2, "verified2 buildid-target process pid=%ld ok=%d\n",
+                                  (long) current->tgid, ok);
+                        if (!ok) {
+                                __stp_tf_handler_end();
+                                return UTRACE_RESUME; // NB: not _DETACH; that interferes with other engines
+                        }
+                }
+
 		/* NB make sure we run mmap callbacks before other callbacks
 		 * like 'probe process.begin' handlers so that the vma tracker
 		 * is already initialized in the latter contexts */
@@ -1797,15 +1833,7 @@ stap_start_task_finder(void)
 					 struct stap_task_finder_target, list);
 			if (tgt == NULL)
 				continue;
-			/* buildid-based target */
-			else if (tgt->build_id_len > 0 && tgt->procname > 0
-				 && !__verify_build_id(tsk,
-						       tgt->build_id_vaddr,
-						       tgt->build_id,
-						       tgt->build_id_len))
-			{
-				continue;
-			}
+			/* buildid-based target ... gets checked in __stp_tf_quiesce_worker */
 			/* procname-based target */
 			else if (tgt->build_id == 0 && tgt->pathlen > 0
 				 && (tgt->pathlen != mmpathlen
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
index de81839..757da30 100644
--- a/runtime/linux/uprobes-inode.c
+++ b/runtime/linux/uprobes-inode.c
@@ -76,7 +76,7 @@ struct stapiu_instance {
   struct list_head instance_list;   // to find other instances e.g. during shutdown

   struct uprobe_consumer kconsumer; // the kernel-side struct for uprobe callbacks etc.
-  struct inode *inode;              // XXX: refcount?
+  struct inode *inode;              // refcounted
   unsigned registered_p:1;          // whether the this kconsumer is registered (= armed, live)

   struct stapiu_consumer *sconsumer; // whose instance are we
@@ -86,10 +86,14 @@ struct stapiu_instance {
 /* A snippet to record the per-process vm where a particular
    executable/solib was mapped.  Used for sdt semaphore setting, and
    for identifying processes of our interest (vs. disinterest) for
-   uprobe hits.  This object is owned by a stapiu_consumer. */
+   uprobe hits.  This object is owned by a stapiu_consumer.  We use
+   the same inode* as the stapiu_instance, and have the same lifespan,
+   so don't bother separately refcount it.
+*/
 struct stapiu_process {
   struct list_head process_list;    // to find other processes

+  struct inode *inode;              // the inode* for solib or executable
   unsigned long relocation;         // the mmap'ed .text address
   unsigned long base;               // the address to apply sdt offsets against
   pid_t tgid;                       // pid
@@ -392,6 +396,7 @@ stapiu_consumer_unreg(struct stapiu_consumer *c)
   // multiple times in the list.  Don't break after the first.
   list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) {
     list_del(&p->process_list);
+    // no refcount used for the inode field
     _stp_kfree (p);
   }
 }
@@ -498,6 +503,8 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
   /* Do the buildid check.  NB: on F29+, offset may not equal
      0 for LOADable "R E" segments, because the read-only .note.*
      stuff may have been loaded earlier, separately.  PR23890. */
+  // NB: this is not really necessary for buildid-based probes,
+  // which had this verified already.
   rc = _stp_usermodule_check(task, c->module_name,
 			     relocation - offset);
   if (rc)
@@ -527,7 +534,6 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
   }

   if (!inst) { // new instance; need new uprobe etc.
-    // Normal case: need a new one.
     inst = _stp_kzalloc(sizeof(struct stapiu_instance));
     if (! inst) {
       rc = -ENOMEM;
@@ -560,30 +566,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
       (void) _stp_perf_read_init ((c->perf_counters)[j], task);
   }

-  // Associate this consumer with this process.  If we encounter
-  // resource problems here, we don't really have to undo the uprobe
-  // registrations etc. already in effect.  It may break correct
-  // tracking of process hierarchy in -c/-x operation, but too bad.
-  p = _stp_kzalloc(sizeof(struct stapiu_process));
-  if (! p) {
-    rc = -ENOMEM;
-    goto out1;
-  }
-  p->tgid = task->tgid;
-  p->relocation = relocation;
-  /* The base is used for relocating semaphores.  If the
-   * probe is in an ET_EXEC binary, then that offset
-   * already is a real address.  But stapiu_process_found
-   * calls us in this case with relocation=offset=0, so
-   * we don't have to worry about it.  */
-  p->base = relocation - offset;
-  spin_lock_irqsave (&c->process_list_lock, flags);
-  list_add(&p->process_list, &c->process_list_head);
-  spin_unlock_irqrestore (&c->process_list_lock, flags);
-  // NB: actual semaphore value bumping is done later
+  // NB: process_list[] already extended up in stapiu_mmap_found().

   rc = 0;
-  // Register actual uprobe if cond_enabled right now
   goto out1;

  out2:
@@ -599,7 +584,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
  * Increment the semaphore now.  */
 static int
 stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task,
-			     unsigned long relocation, unsigned long length)
+			     unsigned long relocation, struct inode* inode)
 {
   int rc = 0;
   struct stapiu_process *p;
@@ -609,6 +594,13 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
   if (! c->sdt_sem_offset) // nothing to do
     return 0;

+  dbug_uprobes("considering semaphore (u%sprobe) pid %ld inode 0x%lx"
+               "pidx %zu\n",
+               c->return_p ? "ret" : "",
+               (long) task->tgid,
+               (unsigned long) inode,
+               c->probe->index);
+
   // NB: we mustn't hold a lock while changing the task memory,
   // but we need a lock to protect the process_list from concurrent
   // add/delete.  So hold a spinlock during iteration until the first
@@ -617,32 +609,31 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
   // somehow maps in the same solib multiple times).  We can't easily
   // both iterate this list (in a spinlock-protected safe way), and
   // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
-  // call within the loop.  So we will hit only the copy in our list.
+  // call within the loop.  So we will hit only the first copy in our list.
   any_found = 0;
   spin_lock_irqsave(&c->process_list_lock, flags);
   /* Look through all the consumer's processes and increment semaphores.  */
   list_for_each_entry(p, &c->process_list_head, process_list) {
     unsigned long addr = p->base + c->sdt_sem_offset;
-    if (p->tgid != task->tgid) // skip other processes in the list
-      continue;
-    if (addr >= relocation && addr < relocation + length) {
-      int rc2;
-      // unlock list and process write for this entry
-      spin_unlock_irqrestore(&c->process_list_lock, flags);
-      any_found=1;
-
-      dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
-                   "pidx %zu address %lx\n",
-                   c->return_p ? "ret" : "",
-                   (long) task->tgid,
-                   c->probe->index,
-                   (unsigned long) addr);
-
-      rc2 = stapiu_write_task_semaphore(task, addr, +1);
-      if (!rc)
-        rc = rc2;
-      break; // exit list_for_each loop
-    }
+    int rc2;
+    if (p->tgid != task->tgid) continue; // skip other processes in the list
+    if (p->inode != inode) continue; // skip other inodes
+
+    // unlock list and process write for this entry
+    spin_unlock_irqrestore(&c->process_list_lock, flags);
+    any_found=1;
+
+    dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
+                 "pidx %zu address 0x%lx\n",
+                 c->return_p ? "ret" : "",
+                 (long) task->tgid,
+                 c->probe->index,
+                 (unsigned long) addr);
+
+    rc2 = stapiu_write_task_semaphore(task, addr, +1);
+    if (!rc)
+            rc = rc2;
+    break; // exit list_for_each loop
   }
   if (! any_found)
     spin_unlock_irqrestore(&c->process_list_lock, flags);
@@ -755,17 +746,41 @@ stapiu_process_found(struct stap_task_finder_target *tf_target,

   if (!process_p)
     return 0; /* ignore threads */
-
+
+  dbug_uprobes("process_found pid=%ld f.p=%s f.b=%s c.p=%s c.b=%s\n",
+               (long)task->tgid,
+	       ((char*)c->finder.procname ?: ""),
+               ((char*)c->finder.build_id ?: ""),
+	       ((char*)c->solib_pathname ?: ""),
+               ((char*)c->solib_build_id ?: ""));
+
   /* ET_EXEC events are like shlib events, but with 0 relocation bases */
   if (register_p) {
     int rc = -EINVAL;
     struct inode *inode = stapiu_get_task_inode(task);

     if (inode) {
-      rc = stapiu_change_plus(c, task, 0, TASK_SIZE,
-			      0, 0, inode);
-      stapiu_change_semaphore_plus(c, task, 0,
-				   TASK_SIZE);
+      // Add a stapiu_process record to the consumer, so that
+      // the semaphore increment logic will accept this task.
+      struct stapiu_process* p;
+      unsigned long flags;
+      p = _stp_kzalloc(sizeof(struct stapiu_process));
+      if (p) {
+        p->tgid = task->tgid;
+        p->relocation = 0;
+        p->inode = inode;
+        p->base = 0;
+        spin_lock_irqsave (&c->process_list_lock, flags);
+        list_add(&p->process_list, &c->process_list_head);
+        spin_unlock_irqrestore (&c->process_list_lock, flags);
+      } else {
+         _stp_warn("out of memory tracking executable in process %ld\n",
+                   (long) task->tgid);
+      }
+
+      rc = stapiu_change_plus(c, task, 0, TASK_SIZE, 0, 0, inode);
+
+      stapiu_change_semaphore_plus(c, task, 0, inode);
     }
     return rc;
   } else
@@ -776,6 +791,8 @@ stapiu_process_found(struct stap_task_finder_target *tf_target,
 bool
 __verify_build_id (struct task_struct *tsk, unsigned long addr,
 		   unsigned const char *build_id, int build_id_len);
+// defined in task_finder2.c
+


 /* The task_finder_mmap_callback.  These callbacks are NOT
@@ -791,28 +808,119 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target,
   struct stapiu_consumer *c =
     container_of(tf_target, struct stapiu_consumer, finder);
   int rc = 0;
+  struct stapiu_process* p;
+  int known_mapping_p;
+  unsigned long flags;

-  /* The file path or build-id must match. The build-id address
-   * is calculated using start address of this vma, the file
-   * offset of the vma start address and the file offset of
-   * the build-id. */
-  if (c->solib_pathname && path && strcmp (path, c->solib_pathname))
-    return 0;
-  if (c->solib_build_id_len > 0 && !__verify_build_id(task,
-						      addr - offset + c->solib_build_id_vaddr,
-						      c->solib_build_id,
-						      c->solib_build_id_len))
-    return 0;
+  /*
+  We need to verify that this file/mmap corresponds to the given stapiu_consumer.
+  One could compare (inode) file name, but that won't work with buildid-based
+  uprobes.  For those, one cannot just
+
+  __verify_build_id(... addr - offset + c->solib_build_id_vaddr ...)
+
+  because dlopen()ing a shared library involves multiple mmaps, including
+  some at repeating/offset addresses.  See glibc _dl_map_segments() in various
+  versions.  So by the fourth call (!) on modern glibc's, we get a VM_WRITE-able
+  data segment mapped, but that's at a load/mapping address that is offset by a
+  page from the base (file offset=0) mapping.
+
+  e.g. on Fedora 32 / glibc 2.31, with testsuite/libsdt_buildid.so:
+
+  Program Headers:
+  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+  LOAD           0x000000 0x0000000000000000 0x0000000000000000 0x0004b8 0x0004b8 R   0x1000
+  LOAD           0x001000 0x0000000000001000 0x0000000000001000 0x000161 0x000161 R E 0x1000
+  LOAD           0x002000 0x0000000000002000 0x0000000000002000 0x0000cc 0x0000cc R   0x1000
+  LOAD           0x002df8 0x0000000000003df8 0x0000000000003df8 0x000232 0x000238 RW  0x1000
+  DYNAMIC        0x002e10 0x0000000000003e10 0x0000000000003e10 0x0001d0 0x0001d0 RW  0x8
+
+  strace:
+  openat(AT_FDCWD, ".../libsdt_buildid.so", O_RDONLY|O_CLOEXEC) = 3
+  mmap(NULL, 16432, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x148c764ac000
+  mmap(0x148c764ad000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x148c764ad000
+  mmap(0x148c764ae000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764ae000
+  mmap(0x148c764af000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764af000
+
+  Note how the virtual mapping for the fourth mmap (also) maps file-offset 0x2000 at
+  vm offset 0x3000.
+
+  So what we do is rely on the name/buildid validation tests being run
+  -earlier- in the dlopen/mmap sequence to validate near-future
+  mmap()s.  We search the c->process_list[] for a mapping that already
+  overlaps the new range, and if so, consider it validated ... whether
+  for the solib_pathname or the solib_build_id case.
+
+  This is complicated for startup-time traversal of processes/mmaps,
+  where it seems sometimes we get notifications out of temporal sequence.
+  */

-  /* 1 - shared libraries' executable segments load from offset 0
-   *   - ld.so convention offset != 0 is now allowed
-   *     so stap_uprobe_change_plus can set a semaphore,
-   *     i.e. a static extern, in a shared object
-   * 2 - the shared library we're interested in
-   * 3 - mapping should be executable or writeable (for
-   *     semaphore in .so)
-   *     NB: or both, on kernels that lack noexec mapping
-   */
+  known_mapping_p = 0;
+  spin_lock_irqsave(&c->process_list_lock, flags);
+  list_for_each_entry(p, &c->process_list_head, process_list) {
+    if (p->tgid != task->tgid) continue;
+    if (p->inode != dentry->d_inode) continue;
+    known_mapping_p = 1;
+    break;
+  }
+  spin_unlock_irqrestore(&c->process_list_lock, flags);
+
+
+  // Check if this mapping (solib) is of interest: whether we expect
+  // it by buildid or name.
+
+  if (! known_mapping_p) {
+    /* The file path or build-id must match. The build-id address
+     * is calculated using start address of this vma, the file
+     * offset of the vma start address and the file offset of
+     * the build-id. */
+    if (c->solib_pathname && path && strcmp (path, c->solib_pathname))
+      return 0;
+    if (c->solib_build_id_len > 0 && !__verify_build_id(task,
+  						        addr - offset + c->solib_build_id_vaddr,
+  						        c->solib_build_id,
+						        c->solib_build_id_len))
+      return 0;
+  }
+
+  // If we made it this far, we have an interesting solib.
+
+  dbug_uprobes("mmap_found pid=%ld path=%s addr=0x%lx length=%lu offset=%lu flags=0x%lx known=%d\n",
+               (long) task->tgid, path, addr, length, offset, vm_flags, known_mapping_p);
+
+  if (! known_mapping_p) {
+    // OK, let's add it.  The first mapping should be a VM_READ mapping
+    // of the entire solib file, which will also serve as the apprx.
+    // outer bounds of the repeatedly-mapped segments.
+
+#if 0
+    // Consider an assumption about the dlopen/mmap sequence
+    // If it comes out of sequence, we could get length/base wrong in the stored
+    // stapiu_process, which could lead us to miscalculate semaphore addresses.
+    //
+    // However, this has been observed on task-finder initial-enumeration case,
+    // (sdt_misc.exp, where a solib test is already running when stap starts).
+    if (offset != 0)
+      return 0;
+#endif
+
+    // Associate this consumer with this process.  If we encounter
+    // resource problems here, we don't really have to undo the uprobe
+    // registrations etc. already in effect.  It may break correct
+    // tracking of process hierarchy in -c/-x operation, but too bad.
+    p = _stp_kzalloc(sizeof(struct stapiu_process));
+    if (p) {
+      p->tgid = task->tgid;
+      p->relocation = addr;
+      p->inode = dentry->d_inode;
+      p->base = addr-offset; // ... in case caught this during the second mmap
+      spin_lock_irqsave (&c->process_list_lock, flags);
+      list_add(&p->process_list, &c->process_list_head);
+      spin_unlock_irqrestore (&c->process_list_lock, flags);
+    } else
+      _stp_warn("out of memory tracking solib %s in process %ld\n",
+                path, (long) task->tgid);
+  }

   /* Check non-writable, executable sections for probes. */
   if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE))
@@ -827,7 +935,7 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target,
    */

   if ((rc == 0) && (vm_flags & VM_WRITE))
-    rc = stapiu_change_semaphore_plus(c, task, addr, length);
+    rc = stapiu_change_semaphore_plus(c, task, addr, dentry->d_inode);

   return rc;
 }
diff --git a/runtime/sym.c b/runtime/sym.c
index be09ec8..21d820a 100644
--- a/runtime/sym.c
+++ b/runtime/sym.c
@@ -713,9 +713,10 @@ static int _stp_build_id_check (struct _stp_module *m,
 	  // NB: It is normal for different binaries with the same file path
 	  // coexist in the same system via chroot or namespaces, therefore
 	  // we make sure below is really a warning.
-          _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" address "
+          _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" pid %ld address "
 		     "%#lx, expected %s actual %s\n",
-                     m->path, notes_addr, hexstring_theory, hexstring_practice);
+                     m->path, (long) tsk->tgid,
+                     notes_addr, hexstring_theory, hexstring_practice);
       return 1;
   }


commit 5e1ef9d7f2a5ea6e5511ef5228cf05dda1c570b3
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Mon Jul 27 07:58:30 2020 -0400

    PR25568 / RHBZ1857749: sdt_buildid.exp test case

    Add new test that checks for combinations of buildid and pathname
    based uprobes for executables and shared libraries.

diff --git a/testsuite/systemtap.base/sdt_buildid.c b/testsuite/systemtap.base/sdt_buildid.c
new file mode 100644
index 0000000..ccbb2f2
--- /dev/null
+++ b/testsuite/systemtap.base/sdt_buildid.c
@@ -0,0 +1,26 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+void bar ();
+
+#ifndef ONLY_MAIN
+#include "sdt_buildid_.h"
+
+void
+bar ()
+{
+  printf("%s=%ld\n", "test_probe_0_semaphore", SDT_BUILDID_TEST_PROBE_0_ENABLED());
+  if (SDT_BUILDID_TEST_PROBE_0_ENABLED())
+    SDT_BUILDID_TEST_PROBE_0();
+}
+#endif
+
+#ifndef NO_MAIN
+int
+main ()
+{
+  bar();
+  return 0;
+}
+#endif
diff --git a/testsuite/systemtap.base/sdt_buildid.exp b/testsuite/systemtap.base/sdt_buildid.exp
new file mode 100644
index 0000000..3141fd6
--- /dev/null
+++ b/testsuite/systemtap.base/sdt_buildid.exp
@@ -0,0 +1,214 @@
+set test "sdt_buildid"
+
+set pbtype_flags {{additional_flags=-g} {} {}}
+set fail_count 0
+
+# Compile a C program to use as the user-space probing target
+set stap_path $env(SYSTEMTAP_PATH)/stap
+set sup_dpath "[pwd]/sdt_buildid_.d"
+set sup_hpath "[pwd]/sdt_buildid_.h"
+set sup_opath "[pwd]/sdt_buildid_.o"
+
+# Run dtrace
+if {[installtest_p]} {
+    set dtrace $env(SYSTEMTAP_PATH)/dtrace
+} else {
+    set dtrace ../dtrace
+}
+
+verbose -log "$dtrace --types -h -s $srcdir/$subdir/sdt_buildid_.d"
+if {[catch {exec $dtrace --types -h -s \
+                $srcdir/$subdir/sdt_buildid_.d} res]} {
+    verbose -log "unable to run $dtrace: $res"
+}
+verbose -log "$dtrace --types -G -s $srcdir/$subdir/sdt_buildid_.d"
+if {[catch {exec $dtrace --types -G -s \
+                $srcdir/$subdir/sdt_buildid_.d} res]} {
+    verbose -log "unable to run $dtrace: $res"
+}
+if {[file exists $sup_hpath] && [file exists $sup_opath]} then {
+    pass "$test dtrace"
+} else {
+    incr fail_count
+    fail "$test dtrace"
+    return
+}
+
+set sup_flags [sdt_includes]
+set sup_flags "$sup_flags additional_flags=-Wall"
+set sup_flags "$sup_flags additional_flags=-Werror"
+set sup_flags "$sup_flags additional_flags=$sup_opath"
+set sup_flags "$sup_flags additional_flags=-I."
+set sup_exepath "[pwd]/sdt_buildid.x"
+
+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exepath \
+             executable $sup_flags]
+if { $res != "" } {
+    incr fail_count
+    verbose "target_compile failed: $res" 2
+    fail "$test compiling"
+    return
+} else {
+    pass "$test compiling"
+}
+
+
+set sup41_flags "$sup_flags additional_flags=-shared"
+set sup41_flags "$sup41_flags additional_flags=-fPIC"
+set sup41_flags "$sup41_flags additional_flags=-DNO_MAIN"
+set sup_sopath "[pwd]/libsdt_buildid.so"
+set sup_exe2path "[pwd]/sdt_buildid_shared.x"
+set res0 [target_compile $srcdir/$subdir/sdt_buildid.c $sup_sopath \
+		  executable $sup41_flags ]
+set sup42_flags "additional_flags=-Wl,-rpath,[pwd]"
+set sup42_flags "$sup42_flags additional_flags=-L[pwd] additional_flags=-lsdt_buildid"
+set sup42_flags "$sup42_flags additional_flags=-DONLY_MAIN"
+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exe2path \
+             executable $sup42_flags ]
+if { $res0 != "" || $res != "" } {
+    incr fail_count
+    verbose "target_compile failed: $res0 $res" 2
+    fail "$test compiling -shared"
+    return
+} else {
+    pass "$test compiling -shared"
+}
+
+catch { exec eu-readelf -n $sup_exepath | grep Build.ID | awk "{print \$NF}" } bid1
+catch { exec eu-readelf -n $sup_sopath | grep Build.ID | awk "{print \$NF}" } bidso
+catch { exec eu-readelf -n $sup_exe2path | grep Build.ID | awk "{print \$NF}" } bid2
+verbose -log "buildid: $sup_exepath $bid1"
+verbose -log "buildid: $sup_sopath $bidso"
+verbose -log "buildid: $sup_exe2path $bid2"
+# though we won't use the $bid2
+
+if {![installtest_p]} {
+    untested $test
+    return
+}
+
+# To test via build-id, we need a debuginfod server to scan the testsuite build
+# directory.
+
+
+if [catch {exec /usr/bin/which debuginfod} debuginfod] then {
+    untested "$test debuginfod"
+} else {
+    set port [expr {10000 + int(rand()*10000)}]
+    spawn $debuginfod -p $port -d :memory: -F .
+    set debuginfod_pid [exp_pid $spawn_id]
+    # give it time to scan the build directory
+    sleep 10
+    # XXX: we could expect some verbose traffic
+    set env(DEBUGINFOD_URLS) "http://localhost:$port $env(DEBUGINFOD_URLS)"
+    verbose -log "started debuginfod on port $port"
+
+    set subtest "$test debuginfod buildid-exe buildid-solib"
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $bidso
+    set ok 0
+    expect {
+        -timeout 240
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+        eof { }
+        timeout { }
+    }
+    catch {close}; catch {wait}
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+    set subtest "$test debuginfod buildid-exe path-solib"
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $sup_sopath
+    set ok 0
+    expect {
+        -timeout 240
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+        eof { }
+        timeout { }
+    }
+    catch {close}; catch {wait}
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+    set subtest "$test debuginfod path-exe buildid-solib"
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $bidso
+    set ok 0
+    expect {
+        -timeout 240
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+        eof { }
+        timeout { }
+    }
+    catch {close}; catch {wait}
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+    set subtest "$test debuginfod buildid-solib"
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bidso
+    set ok 0
+    expect {
+        -timeout 240
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+        eof { }
+        timeout { }
+    }
+    catch {close}; catch {wait}
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+    set subtest "$test debuginfod buildid-exe"
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1
+    set ok 0
+    expect {
+        -timeout 240
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+        eof { }
+        timeout { }
+    }
+    catch {close}; catch {wait}
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+    kill -INT $debuginfod_pid
+}
+
+
+set subtest "$test non-buildid both"
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $sup_sopath
+set ok 0
+expect {
+    -timeout 240
+    -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+    -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+    eof { }
+    timeout { }
+}
+catch {close}; catch {wait}
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+set subtest "$test non-buildid exe"
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath
+set ok 0
+expect {
+    -timeout 240
+    -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+    -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
+    eof { }
+    timeout { }
+}
+catch {close}; catch {wait}
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+set subtest "$test non-buildid solib"
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_sopath
+set ok 0
+expect {
+    -timeout 240
+    -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+    -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
+    eof { }
+    timeout { }
+}
+catch {close}; catch {wait}
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
+
+return
diff --git a/testsuite/systemtap.base/sdt_buildid.stp b/testsuite/systemtap.base/sdt_buildid.stp
new file mode 100644
index 0000000..a26d183
--- /dev/null
+++ b/testsuite/systemtap.base/sdt_buildid.stp
@@ -0,0 +1,19 @@
+global count
+
+function trace () {
+  printf ("Count %d [%d] %s %s\n", count++, pid(), $$name, pp())
+}
+
+probe process(@1).mark("test_probe_0") { trace() }
+%( $# > 1 %? probe process(@2).mark("test_probe_0") { trace() } %)
+
+probe begin
+{
+  printf ("Count %d\n", count++)
+}
+
+probe timer.s(1) // exit quickly after enough marks fire
+{
+  if (count > 10) exit()
+}
+
diff --git a/testsuite/systemtap.base/sdt_buildid_.d b/testsuite/systemtap.base/sdt_buildid_.d
new file mode 100644
index 0000000..ebfca55
--- /dev/null
+++ b/testsuite/systemtap.base/sdt_buildid_.d
@@ -0,0 +1,4 @@
+provider sdt_buildid {
+        probe test_probe_0 ();
+};
+