diff --git a/.gitignore b/.gitignore index a5889b5..d99c51e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/systemtap-4.9.tar.gz +SOURCES/systemtap-5.0.tar.gz diff --git a/.systemtap.metadata b/.systemtap.metadata index 4877693..78736e1 100644 --- a/.systemtap.metadata +++ b/.systemtap.metadata @@ -1 +1 @@ -7ba2ad579a5ba66ccfd36ad6df0896c9e136f9e9 SOURCES/systemtap-4.9.tar.gz +f44f1853ddd462ac97b2c7c4b3a9434440d9d9c2 SOURCES/systemtap-5.0.tar.gz diff --git a/SOURCES/RHEL-16549.patch b/SOURCES/RHEL-16549.patch new file mode 100644 index 0000000..4469759 --- /dev/null +++ b/SOURCES/RHEL-16549.patch @@ -0,0 +1,59 @@ +commit 0fef0bd60ff4b359a32da52262855dfe82fe51ae +gpg: Signature made Tue 14 Nov 2023 03:20:12 PM EST +gpg: using RSA key 4B35DCD2EA45C4E0783135BC8094BE9C9F4696A1 +gpg: Can't check signature: No public key +Author: Yichun Zhang (agentzh) +Date: Fri Nov 10 21:51:56 2023 -0800 + + PR31051: memory and uprobe leaks in early uprobe registraton code when errors happen + +diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c +index 997f4528d..289cce00b 100644 +--- a/runtime/linux/uprobes-inode.c ++++ b/runtime/linux/uprobes-inode.c +@@ -529,6 +529,16 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers) + } + + if (unlikely(ret != 0)) { ++ for ( ;; ) { ++ struct stapiu_consumer *c = &consumers[i]; ++ // protect against conceivable stapiu_refresh() at same time ++ mutex_lock(& c->consumer_lock); ++ stapiu_consumer_unreg(c); ++ mutex_unlock(& c->consumer_lock); ++ if (i == 0) ++ break; ++ i--; ++ } + return ret; + } + +@@ -545,7 +555,27 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers) + break; + } + } +- return ret; ++ ++ if (unlikely(ret != 0)) { ++ int j; ++ for (j = 0; j < nconsumers; ++j) { ++ struct stapiu_consumer *c = &consumers[j]; ++ // protect against conceivable stapiu_refresh() at same time ++ mutex_lock(& c->consumer_lock); ++ stapiu_consumer_unreg(c); ++ mutex_unlock(& c->consumer_lock); ++ } ++ for ( ;; ) { ++ struct stapiu_consumer *c = &consumers[i]; ++ stap_cleanup_task_finder_target(&c->finder); ++ if (i == 0) ++ break; ++ i--; ++ } ++ return ret; ++ } ++ ++ return 0; + } + + diff --git a/SOURCES/RHEL-18334.patch b/SOURCES/RHEL-18334.patch new file mode 100644 index 0000000..4b1c536 --- /dev/null +++ b/SOURCES/RHEL-18334.patch @@ -0,0 +1,147 @@ +commit b84a5e8c2c5a857c0790a71df7824259a95131cf +Author: William Cohen +Date: Mon Dec 4 11:28:10 2023 -0500 + + PR31074: Ensure that the set_kernel_string* functions limit their writes + + Both the set_kernel_string and set_kernel_string_n function use the + underlying _stp_store_deref_string_ function to write strings. There + were two issues with the this function: + + 1) wrote MAXSTRINGLEN bytes even if string was shorter + 2) null write at end could spill past end of buffer + + The first issue was addressed by stopping to write once a null + character is encountered. The second issue is a side effect of C + implicit promotion of character constants to ints and was addressed by + explicitlying casting the character constants as a char. + + The pr31074.exp test was added to verify that the write length are + limited to string length and the null write does not go beyond the end + of the buffer. + +diff --git a/runtime/linux/loc2c-runtime.h b/runtime/linux/loc2c-runtime.h +index 68fbe2ab6..663360293 100644 +--- a/runtime/linux/loc2c-runtime.h ++++ b/runtime/linux/loc2c-runtime.h +@@ -1007,11 +1007,14 @@ static inline int _stp_store_deref_string_(char *src, void *addr, size_t len, + { + for (i = 0; i < len - 1; ++i) + { ++ if (*src == '\0') ++ break; + err = __stp_put_either(*src++, (u8 *)addr + i, seg); + if (err) + goto out; + } +- err = __stp_put_either('\0', (u8 *)addr + i, seg); ++ /* PR31074: cast (char) '\0' to make sure right size */ ++ err = __stp_put_either((char) '\0', (u8 *)addr + i, seg); + } + + out: +diff --git a/testsuite/systemtap.base/pr31074.exp b/testsuite/systemtap.base/pr31074.exp +new file mode 100644 +index 000000000..5b382b789 +--- /dev/null ++++ b/testsuite/systemtap.base/pr31074.exp +@@ -0,0 +1,5 @@ ++# Check that the set_kernel_* functions work correctly. ++ ++set test "pr31074" ++ ++stap_run $test no_load $all_pass_string -g $srcdir/$subdir/$test.stp +diff --git a/testsuite/systemtap.base/pr31074.stp b/testsuite/systemtap.base/pr31074.stp +new file mode 100644 +index 000000000..930c276b5 +--- /dev/null ++++ b/testsuite/systemtap.base/pr31074.stp +@@ -0,0 +1,88 @@ ++/* ++ * pr31074.stp ++ * ++ * Check that the set_kernel_string function work correctly. ++ */ ++ ++probe begin { println("systemtap starting probe") } ++probe end { println("systemtap ending probe") } ++ ++global errors = 0 ++ ++function assert_string(test, expected, value) ++{ ++ if (value == expected) ++ return 1 ++ printf("systemtap test failure - %s: expected \"%s\", got \"%s\"\n", ++ test, expected, value) ++ errors++ ++ return 0 ++} ++ ++function assert_not_reached(test) ++{ ++ printf("systemtap test failure - %s: missing exception\n", test) ++ errors++ ++} ++ ++function assert_buffer_untouched(test, addr) ++{ ++ if (!buffer_42(addr)) { ++ printf("systemtap test failure - %s: buffer overwritten\n", test) ++ errors++ ++ } ++} ++ ++ ++probe end(1) ++{ ++ test = "set_kernel_string" ++ addr3 = get_buffer3() ++ addr2 = get_buffer2() ++ if (assert_string(test, "", kernel_string(addr2))) { ++ set_kernel_string(addr2, "bar") ++ assert_string(test, "bar", kernel_string(addr2)) ++ } ++ addr1 = get_buffer1() ++ if (assert_string(test, "", kernel_string(addr1))) { ++ set_kernel_string(addr1, "foo") ++ assert_string(test, "foo", kernel_string(addr1)) ++ } ++ /* now check to make sure that "bar" has not been overwritten */ ++ assert_string("no null overrun", "bar", kernel_string(addr2)) ++ assert_buffer_untouched("no overrun", addr3) ++ if (!errors) ++ println("systemtap test success") ++} ++ ++%{ ++ static char buffer_x[4+4+MAXSTRINGLEN]; ++%} ++ ++function get_buffer1:long () %{ ++ static char *buffer1 = &(buffer_x[0]); ++ memset(buffer1, 0, 4); ++ STAP_RETVALUE = (long)buffer1; ++%} ++ ++function get_buffer2:long () %{ ++ static char *buffer2 = &(buffer_x[4]); ++ memset(buffer2, 0, 4); ++ STAP_RETVALUE = (long)buffer2; ++%} ++ ++function get_buffer3:long () %{ ++ static char *buffer3 = &(buffer_x[8]); ++ memset(buffer3, 42, MAXSTRINGLEN); ++ STAP_RETVALUE = (long)buffer3; ++%} ++ ++function buffer_42:long (addr:long) %{ ++ int i; ++ char *buffer3 = (char *)STAP_ARG_addr; ++ STAP_RETVALUE = 1; ++ for(i=0; i< MAXSTRINGLEN; ++i){ ++ if (buffer3[i] != 42) ++ STAP_RETVALUE = 0; ++ } ++%} diff --git a/SOURCES/pr29108.patch b/SOURCES/pr29108.patch deleted file mode 100644 index 43f5170..0000000 --- a/SOURCES/pr29108.patch +++ /dev/null @@ -1,1845 +0,0 @@ -commit bf95ad72c984c9e68d12707c4d34dbe6bc1f89f2 -gpg: Signature made Sat 12 Aug 2023 02:49:06 PM EDT -gpg: using RSA key 5D38116FA4D3A7CC77E378D37E83610126DCC2E8 -gpg: Good signature from "Frank Ch. Eigler " [full] -Author: Aliaksandr Valialkin -Date: Thu Jul 27 18:52:37 2023 -0400 - - runtime/staprun: import gheap routines - - BSD-2-Clause gift from the Aliaksandr Valialkin: - https://github.com/valyala/gheap - -diff --git a/staprun/gheap.h b/staprun/gheap.h -new file mode 100644 -index 000000000..4af4b29ed ---- /dev/null -+++ b/staprun/gheap.h -@@ -0,0 +1,561 @@ -+#ifndef GHEAP_H -+#define GHEAP_H -+ -+/* -+ * Generalized heap implementation for C99. -+ * -+ * Don't forget passing -DNDEBUG option to the compiler when creating optimized -+ * builds. This significantly speeds up gheap code by removing debug assertions. -+ * -+ * Author: Aliaksandr Valialkin . -+ */ -+/* -+Copyright (c) 2011 Aliaksandr Valialkin -+All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions -+are met: -+1. Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+2. Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE -+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+SUCH DAMAGE. -+*/ -+ -+ -+ -+/******************************************************************************* -+ * Interface. -+ ******************************************************************************/ -+ -+#include /* for size_t */ -+#include /* for SIZE_MAX */ -+ -+/* -+ * Less comparer must return non-zero value if a < b. -+ * ctx is the gheap_ctx->less_comparer_ctx. -+ * Otherwise it must return 0. -+ */ -+typedef int (*gheap_less_comparer_t)(const void *ctx, const void *a, -+ const void *b); -+ -+/* -+ * Moves the item from src to dst. -+ */ -+typedef void (*gheap_item_mover_t)(void *dst, const void *src); -+ -+/* -+ * Gheap context. -+ * This context must be passed to every gheap function. -+ */ -+struct gheap_ctx -+{ -+ /* -+ * How much children each heap item can have. -+ */ -+ size_t fanout; -+ -+ /* -+ * A chunk is a tuple containing fanout items arranged sequentially in memory. -+ * A page is a subheap containing page_chunks chunks arranged sequentially -+ * in memory. -+ * The number of chunks in a page is an arbitrary integer greater than 0. -+ */ -+ size_t page_chunks; -+ -+ /* -+ * The size of each item in bytes. -+ */ -+ size_t item_size; -+ -+ gheap_less_comparer_t less_comparer; -+ const void *less_comparer_ctx; -+ -+ gheap_item_mover_t item_mover; -+}; -+ -+/* -+ * Returns parent index for the given child index. -+ * Child index must be greater than 0. -+ * Returns 0 if the parent is root. -+ */ -+static inline size_t gheap_get_parent_index(const struct gheap_ctx *ctx, -+ size_t u); -+ -+/* -+ * Returns the index of the first child for the given parent index. -+ * Parent index must be less than SIZE_MAX. -+ * Returns SIZE_MAX if the index of the first child for the given parent -+ * cannot fit size_t. -+ */ -+static inline size_t gheap_get_child_index(const struct gheap_ctx *ctx, -+ size_t u); -+ -+/* -+ * Returns a pointer to the first non-heap item using less_comparer -+ * for items' comparison. -+ * Returns the index of the first non-heap item. -+ * Returns heap_size if base points to valid max heap with the given size. -+ */ -+static inline size_t gheap_is_heap_until(const struct gheap_ctx *ctx, -+ const void *base, size_t heap_size); -+ -+/* -+ * Returns non-zero if base points to valid max heap. Returns zero otherwise. -+ * Uses less_comparer for items' comparison. -+ */ -+static inline int gheap_is_heap(const struct gheap_ctx *ctx, -+ const void *base, size_t heap_size); -+ -+/* -+ * Makes max heap from items base[0] ... base[heap_size-1]. -+ * Uses less_comparer for items' comparison. -+ */ -+static inline void gheap_make_heap(const struct gheap_ctx *ctx, -+ void *base, size_t heap_size); -+ -+/* -+ * Pushes the item base[heap_size-1] into max heap base[0] ... base[heap_size-2] -+ * Uses less_comparer for items' comparison. -+ */ -+static inline void gheap_push_heap(const struct gheap_ctx *ctx, -+ void *base, size_t heap_size); -+ -+/* -+ * Pops the maximum item from max heap base[0] ... base[heap_size-1] into -+ * base[heap_size-1]. -+ * Uses less_comparer for items' comparison. -+ */ -+static inline void gheap_pop_heap(const struct gheap_ctx *ctx, -+ void *base, size_t heap_size); -+ -+/* -+ * Sorts items in place of max heap in ascending order. -+ * Uses less_comparer for items' comparison. -+ */ -+static inline void gheap_sort_heap(const struct gheap_ctx *ctx, -+ void *base, size_t heap_size); -+ -+/* -+ * Swaps the item outside the heap with the maximum item inside -+ * the heap and restores heap invariant. -+ */ -+static inline void gheap_swap_max_item(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size, void *item); -+ -+/* -+ * Restores max heap invariant after item's value has been increased, -+ * i.e. less_comparer(old_item, new_item) != 0. -+ */ -+static inline void gheap_restore_heap_after_item_increase( -+ const struct gheap_ctx *ctx, -+ void *base, size_t heap_size, size_t modified_item_index); -+ -+/* -+ * Restores max heap invariant after item's value has been decreased, -+ * i.e. less_comparer(new_item, old_item) != 0. -+ */ -+static inline void gheap_restore_heap_after_item_decrease( -+ const struct gheap_ctx *ctx, -+ void *base, size_t heap_size, size_t modified_item_index); -+ -+/* -+ * Removes the given item from the heap and puts it into base[heap_size-1]. -+ * Uses less_comparer for items' comparison. -+ */ -+static inline void gheap_remove_from_heap(const struct gheap_ctx *ctx, -+ void *base, size_t heap_size, size_t item_index); -+ -+/******************************************************************************* -+ * Implementation. -+ * -+ * Define all functions inline, so compiler will be able optimizing out common -+ * args (fanout, page_chunks, item_size, less_comparer and item_mover), -+ * which are usually constants, using contant folding optimization -+ * ( http://en.wikipedia.org/wiki/Constant_folding ). -+ *****************************************************************************/ -+ -+#include /* for assert */ -+#include /* for size_t */ -+#include /* for uintptr_t, SIZE_MAX and UINTPTR_MAX */ -+ -+static inline size_t gheap_get_parent_index(const struct gheap_ctx *const ctx, -+ size_t u) -+{ -+ assert(u > 0); -+ -+ const size_t fanout = ctx->fanout; -+ const size_t page_chunks = ctx->page_chunks; -+ -+ --u; -+ if (page_chunks == 1) { -+ return u / fanout; -+ } -+ -+ if (u < fanout) { -+ /* Parent is root. */ -+ return 0; -+ } -+ -+ assert(page_chunks <= SIZE_MAX / fanout); -+ const size_t page_size = fanout * page_chunks; -+ size_t v = u % page_size; -+ if (v >= fanout) { -+ /* Fast path. Parent is on the same page as the child. */ -+ return u - v + v / fanout; -+ } -+ -+ /* Slow path. Parent is on another page. */ -+ v = u / page_size - 1; -+ const size_t page_leaves = (fanout - 1) * page_chunks + 1; -+ u = v / page_leaves + 1; -+ return u * page_size + v % page_leaves - page_leaves + 1; -+} -+ -+static inline size_t gheap_get_child_index(const struct gheap_ctx *const ctx, -+ size_t u) -+{ -+ assert(u < SIZE_MAX); -+ -+ const size_t fanout = ctx->fanout; -+ const size_t page_chunks = ctx->page_chunks; -+ -+ if (page_chunks == 1) { -+ if (u > (SIZE_MAX - 1) / fanout) { -+ /* Child overflow. */ -+ return SIZE_MAX; -+ } -+ return u * fanout + 1; -+ } -+ -+ if (u == 0) { -+ /* Root's child is always 1. */ -+ return 1; -+ } -+ -+ assert(page_chunks <= SIZE_MAX / fanout); -+ const size_t page_size = fanout * page_chunks; -+ --u; -+ size_t v = u % page_size + 1; -+ if (v < page_size / fanout) { -+ /* Fast path. Child is on the same page as the parent. */ -+ v *= fanout - 1; -+ if (u > SIZE_MAX - 2 - v) { -+ /* Child overflow. */ -+ return SIZE_MAX; -+ } -+ return u + v + 2; -+ } -+ -+ /* Slow path. Child is on another page. */ -+ const size_t page_leaves = (fanout - 1) * page_chunks + 1; -+ v += (u / page_size + 1) * page_leaves - page_size; -+ if (v > (SIZE_MAX - 1) / page_size) { -+ /* Child overflow. */ -+ return SIZE_MAX; -+ } -+ return v * page_size + 1; -+} -+ -+/* Returns a pointer to base[index]. */ -+static inline void *_gheap_get_item_ptr(const struct gheap_ctx *const ctx, -+ const void *const base, const size_t index) -+{ -+ const size_t item_size = ctx->item_size; -+ -+ assert(index <= SIZE_MAX / item_size); -+ -+ const size_t offset = item_size * index; -+ assert((uintptr_t)base <= UINTPTR_MAX - offset); -+ -+ return ((char *)base) + offset; -+} -+ -+/* -+ * Sifts the item up in the given sub-heap with the given root_index -+ * starting from the hole_index. -+ */ -+static inline void _gheap_sift_up(const struct gheap_ctx *const ctx, -+ void *const base, const size_t root_index, size_t hole_index, -+ const void *const item) -+{ -+ assert(hole_index >= root_index); -+ -+ const gheap_less_comparer_t less_comparer = ctx->less_comparer; -+ const void *const less_comparer_ctx = ctx->less_comparer_ctx; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ while (hole_index > root_index) { -+ const size_t parent_index = gheap_get_parent_index(ctx, hole_index); -+ assert(parent_index >= root_index); -+ const void *const parent = _gheap_get_item_ptr(ctx, base, parent_index); -+ if (!less_comparer(less_comparer_ctx, parent, item)) { -+ break; -+ } -+ item_mover(_gheap_get_item_ptr(ctx, base, hole_index), -+ parent); -+ hole_index = parent_index; -+ } -+ -+ item_mover(_gheap_get_item_ptr(ctx, base, hole_index), item); -+} -+ -+/* -+ * Moves the max child into the given hole and returns index -+ * of the new hole. -+ */ -+static inline size_t _gheap_move_up_max_child(const struct gheap_ctx *const ctx, -+ void *const base, const size_t children_count, -+ const size_t hole_index, const size_t child_index) -+{ -+ assert(children_count > 0); -+ assert(children_count <= ctx->fanout); -+ assert(child_index == gheap_get_child_index(ctx, hole_index)); -+ -+ const gheap_less_comparer_t less_comparer = ctx->less_comparer; -+ const void *const less_comparer_ctx = ctx->less_comparer_ctx; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ size_t max_child_index = child_index; -+ for (size_t i = 1; i < children_count; ++i) { -+ if (!less_comparer(less_comparer_ctx, -+ _gheap_get_item_ptr(ctx, base, child_index + i), -+ _gheap_get_item_ptr(ctx, base, max_child_index))) { -+ max_child_index = child_index + i; -+ } -+ } -+ item_mover(_gheap_get_item_ptr(ctx, base, hole_index), -+ _gheap_get_item_ptr(ctx, base, max_child_index)); -+ return max_child_index; -+} -+ -+/* -+ * Sifts the given item down in the heap of the given size starting -+ * from the hole_index. -+ */ -+static inline void _gheap_sift_down(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size, size_t hole_index, -+ const void *const item) -+{ -+ assert(heap_size > 0); -+ assert(hole_index < heap_size); -+ -+ const size_t fanout = ctx->fanout; -+ -+ const size_t root_index = hole_index; -+ const size_t last_full_index = heap_size - (heap_size - 1) % fanout; -+ while (1) { -+ const size_t child_index = gheap_get_child_index(ctx, hole_index); -+ if (child_index >= last_full_index) { -+ if (child_index < heap_size) { -+ assert(child_index == last_full_index); -+ hole_index = _gheap_move_up_max_child(ctx, base, -+ heap_size - child_index, hole_index, child_index); -+ } -+ break; -+ } -+ assert(heap_size - child_index >= fanout); -+ hole_index = _gheap_move_up_max_child(ctx, base, fanout, hole_index, -+ child_index); -+ } -+ _gheap_sift_up(ctx, base, root_index, hole_index, item); -+} -+ -+/* -+ * Pops the maximum item from the heap [base[0] ... base[heap_size-1]] -+ * into base[heap_size]. -+ */ -+static inline void _gheap_pop_max_item(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size) -+{ -+ void *const hole = _gheap_get_item_ptr(ctx, base, heap_size); -+ gheap_swap_max_item(ctx, base, heap_size, hole); -+} -+ -+static inline size_t gheap_is_heap_until(const struct gheap_ctx *const ctx, -+ const void *const base, const size_t heap_size) -+{ -+ const gheap_less_comparer_t less_comparer = ctx->less_comparer; -+ const void *const less_comparer_ctx = ctx->less_comparer_ctx; -+ -+ for (size_t u = 1; u < heap_size; ++u) { -+ const size_t v = gheap_get_parent_index(ctx, u); -+ const void *const a = _gheap_get_item_ptr(ctx, base, v); -+ const void *const b = _gheap_get_item_ptr(ctx, base, u); -+ if (less_comparer(less_comparer_ctx, a, b)) { -+ return u; -+ } -+ } -+ return heap_size; -+} -+ -+static inline int gheap_is_heap(const struct gheap_ctx *const ctx, -+ const void *const base, const size_t heap_size) -+{ -+ return (gheap_is_heap_until(ctx, base, heap_size) == heap_size); -+} -+ -+static inline void gheap_make_heap(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size) -+{ -+ const size_t fanout = ctx->fanout; -+ const size_t page_chunks = ctx->page_chunks; -+ const size_t item_size = ctx->item_size; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ if (heap_size > 1) { -+ /* Skip leaf nodes without children. This is easy to do for non-paged heap, -+ * i.e. when page_chunks = 1, but it is difficult for paged heaps. -+ * So leaf nodes in paged heaps are visited anyway. -+ */ -+ size_t i = (page_chunks == 1) ? ((heap_size - 2) / fanout) : -+ (heap_size - 2); -+ do { -+ char tmp[item_size]; -+ item_mover(tmp, _gheap_get_item_ptr(ctx, base, i)); -+ _gheap_sift_down(ctx, base, heap_size, i, tmp); -+ } while (i-- > 0); -+ } -+ -+ assert(gheap_is_heap(ctx, base, heap_size)); -+} -+ -+static inline void gheap_push_heap(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size) -+{ -+ assert(heap_size > 0); -+ assert(gheap_is_heap(ctx, base, heap_size - 1)); -+ -+ const size_t item_size = ctx->item_size; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ if (heap_size > 1) { -+ const size_t u = heap_size - 1; -+ char tmp[item_size]; -+ item_mover(tmp, _gheap_get_item_ptr(ctx, base, u)); -+ _gheap_sift_up(ctx, base, 0, u, tmp); -+ } -+ -+ assert(gheap_is_heap(ctx, base, heap_size)); -+} -+ -+static inline void gheap_pop_heap(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size) -+{ -+ assert(heap_size > 0); -+ assert(gheap_is_heap(ctx, base, heap_size)); -+ -+ if (heap_size > 1) { -+ _gheap_pop_max_item(ctx, base, heap_size - 1); -+ } -+ -+ assert(gheap_is_heap(ctx, base, heap_size - 1)); -+} -+ -+static inline void gheap_sort_heap(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size) -+{ -+ for (size_t i = heap_size; i > 1; --i) { -+ _gheap_pop_max_item(ctx, base, i - 1); -+ } -+} -+ -+static inline void gheap_swap_max_item(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size, void *item) -+{ -+ assert(heap_size > 0); -+ assert(gheap_is_heap(ctx, base, heap_size)); -+ -+ const size_t item_size = ctx->item_size; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ char tmp[item_size]; -+ item_mover(tmp, item); -+ item_mover(item, base); -+ _gheap_sift_down(ctx, base, heap_size, 0, tmp); -+ -+ assert(gheap_is_heap(ctx, base, heap_size)); -+} -+ -+static inline void gheap_restore_heap_after_item_increase( -+ const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size, size_t modified_item_index) -+{ -+ assert(heap_size > 0); -+ assert(modified_item_index < heap_size); -+ assert(gheap_is_heap(ctx, base, modified_item_index)); -+ -+ const size_t item_size = ctx->item_size; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ if (modified_item_index > 0) { -+ char tmp[item_size]; -+ item_mover(tmp, _gheap_get_item_ptr(ctx, base, modified_item_index)); -+ _gheap_sift_up(ctx, base, 0, modified_item_index, tmp); -+ } -+ -+ assert(gheap_is_heap(ctx, base, heap_size)); -+ (void)heap_size; -+} -+ -+static inline void gheap_restore_heap_after_item_decrease( -+ const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size, size_t modified_item_index) -+{ -+ assert(heap_size > 0); -+ assert(modified_item_index < heap_size); -+ assert(gheap_is_heap(ctx, base, modified_item_index)); -+ -+ const size_t item_size = ctx->item_size; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ char tmp[item_size]; -+ item_mover(tmp, _gheap_get_item_ptr(ctx, base, modified_item_index)); -+ _gheap_sift_down(ctx, base, heap_size, modified_item_index, tmp); -+ -+ assert(gheap_is_heap(ctx, base, heap_size)); -+} -+ -+static inline void gheap_remove_from_heap(const struct gheap_ctx *const ctx, -+ void *const base, const size_t heap_size, size_t item_index) -+{ -+ assert(heap_size > 0); -+ assert(item_index < heap_size); -+ assert(gheap_is_heap(ctx, base, heap_size)); -+ -+ const size_t item_size = ctx->item_size; -+ const gheap_less_comparer_t less_comparer = ctx->less_comparer; -+ const void *const less_comparer_ctx = ctx->less_comparer_ctx; -+ const gheap_item_mover_t item_mover = ctx->item_mover; -+ -+ const size_t new_heap_size = heap_size - 1; -+ if (item_index < new_heap_size) { -+ char tmp[item_size]; -+ void *const hole = _gheap_get_item_ptr(ctx, base, new_heap_size); -+ item_mover(tmp, hole); -+ item_mover(hole, _gheap_get_item_ptr(ctx, base, item_index)); -+ if (less_comparer(less_comparer_ctx, tmp, hole)) { -+ _gheap_sift_down(ctx, base, new_heap_size, item_index, tmp); -+ } -+ else { -+ _gheap_sift_up(ctx, base, 0, item_index, tmp); -+ } -+ } -+ -+ assert(gheap_is_heap(ctx, base, new_heap_size)); -+} -+ -+#endif - -commit 5b39471380a238469c8fc18136f12600e5e9aec7 -gpg: Signature made Sat 12 Aug 2023 02:49:21 PM EDT -gpg: using RSA key 5D38116FA4D3A7CC77E378D37E83610126DCC2E8 -gpg: Good signature from "Frank Ch. Eigler " [full] -Author: Frank Ch. Eigler -Date: Mon Jul 31 14:06:57 2023 -0400 - - PR29108 / BZ2095359: rewrite staprun serializer logic - - Logic in commit cd48874296e00 (2021, PR28449) fixed broken cross-cpu - message ordering that followed previous transport concurrency fixes, - but imposed a lot of userspace synchronization delays upon the threads - who were supposed to drain messages from the kernel relayfs streams as - fast as possible. This has led to unnecessarily lossy output overall. - - New code uses a new many-writers single-reader data structure, a mutex - protected heap. All the per-cpu readers copy & pump messages into - that heap as rapidly as possible, sorted by the generally monotonic - sequence number. The reader is signalled via a condition variable and - time to print & release messages in sequence number order. It also - handles lost messages (jumps in the sequence numbers) by waiting a while - to let the stragglers come in. - - The kernel-user messages now also include a framing sequence to allow - the per-cpu readers to resynchronize to the message boundaries, in - case some sort of buffer overflow or something else occurs. It - reports how many bytes and/or messages were skipped in order to - resynchronize. It does so in a lot less lossy way than previous code, - which just tried to flush everything then-currently available, hoping - that it'd match message boundaries. - - Unfortunately, this means that the user-kernel message ABI has - changed! Previous-version staprun instances won't work with the new - modules, nor will current-version staprun with old modules. This flag - day is enforced by changing the numbers of the various ctl message - numbers, so old/new kernel/user combinations will generate errors - rather than quasi-successful staprun startup. - - New code also dramatically simplifies the use of signals in staprun - (or rather stapio). Gone is the signal thread, a lot of the - masking/blocking/waiting. Instead a single basic signal handler just - increments globals when signals of various kinds arrive, and all the - per-cpu etc. threads poll those globals periodically. This includes - logic needed for -S (output file rotation on SIGUSR2) as well as - flight recorder (-L / -A) modes. - - The reader_timeout_ms value (-T) in both bulk/serialized mode for all - ppoll timeouts, to prevent those threads from sleeping indefinitely, - now that they won't be bothered by signals. - -diff --git a/configure b/configure -index 974cc2c81..1ff5580b4 100755 ---- a/configure -+++ b/configure -@@ -12694,6 +12694,14 @@ printf "%s\n" "$as_me: WARNING: cannot find librpmio" >&2;} - fi - fi - -+ac_fn_c_check_header_compile "$LINENO" "stdatomic.h" "ac_cv_header_stdatomic_h" "$ac_includes_default" -+if test "x$ac_cv_header_stdatomic_h" = xyes -+then : -+ printf "%s\n" "#define HAVE_STDATOMIC_H 1" >>confdefs.h -+ -+fi -+ -+ - for ac_header in rpm/rpmcrypto.h - do : - ac_fn_c_check_header_compile "$LINENO" "rpm/rpmcrypto.h" "ac_cv_header_rpm_rpmcrypto_h" "$ac_includes_default" -diff --git a/configure.ac b/configure.ac -index 3f184f862..e9176b725 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -490,6 +490,8 @@ if test "$with_rpm" != "no"; then - fi - fi - -+AC_CHECK_HEADERS([stdatomic.h]) -+ - dnl Look for rpmcrypto.h - AC_CHECK_HEADERS([rpm/rpmcrypto.h], [ - AC_DEFINE([HAVE_RPMCRYPTO_H],[1],[have rpmcrypto_h]) -diff --git a/man/stap.1.in b/man/stap.1.in -index 4e1f0a537..c1a81fef3 100644 ---- a/man/stap.1.in -+++ b/man/stap.1.in -@@ -388,7 +388,7 @@ With \-o option, run staprun in background as a daemon and show its pid. - Sets the maximum size of output file and the maximum number of output files. - If the size of output file will exceed - .B size --, systemtap switches output file to the next file. And if the number of -+megabytes, systemtap switches output file to the next file. And if the number of - output files exceed - .B N - , systemtap removes the oldest output file. You can omit the second argument. -diff --git a/runtime/print_flush.c b/runtime/print_flush.c -index 35677b225..4141f95b9 100644 ---- a/runtime/print_flush.c -+++ b/runtime/print_flush.c -@@ -43,6 +43,7 @@ static void __stp_print_flush(struct _stp_log *log) - if (likely(entry && bytes_reserved > hlen)) { - /* copy new _stp_trace_ header */ - struct _stp_trace t = { -+ .magic = STAP_TRACE_MAGIC, - .sequence = _stp_seq_inc(), - .pdu_len = len - }; -diff --git a/runtime/transport/control.c b/runtime/transport/control.c -index 3d7333403..d0a8bdf53 100644 ---- a/runtime/transport/control.c -+++ b/runtime/transport/control.c -@@ -57,7 +57,7 @@ static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, siz - - #if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2) - if (type < STP_MAX_CMD) -- dbug_trans2("Got %s. euid=%ld, len=%d\n", _stp_command_name[type], -+ dbug_trans2("Got %s. euid=%ld, len=%d\n", _stp_command_name[min(type,STP_MAX_CMD)] ?: "?", - (long)euid, (int)count); - #endif - -@@ -211,7 +211,9 @@ out: - - #if defined(DEBUG_TRANS) && (DEBUG_TRANS >= 2) - if (type < STP_MAX_CMD) -- dbug_trans2("Completed %s (rc=%d)\n", _stp_command_name[type], rc); -+ dbug_trans2("Completed %s (rc=%d)\n", -+ _stp_command_name[min(type,STP_MAX_CMD)] ?: "?", -+ rc); - #endif - return rc; - } -diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h -index 9e0081c80..e3aa995b1 100644 ---- a/runtime/transport/transport_msgs.h -+++ b/runtime/transport/transport_msgs.h -@@ -1,7 +1,7 @@ - /* -*- linux-c -*- - * transport_msgs.h - messages exchanged between module and userspace - * -- * Copyright (C) Red Hat Inc, 2006-2011 -+ * Copyright (C) Red Hat Inc, 2006-2023 - * - * This file is part of systemtap, and is free software. You can - * redistribute it and/or modify it under the terms of the GNU General -@@ -19,7 +19,9 @@ - #define STP_TZ_NAME_LEN 64 - #define STP_REMOTE_URI_LEN 128 - -+#define STAP_TRACE_MAGIC "\xF0\x9F\xA9\xBA" /* unicode stethoscope 🩺 in UTF-8 */ - struct _stp_trace { -+ char magic[4]; /* framing helper */ - uint32_t sequence; /* event number */ - uint32_t pdu_len; /* length of data after this trace */ - }; -@@ -30,7 +32,7 @@ enum - /** stapio sends a STP_START after recieving a STP_TRANSPORT from - the module. The module sends STP_START back with result of call - systemtap_module_init() which will install all initial probes. */ -- STP_START, -+ STP_START = 0x50, // renumbered in version 5.0 to force incompatibility - /** stapio sends STP_EXIT to signal it wants to stop the module - itself or in response to receiving a STP_REQUEST_EXIT. - The module sends STP_EXIT once _stp_clean_and_exit has been -@@ -87,16 +89,21 @@ enum - /** Send by staprun to notify module of remote identity, if any. - Only send once at startup. */ - STP_REMOTE_ID, -+ /** Placeholder, it was mistakenly labeled STP_MAX_CMD */ -+ STP_MAX_CMD_PLACEHOLDER, -+ /** Sent by stapio after having recevied STP_TRANSPORT. Notifies -+ the module of the target namespaces pid.*/ -+ STP_NAMESPACES_PID, -+ -+ /** INSERT NEW MESSAGE TYPES HERE */ -+ - /** Max number of message types, sanity check only. */ - STP_MAX_CMD, -- /** Sent by stapio after having recevied STP_TRANSPORT. Notifies -- the module of the target namespaces pid.*/ -- STP_NAMESPACES_PID - }; - - #ifdef DEBUG_TRANS --static const char *_stp_command_name[] = { -- "STP_START", -+static const char *_stp_command_name[STP_MAX_CMD] = { -+ [STP_START]="STP_START", - "STP_EXIT", - "STP_OOB_DATA", - "STP_SYSTEM", -@@ -113,7 +120,9 @@ static const char *_stp_command_name[] = { - "STP_TZINFO", - "STP_PRIVILEGE_CREDENTIALS", - "STP_REMOTE_ID", -- "STP_NAMESPACES_PID", -+ "STP_MAX_CMD_PLACEHOLDER", -+ "STP_NAMESPACE_PID", -+ [STP_MAX_CMD]="?" /* in control.c, STP_MAX_CMD represents unknown message numbers/names */ - }; - #endif /* DEBUG_TRANS */ - -diff --git a/staprun/common.c b/staprun/common.c -index 3d23d7319..f8d618e24 100644 ---- a/staprun/common.c -+++ b/staprun/common.c -@@ -115,7 +115,7 @@ void parse_args(int argc, char **argv) - target_pid = 0; - target_namespaces_pid = 0; - buffer_size = 0; -- reader_timeout_ms = 0; -+ reader_timeout_ms = 200; - target_cmd = NULL; - outfile_name = NULL; - rename_mod = 0; -diff --git a/staprun/mainloop.c b/staprun/mainloop.c -index 4af21e950..c507fc069 100644 ---- a/staprun/mainloop.c -+++ b/staprun/mainloop.c -@@ -7,7 +7,7 @@ - * Public License (GPL); either version 2, or (at your option) any - * later version. - * -- * Copyright (C) 2005-2021 Red Hat Inc. -+ * Copyright (C) 2005-2023 Red Hat Inc. - */ - - #include "staprun.h" -@@ -23,31 +23,9 @@ - - /* globals */ - int ncpus; --static int pending_interrupts = 0; -+static volatile sig_atomic_t pending_interrupts = 0; // tells stp_main_loop to trigger STP_EXIT message to kernel - static int target_pid_failed_p = 0; - --/* Setup by setup_main_signals, used by signal_thread to notify the -- main thread of interruptable events. */ --static pthread_t main_thread; -- --static void set_nonblocking_std_fds(void) --{ -- int fd; -- for (fd = 1; fd < 3; fd++) { -- /* NB: writing to stderr/stdout blockingly in signal handler is -- * dangerous since it may prevent the stap process from quitting -- * gracefully on receiving SIGTERM/etc signals when the stderr/stdout -- * write buffer is full. PR23891 */ -- int flags = fcntl(fd, F_GETFL); -- if (flags == -1) -- continue; -- -- if (flags & O_NONBLOCK) -- continue; -- -- (void) fcntl(fd, F_SETFL, flags | O_NONBLOCK); -- } --} - - static void set_blocking_std_fds(void) - { -@@ -77,43 +55,16 @@ static void my_exit(int rc) - _exit(rc); - } - --static void *signal_thread(void *arg) --{ -- sigset_t *s = (sigset_t *) arg; -- int signum = 0; - -- while (1) { -- if (sigwait(s, &signum) < 0) { -- _perr("sigwait"); -- continue; -- } -+ -+static void interrupt_handler(int signum) -+{ - if (signum == SIGQUIT) { - load_only = 1; /* flag for stp_main_loop */ -- pending_interrupts ++; -- } else if (signum == SIGINT || signum == SIGHUP || signum == SIGTERM -- || signum == SIGPIPE) -- { -- pending_interrupts ++; - } -- if (pending_interrupts > 2) { -- set_nonblocking_std_fds(); -- pthread_kill (main_thread, SIGURG); -- } -- dbug(2, "sigproc %d (%s)\n", signum, strsignal(signum)); -- } -- /* Notify main thread (interrupts select). */ -- pthread_kill (main_thread, SIGURG); -- return NULL; -+ pending_interrupts ++; - } - --static void urg_proc(int signum) --{ -- /* This handler is just notified from the signal_thread -- whenever an interruptable condition is detected. The -- handler itself doesn't do anything. But this will -- result select to detect an EINTR event. */ -- dbug(2, "urg_proc %d (%s)\n", signum, strsignal(signum)); --} - - static void chld_proc(int signum) - { -@@ -143,9 +94,9 @@ static void chld_proc(int signum) - (void) rc; /* XXX: notused */ - } - -+ - static void setup_main_signals(void) - { -- pthread_t tid; - struct sigaction sa; - sigset_t *s = malloc(sizeof(*s)); - if (!s) { -@@ -153,25 +104,11 @@ static void setup_main_signals(void) - exit(1); - } - -- /* The main thread will only handle SIGCHLD and SIGURG. -- SIGURG is send from the signal thread in case the interrupt -- flag is set. This will then interrupt any select call. */ -- main_thread = pthread_self(); -- sigfillset(s); -- pthread_sigmask(SIG_SETMASK, s, NULL); -- - memset(&sa, 0, sizeof(sa)); - /* select will report EINTR even when SA_RESTART is set. */ - sa.sa_flags = SA_RESTART; - sigfillset(&sa.sa_mask); - -- /* Ignore all these events on the main thread. */ -- sa.sa_handler = SIG_IGN; -- sigaction(SIGINT, &sa, NULL); -- sigaction(SIGTERM, &sa, NULL); -- sigaction(SIGHUP, &sa, NULL); -- sigaction(SIGQUIT, &sa, NULL); -- - /* This is to notify when our child process (-c) ends. */ - sa.sa_handler = chld_proc; - sigaction(SIGCHLD, &sa, NULL); -@@ -182,26 +119,21 @@ static void setup_main_signals(void) - sigaction(SIGWINCH, &sa, NULL); - } - -- /* This signal handler is notified from the signal_thread -- whenever a interruptable event is detected. It will -- result in an EINTR event for select or sleep. */ -- sa.sa_handler = urg_proc; -- sigaction(SIGURG, &sa, NULL); -- -- /* Everything else is handled on a special signal_thread. */ -- sigemptyset(s); -- sigaddset(s, SIGINT); -- sigaddset(s, SIGTERM); -- sigaddset(s, SIGHUP); -- sigaddset(s, SIGQUIT); -- sigaddset(s, SIGPIPE); -- pthread_sigmask(SIG_SETMASK, s, NULL); -- if (pthread_create(&tid, NULL, signal_thread, s) < 0) { -- _perr(_("failed to create thread")); -- exit(1); -- } -+ // listen to these signals via general interrupt handler in whatever thread -+ memset(&sa, 0, sizeof(sa)); -+ sa.sa_flags = SA_RESTART; -+ sigfillset(&sa.sa_mask); -+ -+ sa.sa_handler = interrupt_handler; -+ sigaction(SIGINT, &sa, NULL); -+ sigaction(SIGTERM, &sa, NULL); -+ sigaction(SIGHUP, &sa, NULL); -+ sigaction(SIGQUIT, &sa, NULL); -+ -+ // Formerly, we had a signal catching thread. - } - -+ - /** - * system_cmd() executes system commands in response - * to an STP_SYSTEM message from the module. These -diff --git a/staprun/relay.c b/staprun/relay.c -index dea1d5ae9..08850b246 100644 ---- a/staprun/relay.c -+++ b/staprun/relay.c -@@ -7,30 +7,32 @@ - * Public License (GPL); either version 2, or (at your option) any - * later version. - * -- * Copyright (C) 2007-2013 Red Hat Inc. -+ * Copyright (C) 2007-2023 Red Hat Inc. - */ - - #include "staprun.h" -+#include -+#ifdef HAVE_STDATOMIC_H -+#include -+#endif -+#define NDEBUG -+#include "gheap.h" -+ - - int out_fd[MAX_NR_CPUS]; - int monitor_end = 0; - static pthread_t reader[MAX_NR_CPUS]; --static int relay_fd[MAX_NR_CPUS]; -+static int relay_fd[MAX_NR_CPUS]; // fd to kernel per-cpu relayfs - static int avail_cpus[MAX_NR_CPUS]; --static int switch_file[MAX_NR_CPUS]; --static pthread_mutex_t mutex[MAX_NR_CPUS]; -+static volatile sig_atomic_t sigusr2_count; // number of SIGUSR2's received by process -+static int sigusr2_processed[MAX_NR_CPUS]; // each thread's count of processed SIGUSR2's - static int bulkmode = 0; --static volatile int stop_threads = 0; -+static volatile int stop_threads = 0; // set during relayfs_close to signal threads to die - static time_t *time_backlog[MAX_NR_CPUS]; - static int backlog_order=0; - #define BACKLOG_MASK ((1 << backlog_order) - 1) - #define MONITORLINELENGTH 4096 - --/* tracking message sequence #s for cross-cpu merging */ --static uint32_t last_sequence_number; --static pthread_mutex_t last_sequence_number_mutex = PTHREAD_MUTEX_INITIALIZER; --static pthread_cond_t last_sequence_number_changed = PTHREAD_COND_INITIALIZER; -- - #ifdef NEED_PPOLL - int ppoll(struct pollfd *fds, nfds_t nfds, - const struct timespec *timeout, const sigset_t *sigmask) -@@ -123,18 +125,375 @@ static int switch_outfile(int cpu, int *fnum) - return 0; - } - -+ -+ -+/* In serialized (non-bulk) output mode, ndividual messages that have -+ been received from the kernel per-cpu relays are stored in an central -+ serializing data structure - in this case, a heap. They are ordered -+ by message sequence number. An additional thread (serializer_thread) -+ scans & sequences the output. */ -+struct serialized_message { -+ union { -+ struct _stp_trace bufhdr; -+ char bufhdr_raw[sizeof(struct _stp_trace)]; -+ }; -+ time_t received; // timestamp when this message was enqueued -+ char *buf; // malloc()'d size >= rounded(bufhdr.pdu_len) -+}; -+static struct serialized_message* buffer_heap = NULL; // the heap -+ -+// NB: we control memory via realloc(), gheap just manipulates entries in place -+static unsigned buffer_heap_size = 0; // used number of entries -+static unsigned buffer_heap_alloc = 0; // allocation length, always >= buffer_heap_size -+static unsigned last_sequence_number = 0; // last processed sequential message number -+ -+#ifdef HAVE_STDATOMIC_H -+static atomic_ulong lost_message_count = 0; // how many sequence numbers we know we missed -+static atomic_ulong lost_byte_count = 0; // how many bytes were skipped during resync -+#else -+static unsigned long lost_message_count = 0; // how many sequence numbers we know we missed -+static unsigned long lost_byte_count = 0; // how many bytes were skipped during resync -+#endif -+ -+// concurrency control for the buffer_heap -+static pthread_cond_t buffer_heap_cond = PTHREAD_COND_INITIALIZER; -+static pthread_mutex_t buffer_heap_mutex = PTHREAD_MUTEX_INITIALIZER; -+static pthread_t serializer_thread; // ! bulkmode only -+ -+ -+static void buffer_heap_mover (void *const dest, const void *const src) -+{ -+ memmove (dest, src, sizeof(struct serialized_message)); -+} -+ -+// NB: since we want to sort messages into an INCREASING heap sequence, -+// we reverse the normal comparison operator. gheap_pop_heap() should -+// therefore return the SMALLEST element. -+static int buffer_heap_comparer (const void *const ctx, const void *a, const void *b) -+{ -+ (void) ctx; -+ uint32_t aa = ((struct serialized_message *)a)->bufhdr.sequence; -+ uint32_t bb = ((struct serialized_message *)b)->bufhdr.sequence; -+ return (aa > bb); -+} -+ -+static const struct gheap_ctx buffer_heap_ctx = { -+ .item_size = sizeof(struct serialized_message), -+ .less_comparer = buffer_heap_comparer, -+ .item_mover = buffer_heap_mover, -+ .page_chunks = 16, // arbitrary -+ .fanout = 2 // standard binary heap -+}; -+ -+ -+#define MAX_MESSAGE_LENGTH (128*1024) /* maximum likely length of a single pdu */ -+ -+ -+ -+/* Thread that reads per-cpu messages, and stuffs complete ones into -+ dynamically allocated serialized_message nodes in a binary tree. */ -+static void* reader_thread_serialmode (void *data) -+{ -+ int rc, cpu = (int)(long)data; -+ struct pollfd pollfd; -+ sigset_t sigs; -+ cpu_set_t cpu_mask; -+ -+ sigemptyset(&sigs); -+ sigaddset(&sigs,SIGUSR2); -+ pthread_sigmask(SIG_BLOCK, &sigs, NULL); -+ -+ sigfillset(&sigs); -+ sigdelset(&sigs,SIGUSR2); -+ -+ CPU_ZERO(&cpu_mask); -+ CPU_SET(cpu, &cpu_mask); -+ if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) -+ _perr("sched_setaffinity"); -+ -+ pollfd.fd = relay_fd[cpu]; -+ pollfd.events = POLLIN; -+ -+ while (! stop_threads) { -+ // read a message header -+ struct serialized_message message; -+ -+ /* 200ms, close to human level of "instant" */ -+ struct timespec tim, *timeout = &tim; -+ timeout->tv_sec = reader_timeout_ms / 1000; -+ timeout->tv_nsec = (reader_timeout_ms - timeout->tv_sec * 1000) * 1000000; -+ -+ rc = ppoll(&pollfd, 1, timeout, &sigs); -+ if (rc < 0) { -+ dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno); -+ if (errno == EINTR) { -+ if (stop_threads) -+ break; -+ } else { -+ _perr("poll error"); -+ goto error_out; -+ } -+ } -+ -+ // set the timestamp -+ message.received = time(NULL); -+ -+ /* Read the header. */ -+ rc = read(relay_fd[cpu], &message.bufhdr, sizeof(message.bufhdr)); -+ if (rc <= 0) /* seen during normal shutdown or error */ -+ continue; -+ if (rc != sizeof(message.bufhdr)) { -+ lost_byte_count += rc; -+ continue; -+ } -+ -+ /* Validate the magic value. In case of mismatch, -+ keep on reading & shifting the header, one byte at -+ a time, until we get a match. */ -+ while (! stop_threads && memcmp(message.bufhdr.magic, STAP_TRACE_MAGIC, 4)) { -+ lost_byte_count ++; -+ memmove(& message.bufhdr_raw[0], -+ & message.bufhdr_raw[1], -+ sizeof(message.bufhdr_raw)-1); -+ rc = read(relay_fd[cpu], -+ &message.bufhdr_raw[sizeof(message.bufhdr_raw)-1], -+ 1); -+ if (rc <= 0) /* seen during normal shutdown or error */ -+ break; -+ } -+ -+ /* Validate it slightly. Because of lost messages, we might be getting -+ not a proper _stp_trace struct but the interior of some piece of -+ trace text message. XXX: validate bufhdr.sequence a little bit too? */ -+ if (message.bufhdr.pdu_len == 0 || -+ message.bufhdr.pdu_len > MAX_MESSAGE_LENGTH) { -+ lost_byte_count += sizeof(message.bufhdr); -+ continue; -+ } -+ -+ // Allocate the pdu body -+ message.buf = malloc(message.bufhdr.pdu_len); -+ if (message.buf == NULL) -+ { -+ lost_byte_count += message.bufhdr.pdu_len; -+ continue; -+ } -+ -+ /* Read the message, perhaps in pieces (such as if crossing -+ * relayfs subbuf boundaries). */ -+ size_t bufread = 0; -+ while (bufread < message.bufhdr.pdu_len) { -+ rc = read(relay_fd[cpu], message.buf+bufread, message.bufhdr.pdu_len-bufread); -+ if (rc <= 0) { -+ lost_byte_count += message.bufhdr.pdu_len-bufread; -+ break; /* still process it; hope we can resync at next packet. */ -+ } -+ bufread += rc; -+ } -+ -+ // plop the message into the buffer_heap -+ pthread_mutex_lock(& buffer_heap_mutex); -+ if (message.bufhdr.sequence < last_sequence_number) { -+ // whoa! is this some old message that we've assumed lost? -+ // or are we wrapping around the uint_32 sequence numbers? -+ _perr("unexpected sequence=%u", message.bufhdr.sequence); -+ } -+ -+ // is it large enough? if not, realloc -+ if (buffer_heap_alloc - buffer_heap_size == 0) { // full -+ unsigned new_buffer_heap_alloc = (buffer_heap_alloc + 1) * 1.5; -+ struct serialized_message *new_buffer_heap = -+ realloc(buffer_heap, -+ new_buffer_heap_alloc * sizeof(struct serialized_message)); -+ if (new_buffer_heap == NULL) { -+ _perr("out of memory while enlarging buffer heap"); -+ free (message.buf); -+ lost_message_count ++; -+ pthread_mutex_unlock(& buffer_heap_mutex); -+ continue; -+ } -+ buffer_heap = new_buffer_heap; -+ buffer_heap_alloc = new_buffer_heap_alloc; -+ } -+ // plop copy of message struct into slot at end of heap -+ buffer_heap[buffer_heap_size++] = message; -+ // push it into heap -+ gheap_push_heap(&buffer_heap_ctx, -+ buffer_heap, -+ buffer_heap_size); -+ // and c'est tout -+ pthread_mutex_unlock(& buffer_heap_mutex); -+ pthread_cond_broadcast (& buffer_heap_cond); -+ dbug(3, "thread %d received seq=%u\n", cpu, message.bufhdr.sequence); -+ } -+ -+ dbug(3, "exiting thread for cpu %d\n", cpu); -+ return NULL; -+ -+error_out: -+ /* Signal the main thread that we need to quit */ -+ kill(getpid(), SIGTERM); -+ dbug(2, "exiting thread for cpu %d after error\n", cpu); -+ -+ return NULL; -+} -+ -+ -+// Print and free buffer of given serialized message. -+static void print_serialized_message (struct serialized_message *msg) -+{ -+ // check if file switching is necessary, as per staprun -S -+ -+ // NB: unlike reader_thread_bulkmode(), we don't need to use -+ // mutexes to protect switch_file[] or such, because we're the -+ // ONLY thread doing output. -+ unsigned cpu = 0; // arbitrary -+ static ssize_t wsize = 0; // how many bytes we've written into the serialized file so far -+ static int fnum = 0; // which file number we're using -+ -+ if ((fsize_max && (wsize > fsize_max)) || -+ (sigusr2_count > sigusr2_processed[cpu])) { -+ dbug(2, "switching output file wsize=%ld fsize_max=%ld sigusr2 %d > %d\n", -+ wsize, fsize_max, sigusr2_count, sigusr2_processed[cpu]); -+ sigusr2_processed[cpu] = sigusr2_count; -+ if (switch_outfile(cpu, &fnum) < 0) { -+ perr("unable to switch output file"); -+ // but continue -+ } -+ wsize = 0; -+ } -+ -+ -+ // write loop ... could block if e.g. the output disk is slow -+ // or the user hits a ^S (XOFF) on the tty -+ ssize_t sent = 0; -+ do { -+ ssize_t ret = write (out_fd[avail_cpus[0]], -+ msg->buf+sent, msg->bufhdr.pdu_len-sent); -+ if (ret <= 0) { -+ perr("error writing output"); -+ break; -+ } -+ sent += ret; -+ } while ((unsigned)sent < msg->bufhdr.pdu_len); -+ wsize += sent; -+ -+ // free the associated buffer -+ free (msg->buf); -+ msg->buf = NULL; -+} -+ -+ -+/* Thread that checks on the heap of messages, and pumps them out to -+ the designated output fd in sequence. It waits, but only a little -+ while, if it has only fresher messages than it's expecting. It -+ exits upon a global stop_threads indication. -+*/ -+static void* reader_thread_serializer (void *data) { -+ (void) data; -+ while (! stop_threads) { -+ /* timeout 0-1 seconds; this is the maximum extra time that -+ stapio will be waiting after a ^C */ -+ struct timespec ts = {.tv_sec=time(NULL)+1, .tv_nsec=0}; -+ int rc; -+ pthread_mutex_lock(& buffer_heap_mutex); -+ rc = pthread_cond_timedwait (& buffer_heap_cond, -+ & buffer_heap_mutex, -+ & ts); -+ -+ dbug(3, "serializer cond wait rc=%d heapsize=%u\n", rc, buffer_heap_size); -+ time_t now = time(NULL); -+ unsigned processed = 0; -+ while (buffer_heap_size > 0) { // consume as much as possible -+ // check out the sequence# of the first element -+ uint32_t buf_min_seq = buffer_heap[0].bufhdr.sequence; -+ -+ dbug(3, "serializer last=%u seq=%u\n", last_sequence_number, buf_min_seq); -+ -+ if ((buf_min_seq == last_sequence_number + 1) || // expected seq# -+ (buffer_heap[0].received + 2 <= now)) { // message too old -+ // "we've got one!" -- or waited too long for one -+ // get it off the head of the heap -+ gheap_pop_heap(&buffer_heap_ctx, -+ buffer_heap, -+ buffer_heap_size); -+ buffer_heap_size --; // becomes index where the head was moved -+ processed ++; -+ -+ // take a copy of the whole message -+ struct serialized_message msg = buffer_heap[buffer_heap_size]; -+ -+ // paranoid clear this field of the now-unused slot -+ buffer_heap[buffer_heap_size].buf = NULL; -+ // update statistics -+ if (attach_mod == 1 && last_sequence_number == 0) // first message after staprun -A -+ ; // do not penalize it with lost messages -+ else -+ lost_message_count += (buf_min_seq - last_sequence_number - 1); -+ last_sequence_number = buf_min_seq; -+ -+ // unlock the mutex, permitting -+ // reader_thread_serialmode threads to -+ // resume piling messages into the -+ // heap while we print stuff -+ pthread_mutex_unlock(& buffer_heap_mutex); -+ -+ print_serialized_message (& msg); -+ -+ // must re-take lock for next iteration of the while loop -+ pthread_mutex_lock(& buffer_heap_mutex); -+ } else { -+ // processed as much of the heap as we -+ // could this time; wait for the -+ // condition again -+ break; -+ } -+ } -+ pthread_mutex_unlock(& buffer_heap_mutex); -+ if (processed > 0) -+ dbug(2, "serializer processed n=%u\n", processed); -+ } -+ return NULL; -+} -+ -+ -+ -+// At the end of the program main loop, flush out any the remaining -+// messages and free up all that heapy data. -+static void reader_serialized_flush() -+{ -+ dbug(3, "serializer flushing messages=%u\n", buffer_heap_size); -+ while (buffer_heap_size > 0) { // consume it all -+ // check out the sequence# of the first element -+ uint32_t buf_min_seq = buffer_heap[0].bufhdr.sequence; -+ dbug(3, "serializer seq=%u\n", buf_min_seq); -+ gheap_pop_heap(&buffer_heap_ctx, -+ buffer_heap, -+ buffer_heap_size); -+ buffer_heap_size --; // also index where the head was moved -+ -+ // NB: no need for mutex manipulations, this is super single threaded -+ print_serialized_message (& buffer_heap[buffer_heap_size]); -+ -+ lost_message_count += (buf_min_seq - last_sequence_number - 1); -+ last_sequence_number = buf_min_seq; -+ } -+ free (buffer_heap); -+ buffer_heap = NULL; -+} -+ -+ -+ - /** -- * reader_thread - per-cpu channel buffer reader -+ * reader_thread - per-cpu channel buffer reader, bulkmode (one output file per cpu input file) - */ --static void *reader_thread(void *data) -+static void *reader_thread_bulkmode (void *data) - { -- char buf[128*1024]; // NB: maximum possible output amount from a single probe hit's print_flush -+ char buf[MAX_MESSAGE_LENGTH]; - struct _stp_trace bufhdr; - - int rc, cpu = (int)(long)data; - struct pollfd pollfd; -- /* 200ms, close to human level of "instant" */ -- struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim; - sigset_t sigs; - off_t wsize = 0; - int fnum = 0; -@@ -151,44 +510,30 @@ static void *reader_thread(void *data) - CPU_SET(cpu, &cpu_mask); - if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) - _perr("sched_setaffinity"); --#ifdef NEED_PPOLL -- /* Without a real ppoll, there is a small race condition that could */ -- /* block ppoll(). So use a timeout to prevent that. */ -- timeout->tv_sec = 10; -- timeout->tv_nsec = 0; --#else -- timeout = NULL; --#endif -- -- if (reader_timeout_ms && timeout) { -- timeout->tv_sec = reader_timeout_ms / 1000; -- timeout->tv_nsec = (reader_timeout_ms - timeout->tv_sec * 1000) * 1000000; -- } - - pollfd.fd = relay_fd[cpu]; - pollfd.events = POLLIN; - - do { -- dbug(3, "thread %d start ppoll\n", cpu); -+ /* 200ms, close to human level of "instant" */ -+ struct timespec tim, *timeout = &tim; -+ timeout->tv_sec = reader_timeout_ms / 1000; -+ timeout->tv_nsec = (reader_timeout_ms - timeout->tv_sec * 1000) * 1000000; -+ - rc = ppoll(&pollfd, 1, timeout, &sigs); -- dbug(3, "thread %d end ppoll:%d\n", cpu, rc); - if (rc < 0) { - dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno); - if (errno == EINTR) { - if (stop_threads) - break; - -- pthread_mutex_lock(&mutex[cpu]); -- if (switch_file[cpu]) { -- if (switch_outfile(cpu, &fnum) < 0) { -- switch_file[cpu] = 0; -- pthread_mutex_unlock(&mutex[cpu]); -+ if (sigusr2_count > sigusr2_processed[cpu]) { -+ sigusr2_processed[cpu] = sigusr2_count; -+ if (switch_outfile(cpu, &fnum) < 0) { - goto error_out; -- } -- switch_file[cpu] = 0; -- wsize = 0; -+ } -+ wsize = 0; - } -- pthread_mutex_unlock(&mutex[cpu]); - } else { - _perr("poll error"); - goto error_out; -@@ -197,7 +542,7 @@ static void *reader_thread(void *data) - - /* Read the header. */ - rc = read(relay_fd[cpu], &bufhdr, sizeof(bufhdr)); -- if (rc == 0) /* seen during normal shutdown */ -+ if (rc <= 0) /* seen during normal shutdown */ - continue; - if (rc != sizeof(bufhdr)) { - _perr("bufhdr read error, attempting resync"); -@@ -228,41 +573,20 @@ static void *reader_thread(void *data) - bufread += rc; - } - -- if (! bulkmode) { -- /* Wait until the bufhdr.sequence number indicates it's OUR TURN to go ahead. */ -- struct timespec ts = {.tv_sec=time(NULL)+2, .tv_nsec=0}; /* wait 1-2 seconds */ -- pthread_mutex_lock(& last_sequence_number_mutex); -- while ((last_sequence_number+1 != bufhdr.sequence) && /* normal case */ -- (last_sequence_number < bufhdr.sequence)) { /* we're late!!! */ -- int rc = pthread_cond_timedwait (& last_sequence_number_changed, -- & last_sequence_number_mutex, -- & ts); -- if (rc == ETIMEDOUT) { -- /* _perr("message sequencing timeout"); */ -- break; -- } -- } -- pthread_mutex_unlock(& last_sequence_number_mutex); -- } -- - int wbytes = rc; - char *wbuf = buf; - - dbug(3, "cpu %d: read %d bytes of data\n", cpu, rc); - - /* Switching file */ -- pthread_mutex_lock(&mutex[cpu]); - if ((fsize_max && ((wsize + rc) > fsize_max)) || -- switch_file[cpu]) { -+ (sigusr2_count > sigusr2_processed[cpu])) { -+ sigusr2_processed[cpu] = sigusr2_count; - if (switch_outfile(cpu, &fnum) < 0) { -- switch_file[cpu] = 0; -- pthread_mutex_unlock(&mutex[cpu]); - goto error_out; - } -- switch_file[cpu] = 0; - wsize = 0; - } -- pthread_mutex_unlock(&mutex[cpu]); - - /* Copy loop. Must repeat write(2) in case of a pipe overflow - or other transient fullness. */ -@@ -291,13 +615,8 @@ static void *reader_thread(void *data) - int fd; - /* Only bulkmode and fsize_max use per-cpu output files. Otherwise, - there's just a single output fd stored at out_fd[avail_cpus[0]]. */ -- if (bulkmode || fsize_max) -- fd = out_fd[cpu]; -- else -- fd = out_fd[avail_cpus[0]]; -- rc = 0; -- if (bulkmode) -- rc = write(fd, &bufhdr, sizeof(bufhdr)); // write header -+ fd = out_fd[cpu]; -+ rc = write(fd, &bufhdr, sizeof(bufhdr)); // write header - rc |= write(fd, wbuf, wbytes); // write payload - if (rc <= 0) { - perr("Couldn't write to output %d for cpu %d, exiting.", -@@ -310,14 +629,6 @@ static void *reader_thread(void *data) - } - } - -- /* update the sequence number & let other cpus go ahead */ -- pthread_mutex_lock(& last_sequence_number_mutex); -- if (last_sequence_number < bufhdr.sequence) { /* not if someone leapfrogged us */ -- last_sequence_number = bufhdr.sequence; -- pthread_cond_broadcast (& last_sequence_number_changed); -- } -- pthread_mutex_unlock(& last_sequence_number_mutex); -- - } while (!stop_threads); - dbug(3, "exiting thread for cpu %d\n", cpu); - return(NULL); -@@ -329,41 +640,16 @@ error_out: - return(NULL); - } - -+ - static void switchfile_handler(int sig) - { -- int i; -+ (void) sig; - if (stop_threads || !outfile_name) - return; -- -- for (i = 0; i < ncpus; i++) { -- pthread_mutex_lock(&mutex[avail_cpus[i]]); -- if (reader[avail_cpus[i]] && switch_file[avail_cpus[i]]) { -- pthread_mutex_unlock(&mutex[avail_cpus[i]]); -- dbug(2, "file switching is progressing, signal ignored.\n", sig); -- return; -- } -- pthread_mutex_unlock(&mutex[avail_cpus[i]]); -- } -- for (i = 0; i < ncpus; i++) { -- pthread_mutex_lock(&mutex[avail_cpus[i]]); -- if (reader[avail_cpus[i]]) { -- switch_file[avail_cpus[i]] = 1; -- pthread_mutex_unlock(&mutex[avail_cpus[i]]); -- -- // Make sure we don't send the USR2 signal to -- // ourselves. -- if (pthread_equal(pthread_self(), -- reader[avail_cpus[i]])) -- break; -- pthread_kill(reader[avail_cpus[i]], SIGUSR2); -- } -- else { -- pthread_mutex_unlock(&mutex[avail_cpus[i]]); -- break; -- } -- } -+ sigusr2_count ++; - } - -+ - /** - * init_relayfs - create files and threads for relayfs processing - * -@@ -507,19 +793,20 @@ int init_relayfs(void) - sigaction(SIGUSR2, &sa, NULL); - - dbug(2, "starting threads\n"); -- for (i = 0; i < ncpus; i++) { -- if (pthread_mutex_init(&mutex[avail_cpus[i]], NULL) < 0) { -- _perr("failed to create mutex"); -- return -1; -- } -- } - for (i = 0; i < ncpus; i++) { -- if (pthread_create(&reader[avail_cpus[i]], NULL, reader_thread, -+ if (pthread_create(&reader[avail_cpus[i]], NULL, -+ bulkmode ? reader_thread_bulkmode : reader_thread_serialmode, - (void *)(long)avail_cpus[i]) < 0) { - _perr("failed to create thread"); - return -1; - } - } -+ if (! bulkmode) -+ if (pthread_create(&serializer_thread, NULL, -+ reader_thread_serializer, NULL) < 0) { -+ _perr("failed to create thread"); -+ return -1; -+ } - - return 0; - } -@@ -529,27 +816,31 @@ void close_relayfs(void) - int i; - stop_threads = 1; - dbug(2, "closing\n"); -- for (i = 0; i < ncpus; i++) { -- if (reader[avail_cpus[i]]) -- pthread_kill(reader[avail_cpus[i]], SIGUSR2); -- else -- break; -- } -+ - for (i = 0; i < ncpus; i++) { - if (reader[avail_cpus[i]]) - pthread_join(reader[avail_cpus[i]], NULL); - else - break; - } -+ if (! bulkmode) { -+ if (serializer_thread) // =0 on load_only! -+ pthread_join(serializer_thread, NULL); -+ // at this point, we know all reader and writer -+ // threads for the buffer_heap are dead. -+ reader_serialized_flush(); -+ -+ if (lost_message_count > 0 || lost_byte_count > 0) -+ eprintf("WARNING: There were %u lost messages and %u lost bytes.\n", -+ lost_message_count, lost_byte_count); -+ } -+ - for (i = 0; i < ncpus; i++) { - if (relay_fd[avail_cpus[i]] >= 0) - close(relay_fd[avail_cpus[i]]); - else - break; - } -- for (i = 0; i < ncpus; i++) { -- pthread_mutex_destroy(&mutex[avail_cpus[i]]); -- } - dbug(2, "done\n"); - } - -@@ -558,12 +849,6 @@ void kill_relayfs(void) - int i; - stop_threads = 1; - dbug(2, "killing\n"); -- for (i = 0; i < ncpus; i++) { -- if (reader[avail_cpus[i]]) -- pthread_kill(reader[avail_cpus[i]], SIGUSR2); -- else -- break; -- } - for (i = 0; i < ncpus; i++) { - if (reader[avail_cpus[i]]) - pthread_cancel(reader[avail_cpus[i]]); /* no wait */ -@@ -576,8 +861,5 @@ void kill_relayfs(void) - else - break; - } -- for (i = 0; i < ncpus; i++) { -- pthread_mutex_destroy(&mutex[avail_cpus[i]]); -- } - dbug(2, "done\n"); - } -diff --git a/staprun/stap_merge.c b/staprun/stap_merge.c -index 87de7d465..b210db663 100644 ---- a/staprun/stap_merge.c -+++ b/staprun/stap_merge.c -@@ -76,6 +76,7 @@ int main (int argc, char *argv[]) - fprintf(stderr, "error opening file %s.\n", argv[optind - 1]); - return -1; - } -+ (void) fread(buf, 4, 1, fp[i]); // read & ignore magic word - if (fread (buf, TIMESTAMP_SIZE, 1, fp[i])) - num[i] = *((int *)buf); - else -@@ -133,6 +134,7 @@ int main (int argc, char *argv[]) - count = min; - } - -+ (void) fread(buf, 4, 1, fp[i]); // read & ignore magic word - if (fread (buf, TIMESTAMP_SIZE, 1, fp[j])) - num[j] = *((int *)buf); - else -diff --git a/staprun/stap_merge.tcl b/staprun/stap_merge.tcl -deleted file mode 100755 -index 0c7d7b694..000000000 ---- a/staprun/stap_merge.tcl -+++ /dev/null -@@ -1,101 +0,0 @@ --#!/usr/bin/env tclsh --# --# stap_merge.tcl - systemtap merge program --# --# This program is free software; you can redistribute it and/or modify --# it under the terms of the GNU General Public License as published by --# the Free Software Foundation; either version 2 of the License, or --# (at your option) any later version. --# --# This program is distributed in the hope that it will be useful, --# but WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the --# GNU General Public License for more details. --# --# You should have received a copy of the GNU General Public License --# along with this program. If not, see . --# --# Copyright (C) Red Hat Inc, 2007 --# --# -- --proc usage {} { -- puts stderr "$::argv0 \[-v\] \[-o output_filename\] input_files ...\n" -- exit 1 --} -- --set outfile "stdout" --set verbose 0 --set index 0 --while {[string match -* [lindex $argv $index]]} { -- switch -glob -- [lindex $argv $index] { -- -v {set verbose 1} -- -o {incr index; set outfile [lindex $argv $index]} -- default {usage} -- } -- incr index --} -- --if {$tcl_platform(byteOrder) == "littleEndian"} { -- set int_format i --} else { -- set int_format I --} -- --set files [lrange $argv $index end] -- --set n 0 --foreach file $files { -- if {[catch {open $file} fd($n)]} { -- puts stderr $fd($n) -- exit 1 -- } -- fconfigure $fd($n) -translation binary -- if {![binary scan [read $fd($n) 4] $int_format timestamp($n)]} { -- continue -- } -- set timestamp($n) [expr $timestamp($n) & 0xFFFFFFFF] -- incr n --} --set ncpus $n -- --if {$outfile != "stdout"} { -- if {[catch {open $outfile w} outfile]} { -- puts stderr $outfile -- exit 1 -- } --} --fconfigure $outfile -translation binary -- --while {1} { -- set mincpu -1 -- for {set n 0} {$n < $ncpus} {incr n} { -- if {[info exists fd($n)] && (![info exists min] || $timestamp($n) <= $min)} { -- set min $timestamp($n) -- set mincpu $n -- } -- } -- -- if {![info exists min]} {break} -- -- if {![binary scan [read $fd($mincpu) 4] $int_format len]} { -- puts stderr "Error reading length from channel $mincpu" -- exit 1 -- } -- -- if {$verbose == 1} { -- puts stderr "\[CPU:$mincpu, seq=$min, length=$len\]" -- } -- -- set data [read $fd($mincpu) $len] -- puts -nonewline $outfile $data -- -- set data [read $fd($mincpu) 4] -- if {$data == ""} { -- unset fd($mincpu) -- } else { -- binary scan $data $int_format timestamp($mincpu) -- set timestamp($mincpu) [expr $timestamp($mincpu) & 0xFFFFFFFF] -- } -- unset min --} -diff --git a/staprun/staprun.8 b/staprun/staprun.8 -index 3bc16ab95..4e1ca9af6 100644 ---- a/staprun/staprun.8 -+++ b/staprun/staprun.8 -@@ -120,7 +120,7 @@ remote_id() and remote_uri(). - Sets the maximum size of output file and the maximum number of output files. - If the size of output file will exceed - .B size --, systemtap switches output file to the next file. And if the number of -+megabytes, systemtap switches output file to the next file. And if the number of - output files exceed - .B N - , systemtap removes the oldest output file. You can omit the second argument. -commit 2442beb99eeab3144c2622cae1fc98b999f72108 -gpg: Signature made Mon 14 Aug 2023 01:55:27 PM EDT -gpg: using RSA key 5D38116FA4D3A7CC77E378D37E83610126DCC2E8 -gpg: Good signature from "Frank Ch. Eigler " [full] -Author: Frank Ch. Eigler -Date: Mon Aug 14 13:54:50 2023 -0400 - - PR29108 / BZ2095359 tweak: stap_merge magic handling - - We don't bother do much error checking in this infrequently used - tool, but gcc warnings require us to do some. - -diff --git a/staprun/stap_merge.c b/staprun/stap_merge.c -index b210db663..388b14938 100644 ---- a/staprun/stap_merge.c -+++ b/staprun/stap_merge.c -@@ -76,7 +76,8 @@ int main (int argc, char *argv[]) - fprintf(stderr, "error opening file %s.\n", argv[optind - 1]); - return -1; - } -- (void) fread(buf, 4, 1, fp[i]); // read & ignore magic word -+ if (fread(buf, 4, 1, fp[i]) != 1) // read magic word -+ fprintf(stderr, "warning: erro reading magic word\n"); - if (fread (buf, TIMESTAMP_SIZE, 1, fp[i])) - num[i] = *((int *)buf); - else -@@ -134,7 +135,8 @@ int main (int argc, char *argv[]) - count = min; - } - -- (void) fread(buf, 4, 1, fp[i]); // read & ignore magic word -+ if (fread(buf, 4, 1, fp[i]) != 1) // read magic word -+ fprintf(stderr, "warning: erro reading magic word\n"); - if (fread (buf, TIMESTAMP_SIZE, 1, fp[j])) - num[j] = *((int *)buf); - else diff --git a/SOURCES/pr30749.patch b/SOURCES/pr30749.patch deleted file mode 100644 index 108a642..0000000 --- a/SOURCES/pr30749.patch +++ /dev/null @@ -1,99 +0,0 @@ -commit 9839db5514a29cf4f58b3de8cc6155088be6d061 -gpg: Signature made Sat 12 Aug 2023 02:49:26 PM EDT -gpg: using RSA key 5D38116FA4D3A7CC77E378D37E83610126DCC2E8 -gpg: Good signature from "Frank Ch. Eigler " [full] -Author: Frank Ch. Eigler -Date: Sat Aug 12 14:28:44 2023 -0400 - - PR30749: correct stap --sign-module timing - - Previous code signed the temp directory copy, after it had already - been copied into the cache -- so the signature never made it to a - permanent artifact. - - If the module was being fetched from the cache from a previous build - run, a sign (re)attempt will still be done. This may not be - necessary, but shouldn't be harmful. - - Reported-By: Renaud Métrich - -diff --git a/main.cxx b/main.cxx -index 06adb66ad..9f695cbd8 100644 ---- a/main.cxx -+++ b/main.cxx -@@ -1190,8 +1190,10 @@ passes_0_4 (systemtap_session &s) - s.mok_fingerprints.clear(); - s.mok_fingerprints.push_back(mok_fingerprint); - } -- rc = -- sign_module (s.tmpdir, s.module_filename(), s.mok_fingerprints, mok_path, s.kernel_build_tree); -+ if (s.verbose) -+ clog << _F("Signing %s with mok key %s", s.module_filename().c_str(), mok_path.c_str()) -+ << endl; -+ rc = sign_module (s.tmpdir, s.module_filename(), s.mok_fingerprints, mok_path, s.kernel_build_tree); - } - #endif - -@@ -1310,8 +1312,30 @@ passes_0_4 (systemtap_session &s) - if (! s.use_script_cache && s.last_pass <= 4) - s.save_module = true; - -+#if HAVE_NSS -+ // PR30749 -+ if (!rc && s.module_sign_given) -+ { -+ // when run on client as --sign-module, mok fingerprints are result of mokutil -l -+ // when run from server as --sign-module=PATH, mok fingerprint is given by PATH -+ string mok_path; -+ if (!s.module_sign_mok_path.empty()) -+ { -+ string mok_fingerprint; -+ split_path (s.module_sign_mok_path, mok_path, mok_fingerprint); -+ s.mok_fingerprints.clear(); -+ s.mok_fingerprints.push_back(mok_fingerprint); -+ } -+ -+ if (s.verbose) -+ clog << _F("Signing %s with mok key %s", s.module_filename().c_str(), mok_path.c_str()) -+ << endl; -+ rc = sign_module (s.tmpdir, s.module_filename(), s.mok_fingerprints, mok_path, s.kernel_build_tree); -+ } -+#endif -+ - // Copy module to the current directory. -- if (s.save_module && !pending_interrupts) -+ if (!rc && s.save_module && !pending_interrupts) - { - string module_src_path = s.tmpdir + "/" + s.module_filename(); - string module_dest_path = s.module_filename(); -@@ -1327,29 +1351,11 @@ passes_0_4 (systemtap_session &s) - } - } - --#if HAVE_NSS -- if (s.module_sign_given) -- { -- // when run on client as --sign-module, mok fingerprints are result of mokutil -l -- // when run from server as --sign-module=PATH, mok fingerprint is given by PATH -- string mok_path; -- if (!s.module_sign_mok_path.empty()) -- { -- string mok_fingerprint; -- split_path (s.module_sign_mok_path, mok_path, mok_fingerprint); -- s.mok_fingerprints.clear(); -- s.mok_fingerprints.push_back(mok_fingerprint); -- } -- -- rc = sign_module (s.tmpdir, s.module_filename(), s.mok_fingerprints, mok_path, s.kernel_build_tree); -- } --#endif -- - PROBE1(stap, pass4__end, &s); - - return rc; - } -- -+ - int - pass_5 (systemtap_session &s, vector targets) - { diff --git a/SOURCES/rhbz2223733.patch b/SOURCES/rhbz2223733.patch deleted file mode 100644 index 81255ed..0000000 --- a/SOURCES/rhbz2223733.patch +++ /dev/null @@ -1,24 +0,0 @@ -commit ead30c04c7157fec194c0f6d81e5c51c99bf25cf -gpg: Signature made Wed 24 May 2023 10:23:54 AM EDT -gpg: using RSA key 5D38116FA4D3A7CC77E378D37E83610126DCC2E8 -gpg: Good signature from "Frank Ch. Eigler " [full] -Author: Frank Ch. Eigler -Date: Wed May 24 10:22:08 2023 -0400 - - PR30484: stap-report: scrape less of /sys /proc - - Mainly: avoid process/busy parts like /proc/$pid. - -diff --git a/stap-report b/stap-report -index 217ddf840..3b3a1a258 100755 ---- a/stap-report -+++ b/stap-report -@@ -105,7 +105,7 @@ elif [ -f /var/log/packages ]; then - run "cat /var/log/packages | egrep 'systemtap|elfutils|kernel|gcc|dyninst|java|byteman|avahi|nss|nspr|dejagnu' | sort -k9" - fi - run "egrep 'PROBE|RANDOMIZE|RELOC|TRACE|MARKER|KALLSYM|_DEBUG_|LOCKDEP|LOCKING|MODULE|FENTRY|_SIG|BPF' /lib/modules/`uname -r`/build/.config | grep -v not.set | sort | fmt -w 80" --run "find /debugfs /proc /sys /dev /sys/kernel/debug -type f -path '*kprobe*' -o -path '*yama*' 2>/dev/null | xargs grep -H ." -+run "find /debugfs /proc/sys /sys/kernel /dev -type f -path '*kprobe*' -o -path '*yama*' 2>/dev/null | xargs grep -H ." - run "lsmod" - run "avahi-browse -r -t _stap._tcp" - run "ifconfig -a" diff --git a/SOURCES/rhbz2223735.patch b/SOURCES/rhbz2223735.patch deleted file mode 100644 index 2f21386..0000000 --- a/SOURCES/rhbz2223735.patch +++ /dev/null @@ -1,64 +0,0 @@ -commit ab0c5c25509600b7c9cecc9e10baebc984082b50 -gpg: Signature made Fri 12 May 2023 11:18:18 AM EDT -gpg: using RSA key 5D38116FA4D3A7CC77E378D37E83610126DCC2E8 -gpg: Good signature from "Frank Ch. Eigler " [full] -Author: Frank Ch. Eigler -Date: Fri May 12 11:13:45 2023 -0400 - - PR30442: failing optional statement probes should not trigger pass2 exceptions - - In tapsets.cxx, query_cu() and query_module() aggressively caught & - sess-print_error'd semantic_errors from subsidiary call sites. They - are unaware of whether the probe in question is being resolved within - an optional (? or !) context. Instead of this, they now simply let - the exceptions propagate out to derive_probes() or similar, which does - know whether exceptions are errors in that context. That means - exceptions can propagate through elfutils iteration machinery too, - perhaps risking C level memory leaks, but so be it. - - This fix goes well beyond statement probes per se, but hand-testing - and the testsuite appear not to show regressions related to this. - - Added semok/badstmt.exp to test. - -diff --git a/tapsets.cxx b/tapsets.cxx -index 859160bc5..7b7107371 100644 ---- a/tapsets.cxx -+++ b/tapsets.cxx -@@ -2453,8 +2453,9 @@ query_cu (Dwarf_Die * cudie, dwarf_query * q) - } - catch (const semantic_error& e) - { -- q->sess.print_error (e); -- return DWARF_CB_ABORT; -+ // q->sess.print_error (e); -+ throw; -+ // return DWARF_CB_ABORT; - } - } - -@@ -2696,8 +2697,9 @@ query_module (Dwfl_Module *mod, - } - catch (const semantic_error& e) - { -- q->sess.print_error (e); -- return DWARF_CB_ABORT; -+ // q->sess.print_error (e); -+ // return DWARF_CB_ABORT; -+ throw; - } - } - -diff --git a/testsuite/semok/stmtbad.stp b/testsuite/semok/stmtbad.stp -new file mode 100755 -index 000000000..06780790a ---- /dev/null -+++ b/testsuite/semok/stmtbad.stp -@@ -0,0 +1,7 @@ -+#! /bin/sh -+ -+exec stap -v -p2 -e 'probe oneshot {log("nothing") } -+ probe process.statement("main@*:1")? { log("yo") }' -c stap -+ -+# The optional misaddressed statement probe should let stap still -+# succeed with the oneshot probe. diff --git a/SPECS/systemtap.spec b/SPECS/systemtap.spec index 9857a0c..0d5b080 100644 --- a/SPECS/systemtap.spec +++ b/SPECS/systemtap.spec @@ -45,7 +45,7 @@ %{!?with_sysusers: %global with_sysusers 0%{?fedora} >= 32 || 0%{?rhel} >= 9} # Virt is supported on these arches, even on el7, but it's not in core EL7 -%if 0%{?rhel} <= 7 +%if 0%{?rhel} && 0%{?rhel} <= 7 %ifarch ppc64le aarch64 %global with_virthost 0 %endif @@ -64,9 +64,6 @@ %else %if 0%{?rhel} >= 6 %define udevrulesdir /lib/udev/rules.d - %else - # RHEL5 - %define udevrulesdir /etc/udev/rules.d %endif %endif %endif @@ -83,11 +80,7 @@ %define dracutbindir %{_bindir} %endif -%if 0%{?rhel} == 6 - %{!?_rpmmacrodir: %define _rpmmacrodir /etc/rpm/} -%else - %{!?_rpmmacrodir: %define _rpmmacrodir %{_rpmconfigdir}/macros.d} -%endif +%{!?_rpmmacrodir: %define _rpmmacrodir %{_rpmconfigdir}/macros.d} # To avoid testsuite/*/*.stp has shebang which doesn't start with '/' %define __brp_mangle_shebangs_exclude_from .stp$ @@ -122,8 +115,8 @@ m stapdev stapdev Name: systemtap # PRERELEASE -Version: 4.9 -Release: 3%{?release_override}%{?dist} +Version: 5.0 +Release: 4%{?release_override}%{?dist} # for version, see also configure.ac @@ -156,15 +149,12 @@ Release: 3%{?release_override}%{?dist} # intermediary stap-server for --use-server: systemtap-server (-devel unused) Summary: Programmable system-wide instrumentation system -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Source: ftp://sourceware.org/pub/systemtap/releases/systemtap-%{version}.tar.gz -Patch1: rhbz2223733.patch -Patch2: rhbz2223735.patch -Patch3: pr29108.patch -Patch4: pr30749.patch - +Patch1: RHEL-16549.patch +Patch2: RHEL-18334.patch # Build* BuildRequires: make @@ -175,6 +165,7 @@ BuildRequires: pkgconfig(nss) BuildRequires: pkgconfig(avahi-client) %if %{with_debuginfod} BuildRequires: pkgconfig(libdebuginfod) +BuildRequires: pkgconfig(json-c) %endif %if %{with_dyninst} BuildRequires: dyninst-devel >= 10.0 @@ -226,9 +217,6 @@ BuildRequires: pkgconfig(libvirt) BuildRequires: pkgconfig(libxml-2.0) %endif BuildRequires: readline-devel -%if 0%{?rhel} <= 5 -BuildRequires: pkgconfig(ncurses) -%endif %if %{with_python2_probes} BuildRequires: python2-devel %if 0%{?fedora} >= 1 @@ -268,7 +256,7 @@ the components needed to locally develop and execute systemtap scripts. %package server Summary: Instrumentation System Server -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap-devel = %{version}-%{release} Conflicts: systemtap-devel < %{version}-%{release} @@ -298,7 +286,7 @@ compiles systemtap scripts to kernel objects on their demand. %package devel Summary: Programmable system-wide instrumentation system - development headers, tools -License: GPLv2+ +License: GPL-2.0-or-later AND GPL-2.0-only AND BSD-3-Clause AND LGPL-2.1-only AND BSD-2-Clause URL: http://sourceware.org/systemtap/ %if 0%{?rhel} >= 8 || 0%{?fedora} >= 20 @@ -328,7 +316,7 @@ a copy of the standard tapset library and the runtime library C files. %package runtime Summary: Programmable system-wide instrumentation system - runtime -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires(pre): shadow-utils Conflicts: systemtap-devel < %{version}-%{release} @@ -343,7 +331,7 @@ using a local or remote systemtap-devel installation. %package client Summary: Programmable system-wide instrumentation system - client -License: GPLv2+ +License: GPL-2.0-or-later AND GPL-2.0-only AND BSD-3-Clause AND LGPL-2.1-only AND GFDL-1.2-or-later AND BSD-2-Clause URL: http://sourceware.org/systemtap/ Requires: zip unzip Requires: systemtap-runtime = %{version}-%{release} @@ -366,7 +354,7 @@ documentation, and a copy of the tapset library for reference. %package initscript Summary: Systemtap Initscripts -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap = %{version}-%{release} %if %{with_systemd} @@ -386,7 +374,7 @@ boot-time probing if supported. %package sdt-devel Summary: Static probe support tools -License: GPLv2+ and Public Domain +License: GPL-2.0-or-later AND CC0-1.0 URL: http://sourceware.org/systemtap/ %if %{with_pyparsing} %if %{with_python3} @@ -409,12 +397,12 @@ with the optional dtrace-compatibility preprocessor to process related %package testsuite Summary: Instrumentation System Testsuite -License: GPLv2+ +License: GPL-2.0-or-later AND GPL-2.0-only AND GPL-3.0-or-later AND MIT URL: http://sourceware.org/systemtap/ Requires: systemtap = %{version}-%{release} Requires: systemtap-sdt-devel = %{version}-%{release} Requires: systemtap-server = %{version}-%{release} -Requires: dejagnu which elfutils grep nc +Requires: dejagnu which elfutils grep nc wget %if %{with_debuginfod} Requires: elfutils-debuginfod %endif @@ -481,7 +469,7 @@ systemtap on the current system. %if %{with_java} %package runtime-java Summary: Systemtap Java Runtime Support -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap-runtime = %{version}-%{release} # work around fedora ci gating kvetching about i686<->x86-64 conflicts @@ -503,7 +491,7 @@ that probe Java processes running on the OpenJDK runtimes using Byteman. %if %{with_python2_probes} %package runtime-python2 Summary: Systemtap Python 2 Runtime Support -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap-runtime = %{version}-%{release} @@ -515,7 +503,7 @@ that probe python 2 processes. %if %{with_python3_probes} %package runtime-python3 Summary: Systemtap Python 3 Runtime Support -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap-runtime = %{version}-%{release} @@ -532,7 +520,7 @@ that probe python 3 processes. %if %{with_python3_probes} %package exporter Summary: Systemtap-prometheus interoperation mechanism -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap-runtime = %{version}-%{release} @@ -545,7 +533,7 @@ to remote requesters on demand. %if %{with_virthost} %package runtime-virthost Summary: Systemtap Cross-VM Instrumentation - host -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ # only require libvirt-libs really #Requires: libvirt >= 1.0.2 @@ -560,7 +548,7 @@ connection. %if %{with_virtguest} %package runtime-virtguest Summary: Systemtap Cross-VM Instrumentation - guest -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap-runtime = %{version}-%{release} %if %{with_systemd} @@ -581,7 +569,7 @@ systemtap-runtime-virthost machine to execute systemtap scripts. %if %{with_python3} && %{with_monitor} %package jupyter Summary: ISystemtap jupyter kernel and examples -License: GPLv2+ +License: GPL-2.0-or-later URL: http://sourceware.org/systemtap/ Requires: systemtap = %{version}-%{release} @@ -596,8 +584,6 @@ or within a container. %setup -q %patch -P1 -p1 %patch -P2 -p1 -%patch -P3 -p1 -%patch -P4 -p1 %build @@ -1317,6 +1303,19 @@ exit 0 # PRERELEASE %changelog +* Wed Dec 6 2023 William Cohen - 5.0-4 +- RHEL-18334 + +* Tue Nov 14 2023 Frank Ch. Eigler - 5.0-3 +- RHEL-16549 + +* Mon Nov 06 2023 Frank Ch. Eigler - 5.0-2 +- License header tweak + +* Fri Nov 03 2023 Frank Ch. Eigler - 5.0-1 +- Upstream release, see wiki page below for detailed notes. + https://sourceware.org/systemtap/wiki/SystemTapReleases + * Mon Aug 14 2023 Frank Ch. Eigler - 4.9-3 - rhbz2231632 - rhbz2231635