diff --git a/SOURCES/redhat-bugzilla-1973833.patch b/SOURCES/redhat-bugzilla-1973833.patch new file mode 100644 index 0000000..ef1b249 --- /dev/null +++ b/SOURCES/redhat-bugzilla-1973833.patch @@ -0,0 +1,193 @@ +commit 6b907e90c74fce82d6b712493d8b362bdd1a1ec1 +Author: Nathan Scott +Date: Tue Dec 14 08:54:14 2021 +1100 + + pmlogconf: switch to the bulk pmLookupDescs(3) interface + + No functional change, all existing regression tests pass. + + Related to Red Hat BZ #1973833. + +diff --git a/src/pmlogconf/pmlogconf.c b/src/pmlogconf/pmlogconf.c +index fa1156859d..ef4fc08bbd 100644 +--- a/src/pmlogconf/pmlogconf.c ++++ b/src/pmlogconf/pmlogconf.c +@@ -473,13 +473,19 @@ fetch_groups(void) + { + static pmResult *result; + const char **names; ++ pmDesc *descs; + pmID *pmids; +- int i, n, sts; ++ int i, n, sts, count; + +- /* prepare arrays of names and identifiers for PMAPI metric lookup */ ++ /* prepare arrays of names, descriptors and IDs for PMAPI metric lookup */ + if ((names = calloc(ngroups, sizeof(char *))) == NULL) + return -ENOMEM; ++ if ((descs = calloc(ngroups, sizeof(pmDesc))) == NULL) { ++ free(names); ++ return -ENOMEM; ++ } + if ((pmids = calloc(ngroups, sizeof(pmID))) == NULL) { ++ free(descs); + free(names); + return -ENOMEM; + } +@@ -490,15 +496,16 @@ fetch_groups(void) + continue; + names[n++] = (const char *)groups[i].metric; + } ++ count = n; + +- if ((sts = pmLookupName(n, names, pmids)) < 0) { +- if (n == 1) ++ if ((sts = pmLookupName(count, names, pmids)) < 0) { ++ if (count == 1) + groups[0].pmid = PM_ID_NULL; + else + fprintf(stderr, "%s: cannot lookup metric names: %s\n", + pmGetProgname(), pmErrStr(sts)); + } +- else if ((sts = pmFetch(n, pmids, &result)) < 0) { ++ else if ((sts = pmFetch(count, pmids, &result)) < 0) { + fprintf(stderr, "%s: cannot fetch metric values: %s\n", + pmGetProgname(), pmErrStr(sts)); + } +@@ -510,6 +517,13 @@ fetch_groups(void) + else + groups[i].pmid = pmids[n++]; + } ++ /* descriptor lookup, descs_hash handles failure here */ ++ (void) pmLookupDescs(count, pmids, descs); ++ ++ /* create a hash over the descs for quick PMID lookup */ ++ if ((sts = descs_hash(count, descs)) < 0) ++ fprintf(stderr, "%s: cannot hash metric descs: %s\n", ++ pmGetProgname(), pmErrStr(sts)); + /* create a hash over the result for quick PMID lookup */ + if ((sts = values_hash(result)) < 0) + fprintf(stderr, "%s: cannot hash metric values: %s\n", +@@ -806,14 +820,16 @@ evaluate_string_regexp(group_t *group, regex_cmp_t compare) + int i, found; + pmValueSet *vsp; + pmValue *vp; ++ pmDesc *dp; + pmAtomValue atom; + regex_t regex; + int sts, type; + +- if ((vsp = metric_values(group->pmid)) == NULL) ++ if ((vsp = metric_values(group->pmid)) == NULL || ++ (dp = metric_desc(group->pmid)) == NULL) + return 0; + +- type = metric_type(group->pmid); ++ type = dp->type; + if (type < 0 || type > PM_TYPE_STRING) { + fprintf(stderr, "%s: %s uses regular expression on non-scalar metric\n", + pmGetProgname(), group->tag); +@@ -849,11 +865,14 @@ evaluate_string_regexp(group_t *group, regex_cmp_t compare) + static int + evaluate_values(group_t *group, numeric_cmp_t ncmp, string_cmp_t scmp) + { +- int type = metric_type(group->pmid); ++ pmDesc *dp; ++ ++ if ((dp = metric_desc(group->pmid)) == NULL) ++ return 0; + +- if (type == PM_TYPE_STRING) ++ if (dp->type == PM_TYPE_STRING) + return evaluate_string_values(group, scmp); +- return evaluate_number_values(group, type, ncmp); ++ return evaluate_number_values(group, dp->type, ncmp); + } + + int +diff --git a/src/pmlogconf/util.c b/src/pmlogconf/util.c +index d44c2e529a..293eb2eca3 100644 +--- a/src/pmlogconf/util.c ++++ b/src/pmlogconf/util.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2020 Red Hat. All Rights Reserved. ++ * Copyright (c) 2020-2021 Red Hat. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the +@@ -14,7 +14,7 @@ + #include "util.h" + + static __pmHashCtl valuesctl; /* pointers to values in pmResult */ +-static __pmHashCtl typesctl; /* metric types from pmLookupDesc */ ++static __pmHashCtl descsctl; /* metric descs from pmLookupDesc */ + + int + values_hash(pmResult *result) +@@ -47,27 +47,33 @@ metric_values(pmID pmid) + } + + int +-metric_type(pmID pmid) ++descs_hash(int numpmid, pmDesc *descs) + { +- __pmHashNode *node; +- pmDesc desc; +- int sts, *data; ++ unsigned int i; ++ pmDesc *dp; ++ int sts; + +- if (pmid == PM_IN_NULL) +- return PM_TYPE_UNKNOWN; +- if ((node = __pmHashSearch(pmid, &typesctl)) == NULL) { +- if ((sts = pmLookupDesc(pmid, &desc)) < 0) +- return sts; +- if ((data = malloc(sizeof(int))) == NULL) +- return sts; +- *data = desc.type; +- if ((sts = __pmHashAdd(pmid, data, &typesctl)) < 0) { +- free(data); ++ if ((sts = __pmHashPreAlloc(numpmid, &descsctl)) < 0) ++ return sts; ++ ++ for (i = 0; i < numpmid; i++) { ++ dp = &descs[i]; ++ if ((sts = __pmHashAdd(dp->pmid, dp, &descsctl)) < 0) + return sts; +- } +- return *data; + } +- return *(int *)node->data; ++ return numpmid; ++} ++ ++pmDesc * ++metric_desc(pmID pmid) ++{ ++ __pmHashNode *node; ++ ++ if (pmid == PM_IN_NULL) ++ return NULL; ++ if ((node = __pmHashSearch(pmid, &descsctl)) == NULL) ++ return NULL; ++ return (pmDesc *)node->data; + } + + int +diff --git a/src/pmlogconf/util.h b/src/pmlogconf/util.h +index 17d856a0d7..a11350d899 100644 +--- a/src/pmlogconf/util.h ++++ b/src/pmlogconf/util.h +@@ -34,7 +34,9 @@ extern void fmt(const char *, char *, size_t, int, int, fmt_t, void *); + + extern int values_hash(pmResult *); + extern pmValueSet *metric_values(pmID); +-extern int metric_type(pmID); ++ ++extern int descs_hash(int, pmDesc *); ++extern pmDesc *metric_desc(pmID); + + typedef int (*numeric_cmp_t)(double, double); + extern int number_equal(double, double); diff --git a/SOURCES/redhat-bugzilla-2003956-pmdabcc-update-kernel-version-check-due-to-backporting.patch b/SOURCES/redhat-bugzilla-2003956-pmdabcc-update-kernel-version-check-due-to-backporting.patch new file mode 100644 index 0000000..3c7eaec --- /dev/null +++ b/SOURCES/redhat-bugzilla-2003956-pmdabcc-update-kernel-version-check-due-to-backporting.patch @@ -0,0 +1,20 @@ +bcc included in RHEL 8.6 doesn't support the kernel_struct_has_field function. +The 4.18.x kernel in RHEL 8.6 did backport the `state` to `__state` rename (upstream: +change was in kernel v5.14+), and now we're in a situation where we can't test for +the existence of this kernel struct member and also can't rely on a kernel version check. + +Therefore, let's patch it here for RHEL 8.x only: + +diff --git a/src/pmdas/bcc/modules/runqlat.python b/src/pmdas/bcc/modules/runqlat.python +index 1c6c6b4b0..efc30e958 100644 +--- a/src/pmdas/bcc/modules/runqlat.python ++++ b/src/pmdas/bcc/modules/runqlat.python +@@ -100,7 +100,7 @@ class PCPBCCModule(PCPBCCBase): + if ( + hasattr(BPF, "kernel_struct_has_field") + and BPF.kernel_struct_has_field(b"task_struct", b"__state") == 1 +- ) or self.kernel_version() >= (5, 14, 0): ++ ) or self.kernel_version() >= (4, 18, 0): + self.bpf_text = self.bpf_text.replace('STATE_FIELD', '__state') + else: + self.bpf_text = self.bpf_text.replace('STATE_FIELD', 'state') diff --git a/SOURCES/redhat-bugzilla-2003956.patch b/SOURCES/redhat-bugzilla-2003956.patch new file mode 100644 index 0000000..a027740 --- /dev/null +++ b/SOURCES/redhat-bugzilla-2003956.patch @@ -0,0 +1,1433 @@ +commit 14ffcd934e1c5099b471f4e73da32d1b32bac7e6 +Author: Andreas Gerstmayr +Date: Mon Dec 13 20:10:40 2021 +0100 + + pmdabcc: sync bcc PMDA modules with upstream bcc tools + +diff --git a/src/pmdas/bcc/modules/execsnoop.bpf b/src/pmdas/bcc/modules/execsnoop.bpf +index f69200773..aa755b3a1 100644 +--- a/src/pmdas/bcc/modules/execsnoop.bpf ++++ b/src/pmdas/bcc/modules/execsnoop.bpf +@@ -4,40 +4,57 @@ + #include + #include + #include ++ + #define ARGSIZE 128 ++ + enum event_type { + EVENT_ARG, + EVENT_RET, + }; ++ + struct data_t { + u32 pid; // PID as in the userspace term (i.e. task->tgid in kernel) + u32 ppid; // Parent PID as in the userspace term (i.e task->real_parent->tgid in kernel) ++ u32 uid; + char comm[TASK_COMM_LEN]; + enum event_type type; + char argv[ARGSIZE]; + int retval; + }; ++ + BPF_PERF_OUTPUT(events); ++ + static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data) + { +- bpf_probe_read(data->argv, sizeof(data->argv), ptr); ++ bpf_probe_read_user(data->argv, sizeof(data->argv), ptr); + events.perf_submit(ctx, data, sizeof(struct data_t)); + return 1; + } ++ + static int submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data) + { + const char *argp = NULL; +- bpf_probe_read(&argp, sizeof(argp), ptr); ++ bpf_probe_read_user(&argp, sizeof(argp), ptr); + if (argp) { + return __submit_arg(ctx, (void *)(argp), data); + } + return 0; + } ++ + int syscall__execve(struct pt_regs *ctx, + const char __user *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp) + { ++ ++ u32 uid = bpf_get_current_uid_gid() & 0xffffffff; ++ ++ UID_FILTER ++ ++ if (container_should_be_filtered()) { ++ return 0; ++ } ++ + // create data here and pass to submit_arg to save stack space (#555) + struct data_t data = {}; + struct task_struct *task; +@@ -52,25 +69,37 @@ int syscall__execve(struct pt_regs *ctx, + + bpf_get_current_comm(&data.comm, sizeof(data.comm)); + data.type = EVENT_ARG; ++ + __submit_arg(ctx, (void *)filename, &data); ++ + // skip first arg, as we submitted filename + #pragma unroll + for (int i = 1; i < MAXARG; i++) { + if (submit_arg(ctx, (void *)&__argv[i], &data) == 0) + goto out; + } ++ + // handle truncated argument list + char ellipsis[] = "..."; + __submit_arg(ctx, (void *)ellipsis, &data); + out: + return 0; + } ++ + int do_ret_sys_execve(struct pt_regs *ctx) + { ++ if (container_should_be_filtered()) { ++ return 0; ++ } ++ + struct data_t data = {}; + struct task_struct *task; + ++ u32 uid = bpf_get_current_uid_gid() & 0xffffffff; ++ UID_FILTER ++ + data.pid = bpf_get_current_pid_tgid() >> 32; ++ data.uid = uid; + + task = (struct task_struct *)bpf_get_current_task(); + // Some kernels, like Ubuntu 4.13.0-generic, return 0 +@@ -82,5 +111,6 @@ int do_ret_sys_execve(struct pt_regs *ctx) + data.type = EVENT_RET; + data.retval = PT_REGS_RC(ctx); + events.perf_submit(ctx, &data, sizeof(data)); ++ + return 0; + } +diff --git a/src/pmdas/bcc/modules/execsnoop.python b/src/pmdas/bcc/modules/execsnoop.python +index 54382fa9b..1127cc471 100644 +--- a/src/pmdas/bcc/modules/execsnoop.python ++++ b/src/pmdas/bcc/modules/execsnoop.python +@@ -44,20 +44,6 @@ MODULE = 'execsnoop' + BASENS = 'proc.exec.' + units_none = pmUnits(0, 0, 0, 0, 0, 0) + +-TASK_COMM_LEN = 16 # linux/sched.h +-ARGSIZE = 128 # should match #define in execsnoop.bpf +- +-class Data(ct.Structure): +- """ execsnoop data struct """ +- _fields_ = [ +- ("pid", ct.c_uint), +- ("ppid", ct.c_uint), +- ("comm", ct.c_char * TASK_COMM_LEN), +- ("type", ct.c_int), +- ("argv", ct.c_char * ARGSIZE), +- ("retval", ct.c_int), +- ] +- + class EventType(object): + """ Event type """ + EVENT_ARG = 0 +@@ -137,7 +123,7 @@ class PCPBCCModule(PCPBCCBase): + + def handle_event(self, _cpu, data, _size): + """ Event handler """ +- event = ct.cast(data, ct.POINTER(Data)).contents ++ event = self.bpf["events"].event(data) + skip = False + + if event.type == EventType.EVENT_ARG: +@@ -145,9 +131,9 @@ class PCPBCCModule(PCPBCCBase): + elif event.type == EventType.EVENT_RET: + if event.retval != 0 and not self.include_failed: + skip = True +- if self.command and not re.search(self.command, event.comm): ++ if self.command and not re.search(bytes(self.command), event.comm): + skip = True +- if self.args and not re.search(self.args, b" ".join(self.argv_cache[event.pid])): ++ if self.args and not re.search(bytes(self.args), b" ".join(self.argv_cache[event.pid])): + skip = True + + if not skip: +@@ -177,10 +163,14 @@ class PCPBCCModule(PCPBCCBase): + + self.bpf_text = self.bpf_text.replace("MAXARG", str(self.max_args)) + ++ bpf_text = self.bpf_text ++ bpf_text = bpf_text.replace('UID_FILTER', '') ++ bpf_text = bpf_text.replace('container_should_be_filtered()', '0') ++ + if self.debug: +- self.log("BPF to be compiled:\n" + self.bpf_text.strip()) ++ self.log("BPF to be compiled:\n" + bpf_text.strip()) + +- self.bpf = BPF(text=self.bpf_text) ++ self.bpf = BPF(text=bpf_text) + execve_fnname = self.get_syscall_fnname("execve") + self.bpf.attach_kprobe(event=execve_fnname, fn_name="syscall__execve") + self.bpf.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve") +diff --git a/src/pmdas/bcc/modules/pcpbcc.python b/src/pmdas/bcc/modules/pcpbcc.python +index 0555dc33f..62783b7fc 100644 +--- a/src/pmdas/bcc/modules/pcpbcc.python ++++ b/src/pmdas/bcc/modules/pcpbcc.python +@@ -14,6 +14,7 @@ + """ PCP BCC PMDA module base class """ + + import re ++import platform + import ctypes as ct + from os import kill, listdir, path + from collections import OrderedDict +@@ -348,6 +349,16 @@ class PCPBCCBase(object): + """ Returns BCC version as an int tuple (for comparisons) """ + return tuple(map(int, PCPBCCBase.bcc_version().split('.'))) + ++ @staticmethod ++ def kernel_version(): ++ """Returns the kernel version""" ++ version_str = platform.release() ++ m = re.match(r'^(\d+)\.(\d+)\.(\d+)', version_str) ++ if m: ++ return tuple(map(int, m.groups())) ++ else: ++ return (0, 0, 0) ++ + def perf_buffer_poller(self): + """ BPF poller """ + try: +diff --git a/src/pmdas/bcc/modules/runqlat.python b/src/pmdas/bcc/modules/runqlat.python +index 27007c7e5..1c6c6b4b0 100644 +--- a/src/pmdas/bcc/modules/runqlat.python ++++ b/src/pmdas/bcc/modules/runqlat.python +@@ -30,7 +30,11 @@ from modules.pcpbcc import PCPBCCBase + # + # BPF program + # +-bpf_src = "modules/runqlat.bpf" ++is_support_raw_tp = BPF.support_raw_tracepoint() ++if is_support_raw_tp: ++ bpf_src = "modules/runqlat_tp.bpf" ++else: ++ bpf_src = "modules/runqlat_kp.bpf" + + # + # PCP BCC PMDA constants +@@ -59,6 +63,7 @@ class PCPBCCModule(PCPBCCBase): + self.proc_filter = self.config.get(MODULE, opt) + self.update_pids(self.get_proc_info(self.proc_filter)) + ++ self.log("Using BPF source file %s." % bpf_src) + self.log("Initialized.") + + def metrics(self): +@@ -89,7 +94,23 @@ class PCPBCCModule(PCPBCCBase): + with open(path.dirname(__file__) + '/../' + bpf_src) as src: + self.bpf_text = src.read() + ++ # BPF.kernel_struct_has_field requires BCC v0.23.0 ++ # use kernel version check as alternative ++ # pylint: disable=no-member ++ if ( ++ hasattr(BPF, "kernel_struct_has_field") ++ and BPF.kernel_struct_has_field(b"task_struct", b"__state") == 1 ++ ) or self.kernel_version() >= (5, 14, 0): ++ self.bpf_text = self.bpf_text.replace('STATE_FIELD', '__state') ++ else: ++ self.bpf_text = self.bpf_text.replace('STATE_FIELD', 'state') ++ + self.bpf_text = self.bpf_text.replace("FILTER", "PID_CHECK") ++ self.bpf_text = self.bpf_text.replace('FACTOR', 'delta /= 1000;') ++ ++ self.bpf_text = self.bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);') ++ self.bpf_text = self.bpf_text.replace('STORE', ++ 'dist.increment(bpf_log2l(delta));') + + if not self.pids and self.proc_filter and self.proc_refresh: + self.log("No process to attach found, activation postponed.") +@@ -102,9 +123,11 @@ class PCPBCCModule(PCPBCCBase): + + self.reset_cache() + self.bpf = BPF(text=bpf_text) +- self.bpf.attach_kprobe(event="ttwu_do_wakeup", fn_name="trace_ttwu_do_wakeup") +- self.bpf.attach_kprobe(event="wake_up_new_task", fn_name="trace_wake_up_new_task") +- self.bpf.attach_kprobe(event_re=r"^finish_task_switch$|^finish_task_switch\.isra\.\d$", fn_name="trace_run") ++ if not is_support_raw_tp: ++ self.bpf.attach_kprobe(event="ttwu_do_wakeup", fn_name="trace_ttwu_do_wakeup") ++ self.bpf.attach_kprobe(event="wake_up_new_task", fn_name="trace_wake_up_new_task") ++ self.bpf.attach_kprobe(event_re=r"^finish_task_switch$|^finish_task_switch\.isra\.\d$", ++ fn_name="trace_run") + self.log("Compiled.") + except Exception as error: # pylint: disable=broad-except + self.bpf = None +diff --git a/src/pmdas/bcc/modules/runqlat.bpf b/src/pmdas/bcc/modules/runqlat_kp.bpf +similarity index 54% +rename from src/pmdas/bcc/modules/runqlat.bpf +rename to src/pmdas/bcc/modules/runqlat_kp.bpf +index a3664a035..dd643d600 100644 +--- a/src/pmdas/bcc/modules/runqlat.bpf ++++ b/src/pmdas/bcc/modules/runqlat_kp.bpf +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + typedef struct pid_key { + u64 id; // work around +@@ -17,7 +18,7 @@ typedef struct pidns_key { + } pidns_key_t; + + BPF_HASH(start, u32); +-BPF_HISTOGRAM(dist); ++STORAGE + + struct rq; + +@@ -31,6 +32,45 @@ static int trace_enqueue(u32 tgid, u32 pid) + return 0; + } + ++static __always_inline unsigned int pid_namespace(struct task_struct *task) ++{ ++ ++/* pids[] was removed from task_struct since commit 2c4704756cab7cfa031ada4dab361562f0e357c0 ++ * Using the macro INIT_PID_LINK as a conditional judgment. ++ */ ++#ifdef INIT_PID_LINK ++ struct pid_link pids; ++ unsigned int level; ++ struct upid upid; ++ struct ns_common ns; ++ ++ /* get the pid namespace by following task_active_pid_ns(), ++ * pid->numbers[pid->level].ns ++ */ ++ bpf_probe_read_kernel(&pids, sizeof(pids), &task->pids[PIDTYPE_PID]); ++ bpf_probe_read_kernel(&level, sizeof(level), &pids.pid->level); ++ bpf_probe_read_kernel(&upid, sizeof(upid), &pids.pid->numbers[level]); ++ bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns); ++ ++ return ns.inum; ++#else ++ struct pid *pid; ++ unsigned int level; ++ struct upid upid; ++ struct ns_common ns; ++ ++ /* get the pid namespace by following task_active_pid_ns(), ++ * pid->numbers[pid->level].ns ++ */ ++ bpf_probe_read_kernel(&pid, sizeof(pid), &task->thread_pid); ++ bpf_probe_read_kernel(&level, sizeof(level), &pid->level); ++ bpf_probe_read_kernel(&upid, sizeof(upid), &pid->numbers[level]); ++ bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns); ++ ++ return ns.inum; ++#endif ++} ++ + int trace_wake_up_new_task(struct pt_regs *ctx, struct task_struct *p) + { + return trace_enqueue(p->tgid, p->pid); +@@ -48,7 +88,7 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev) + u32 pid, tgid; + + // ivcsw: treat like an enqueue event and store timestamp +- if (prev->state == TASK_RUNNING) { ++ if (prev->STATE_FIELD == TASK_RUNNING) { + tgid = prev->tgid; + pid = prev->pid; + if (!(FILTER || pid == 0)) { +@@ -69,10 +109,10 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev) + return 0; // missed enqueue + } + delta = bpf_ktime_get_ns() - *tsp; +- delta /= 1000; ++ FACTOR + + // store as histogram +- dist.increment(bpf_log2l(delta)); ++ STORE + + start.delete(&pid); + return 0; +diff --git a/src/pmdas/bcc/modules/runqlat_tp.bpf b/src/pmdas/bcc/modules/runqlat_tp.bpf +new file mode 100644 +index 000000000..f0e9ce69b +--- /dev/null ++++ b/src/pmdas/bcc/modules/runqlat_tp.bpf +@@ -0,0 +1,124 @@ ++// Copyright 2016 Netflix, Inc. ++// Licensed under the Apache License, Version 2.0 (the "License") ++ ++#include ++#include ++#include ++#include ++#include ++ ++typedef struct pid_key { ++ u64 id; // work around ++ u64 slot; ++} pid_key_t; ++ ++typedef struct pidns_key { ++ u64 id; // work around ++ u64 slot; ++} pidns_key_t; ++ ++BPF_HASH(start, u32); ++STORAGE ++ ++struct rq; ++ ++// record enqueue timestamp ++static int trace_enqueue(u32 tgid, u32 pid) ++{ ++ if (FILTER || pid == 0) ++ return 0; ++ u64 ts = bpf_ktime_get_ns(); ++ start.update(&pid, &ts); ++ return 0; ++} ++ ++static __always_inline unsigned int pid_namespace(struct task_struct *task) ++{ ++ ++/* pids[] was removed from task_struct since commit 2c4704756cab7cfa031ada4dab361562f0e357c0 ++ * Using the macro INIT_PID_LINK as a conditional judgment. ++ */ ++#ifdef INIT_PID_LINK ++ struct pid_link pids; ++ unsigned int level; ++ struct upid upid; ++ struct ns_common ns; ++ ++ /* get the pid namespace by following task_active_pid_ns(), ++ * pid->numbers[pid->level].ns ++ */ ++ bpf_probe_read_kernel(&pids, sizeof(pids), &task->pids[PIDTYPE_PID]); ++ bpf_probe_read_kernel(&level, sizeof(level), &pids.pid->level); ++ bpf_probe_read_kernel(&upid, sizeof(upid), &pids.pid->numbers[level]); ++ bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns); ++ ++ return ns.inum; ++#else ++ struct pid *pid; ++ unsigned int level; ++ struct upid upid; ++ struct ns_common ns; ++ ++ /* get the pid namespace by following task_active_pid_ns(), ++ * pid->numbers[pid->level].ns ++ */ ++ bpf_probe_read_kernel(&pid, sizeof(pid), &task->thread_pid); ++ bpf_probe_read_kernel(&level, sizeof(level), &pid->level); ++ bpf_probe_read_kernel(&upid, sizeof(upid), &pid->numbers[level]); ++ bpf_probe_read_kernel(&ns, sizeof(ns), &upid.ns->ns); ++ ++ return ns.inum; ++#endif ++} ++ ++RAW_TRACEPOINT_PROBE(sched_wakeup) ++{ ++ // TP_PROTO(struct task_struct *p) ++ struct task_struct *p = (struct task_struct *)ctx->args[0]; ++ return trace_enqueue(p->tgid, p->pid); ++} ++ ++RAW_TRACEPOINT_PROBE(sched_wakeup_new) ++{ ++ // TP_PROTO(struct task_struct *p) ++ struct task_struct *p = (struct task_struct *)ctx->args[0]; ++ return trace_enqueue(p->tgid, p->pid); ++} ++ ++RAW_TRACEPOINT_PROBE(sched_switch) ++{ ++ // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next) ++ struct task_struct *prev = (struct task_struct *)ctx->args[1]; ++ struct task_struct *next = (struct task_struct *)ctx->args[2]; ++ u32 pid, tgid; ++ ++ // ivcsw: treat like an enqueue event and store timestamp ++ if (prev->STATE_FIELD == TASK_RUNNING) { ++ tgid = prev->tgid; ++ pid = prev->pid; ++ if (!(FILTER || pid == 0)) { ++ u64 ts = bpf_ktime_get_ns(); ++ start.update(&pid, &ts); ++ } ++ } ++ ++ tgid = next->tgid; ++ pid = next->pid; ++ if (FILTER || pid == 0) ++ return 0; ++ u64 *tsp, delta; ++ ++ // fetch timestamp and calculate delta ++ tsp = start.lookup(&pid); ++ if (tsp == 0) { ++ return 0; // missed enqueue ++ } ++ delta = bpf_ktime_get_ns() - *tsp; ++ FACTOR ++ ++ // store as histogram ++ STORE ++ ++ start.delete(&pid); ++ return 0; ++} +diff --git a/src/pmdas/bcc/modules/tcplife.python b/src/pmdas/bcc/modules/tcplife.python +index 0c6f17c36..02c693a6a 100644 +--- a/src/pmdas/bcc/modules/tcplife.python ++++ b/src/pmdas/bcc/modules/tcplife.python +@@ -37,16 +37,11 @@ from modules.pcpbcc import PCPBCCBase + # + # BPF program + # +-bpf_src = "modules/tcplife.bpf" +-# Compat with kernel < 4.16, bcc < 0.6 +-TRACEFS = "/sys/kernel/debug/tracing" +-bpf_src_old_tp = "modules/tcplife_old_tp.bpf" +-bpf_src_old_kb = "modules/tcplife_old_kb.bpf" +-if not path.exists(TRACEFS + "/events/sock/inet_sock_set_state"): +- if path.exists(TRACEFS + "/events/tcp/tcp_set_state"): +- bpf_src = bpf_src_old_tp +- else: +- bpf_src = bpf_src_old_kb ++if BPF.tracepoint_exists("sock", "inet_sock_set_state"): ++ bpf_src = "modules/tcplife_tp.bpf" ++else: ++ bpf_src = "modules/tcplife_kp.bpf" ++ + + # + # PCP BCC PMDA constants +@@ -57,35 +52,6 @@ units_bytes = pmUnits(1, 0, 0, PM_SPACE_BYTE, 0, 0) + units_usecs = pmUnits(0, 1, 0, 0, PM_TIME_USEC, 0) + units_none = pmUnits(0, 0, 0, 0, 0, 0) + +-TASK_COMM_LEN = 16 # linux/sched.h +- +-class Data_ipv4(ct.Structure): +- """ IPv4 data struct """ +- _fields_ = [ +- ("ts_us", ct.c_ulonglong), +- ("pid", ct.c_ulonglong), +- ("saddr", ct.c_ulonglong), +- ("daddr", ct.c_ulonglong), +- ("ports", ct.c_ulonglong), +- ("rx_b", ct.c_ulonglong), +- ("tx_b", ct.c_ulonglong), +- ("span_us", ct.c_ulonglong), +- ("task", ct.c_char * TASK_COMM_LEN) +- ] +- +-class Data_ipv6(ct.Structure): +- """ IPv6 data struct """ +- _fields_ = [ +- ("ts_us", ct.c_ulonglong), +- ("pid", ct.c_ulonglong), +- ("saddr", (ct.c_ulonglong * 2)), +- ("daddr", (ct.c_ulonglong * 2)), +- ("ports", ct.c_ulonglong), +- ("rx_b", ct.c_ulonglong), +- ("tx_b", ct.c_ulonglong), +- ("span_us", ct.c_ulonglong), +- ("task", ct.c_char * TASK_COMM_LEN) +- ] + + # + # PCP BCC Module +@@ -129,24 +95,22 @@ class PCPBCCModule(PCPBCCBase): + self.lock = Lock() + self.thread = None + +- # Compat with kernel < 4.16 + self.log("Using BPF source file %s." % bpf_src) + + # Exit hard if impossible to continue +- if self.bcc_version() == "0.6.1" and bpf_src == bpf_src_old_kb: +- raise RuntimeError("BCC 0.6.1 bug makes it incompatible with this module " +- "on kernel < 4.15.") ++ if self.bcc_version_tuple() < (0, 6, 1): ++ raise RuntimeError("BCC 0.6.1+ is required for this module.") + + self.log("Initialized.") + + def handle_ip_event(self, data, version): + """ IP event handler """ + if version == 4: +- event = ct.cast(data, ct.POINTER(Data_ipv4)).contents ++ event = self.bpf["ipv4_events"].event(data) + laddr = inet_ntop(AF_INET, pack("I", event.saddr)) + daddr = inet_ntop(AF_INET, pack("I", event.daddr)) + else: +- event = ct.cast(data, ct.POINTER(Data_ipv6)).contents ++ event = self.bpf["ipv6_events"].event(data) + laddr = inet_ntop(AF_INET6, event.saddr) + daddr = inet_ntop(AF_INET6, event.daddr) + +@@ -205,31 +169,25 @@ class PCPBCCModule(PCPBCCBase): + if not self.bpf_text: + with open(path.dirname(__file__) + '/../' + bpf_src) as src: + self.bpf_text = src.read() +- # Compat with bcc < 0.6 +- self.log("Testing BCC compatilibility, possible errors below are safe to ignore.") +- try: +- test_txt = self.bpf_text.replace("// NEW: ", "").replace("FILTER_PID", "") +- test_bpf = BPF(text=test_txt) +- test_bpf.cleanup() +- self.bpf_text = self.bpf_text.replace("// NEW: ", "") +- except Exception: # pylint: disable=broad-except +- self.bpf_text = self.bpf_text.replace("// OLD: ", "") +- self.log("Tested BCC compatilibility, possible errors above are safe to ignore.") + + if self.dports: + filterp = " && ".join(["dport != %d" % port for port in self.dports]) + filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp +- self.bpf_text = self.bpf_text.replace("//FILTER_DPORT", filter_txt) ++ self.bpf_text = self.bpf_text.replace("FILTER_DPORT", filter_txt) + if self.lports: + filterp = " && ".join(["lport != %d" % port for port in self.lports]) + filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp +- self.bpf_text = self.bpf_text.replace("//FILTER_LPORT", filter_txt) ++ self.bpf_text = self.bpf_text.replace("FILTER_LPORT", filter_txt) + + if not self.pids and self.proc_filter and self.proc_refresh: + self.log("No process to attach found, activation postponed.") + return + + bpf_text = self.apply_pid_filter(self.bpf_text, self.pids, False) ++ bpf_text = bpf_text.replace('FILTER_PID', '') ++ bpf_text = bpf_text.replace('FILTER_DPORT', '') ++ bpf_text = bpf_text.replace('FILTER_LPORT', '') ++ bpf_text = bpf_text.replace('FILTER_FAMILY', '') + + if self.debug: + self.log("BPF to be compiled:\n" + bpf_text.strip()) +diff --git a/src/pmdas/bcc/modules/tcplife_old_kb.bpf b/src/pmdas/bcc/modules/tcplife_kp.bpf +similarity index 81% +rename from src/pmdas/bcc/modules/tcplife_old_kb.bpf +rename to src/pmdas/bcc/modules/tcplife_kp.bpf +index eed01941a..5486c6a37 100644 +--- a/src/pmdas/bcc/modules/tcplife_old_kb.bpf ++++ b/src/pmdas/bcc/modules/tcplife_kp.bpf +@@ -2,7 +2,6 @@ + // Licensed under the Apache License, Version 2.0 (the "License") + + #include +-#define KBUILD_MODNAME "pcpbcctcplife" + #include + #include + #include +@@ -11,11 +10,10 @@ BPF_HASH(birth, struct sock *, u64); + + // separate data structs for ipv4 and ipv6 + struct ipv4_data_t { +- // XXX: switch some to u32's when supported + u64 ts_us; +- u64 pid; +- u64 saddr; +- u64 daddr; ++ u32 pid; ++ u32 saddr; ++ u32 daddr; + u64 ports; + u64 rx_b; + u64 tx_b; +@@ -26,7 +24,7 @@ BPF_PERF_OUTPUT(ipv4_events); + + struct ipv6_data_t { + u64 ts_us; +- u64 pid; ++ u32 pid; + unsigned __int128 saddr; + unsigned __int128 daddr; + u64 ports; +@@ -49,12 +47,12 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) + + // lport is either used in a filter here, or later + u16 lport = sk->__sk_common.skc_num; +- //FILTER_LPORT ++ FILTER_LPORT + + // dport is either used in a filter here, or later + u16 dport = sk->__sk_common.skc_dport; + dport = ntohs(dport); +- //FILTER_DPORT ++ FILTER_DPORT + + /* + * This tool includes PID and comm context. It's best effort, and may +@@ -74,6 +72,9 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) + * sets ESTABLISHED without a tcp_set_state() call. Until we know + * that for sure, match all early states to increase chances a + * timestamp is set. ++ * Note that this needs to be set before the PID filter later on, ++ * since the PID isn't reliable for these early stages, so we must ++ * save all timestamps and do the PID filter later when we can. + */ + u64 ts = bpf_ktime_get_ns(); + birth.update(&sk, &ts); +@@ -101,7 +102,7 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + birth.delete(&sk); + +- // fetch possible cached data ++ // fetch possible cached data, and filter + struct id_t *mep; + mep = whoami.lookup(&sk); + if (mep != 0) +@@ -116,9 +117,13 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) + + u16 family = sk->__sk_common.skc_family; + ++ FILTER_FAMILY ++ + if (family == AF_INET) { +- struct ipv4_data_t data4 = {.span_us = delta_us, +- .rx_b = rx_b, .tx_b = tx_b}; ++ struct ipv4_data_t data4 = {}; ++ data4.span_us = delta_us; ++ data4.rx_b = rx_b; ++ data4.tx_b = tx_b; + data4.ts_us = bpf_ktime_get_ns() / 1000; + data4.saddr = sk->__sk_common.skc_rcv_saddr; + data4.daddr = sk->__sk_common.skc_daddr; +@@ -128,17 +133,19 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) + if (mep == 0) { + bpf_get_current_comm(&data4.task, sizeof(data4.task)); + } else { +- bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task); ++ bpf_probe_read_kernel(&data4.task, sizeof(data4.task), (void *)mep->task); + } + ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); + + } else /* 6 */ { +- struct ipv6_data_t data6 = {.span_us = delta_us, +- .rx_b = rx_b, .tx_b = tx_b}; ++ struct ipv6_data_t data6 = {}; ++ data6.span_us = delta_us; ++ data6.rx_b = rx_b; ++ data6.tx_b = tx_b; + data6.ts_us = bpf_ktime_get_ns() / 1000; +- bpf_probe_read(&data6.saddr, sizeof(data6.saddr), ++ bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr), + sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); +- bpf_probe_read(&data6.daddr, sizeof(data6.daddr), ++ bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr), + sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + // a workaround until data6 compiles with separate lport/dport + data6.ports = dport + ((0ULL + lport) << 32); +@@ -146,7 +153,7 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) + if (mep == 0) { + bpf_get_current_comm(&data6.task, sizeof(data6.task)); + } else { +- bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task); ++ bpf_probe_read_kernel(&data6.task, sizeof(data6.task), (void *)mep->task); + } + ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); + } +diff --git a/src/pmdas/bcc/modules/tcplife_old_tp.bpf b/src/pmdas/bcc/modules/tcplife_old_tp.bpf +deleted file mode 100644 +index a7c9c625c..000000000 +--- a/src/pmdas/bcc/modules/tcplife_old_tp.bpf ++++ /dev/null +@@ -1,166 +0,0 @@ +-// Copyright 2016 Netflix, Inc. +-// Licensed under the Apache License, Version 2.0 (the "License") +- +-#include +-#define KBUILD_MODNAME "pcpbcctcplife" +-#include +-#include +-#include +- +-BPF_HASH(birth, struct sock *, u64); +- +-// separate data structs for ipv4 and ipv6 +-struct ipv4_data_t { +- // XXX: switch some to u32's when supported +- u64 ts_us; +- u64 pid; +- u64 saddr; +- u64 daddr; +- u64 ports; +- u64 rx_b; +- u64 tx_b; +- u64 span_us; +- char task[TASK_COMM_LEN]; +-}; +-BPF_PERF_OUTPUT(ipv4_events); +- +-struct ipv6_data_t { +- u64 ts_us; +- u64 pid; +- unsigned __int128 saddr; +- unsigned __int128 daddr; +- u64 ports; +- u64 rx_b; +- u64 tx_b; +- u64 span_us; +- char task[TASK_COMM_LEN]; +-}; +-BPF_PERF_OUTPUT(ipv6_events); +- +-struct id_t { +- u32 pid; +- char task[TASK_COMM_LEN]; +-}; +-BPF_HASH(whoami, struct sock *, struct id_t); +- +-TRACEPOINT_PROBE(tcp, tcp_set_state) +-{ +- u32 pid = bpf_get_current_pid_tgid() >> 32; +- // sk is mostly used as a UUID, once for skc_family, and two tcp stats: +- struct sock *sk = (struct sock *)args->skaddr; +- +- // lport is either used in a filter here, or later +- u16 lport = args->sport; +- //FILTER_LPORT +- +- // dport is either used in a filter here, or later +- u16 dport = args->dport; +- //FILTER_DPORT +- +- /* +- * This tool includes PID and comm context. It's best effort, and may +- * be wrong in some situations. It currently works like this: +- * - record timestamp on any state < TCP_FIN_WAIT1 +- * - cache task context on: +- * TCP_SYN_SENT: tracing from client +- * TCP_LAST_ACK: client-closed from server +- * - do output on TCP_CLOSE: +- * fetch task context if cached, or use current task +- */ +- +- // capture birth time +- if (args->newstate < TCP_FIN_WAIT1) { +- /* +- * Matching just ESTABLISHED may be sufficient, provided no code-path +- * sets ESTABLISHED without a tcp_set_state() call. Until we know +- * that for sure, match all early states to increase chances a +- * timestamp is set. +- * Note that this needs to be set before the PID filter later on, +- * since the PID isn't reliable for these early stages, so we must +- * save all timestamps and do the PID filter later when we can. +- */ +- u64 ts = bpf_ktime_get_ns(); +- birth.update(&sk, &ts); +- } +- +- // record PID & comm on SYN_SENT +- if (args->newstate == TCP_SYN_SENT || args->newstate == TCP_LAST_ACK) { +- // now we can PID filter, both here and a little later on for CLOSE +- FILTER_PID +- struct id_t me = {.pid = pid}; +- bpf_get_current_comm(&me.task, sizeof(me.task)); +- whoami.update(&sk, &me); +- } +- +- if (args->newstate != TCP_CLOSE) +- return 0; +- +- // calculate lifespan +- u64 *tsp, delta_us; +- tsp = birth.lookup(&sk); +- if (tsp == 0) { +- whoami.delete(&sk); // may not exist +- return 0; // missed create +- } +- delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; +- birth.delete(&sk); +- +- // fetch possible cached data, and filter +- struct id_t *mep; +- mep = whoami.lookup(&sk); +- if (mep != 0) +- pid = mep->pid; +- FILTER_PID +- +- // get throughput stats. see tcp_get_info(). +- u64 rx_b = 0, tx_b = 0, sport = 0; +- struct tcp_sock *tp = (struct tcp_sock *)sk; +- // OLD: bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received); +- // OLD: bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked); +- // NEW: rx_b = tp->bytes_received; +- // NEW: tx_b = tp->bytes_acked; +- +- u16 family = 0; +- // OLD: bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family); +- // NEW: family = sk->__sk_common.skc_family; +- +- if (family == AF_INET) { +- +- struct ipv4_data_t data4 = {.span_us = delta_us, +- .rx_b = rx_b, .tx_b = tx_b}; +- data4.ts_us = bpf_ktime_get_ns() / 1000; +- bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr); +- bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr); +- // a workaround until data4 compiles with separate lport/dport +- data4.ports = dport + ((0ULL + lport) << 32); +- data4.pid = pid; +- +- if (mep == 0) { +- bpf_get_current_comm(&data4.task, sizeof(data4.task)); +- } else { +- bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task); +- } +- ipv4_events.perf_submit(args, &data4, sizeof(data4)); +- +- } else /* 6 */ { +- struct ipv6_data_t data6 = {.span_us = delta_us, +- .rx_b = rx_b, .tx_b = tx_b}; +- data6.ts_us = bpf_ktime_get_ns() / 1000; +- bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6); +- bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6); +- // a workaround until data6 compiles with separate lport/dport +- data6.ports = dport + ((0ULL + lport) << 32); +- data6.pid = pid; +- if (mep == 0) { +- bpf_get_current_comm(&data6.task, sizeof(data6.task)); +- } else { +- bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task); +- } +- ipv6_events.perf_submit(args, &data6, sizeof(data6)); +- } +- +- if (mep != 0) +- whoami.delete(&sk); +- +- return 0; +-} +diff --git a/src/pmdas/bcc/modules/tcplife.bpf b/src/pmdas/bcc/modules/tcplife_tp.bpf +similarity index 80% +rename from src/pmdas/bcc/modules/tcplife.bpf +rename to src/pmdas/bcc/modules/tcplife_tp.bpf +index 19ca8d740..2b16b98e7 100644 +--- a/src/pmdas/bcc/modules/tcplife.bpf ++++ b/src/pmdas/bcc/modules/tcplife_tp.bpf +@@ -2,7 +2,6 @@ + // Licensed under the Apache License, Version 2.0 (the "License") + + #include +-#define KBUILD_MODNAME "pcpbcctcplife" + #include + #include + #include +@@ -11,11 +10,10 @@ BPF_HASH(birth, struct sock *, u64); + + // separate data structs for ipv4 and ipv6 + struct ipv4_data_t { +- // XXX: switch some to u32's when supported + u64 ts_us; +- u64 pid; +- u64 saddr; +- u64 daddr; ++ u32 pid; ++ u32 saddr; ++ u32 daddr; + u64 ports; + u64 rx_b; + u64 tx_b; +@@ -26,7 +24,7 @@ BPF_PERF_OUTPUT(ipv4_events); + + struct ipv6_data_t { + u64 ts_us; +- u64 pid; ++ u32 pid; + unsigned __int128 saddr; + unsigned __int128 daddr; + u64 ports; +@@ -54,11 +52,11 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state) + + // lport is either used in a filter here, or later + u16 lport = args->sport; +- //FILTER_LPORT ++ FILTER_LPORT + + // dport is either used in a filter here, or later + u16 dport = args->dport; +- //FILTER_DPORT ++ FILTER_DPORT + + /* + * This tool includes PID and comm context. It's best effort, and may +@@ -115,20 +113,23 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state) + pid = mep->pid; + FILTER_PID + ++ u16 family = args->family; ++ FILTER_FAMILY ++ + // get throughput stats. see tcp_get_info(). + u64 rx_b = 0, tx_b = 0, sport = 0; + struct tcp_sock *tp = (struct tcp_sock *)sk; +- // OLD: bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received); +- // OLD: bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked); +- // NEW: rx_b = tp->bytes_received; +- // NEW: tx_b = tp->bytes_acked; ++ rx_b = tp->bytes_received; ++ tx_b = tp->bytes_acked; + + if (args->family == AF_INET) { +- struct ipv4_data_t data4 = {.span_us = delta_us, +- .rx_b = rx_b, .tx_b = tx_b}; ++ struct ipv4_data_t data4 = {}; ++ data4.span_us = delta_us; ++ data4.rx_b = rx_b; ++ data4.tx_b = tx_b; + data4.ts_us = bpf_ktime_get_ns() / 1000; +- bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr); +- bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr); ++ __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); ++ __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); + // a workaround until data4 compiles with separate lport/dport + data4.ports = dport + ((0ULL + lport) << 32); + data4.pid = pid; +@@ -136,23 +137,25 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state) + if (mep == 0) { + bpf_get_current_comm(&data4.task, sizeof(data4.task)); + } else { +- bpf_probe_read(&data4.task, sizeof(data4.task), (void *)mep->task); ++ bpf_probe_read_kernel(&data4.task, sizeof(data4.task), (void *)mep->task); + } + ipv4_events.perf_submit(args, &data4, sizeof(data4)); + + } else /* 6 */ { +- struct ipv6_data_t data6 = {.span_us = delta_us, +- .rx_b = rx_b, .tx_b = tx_b}; ++ struct ipv6_data_t data6 = {}; ++ data6.span_us = delta_us; ++ data6.rx_b = rx_b; ++ data6.tx_b = tx_b; + data6.ts_us = bpf_ktime_get_ns() / 1000; +- bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6); +- bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6); ++ __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr)); ++ __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr)); + // a workaround until data6 compiles with separate lport/dport + data6.ports = dport + ((0ULL + lport) << 32); + data6.pid = pid; + if (mep == 0) { + bpf_get_current_comm(&data6.task, sizeof(data6.task)); + } else { +- bpf_probe_read(&data6.task, sizeof(data6.task), (void *)mep->task); ++ bpf_probe_read_kernel(&data6.task, sizeof(data6.task), (void *)mep->task); + } + ipv6_events.perf_submit(args, &data6, sizeof(data6)); + } +diff --git a/src/pmdas/bcc/modules/tcpperpid.python b/src/pmdas/bcc/modules/tcpperpid.python +index 3cb2cfcfd..0096929a6 100644 +--- a/src/pmdas/bcc/modules/tcpperpid.python ++++ b/src/pmdas/bcc/modules/tcpperpid.python +@@ -32,16 +32,10 @@ from modules.pcpbcc import PCPBCCBase + # + # BPF program + # +-bpf_src = "modules/tcplife.bpf" +-# Compat with kernel < 4.16, bcc < 0.6 +-TRACEFS = "/sys/kernel/debug/tracing" +-bpf_src_old_tp = "modules/tcplife_old_tp.bpf" +-bpf_src_old_kb = "modules/tcplife_old_kb.bpf" +-if not path.exists(TRACEFS + "/events/sock/inet_sock_set_state"): +- if path.exists(TRACEFS + "/events/tcp/tcp_set_state"): +- bpf_src = bpf_src_old_tp +- else: +- bpf_src = bpf_src_old_kb ++if BPF.tracepoint_exists("sock", "inet_sock_set_state"): ++ bpf_src = "modules/tcplife_tp.bpf" ++else: ++ bpf_src = "modules/tcplife_kp.bpf" + + # Alternative, "high resolution" BPF + bpf_highres = "modules/tcptop.bpf" +@@ -53,36 +47,6 @@ MODULE = 'tcpperpid' + BASENS = 'proc.io.net.total.' + units_bytes = pmUnits(1, 0, 0, PM_SPACE_BYTE, 0, 0) + +-TASK_COMM_LEN = 16 # linux/sched.h +- +-class Data_ipv4(ct.Structure): +- """ IPv4 data struct """ +- _fields_ = [ +- ("ts_us", ct.c_ulonglong), +- ("pid", ct.c_ulonglong), +- ("saddr", ct.c_ulonglong), +- ("daddr", ct.c_ulonglong), +- ("ports", ct.c_ulonglong), +- ("rx_b", ct.c_ulonglong), +- ("tx_b", ct.c_ulonglong), +- ("span_us", ct.c_ulonglong), +- ("task", ct.c_char * TASK_COMM_LEN) +- ] +- +-class Data_ipv6(ct.Structure): +- """ IPv6 data struct """ +- _fields_ = [ +- ("ts_us", ct.c_ulonglong), +- ("pid", ct.c_ulonglong), +- ("saddr", (ct.c_ulonglong * 2)), +- ("daddr", (ct.c_ulonglong * 2)), +- ("ports", ct.c_ulonglong), +- ("rx_b", ct.c_ulonglong), +- ("tx_b", ct.c_ulonglong), +- ("span_us", ct.c_ulonglong), +- ("task", ct.c_char * TASK_COMM_LEN) +- ] +- + # + # PCP BCC Module + # +@@ -133,15 +97,14 @@ class PCPBCCModule(PCPBCCBase): + self.log("Using BPF source file %s." % src) + + # Exit hard if impossible to continue +- if self.bcc_version() == "0.6.1" and src == bpf_src_old_kb and not self.highres: +- raise RuntimeError("BCC 0.6.1 bug makes it incompatible with this module " +- "on kernel < 4.15 in non-highres mode.") ++ if self.bcc_version_tuple() < (0, 6, 1) and not self.highres: ++ raise RuntimeError("BCC 0.6.1+ is required for this module in non-highres mode.") + + self.log("Initialized.") + + def handle_ipv4_event(self, _cpu, data, _size): + """ IPv4 event handler """ +- event = ct.cast(data, ct.POINTER(Data_ipv4)).contents ++ event = self.bpf["ipv4_events"].event(data) + pid = str(event.pid).zfill(6) + self.lock.acquire() + if pid not in self.ipv4_stats: +@@ -153,7 +116,7 @@ class PCPBCCModule(PCPBCCBase): + + def handle_ipv6_event(self, _cpu, data, _size): + """ IPv6 event handler """ +- event = ct.cast(data, ct.POINTER(Data_ipv6)).contents ++ event = self.bpf["ipv6_events"].event(data) + pid = str(event.pid).zfill(6) + self.lock.acquire() + if pid not in self.ipv6_stats: +@@ -199,31 +162,25 @@ class PCPBCCModule(PCPBCCBase): + self.bpf_text = src.read() + if self.highres: + self.bpf_text = self.bpf_text.replace("FILTER", "FILTER_PID") +- # Compat with bcc < 0.6 +- self.log("Testing BCC compatilibility, possible errors below are safe to ignore.") +- try: +- test_txt = self.bpf_text.replace("// NEW: ", "").replace("FILTER_PID", "") +- test_bpf = BPF(text=test_txt) +- test_bpf.cleanup() +- self.bpf_text = self.bpf_text.replace("// NEW: ", "") +- except Exception: # pylint: disable=broad-except +- self.bpf_text = self.bpf_text.replace("// OLD: ", "") +- self.log("Tested BCC compatilibility, possible errors above are safe to ignore.") + + if self.dports: + filterp = " && ".join(["dport != %d" % port for port in self.dports]) + filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp +- self.bpf_text = self.bpf_text.replace("//FILTER_DPORT", filter_txt) ++ self.bpf_text = self.bpf_text.replace("FILTER_DPORT", filter_txt) + if self.lports: + filterp = " && ".join(["lport != %d" % port for port in self.lports]) + filter_txt = "if (%s) { birth.delete(&sk); return 0; }" % filterp +- self.bpf_text = self.bpf_text.replace("//FILTER_LPORT", filter_txt) ++ self.bpf_text = self.bpf_text.replace("FILTER_LPORT", filter_txt) + + if not self.pids and self.proc_filter and self.proc_refresh: + self.log("No process to attach found, activation postponed.") + return + + bpf_text = self.apply_pid_filter(self.bpf_text, self.pids, False) ++ bpf_text = bpf_text.replace('FILTER_PID', '') ++ bpf_text = bpf_text.replace('FILTER_DPORT', '') ++ bpf_text = bpf_text.replace('FILTER_LPORT', '') ++ bpf_text = bpf_text.replace('FILTER_FAMILY', '') + + if self.debug: + self.log("BPF to be compiled:\n" + bpf_text.strip()) +diff --git a/src/pmdas/bcc/modules/tcptop.bpf b/src/pmdas/bcc/modules/tcptop.bpf +index 349ee1529..c1fed7aef 100644 +--- a/src/pmdas/bcc/modules/tcptop.bpf ++++ b/src/pmdas/bcc/modules/tcptop.bpf +@@ -4,6 +4,7 @@ + #include + #include + #include ++ + struct ipv4_key_t { + u32 pid; + u32 saddr; +@@ -13,25 +14,32 @@ struct ipv4_key_t { + }; + BPF_HASH(ipv4_send_bytes, struct ipv4_key_t); + BPF_HASH(ipv4_recv_bytes, struct ipv4_key_t); ++ + struct ipv6_key_t { ++ unsigned __int128 saddr; ++ unsigned __int128 daddr; + u32 pid; +- // workaround until unsigned __int128 support: +- u64 saddr0; +- u64 saddr1; +- u64 daddr0; +- u64 daddr1; + u16 lport; + u16 dport; ++ u64 __pad__; + }; + BPF_HASH(ipv6_send_bytes, struct ipv6_key_t); + BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t); ++ + int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, + struct msghdr *msg, size_t size) + { +- u32 pid = bpf_get_current_pid_tgid(); +- FILTER ++ if (container_should_be_filtered()) { ++ return 0; ++ } ++ ++ u32 pid = bpf_get_current_pid_tgid() >> 32; ++ FILTER_PID ++ + u16 dport = 0, family = sk->__sk_common.skc_family; +- u64 *val, zero = 0; ++ ++ FILTER_FAMILY ++ + if (family == AF_INET) { + struct ipv4_key_t ipv4_key = {.pid = pid}; + ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr; +@@ -39,31 +47,24 @@ int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, + ipv4_key.lport = sk->__sk_common.skc_num; + dport = sk->__sk_common.skc_dport; + ipv4_key.dport = ntohs(dport); +- val = ipv4_send_bytes.lookup_or_init(&ipv4_key, &zero); +- if (val) { +- (*val) += size; +- } ++ ipv4_send_bytes.increment(ipv4_key, size); ++ + } else if (family == AF_INET6) { + struct ipv6_key_t ipv6_key = {.pid = pid}; +- bpf_probe_read(&ipv6_key.saddr0, sizeof(ipv6_key.saddr0), +- &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]); +- bpf_probe_read(&ipv6_key.saddr1, sizeof(ipv6_key.saddr1), +- &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]); +- bpf_probe_read(&ipv6_key.daddr0, sizeof(ipv6_key.daddr0), +- &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]); +- bpf_probe_read(&ipv6_key.daddr1, sizeof(ipv6_key.daddr1), +- &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]); ++ bpf_probe_read_kernel(&ipv6_key.saddr, sizeof(ipv6_key.saddr), ++ &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); ++ bpf_probe_read_kernel(&ipv6_key.daddr, sizeof(ipv6_key.daddr), ++ &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + ipv6_key.lport = sk->__sk_common.skc_num; + dport = sk->__sk_common.skc_dport; + ipv6_key.dport = ntohs(dport); +- val = ipv6_send_bytes.lookup_or_init(&ipv6_key, &zero); +- if (val) { +- (*val) += size; +- } ++ ipv6_send_bytes.increment(ipv6_key, size); + } + // else drop ++ + return 0; + } ++ + /* + * tcp_recvmsg() would be obvious to trace, but is less suitable because: + * - we'd need to trace both entry and return, to have both sock and size +@@ -72,12 +73,21 @@ int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, + */ + int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied) + { +- u32 pid = bpf_get_current_pid_tgid(); +- FILTER ++ if (container_should_be_filtered()) { ++ return 0; ++ } ++ ++ u32 pid = bpf_get_current_pid_tgid() >> 32; ++ FILTER_PID ++ + u16 dport = 0, family = sk->__sk_common.skc_family; + u64 *val, zero = 0; ++ + if (copied <= 0) + return 0; ++ ++ FILTER_FAMILY ++ + if (family == AF_INET) { + struct ipv4_key_t ipv4_key = {.pid = pid}; + ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr; +@@ -85,28 +95,20 @@ int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied) + ipv4_key.lport = sk->__sk_common.skc_num; + dport = sk->__sk_common.skc_dport; + ipv4_key.dport = ntohs(dport); +- val = ipv4_recv_bytes.lookup_or_init(&ipv4_key, &zero); +- if (val) { +- (*val) += copied; +- } ++ ipv4_recv_bytes.increment(ipv4_key, copied); ++ + } else if (family == AF_INET6) { + struct ipv6_key_t ipv6_key = {.pid = pid}; +- bpf_probe_read(&ipv6_key.saddr0, sizeof(ipv6_key.saddr0), +- &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]); +- bpf_probe_read(&ipv6_key.saddr1, sizeof(ipv6_key.saddr1), +- &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]); +- bpf_probe_read(&ipv6_key.daddr0, sizeof(ipv6_key.daddr0), +- &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]); +- bpf_probe_read(&ipv6_key.daddr1, sizeof(ipv6_key.daddr1), +- &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]); ++ bpf_probe_read_kernel(&ipv6_key.saddr, sizeof(ipv6_key.saddr), ++ &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); ++ bpf_probe_read_kernel(&ipv6_key.daddr, sizeof(ipv6_key.daddr), ++ &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + ipv6_key.lport = sk->__sk_common.skc_num; + dport = sk->__sk_common.skc_dport; + ipv6_key.dport = ntohs(dport); +- val = ipv6_recv_bytes.lookup_or_init(&ipv6_key, &zero); +- if (val) { +- (*val) += copied; +- } ++ ipv6_recv_bytes.increment(ipv6_key, copied); + } + // else drop ++ + return 0; + } +diff --git a/src/pmdas/bcc/modules/tcptop.python b/src/pmdas/bcc/modules/tcptop.python +index 45063dff3..db1c1da15 100644 +--- a/src/pmdas/bcc/modules/tcptop.python ++++ b/src/pmdas/bcc/modules/tcptop.python +@@ -120,13 +120,14 @@ class PCPBCCModule(PCPBCCBase): + with open(path.dirname(__file__) + '/../' + bpf_src) as src: + self.bpf_text = src.read() + +- self.bpf_text = self.bpf_text.replace("FILTER", "FILTER_PID") +- + if not self.pids and self.proc_filter and self.proc_refresh: + self.log("No process to attach found, activation postponed.") + return + + bpf_text = self.apply_pid_filter(self.bpf_text, self.pids, False) ++ bpf_text = bpf_text.replace('FILTER_PID', '') ++ bpf_text = bpf_text.replace('FILTER_FAMILY', '') ++ bpf_text = bpf_text.replace('container_should_be_filtered()', '0') + + if self.debug: + self.log("BPF to be compiled:\n" + bpf_text.strip()) +@@ -155,21 +156,31 @@ class PCPBCCModule(PCPBCCBase): + + @staticmethod + def ipv4_table_to_dict(table): +- """ Build hashable dict from IPv4 BPF table """ +- return {TCPSessionKey(pid=k.pid, +- laddr=inet_ntop(AF_INET, pack("I", k.saddr)), +- lport=k.lport, +- daddr=inet_ntop(AF_INET, pack("I", k.daddr)), +- dport=k.dport):v.value for k, v in table.items()} ++ """Build hashable dict from IPv4 BPF table""" ++ return { ++ TCPSessionKey( ++ pid=k.pid, ++ laddr=inet_ntop(AF_INET, pack("I", k.saddr)), ++ lport=k.lport, ++ daddr=inet_ntop(AF_INET, pack("I", k.daddr)), ++ dport=k.dport, ++ ): v.value ++ for k, v in table.items() ++ } + + @staticmethod + def ipv6_table_to_dict(table): +- """ Build hashable dict from IPv6 BPF table """ +- return {TCPSessionKey(pid=k.pid, +- laddr=inet_ntop(AF_INET6, pack("QQ", k.saddr0, k.saddr1)), +- lport=k.lport, +- daddr=inet_ntop(AF_INET6, pack("QQ", k.daddr0, k.daddr1)), +- dport=k.dport):v.value for k, v in table.items()} ++ """Build hashable dict from IPv6 BPF table""" ++ return { ++ TCPSessionKey( ++ pid=k.pid, ++ laddr=inet_ntop(AF_INET6, k.saddr), ++ lport=k.lport, ++ daddr=inet_ntop(AF_INET6, k.daddr), ++ dport=k.dport, ++ ): v.value ++ for k, v in table.items() ++ } + + def refresh_stats(self): + """ Refresh statistics from BPF table """ +commit d45ce8e85035cc95ba897cd19967fad6d5d741be (cherry-picked) +Author: Andreas Gerstmayr +Date: Wed Dec 15 08:03:40 2021 +0100 + + qa: update qa/1118 to add new log output of runqlat bcc module + +diff --git a/qa/1118 b/qa/1118 +index 4123495b5..bcaec0a0d 100755 +--- a/qa/1118 ++++ b/qa/1118 +@@ -21,12 +21,19 @@ _label_filter() + grep '"0-1"' | grep '"statistic":"histogram"' | grep '"lower_bound":0' | grep 'upper_bound":1' > /dev/null && echo 'OK' + } + ++_install_filter() ++{ ++ sed \ ++ -e "s/Using BPF source file .\+/Using BPF source file X/g" \ ++ #end ++} ++ + _prepare_pmda bcc + trap "_pmdabcc_cleanup; exit \$status" 0 1 2 3 15 + _stop_auto_restart pmcd + + # real QA test starts here +-cat < -Date: Wed Dec 8 15:24:49 2021 +1100 - - Resolve inconsistencies in new 'farm' and other systemd units - - This change most importantly introduces the Wants= line Mark - (and Jan earlier, indirectly) proposed to make pmlogger_farm - handling function as end-users will expect when manipulating - the pmlogger.service. Ditto for pmie. - - There's also several cleanups of things that are inconsistent - and just plain wrong or missing, particularly in spec files. - - This supercedes PR #1492 and PR #1489. - This resolves Red Hat BZ #2027753. - -diff --git a/src/pmie/pmie.service.in b/src/pmie/pmie.service.in -index d234c8a5e5..bf4e64980a 100644 ---- a/src/pmie/pmie.service.in -+++ b/src/pmie/pmie.service.in -@@ -4,7 +4,7 @@ Documentation=man:pmie(1) - After=network-online.target pmcd.service - Before=pmie_check.timer pmie_daily.timer - BindsTo=pmie_check.timer pmie_daily.timer --Wants=pmcd.service -+Wants=pmcd.service pmie_farm.service +diff -Naurp pcp-5.3.5.orig/src/pmie/GNUmakefile pcp-5.3.5/src/pmie/GNUmakefile +--- pcp-5.3.5.orig/src/pmie/GNUmakefile 2021-11-09 10:50:58.000000000 +1100 ++++ pcp-5.3.5/src/pmie/GNUmakefile 2021-12-15 16:51:51.306200748 +1100 +@@ -84,7 +84,6 @@ pmie.service : pmie.service.in + + pmie_farm.service : pmie_farm.service.in + $(SED) <$< >$@ \ +- -e 's;@CRONTAB_PATH@;'$(CRONTAB_PATH)';' \ + -e 's;@PCP_SYSCONFIG_DIR@;'$(PCP_SYSCONFIG_DIR)';' \ + -e 's;@PCP_BINADM_DIR@;'$(PCP_BINADM_DIR)';' \ + -e 's;@PCP_VAR_DIR@;'$(PCP_VAR_DIR)';' \ +@@ -95,7 +94,9 @@ pmie_farm.service : pmie_farm.service.in + + pmie_farm_check.service : pmie_farm_check.service.in + $(SED) <$< >$@ \ ++ -e 's;@CRONTAB_PATH@;'$(CRONTAB_PATH)';' \ + -e 's;@PCP_BIN_DIR@;'$(PCP_BIN_DIR)';' \ ++ -e 's;@PCP_VAR_DIR@;'$(PCP_VAR_DIR)';' \ + # END + + pmie_check.service : pmie_check.service.in +diff -Naurp pcp-5.3.5.orig/src/pmie/pmie_farm_check.service.in pcp-5.3.5/src/pmie/pmie_farm_check.service.in +--- pcp-5.3.5.orig/src/pmie/pmie_farm_check.service.in 2021-11-09 10:50:58.000000000 +1100 ++++ pcp-5.3.5/src/pmie/pmie_farm_check.service.in 2021-12-15 16:51:51.306200748 +1100 +@@ -1,7 +1,7 @@ + [Unit] +-Description=Check and migrate non-primary pmie instances to pmie_farm +-Documentation=man:pmie_check(1) +-# TODO non-systemd ConditionPathExists=!/etc/cron.d/pcp-pmie ++Description=Check and migrate non-primary pmie farm instances ++Documentation=man:pmiectl(1) ++ConditionPathExists=!@CRONTAB_PATH@ [Service] - Type=notify -diff --git a/src/pmie/pmie_farm.service.in b/src/pmie/pmie_farm.service.in -index 6679e48ba1..5459adb310 100644 ---- a/src/pmie/pmie_farm.service.in -+++ b/src/pmie/pmie_farm.service.in -@@ -22,6 +22,3 @@ User=@PCP_USER@ - - [Install] - WantedBy=multi-user.target + Type=exec +@@ -9,8 +9,7 @@ Restart=no + TimeoutStartSec=4h + TimeoutStopSec=120 + ExecStart=@PCP_BIN_DIR@/pmiectl -m check +-WorkingDirectory=/var/lib/pcp - --# This dependency will be removed in PCPv6. --WantedBy=pmie.service -diff --git a/src/pmlogger/pmlogger.service.in b/src/pmlogger/pmlogger.service.in -index de0df29db1..59299ac15d 100644 ---- a/src/pmlogger/pmlogger.service.in -+++ b/src/pmlogger/pmlogger.service.in -@@ -4,7 +4,7 @@ Documentation=man:pmlogger(1) - After=network-online.target pmcd.service - Before=pmlogger_check.timer pmlogger_daily.timer - BindsTo=pmlogger_check.timer pmlogger_daily.timer --Wants=pmcd.service -+Wants=pmcd.service pmlogger_farm.service - - [Service] - Type=notify -diff --git a/src/pmlogger/pmlogger_farm.service.in b/src/pmlogger/pmlogger_farm.service.in -index fe753afdf6..3bfa2e7098 100644 ---- a/src/pmlogger/pmlogger_farm.service.in -+++ b/src/pmlogger/pmlogger_farm.service.in -@@ -22,6 +22,3 @@ User=@PCP_USER@ - - [Install] - WantedBy=multi-user.target -- --# This dependency will be removed in PCPv6. --WantedBy=pmlogger.service - -commit cc2dddfb7a04d98f97bdf759f057bae2727260ff -Author: Nathan Scott -Date: Thu Dec 9 10:41:22 2021 +1100 - - Resolve inconsistencies in new 'farm' systemd timers - - When the farm systemd timers were introduced the check interval - was drastically reduced from half hourly to 5 minutely. There - wasn't any discussion about rationales for this and its now not - consistent (does not dovetail at all) with the primary pmlogger - and pmie service. If startup takes a long time (large farms or - slow networks) these will likely overlap constantly, and timing - should be such that we work with the primary services in mind. - - Reset to half hourly for these checks, and lets revisit this in - the new year when the other systemd changes are being proposed. - - Related to https://github.com/performancecopilot/pcp/pull/1495 - -diff --git a/src/pmie/pmie_farm_check.timer b/src/pmie/pmie_farm_check.timer -index ee7aa21242..97dc061af2 100644 ---- a/src/pmie/pmie_farm_check.timer -+++ b/src/pmie/pmie_farm_check.timer ++WorkingDirectory=@PCP_VAR_DIR@ + # root so pmiectl can migrate pmie processes to the pmie_farm service + Group=root + User=root +diff -Naurp pcp-5.3.5.orig/src/pmie/pmie_farm_check.timer pcp-5.3.5/src/pmie/pmie_farm_check.timer +--- pcp-5.3.5.orig/src/pmie/pmie_farm_check.timer 2021-11-04 08:26:15.000000000 +1100 ++++ pcp-5.3.5/src/pmie/pmie_farm_check.timer 2021-12-15 16:51:49.649229430 +1100 @@ -1,10 +1,11 @@ [Unit] -Description=5 minute check of pmie farm instances @@ -102,10 +61,87 @@ index ee7aa21242..97dc061af2 100644 [Install] WantedBy=timers.target -diff --git a/src/pmlogger/pmlogger_farm_check.timer b/src/pmlogger/pmlogger_farm_check.timer -index 094fb4505d..f234ef7839 100644 ---- a/src/pmlogger/pmlogger_farm_check.timer -+++ b/src/pmlogger/pmlogger_farm_check.timer +diff -Naurp pcp-5.3.5.orig/src/pmie/pmie_farm.service.in pcp-5.3.5/src/pmie/pmie_farm.service.in +--- pcp-5.3.5.orig/src/pmie/pmie_farm.service.in 2021-11-04 08:26:15.000000000 +1100 ++++ pcp-5.3.5/src/pmie/pmie_farm.service.in 2021-12-15 16:51:51.306200748 +1100 +@@ -1,9 +1,8 @@ + [Unit] + Description=pmie farm service +-Documentation=man:pmie(1) +-After=network-online.target pmcd.service +-Before=pmie_check.timer pmie_daily.timer +-BindsTo=pmie_farm_check.timer pmie_check.timer pmie_daily.timer ++Documentation=man:pmie_check(1) ++Before=pmie_farm_check.timer ++BindsTo=pmie_farm_check.timer + + [Service] + Type=@SD_SERVICE_TYPE@ +@@ -15,13 +14,9 @@ TimeoutStopSec=120 + Environment="PMIE_CHECK_PARAMS=--skip-primary" + EnvironmentFile=-@PCP_SYSCONFIG_DIR@/pmie_timers + ExecStart=@PCP_BINADM_DIR@/pmie_farm $PMIE_CHECK_PARAMS +- + WorkingDirectory=@PCP_VAR_DIR@ + Group=@PCP_GROUP@ + User=@PCP_USER@ + + [Install] +-WantedBy=multi-user.target +- +-# This dependency will be removed in PCPv6. +-WantedBy=pmie.service ++RequiredBy=pmie.service +diff -Naurp pcp-5.3.5.orig/src/pmlogger/GNUmakefile pcp-5.3.5/src/pmlogger/GNUmakefile +--- pcp-5.3.5.orig/src/pmlogger/GNUmakefile 2021-11-09 09:08:40.000000000 +1100 ++++ pcp-5.3.5/src/pmlogger/GNUmakefile 2021-12-15 16:51:51.306200748 +1100 +@@ -99,7 +99,6 @@ pmlogger.service : pmlogger.service.in + + pmlogger_farm.service : pmlogger_farm.service.in + $(SED) <$< >$@ \ +- -e 's;@CRONTAB_PATH@;'$(CRONTAB_PATH)';' \ + -e 's;@PCP_SYSCONFIG_DIR@;'$(PCP_SYSCONFIG_DIR)';' \ + -e 's;@PCP_BINADM_DIR@;'$(PCP_BINADM_DIR)';' \ + -e 's;@PCP_VAR_DIR@;'$(PCP_VAR_DIR)';' \ +@@ -110,7 +109,9 @@ pmlogger_farm.service : pmlogger_farm.se + + pmlogger_farm_check.service : pmlogger_farm_check.service.in + $(SED) <$< >$@ \ ++ -e 's;@CRONTAB_PATH@;'$(CRONTAB_PATH)';' \ + -e 's;@PCP_BIN_DIR@;'$(PCP_BIN_DIR)';' \ ++ -e 's;@PCP_VAR_DIR@;'$(PCP_VAR_DIR)';' \ + # END + + pmlogger_daily.service : pmlogger_daily.service.in +diff -Naurp pcp-5.3.5.orig/src/pmlogger/pmlogger_farm_check.service.in pcp-5.3.5/src/pmlogger/pmlogger_farm_check.service.in +--- pcp-5.3.5.orig/src/pmlogger/pmlogger_farm_check.service.in 2021-11-09 09:08:40.000000000 +1100 ++++ pcp-5.3.5/src/pmlogger/pmlogger_farm_check.service.in 2021-12-15 16:51:51.307200731 +1100 +@@ -1,7 +1,7 @@ + [Unit] +-Description=Check and migrate non-primary pmlogger instances to pmlogger_farm +-Documentation=man:pmlogger_check(1) +-# TODO non-systemd ConditionPathExists=!/etc/cron.d/pcp-pmlogger ++Description=Check and migrate non-primary pmlogger farm instances ++Documentation=man:pmlogctl(1) ++ConditionPathExists=!@CRONTAB_PATH@ + + [Service] + Type=exec +@@ -9,9 +9,8 @@ Restart=no + TimeoutStartSec=4h + TimeoutStopSec=120 + ExecStart=@PCP_BIN_DIR@/pmlogctl -m check +-WorkingDirectory=/var/lib/pcp +- +-# root so pmlogctl can migrate pmloggers to the pmlogger_farm service ++WorkingDirectory=@PCP_VAR_DIR@ ++# root so pmlogctl can migrate pmlogger processes to the pmlogger_farm service + Group=root + User=root + +diff -Naurp pcp-5.3.5.orig/src/pmlogger/pmlogger_farm_check.timer pcp-5.3.5/src/pmlogger/pmlogger_farm_check.timer +--- pcp-5.3.5.orig/src/pmlogger/pmlogger_farm_check.timer 2021-11-04 08:26:15.000000000 +1100 ++++ pcp-5.3.5/src/pmlogger/pmlogger_farm_check.timer 2021-12-15 16:51:49.649229430 +1100 @@ -1,10 +1,11 @@ [Unit] -Description=5 minute check of pmlogger farm instances @@ -121,3 +157,216 @@ index 094fb4505d..f234ef7839 100644 [Install] WantedBy=timers.target +diff -Naurp pcp-5.3.5.orig/src/pmlogger/pmlogger_farm.service.in pcp-5.3.5/src/pmlogger/pmlogger_farm.service.in +--- pcp-5.3.5.orig/src/pmlogger/pmlogger_farm.service.in 2021-11-04 08:26:15.000000000 +1100 ++++ pcp-5.3.5/src/pmlogger/pmlogger_farm.service.in 2021-12-15 16:51:51.306200748 +1100 +@@ -1,9 +1,8 @@ + [Unit] + Description=pmlogger farm service +-Documentation=man:pmlogger(1) +-After=network-online.target pmcd.service +-Before=pmlogger_check.timer pmlogger_daily.timer +-BindsTo=pmlogger_farm_check.timer pmlogger_check.timer pmlogger_daily.timer ++Documentation=man:pmlogger_check(1) ++Before=pmlogger_farm_check.timer ++BindsTo=pmlogger_farm_check.timer + + [Service] + Type=@SD_SERVICE_TYPE@ +@@ -15,13 +14,9 @@ TimeoutStopSec=120 + Environment="PMLOGGER_CHECK_PARAMS=--skip-primary" + EnvironmentFile=-@PCP_SYSCONFIG_DIR@/pmlogger_timers + ExecStart=@PCP_BINADM_DIR@/pmlogger_farm $PMLOGGER_CHECK_PARAMS +- + WorkingDirectory=@PCP_VAR_DIR@ + Group=@PCP_GROUP@ + User=@PCP_USER@ + + [Install] +-WantedBy=multi-user.target +- +-# This dependency will be removed in PCPv6. +-WantedBy=pmlogger.service ++RequiredBy=pmlogger.service +commit 7d6e266d0a15ba0fd0894a059257a502f19b7fe9 +Author: Nathan Scott +Date: Thu Dec 16 17:38:33 2021 +1100 + + pmlogger/pmie farms: further consistency changes and hard dep fix + + Several inconsistencies between various pmie/pmlogger systemd units + resolved. Most importantly, add BindsTo lines to ensure the farms + get started and stopped along with the underlying service, always. + +diff --git a/src/pmie/pmie.service.in b/src/pmie/pmie.service.in +index d234c8a5e5..d659c39c65 100644 +--- a/src/pmie/pmie.service.in ++++ b/src/pmie/pmie.service.in +@@ -2,8 +2,8 @@ + Description=Performance Metrics Inference Engine + Documentation=man:pmie(1) + After=network-online.target pmcd.service +-Before=pmie_check.timer pmie_daily.timer +-BindsTo=pmie_check.timer pmie_daily.timer ++Before=pmie_farm.service pmie_check.timer pmie_daily.timer ++BindsTo=pmie_farm.service pmie_check.timer pmie_daily.timer + Wants=pmcd.service + + [Service] +diff --git a/src/pmie/pmie_daily.service.in b/src/pmie/pmie_daily.service.in +index e93446ce83..ca49945905 100644 +--- a/src/pmie/pmie_daily.service.in ++++ b/src/pmie/pmie_daily.service.in +@@ -2,13 +2,18 @@ + Description=Process PMIE logs + Documentation=man:pmie_daily(1) + ConditionPathExists=!@CRONTAB_PATH@ ++PartOf=pmie.service + + [Service] + Type=@SD_SERVICE_TYPE@ + Restart=no ++TimeoutStartSec=1h + Environment="PMIE_DAILY_PARAMS=-X xz -x 3" + EnvironmentFile=-@PCP_SYSCONFIG_DIR@/pmie_timers + ExecStart=@PCP_BINADM_DIR@/pmie_daily $PMIE_DAILY_PARAMS + WorkingDirectory=@PCP_VAR_DIR@ + Group=@PCP_GROUP@ + User=@PCP_USER@ ++ ++[Install] ++RequiredBy=pmie.service +diff --git a/src/pmie/pmie_daily.timer b/src/pmie/pmie_daily.timer +index 42b86333be..67742a070e 100644 +--- a/src/pmie/pmie_daily.timer ++++ b/src/pmie/pmie_daily.timer +@@ -1,7 +1,9 @@ + [Unit] + Description=Daily processing of PMIE logs ++PartOf=pmie.service + + [Timer] ++Persistent=true + OnCalendar=*-*-* 00:08:00 + + [Install] +diff --git a/src/pmie/pmie_farm.service.in b/src/pmie/pmie_farm.service.in +index 0f7e7e46b8..e119388b4b 100644 +--- a/src/pmie/pmie_farm.service.in ++++ b/src/pmie/pmie_farm.service.in +@@ -3,6 +3,7 @@ Description=pmie farm service + Documentation=man:pmie_check(1) + Before=pmie_farm_check.timer + BindsTo=pmie_farm_check.timer ++PartOf=pmie.service + + [Service] + Type=@SD_SERVICE_TYPE@ +diff --git a/src/pmie/pmie_farm.sh b/src/pmie/pmie_farm.sh +index f2771ad43b..400e790cae 100644 +--- a/src/pmie/pmie_farm.sh ++++ b/src/pmie/pmie_farm.sh +@@ -12,8 +12,8 @@ + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + # for more details. + # +-# Administrative script to start the pmlogger_farm service. +-# All arguments to pmlogger_check are passed from pmlogger_farm.service. ++# Administrative script to start the pmie_farm service. ++# All arguments to pmie_check are passed from pmie_farm.service. + # + . $PCP_DIR/etc/pcp.env + +diff --git a/src/pmie/pmie_farm_check.service.in b/src/pmie/pmie_farm_check.service.in +index 64f822ad53..01d83390a9 100644 +--- a/src/pmie/pmie_farm_check.service.in ++++ b/src/pmie/pmie_farm_check.service.in +@@ -2,6 +2,7 @@ + Description=Check and migrate non-primary pmie farm instances + Documentation=man:pmiectl(1) + ConditionPathExists=!@CRONTAB_PATH@ ++PartOf=pmie_farm.service + + [Service] + Type=exec +diff --git a/src/pmlogger/pmlogger.service.in b/src/pmlogger/pmlogger.service.in +index de0df29db1..b0d706027a 100644 +--- a/src/pmlogger/pmlogger.service.in ++++ b/src/pmlogger/pmlogger.service.in +@@ -2,8 +2,8 @@ + Description=Performance Metrics Archive Logger + Documentation=man:pmlogger(1) + After=network-online.target pmcd.service +-Before=pmlogger_check.timer pmlogger_daily.timer +-BindsTo=pmlogger_check.timer pmlogger_daily.timer ++Before=pmlogger_farm.service pmlogger_check.timer pmlogger_daily.timer ++BindsTo=pmlogger_farm.service pmlogger_check.timer pmlogger_daily.timer + Wants=pmcd.service + + [Service] +diff --git a/src/pmlogger/pmlogger_check.service.in b/src/pmlogger/pmlogger_check.service.in +index 8fcef57e0f..db7edf0b0f 100644 +--- a/src/pmlogger/pmlogger_check.service.in ++++ b/src/pmlogger/pmlogger_check.service.in +@@ -2,6 +2,7 @@ + Description=Check pmlogger instances are running + Documentation=man:pmlogger_check(1) + ConditionPathExists=!@CRONTAB_PATH@ ++PartOf=pmlogger.service + + [Service] + Type=@SD_SERVICE_TYPE@ +diff --git a/src/pmlogger/pmlogger_check.timer b/src/pmlogger/pmlogger_check.timer +index e3f3c4fc40..ddffc7584a 100644 +--- a/src/pmlogger/pmlogger_check.timer ++++ b/src/pmlogger/pmlogger_check.timer +@@ -1,5 +1,6 @@ + [Unit] + Description=Half-hourly check of pmlogger instances ++PartOf=pmlogger.service + + [Timer] + # if enabled, runs 1m after boot and every half hour +diff --git a/src/pmlogger/pmlogger_daily.service.in b/src/pmlogger/pmlogger_daily.service.in +index 8a07f5caa3..9a3cbf42b1 100644 +--- a/src/pmlogger/pmlogger_daily.service.in ++++ b/src/pmlogger/pmlogger_daily.service.in +@@ -2,6 +2,7 @@ + Description=Process archive logs + Documentation=man:pmlogger_daily(1) + ConditionPathExists=!@CRONTAB_PATH@ ++PartOf=pmlogger.service + + [Service] + Type=@SD_SERVICE_TYPE@ +@@ -13,3 +14,6 @@ ExecStart=@PCP_BINADM_DIR@/pmlogger_daily $PMLOGGER_DAILY_PARAMS + WorkingDirectory=@PCP_VAR_DIR@ + Group=@PCP_GROUP@ + User=@PCP_USER@ ++ ++[Install] ++RequiredBy=pmlogger.service +diff --git a/src/pmlogger/pmlogger_farm.service.in b/src/pmlogger/pmlogger_farm.service.in +index a194ac39c7..a67e25f312 100644 +--- a/src/pmlogger/pmlogger_farm.service.in ++++ b/src/pmlogger/pmlogger_farm.service.in +@@ -3,6 +3,7 @@ Description=pmlogger farm service + Documentation=man:pmlogger_check(1) + Before=pmlogger_farm_check.timer + BindsTo=pmlogger_farm_check.timer ++PartOf=pmlogger.service + + [Service] + Type=@SD_SERVICE_TYPE@ +diff --git a/src/pmlogger/pmlogger_farm_check.service.in b/src/pmlogger/pmlogger_farm_check.service.in +index 2ec3a5446b..de5e59a7f5 100644 +--- a/src/pmlogger/pmlogger_farm_check.service.in ++++ b/src/pmlogger/pmlogger_farm_check.service.in +@@ -2,6 +2,7 @@ + Description=Check and migrate non-primary pmlogger farm instances + Documentation=man:pmlogctl(1) + ConditionPathExists=!@CRONTAB_PATH@ ++PartOf=pmlogger_farm.service + + [Service] + Type=exec diff --git a/SPECS/pcp.spec b/SPECS/pcp.spec index ad764b4..7023211 100644 --- a/SPECS/pcp.spec +++ b/SPECS/pcp.spec @@ -1,6 +1,6 @@ Name: pcp Version: 5.3.5 -Release: 3%{?dist} +Release: 6%{?dist} Summary: System-level performance monitoring and performance management License: GPLv2+ and LGPLv2+ and CC-BY URL: https://pcp.io @@ -13,6 +13,9 @@ Patch1: redhat-bugzilla-2029301.patch Patch2: redhat-bugzilla-2030121.patch Patch3: redhat-bugzilla-2027753.patch Patch4: redhat-bugzilla-2030140.patch +Patch5: redhat-bugzilla-2003956.patch +Patch6: redhat-bugzilla-2003956-pmdabcc-update-kernel-version-check-due-to-backporting.patch +Patch7: redhat-bugzilla-1973833.patch %if 0%{?fedora} >= 26 || 0%{?rhel} > 7 %global __python2 python2 @@ -2283,6 +2286,9 @@ updated policy package. %patch2 -p1 %patch3 -p1 %patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 %build # the buildsubdir macro gets defined in %setup and is apparently only available in the next step (i.e. the %build step) @@ -2962,40 +2968,36 @@ then # stop daemons before erasing the package %if !%{disable_systemd} %systemd_preun pmlogger.service + %systemd_preun pmlogger_check.timer + %systemd_preun pmlogger_daily.timer %systemd_preun pmlogger_farm.service + %systemd_preun pmlogger_farm_check.service + %systemd_preun pmlogger_farm_check.timer %systemd_preun pmie.service + %systemd_preun pmie_check.timer + %systemd_preun pmie_daily.timer %systemd_preun pmie_farm.service + %systemd_preun pmie_farm_check.service + %systemd_preun pmie_farm_check.timer %systemd_preun pmproxy.service %systemd_preun pmfind.service %systemd_preun pmcd.service - %systemd_preun pmlogger_daily.timer - %systemd_preun pmlogger_check.timer - %systemd_preun pmlogger_farm_check.timer - %systemd_preun pmie_daily.timer - %systemd_preun pmie_check.timer - %systemd_preun pmie_farm_check.timer systemctl stop pmlogger.service >/dev/null 2>&1 - systemctl stop pmlogger_farm.service >/dev/null 2>&1 systemctl stop pmie.service >/dev/null 2>&1 - systemctl stop pmie_farm.service >/dev/null 2>&1 systemctl stop pmproxy.service >/dev/null 2>&1 systemctl stop pmfind.service >/dev/null 2>&1 systemctl stop pmcd.service >/dev/null 2>&1 %else /sbin/service pmlogger stop >/dev/null 2>&1 - /sbin/service pmlogger_farm stop >/dev/null 2>&1 /sbin/service pmie stop >/dev/null 2>&1 - /sbin/service pmie_farm stop >/dev/null 2>&1 /sbin/service pmproxy stop >/dev/null 2>&1 /sbin/service pmcd stop >/dev/null 2>&1 /sbin/chkconfig --del pcp >/dev/null 2>&1 /sbin/chkconfig --del pmcd >/dev/null 2>&1 /sbin/chkconfig --del pmlogger >/dev/null 2>&1 - /sbin/chkconfig --del pmlogger_farm >/dev/null 2>&1 /sbin/chkconfig --del pmie >/dev/null 2>&1 - /sbin/chkconfig --del pmie_farm >/dev/null 2>&1 /sbin/chkconfig --del pmproxy >/dev/null 2>&1 %endif # cleanup namespace state/flag, may still exist @@ -3068,12 +3070,8 @@ PCP_LOG_DIR=%{_logsdir} /sbin/service pmcd condrestart /sbin/chkconfig --add pmlogger >/dev/null 2>&1 /sbin/service pmlogger condrestart - /sbin/chkconfig --add pmlogger_farm >/dev/null 2>&1 - /sbin/service pmlogger_farm condrestart /sbin/chkconfig --add pmie >/dev/null 2>&1 /sbin/service pmie condrestart - /sbin/chkconfig --add pmie_farm >/dev/null 2>&1 - /sbin/service pmie_farm condrestart /sbin/chkconfig --add pmproxy >/dev/null 2>&1 /sbin/service pmproxy condrestart %endif @@ -3373,6 +3371,18 @@ PCP_LOG_DIR=%{_logsdir} %files zeroconf -f pcp-zeroconf-files.rpm %changelog +* Thu Dec 16 2021 Andreas Gerstmayr - 5.3.5-6 +- pmdabcc: update qa/1118 testcase to match new output (BZ 2003956) + +* Thu Dec 16 2021 Nathan Scott - 5.3.5-5 +- Futher improvements to the farm systemd services (BZ 2027753) + +* Wed Dec 15 2021 Nathan Scott - 5.3.5-4 +- pmdabcc: resolve compilation issues of some bcc PMDA modules on + aarch64, ppc64le and s390x (BZ 2003956) +- Further improve pmlogger service startup latency (BZ 1973833) +- Additional improvements to farm systemd services (BZ 2027753) + * Thu Dec 09 2021 Nathan Scott - 5.3.5-3 - Resolve failure in the Nvidia metrics agent (BZ 2029301) - PMDA indom cache loading performance improvements (BZ 2030121)