From eaeed6cca46a0223617ead834aaa576dd5ad07ff Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Fri, 31 May 2019 16:11:16 +0200 Subject: [PATCH] Fix: sbd-common: query rt-budget > 0 otherwise try moving to root-slice --- src/sbd-common.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/sbd-inquisitor.c | 15 +++++++ src/sbd.h | 2 + src/sbd.sysconfig | 14 +++++++ 4 files changed, 141 insertions(+) diff --git a/src/sbd-common.c b/src/sbd-common.c index 873a76e..ebfdaa3 100644 --- a/src/sbd-common.c +++ b/src/sbd-common.c @@ -662,6 +662,112 @@ static void sbd_memlock(int stackgrowK, int heapgrowK) #endif } +static int get_realtime_budget(void) +{ + FILE *f; + char fname[PATH_MAX]; + int res = -1, lnum = 0; + char *cgroup = NULL, *namespecs = NULL; + + snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid()); + f = fopen(fname, "rt"); + if (f == NULL) { + cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd", + (intmax_t)getpid()); + goto exit_res; + } + while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum, &namespecs, &cgroup) !=EOF ) { + if (namespecs && strstr(namespecs, "cpuacct")) { + free(namespecs); + break; + } + if (cgroup) { + free(cgroup); + cgroup = NULL; + } + if (namespecs) { + free(namespecs); + namespecs = NULL; + } + } + fclose(f); + if (cgroup == NULL) { + cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd", + (intmax_t)getpid()); + goto exit_res; + } + snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us", + cgroup); + f = fopen(fname, "rt"); + if (f == NULL) { + cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but " + "doesn't for '%s'", cgroup); + goto exit_res; + } + if (fscanf(f, "%d", &res) != 1) { + cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname); + } else { + cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res); + } + fclose(f); + +exit_res: + if (cgroup) { + free(cgroup); + } + return res; +} + +/* stolen from corosync */ +static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) { + FILE *f; + int res = -1; + + /* + * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now + * using systemd and systemd uses hardcoded path of cgroup mount point. + * + * This feature is expected to be removed as soon as systemd gets support + * for managing RT configuration. + */ + f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); + if (f == NULL) { + cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> " + "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); + res = 0; + goto exit_res; + } + fclose(f); + + if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) { + cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are " + "-> skip moving to root-slice"); + res = 0; + goto exit_res; + } + + f = fopen("/sys/fs/cgroup/cpu/tasks", "w"); + if (f == NULL) { + cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing"); + + goto exit_res; + } + + if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { + cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file"); + goto close_and_exit_res; + } + +close_and_exit_res: + if (fclose(f) != 0) { + cl_log(LOG_WARNING, "Can't close cgroups tasks file"); + goto exit_res; + } + +exit_res: + return (res); +} + void sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) { @@ -670,6 +776,10 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) } #ifdef SCHED_RR + if (move_to_root_cgroup) { + sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup); + } + { int pcurrent = 0; int pmin = sched_get_priority_min(SCHED_RR); diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c index abde4e5..cef5cc7 100644 --- a/src/sbd-inquisitor.c +++ b/src/sbd-inquisitor.c @@ -33,6 +33,8 @@ int start_mode = 0; char* pidfile = NULL; bool do_flush = true; char timeout_sysrq_char = 'b'; +bool move_to_root_cgroup = true; +bool enforce_moving_to_root_cgroup = false; int parse_device_line(const char *line); @@ -965,6 +967,19 @@ int main(int argc, char **argv, char **envp) timeout_action = strdup(value); } + value = getenv("SBD_MOVE_TO_ROOT_CGROUP"); + if(value) { + move_to_root_cgroup = crm_is_true(value); + + if (move_to_root_cgroup) { + enforce_moving_to_root_cgroup = true; + } else { + if (strcmp(value, "auto") == 0) { + move_to_root_cgroup = true; + } + } + } + while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { switch (c) { case 'D': diff --git a/src/sbd.h b/src/sbd.h index 3b05a11..ac30ec7 100644 --- a/src/sbd.h +++ b/src/sbd.h @@ -159,6 +159,8 @@ extern bool watchdogdev_is_default; extern char* local_uname; extern bool do_flush; extern char timeout_sysrq_char; +extern bool move_to_root_cgroup; +extern bool enforce_moving_to_root_cgroup; /* Global, non-tunable variables: */ extern int sector_size; diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig index f163f21..e1a60ed 100644 --- a/src/sbd.sysconfig +++ b/src/sbd.sysconfig @@ -91,6 +91,20 @@ SBD_WATCHDOG_TIMEOUT=5 # SBD_TIMEOUT_ACTION=flush,reboot +## Type: yesno / auto +## Default: auto +# +# If CPUAccounting is enabled default is not to assign any RT-budget +# to the system.slice which prevents sbd from running RR-scheduled. +# +# One way to escape that issue is to move sbd-processes from the +# slice they were originally started to root-slice. +# Of course starting sbd in a certain slice might be intentional. +# Thus in auto-mode sbd will check if the slice has RT-budget assigned. +# If that is the case sbd will stay in that slice while it will +# be moved to root-slice otherwise. +SBD_MOVE_TO_ROOT_CGROUP=auto + ## Type: string ## Default: "" # -- 1.8.3.1