216 lines
6.3 KiB
Diff
216 lines
6.3 KiB
Diff
From eaeed6cca46a0223617ead834aaa576dd5ad07ff Mon Sep 17 00:00:00 2001
|
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
Date: Fri, 31 May 2019 16:11:16 +0200
|
|
Subject: [PATCH] Fix: sbd-common: query rt-budget > 0 otherwise try moving to
|
|
root-slice
|
|
|
|
---
|
|
src/sbd-common.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
src/sbd-inquisitor.c | 15 +++++++
|
|
src/sbd.h | 2 +
|
|
src/sbd.sysconfig | 14 +++++++
|
|
4 files changed, 141 insertions(+)
|
|
|
|
diff --git a/src/sbd-common.c b/src/sbd-common.c
|
|
index 873a76e..ebfdaa3 100644
|
|
--- a/src/sbd-common.c
|
|
+++ b/src/sbd-common.c
|
|
@@ -662,6 +662,112 @@ static void sbd_memlock(int stackgrowK, int heapgrowK)
|
|
#endif
|
|
}
|
|
|
|
+static int get_realtime_budget(void)
|
|
+{
|
|
+ FILE *f;
|
|
+ char fname[PATH_MAX];
|
|
+ int res = -1, lnum = 0;
|
|
+ char *cgroup = NULL, *namespecs = NULL;
|
|
+
|
|
+ snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid());
|
|
+ f = fopen(fname, "rt");
|
|
+ if (f == NULL) {
|
|
+ cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd",
|
|
+ (intmax_t)getpid());
|
|
+ goto exit_res;
|
|
+ }
|
|
+ while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum, &namespecs, &cgroup) !=EOF ) {
|
|
+ if (namespecs && strstr(namespecs, "cpuacct")) {
|
|
+ free(namespecs);
|
|
+ break;
|
|
+ }
|
|
+ if (cgroup) {
|
|
+ free(cgroup);
|
|
+ cgroup = NULL;
|
|
+ }
|
|
+ if (namespecs) {
|
|
+ free(namespecs);
|
|
+ namespecs = NULL;
|
|
+ }
|
|
+ }
|
|
+ fclose(f);
|
|
+ if (cgroup == NULL) {
|
|
+ cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd",
|
|
+ (intmax_t)getpid());
|
|
+ goto exit_res;
|
|
+ }
|
|
+ snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us",
|
|
+ cgroup);
|
|
+ f = fopen(fname, "rt");
|
|
+ if (f == NULL) {
|
|
+ cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but "
|
|
+ "doesn't for '%s'", cgroup);
|
|
+ goto exit_res;
|
|
+ }
|
|
+ if (fscanf(f, "%d", &res) != 1) {
|
|
+ cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname);
|
|
+ } else {
|
|
+ cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res);
|
|
+ }
|
|
+ fclose(f);
|
|
+
|
|
+exit_res:
|
|
+ if (cgroup) {
|
|
+ free(cgroup);
|
|
+ }
|
|
+ return res;
|
|
+}
|
|
+
|
|
+/* stolen from corosync */
|
|
+static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
|
|
+ FILE *f;
|
|
+ int res = -1;
|
|
+
|
|
+ /*
|
|
+ * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
|
|
+ * using systemd and systemd uses hardcoded path of cgroup mount point.
|
|
+ *
|
|
+ * This feature is expected to be removed as soon as systemd gets support
|
|
+ * for managing RT configuration.
|
|
+ */
|
|
+ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
|
|
+ if (f == NULL) {
|
|
+ cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
|
|
+ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
|
|
+ res = 0;
|
|
+ goto exit_res;
|
|
+ }
|
|
+ fclose(f);
|
|
+
|
|
+ if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
|
|
+ cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are "
|
|
+ "-> skip moving to root-slice");
|
|
+ res = 0;
|
|
+ goto exit_res;
|
|
+ }
|
|
+
|
|
+ f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
|
|
+ if (f == NULL) {
|
|
+ cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
|
|
+
|
|
+ goto exit_res;
|
|
+ }
|
|
+
|
|
+ if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
|
|
+ cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
|
|
+ goto close_and_exit_res;
|
|
+ }
|
|
+
|
|
+close_and_exit_res:
|
|
+ if (fclose(f) != 0) {
|
|
+ cl_log(LOG_WARNING, "Can't close cgroups tasks file");
|
|
+ goto exit_res;
|
|
+ }
|
|
+
|
|
+exit_res:
|
|
+ return (res);
|
|
+}
|
|
+
|
|
void
|
|
sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
|
|
{
|
|
@@ -670,6 +776,10 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
|
|
}
|
|
|
|
#ifdef SCHED_RR
|
|
+ if (move_to_root_cgroup) {
|
|
+ sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup);
|
|
+ }
|
|
+
|
|
{
|
|
int pcurrent = 0;
|
|
int pmin = sched_get_priority_min(SCHED_RR);
|
|
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
|
|
index abde4e5..cef5cc7 100644
|
|
--- a/src/sbd-inquisitor.c
|
|
+++ b/src/sbd-inquisitor.c
|
|
@@ -33,6 +33,8 @@ int start_mode = 0;
|
|
char* pidfile = NULL;
|
|
bool do_flush = true;
|
|
char timeout_sysrq_char = 'b';
|
|
+bool move_to_root_cgroup = true;
|
|
+bool enforce_moving_to_root_cgroup = false;
|
|
|
|
int parse_device_line(const char *line);
|
|
|
|
@@ -965,6 +967,19 @@ int main(int argc, char **argv, char **envp)
|
|
timeout_action = strdup(value);
|
|
}
|
|
|
|
+ value = getenv("SBD_MOVE_TO_ROOT_CGROUP");
|
|
+ if(value) {
|
|
+ move_to_root_cgroup = crm_is_true(value);
|
|
+
|
|
+ if (move_to_root_cgroup) {
|
|
+ enforce_moving_to_root_cgroup = true;
|
|
+ } else {
|
|
+ if (strcmp(value, "auto") == 0) {
|
|
+ move_to_root_cgroup = true;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
|
|
switch (c) {
|
|
case 'D':
|
|
diff --git a/src/sbd.h b/src/sbd.h
|
|
index 3b05a11..ac30ec7 100644
|
|
--- a/src/sbd.h
|
|
+++ b/src/sbd.h
|
|
@@ -159,6 +159,8 @@ extern bool watchdogdev_is_default;
|
|
extern char* local_uname;
|
|
extern bool do_flush;
|
|
extern char timeout_sysrq_char;
|
|
+extern bool move_to_root_cgroup;
|
|
+extern bool enforce_moving_to_root_cgroup;
|
|
|
|
/* Global, non-tunable variables: */
|
|
extern int sector_size;
|
|
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
|
|
index f163f21..e1a60ed 100644
|
|
--- a/src/sbd.sysconfig
|
|
+++ b/src/sbd.sysconfig
|
|
@@ -91,6 +91,20 @@ SBD_WATCHDOG_TIMEOUT=5
|
|
#
|
|
SBD_TIMEOUT_ACTION=flush,reboot
|
|
|
|
+## Type: yesno / auto
|
|
+## Default: auto
|
|
+#
|
|
+# If CPUAccounting is enabled default is not to assign any RT-budget
|
|
+# to the system.slice which prevents sbd from running RR-scheduled.
|
|
+#
|
|
+# One way to escape that issue is to move sbd-processes from the
|
|
+# slice they were originally started to root-slice.
|
|
+# Of course starting sbd in a certain slice might be intentional.
|
|
+# Thus in auto-mode sbd will check if the slice has RT-budget assigned.
|
|
+# If that is the case sbd will stay in that slice while it will
|
|
+# be moved to root-slice otherwise.
|
|
+SBD_MOVE_TO_ROOT_CGROUP=auto
|
|
+
|
|
## Type: string
|
|
## Default: ""
|
|
#
|
|
--
|
|
1.8.3.1
|
|
|