- Related: rhbz#1948974
This commit is contained in:
parent
026d94bfc7
commit
0aa25de1e2
1
.gitignore
vendored
1
.gitignore
vendored
@ -46,3 +46,4 @@ corosync-1.2.7.tar.gz
|
||||
/corosync-3.1.2.tar.gz
|
||||
/corosync-3.1.3.tar.gz
|
||||
/corosync-3.1.4.tar.gz
|
||||
/corosync-3.1.5.tar.gz
|
||||
|
@ -1,317 +0,0 @@
|
||||
From c9996fdd0f4fa1fbf113b740eea01bcc70b235aa Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Mon, 3 May 2021 15:29:04 +0200
|
||||
Subject: [PATCH] main: Add support for cgroup v2 and auto mode
|
||||
|
||||
Support for cgroup v2 is very similar to cgroup v1 just checking (and
|
||||
writing) different file.
|
||||
|
||||
Because of all the problems described later with cgroup v2 new "auto"
|
||||
mode (new default) is added. This mode first tries to set rr scheduling
|
||||
and moves Corosync to root cgroup only if it fails.
|
||||
|
||||
Testing this feature is a bit harder than with cgroup v1 so it's
|
||||
probably worh noting in this commit message.
|
||||
|
||||
1. Copy some service file (I've used httpd service) and set
|
||||
CPUQuota=30% in the [service] section.
|
||||
2. Check /sys/fs/cgroup/cgroup.subtree_control - there should be no
|
||||
"cpu"
|
||||
3. Start modified service
|
||||
4. Check /sys/fs/cgroup/cgroup.subtree_control - there should be "cpu"
|
||||
5. Start corosync - It should be able to get rt priority
|
||||
|
||||
When move_to_root_cgroup is disabled (applies only for kernels
|
||||
with CONFIG_RT_GROUP_SCHED enabled), behavior differs:
|
||||
- If corosync is started before modified service, so
|
||||
there is no "cpu" in /sys/fs/cgroup/cgroup.subtree_control
|
||||
corosync starts without problem and gets rt priority.
|
||||
Starting modified service later will never add "cpu" into
|
||||
/sys/fs/cgroup/cgroup.subtree_control (because corosync is holding
|
||||
rt priority and it is placed in the non-root cgroup by systemd).
|
||||
|
||||
- When corosync is started after modified service, so "cpu"
|
||||
is in /sys/fs/cgroup/cgroup.subtree_control, corosync is not
|
||||
able to get RT priority.
|
||||
|
||||
It's worth noting problems when cgroup v2 is used together with systemd
|
||||
logging described in corosync.conf(5) man page.
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
exec/coroparse.c | 3 +-
|
||||
exec/main.c | 98 ++++++++++++++++++++++++++++++++++-----------
|
||||
man/corosync.conf.5 | 38 +++++++++++++++---
|
||||
3 files changed, 109 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/exec/coroparse.c b/exec/coroparse.c
|
||||
index 741f3741..56b8034e 100644
|
||||
--- a/exec/coroparse.c
|
||||
+++ b/exec/coroparse.c
|
||||
@@ -828,7 +828,8 @@ static int main_config_parser_cb(const char *path,
|
||||
}
|
||||
if (strcmp(path, "system.move_to_root_cgroup") == 0) {
|
||||
if ((strcmp(value, "yes") != 0) &&
|
||||
- (strcmp(value, "no") != 0)) {
|
||||
+ (strcmp(value, "no") != 0) &&
|
||||
+ (strcmp(value, "auto") != 0)) {
|
||||
*error_string = "Invalid system.move_to_root_cgroup";
|
||||
|
||||
return (0);
|
||||
diff --git a/exec/main.c b/exec/main.c
|
||||
index aa6d9fbf..5fb4d47c 100644
|
||||
--- a/exec/main.c
|
||||
+++ b/exec/main.c
|
||||
@@ -169,6 +169,12 @@ static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf"
|
||||
|
||||
static int lockfile_fd = -1;
|
||||
|
||||
+enum move_to_root_cgroup_mode {
|
||||
+ MOVE_TO_ROOT_CGROUP_MODE_OFF = 0,
|
||||
+ MOVE_TO_ROOT_CGROUP_MODE_ON = 1,
|
||||
+ MOVE_TO_ROOT_CGROUP_MODE_AUTO = 2,
|
||||
+};
|
||||
+
|
||||
qb_loop_t *cs_poll_handle_get (void)
|
||||
{
|
||||
return (corosync_poll_handle);
|
||||
@@ -859,7 +865,12 @@ static void timer_function_scheduler_timeout (void *data)
|
||||
}
|
||||
|
||||
|
||||
-static int corosync_set_rr_scheduler (void)
|
||||
+/*
|
||||
+ * Set main pid RR scheduler.
|
||||
+ * silent: don't log sched_get_priority_max and sched_setscheduler errors
|
||||
+ * Returns: 0 - success, -1 failure, -2 platform doesn't support SCHED_RR
|
||||
+ */
|
||||
+static int corosync_set_rr_scheduler (int silent)
|
||||
{
|
||||
int ret_val = 0;
|
||||
|
||||
@@ -871,9 +882,11 @@ static int corosync_set_rr_scheduler (void)
|
||||
global_sched_param.sched_priority = sched_priority;
|
||||
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
|
||||
if (res == -1) {
|
||||
- LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
|
||||
- "Could not set SCHED_RR at priority %d",
|
||||
- global_sched_param.sched_priority);
|
||||
+ if (!silent) {
|
||||
+ LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
|
||||
+ "Could not set SCHED_RR at priority %d",
|
||||
+ global_sched_param.sched_priority);
|
||||
+ }
|
||||
|
||||
global_sched_param.sched_priority = 0;
|
||||
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
|
||||
@@ -898,15 +911,17 @@ static int corosync_set_rr_scheduler (void)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
- LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
|
||||
- "Could not get maximum scheduler priority");
|
||||
+ if (!silent) {
|
||||
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
|
||||
+ "Could not get maximum scheduler priority");
|
||||
+ }
|
||||
sched_priority = 0;
|
||||
ret_val = -1;
|
||||
}
|
||||
#else
|
||||
log_printf(LOGSYS_LEVEL_WARNING,
|
||||
"The Platform is missing process priority setting features. Leaving at default.");
|
||||
- ret_val = -1;
|
||||
+ ret_val = -2;
|
||||
#endif
|
||||
|
||||
return (ret_val);
|
||||
@@ -1173,6 +1188,7 @@ error_close:
|
||||
static int corosync_move_to_root_cgroup(void) {
|
||||
FILE *f;
|
||||
int res = -1;
|
||||
+ const char *cgroup_task_fname = NULL;
|
||||
|
||||
/*
|
||||
* /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
|
||||
@@ -1183,15 +1199,29 @@ static int corosync_move_to_root_cgroup(void) {
|
||||
*/
|
||||
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
|
||||
if (f == NULL) {
|
||||
- log_printf(LOGSYS_LEVEL_DEBUG, "cpu.rt_runtime_us doesn't exists -> "
|
||||
- "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
|
||||
+ /*
|
||||
+ * Try cgroup v2
|
||||
+ */
|
||||
+ f = fopen("/sys/fs/cgroup/cgroup.procs", "rt");
|
||||
+ if (f == NULL) {
|
||||
+ log_printf(LOG_DEBUG, "cpu.rt_runtime_us or cgroup.procs doesn't exist -> "
|
||||
+ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
|
||||
|
||||
- res = 0;
|
||||
- goto exit_res;
|
||||
+ res = 0;
|
||||
+ goto exit_res;
|
||||
+ } else {
|
||||
+ log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v2 root cgroup");
|
||||
+
|
||||
+ cgroup_task_fname = "/sys/fs/cgroup/cgroup.procs";
|
||||
+ }
|
||||
+ } else {
|
||||
+ log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v1 root cgroup");
|
||||
+
|
||||
+ cgroup_task_fname = "/sys/fs/cgroup/cpu/tasks";
|
||||
}
|
||||
(void)fclose(f);
|
||||
|
||||
- f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
|
||||
+ f = fopen(cgroup_task_fname, "w");
|
||||
if (f == NULL) {
|
||||
log_printf(LOGSYS_LEVEL_WARNING, "Can't open cgroups tasks file for writing");
|
||||
|
||||
@@ -1256,7 +1286,8 @@ int main (int argc, char **argv, char **envp)
|
||||
const char *error_string;
|
||||
struct totem_config totem_config;
|
||||
int res, ch;
|
||||
- int background, sched_rr, prio, testonly, move_to_root_cgroup;
|
||||
+ int background, sched_rr, prio, testonly;
|
||||
+ enum move_to_root_cgroup_mode move_to_root_cgroup;
|
||||
enum e_corosync_done flock_err;
|
||||
uint64_t totem_config_warnings;
|
||||
struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
|
||||
@@ -1264,6 +1295,7 @@ int main (int argc, char **argv, char **envp)
|
||||
char *ep;
|
||||
char *tmp_str;
|
||||
int log_subsys_id_totem;
|
||||
+ int silent;
|
||||
|
||||
/* default configuration
|
||||
*/
|
||||
@@ -1417,21 +1449,19 @@ int main (int argc, char **argv, char **envp)
|
||||
}
|
||||
|
||||
|
||||
- move_to_root_cgroup = 1;
|
||||
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_AUTO;
|
||||
if (icmap_get_string("system.move_to_root_cgroup", &tmp_str) == CS_OK) {
|
||||
- if (strcmp(tmp_str, "yes") != 0) {
|
||||
- move_to_root_cgroup = 0;
|
||||
+ /*
|
||||
+ * Validity of move_to_root_cgroup values checked in coroparse.c
|
||||
+ */
|
||||
+ if (strcmp(tmp_str, "yes") == 0) {
|
||||
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_ON;
|
||||
+ } else if (strcmp(tmp_str, "no") == 0) {
|
||||
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_OFF;
|
||||
}
|
||||
free(tmp_str);
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Try to move corosync into root cpu cgroup. Failure is not fatal and
|
||||
- * error is deliberately ignored.
|
||||
- */
|
||||
- if (move_to_root_cgroup) {
|
||||
- (void)corosync_move_to_root_cgroup();
|
||||
- }
|
||||
|
||||
sched_rr = 1;
|
||||
if (icmap_get_string("system.sched_rr", &tmp_str) == CS_OK) {
|
||||
@@ -1462,11 +1492,31 @@ int main (int argc, char **argv, char **envp)
|
||||
free(tmp_str);
|
||||
}
|
||||
|
||||
+ if (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_ON) {
|
||||
+ /*
|
||||
+ * Try to move corosync into root cpu cgroup. Failure is not fatal and
|
||||
+ * error is deliberately ignored.
|
||||
+ */
|
||||
+ (void)corosync_move_to_root_cgroup();
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* Set round robin realtime scheduling with priority 99
|
||||
*/
|
||||
if (sched_rr) {
|
||||
- if (corosync_set_rr_scheduler () != 0) {
|
||||
+ silent = (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO);
|
||||
+ res = corosync_set_rr_scheduler (silent);
|
||||
+
|
||||
+ if (res == -1 && move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO) {
|
||||
+ /*
|
||||
+ * Try to move process to root cgroup and try set priority again
|
||||
+ */
|
||||
+ (void)corosync_move_to_root_cgroup();
|
||||
+
|
||||
+ res = corosync_set_rr_scheduler (0);
|
||||
+ }
|
||||
+
|
||||
+ if (res != 0) {
|
||||
prio = INT_MIN;
|
||||
} else {
|
||||
prio = 0;
|
||||
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
|
||||
index 25289ba4..0588ad1e 100644
|
||||
--- a/man/corosync.conf.5
|
||||
+++ b/man/corosync.conf.5
|
||||
@@ -1,6 +1,6 @@
|
||||
.\"/*
|
||||
.\" * Copyright (c) 2005 MontaVista Software, Inc.
|
||||
-.\" * Copyright (c) 2006-2020 Red Hat, Inc.
|
||||
+.\" * Copyright (c) 2006-2021 Red Hat, Inc.
|
||||
.\" *
|
||||
.\" * All rights reserved.
|
||||
.\" *
|
||||
@@ -32,7 +32,7 @@
|
||||
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
.\" * THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.\" */
|
||||
-.TH COROSYNC_CONF 5 2021-04-09 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
|
||||
+.TH COROSYNC_CONF 5 2021-07-23 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
|
||||
.SH NAME
|
||||
corosync.conf - corosync executive configuration file
|
||||
|
||||
@@ -799,9 +799,37 @@ meaning maximal / minimal priority (so minimal / maximal nice value).
|
||||
|
||||
.TP
|
||||
move_to_root_cgroup
|
||||
-Should be set to yes (default) if corosync should try to move itself to root
|
||||
-cgroup. This feature is available only for systems with cgroups with RT
|
||||
-sched enabled (Linux with CONFIG_RT_GROUP_SCHED kernel option).
|
||||
+Can be one of
|
||||
+.B yes
|
||||
+(Corosync always moves itself to root cgroup),
|
||||
+.B no
|
||||
+(Corosync never tries to move itself to root cgroup) or
|
||||
+.B auto
|
||||
+(Corosync first checks if sched_rr is enabled, and if
|
||||
+so, it tries to set round robin realtime scheduling with maximal priority to itself.
|
||||
+If setting of priority fails, corosync tries to move itself to root
|
||||
+cgroup and retries setting of priority).
|
||||
+
|
||||
+This feature is available only for systems with cgroups v1 with RT
|
||||
+sched enabled (Linux with CONFIG_RT_GROUP_SCHED kernel option) and cgroups v2.
|
||||
+
|
||||
+It's worth noting that currently (May 3 2021) cgroup2 doesn’t yet
|
||||
+support control of realtime processes and the cpu controller can only be
|
||||
+enabled when all RT processes are in the root cgroup (applies only for kernel
|
||||
+with CONFIG_RT_GROUP_SCHED enabled). So when move_to_root_cgroup
|
||||
+is disabled, kernel is compiled with CONFIG_RT_GROUP_SCHED and systemd is used,
|
||||
+it may be impossible to make systemd options
|
||||
+like CPUQuota working correctly until corosync is stopped.
|
||||
+
|
||||
+Also when moving to root cgroup is enforced and used together with cgroup2 and systemd
|
||||
+it makes impossible (most of the time) for journald to add systemd specific
|
||||
+metadata (most importantly _SYSTEMD_UNIT) properly, because corosync is
|
||||
+moved out of cgroup created by systemd. This means
|
||||
+it is not possible to filter corosync logged messages based on these metadata
|
||||
+(for example using -u or _SYSTEMD_UNIT=UNIT pattern) and also running
|
||||
+systemctl status doesn't display (all) corosync log messages.
|
||||
+The problem is even worse because journald caches pid for some time
|
||||
+(approx. 5 sec) so initial corosync messages have correct metadata.
|
||||
|
||||
.TP
|
||||
allow_knet_handle_fallback
|
||||
--
|
||||
2.27.0
|
||||
|
@ -17,14 +17,12 @@
|
||||
|
||||
Name: corosync
|
||||
Summary: The Corosync Cluster Engine and Application Programming Interfaces
|
||||
Version: 3.1.4
|
||||
Release: 3%{?gitver}%{?dist}
|
||||
Version: 3.1.5
|
||||
Release: 1%{?gitver}%{?dist}
|
||||
License: BSD
|
||||
URL: http://corosync.github.io/corosync/
|
||||
Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz
|
||||
|
||||
Patch0: bz1948974-1-main-Add-support-for-cgroup-v2-and-auto-mode.patch
|
||||
|
||||
# Runtime bits
|
||||
# The automatic dependency overridden in favor of explicit version lock
|
||||
Requires: corosynclib%{?_isa} = %{version}-%{release}
|
||||
@ -73,7 +71,6 @@ BuildRequires: make
|
||||
|
||||
%prep
|
||||
%setup -q -n %{name}-%{version}%{?gittarver}
|
||||
%patch0 -p1 -b .bz1948974-1
|
||||
|
||||
%build
|
||||
%if %{with runautogen}
|
||||
@ -292,6 +289,11 @@ network splits)
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Wed Aug 04 2021 Jan Friesse <jfriesse@redhat.com> - 3.1.5-1
|
||||
- Related: rhbz#1948974
|
||||
|
||||
- New upstream release (rhbz#1948974)
|
||||
|
||||
* Fri Jul 23 2021 Jan Friesse <jfriesse@redhat.com> - 3.1.4-3
|
||||
- Related: rhbz#1948974
|
||||
|
||||
|
2
sources
2
sources
@ -1 +1 @@
|
||||
SHA512 (corosync-3.1.4.tar.gz) = 3e8ed70653d2559449765dac91f3ad87b8a474b81529895a7b33dee044946e05b08dbf9f36da5be60d93527976ac0e390beb3ce2f77d088613fdb45e940558ec
|
||||
SHA512 (corosync-3.1.5.tar.gz) = eb974a32f60c52564057ed41c1ebf31fe4332a5a082ebbd5fa2540af8fa9e8c0c42d4ef9066abcb9d7dd04c12b97cd13642289c65b5b6b65cfd30c12641ada1d
|
||||
|
Loading…
Reference in New Issue
Block a user