- Related: rhbz#1948974
This commit is contained in:
parent
026d94bfc7
commit
0aa25de1e2
1
.gitignore
vendored
1
.gitignore
vendored
@ -46,3 +46,4 @@ corosync-1.2.7.tar.gz
|
|||||||
/corosync-3.1.2.tar.gz
|
/corosync-3.1.2.tar.gz
|
||||||
/corosync-3.1.3.tar.gz
|
/corosync-3.1.3.tar.gz
|
||||||
/corosync-3.1.4.tar.gz
|
/corosync-3.1.4.tar.gz
|
||||||
|
/corosync-3.1.5.tar.gz
|
||||||
|
@ -1,317 +0,0 @@
|
|||||||
From c9996fdd0f4fa1fbf113b740eea01bcc70b235aa Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jan Friesse <jfriesse@redhat.com>
|
|
||||||
Date: Mon, 3 May 2021 15:29:04 +0200
|
|
||||||
Subject: [PATCH] main: Add support for cgroup v2 and auto mode
|
|
||||||
|
|
||||||
Support for cgroup v2 is very similar to cgroup v1 just checking (and
|
|
||||||
writing) different file.
|
|
||||||
|
|
||||||
Because of all the problems described later with cgroup v2 new "auto"
|
|
||||||
mode (new default) is added. This mode first tries to set rr scheduling
|
|
||||||
and moves Corosync to root cgroup only if it fails.
|
|
||||||
|
|
||||||
Testing this feature is a bit harder than with cgroup v1 so it's
|
|
||||||
probably worh noting in this commit message.
|
|
||||||
|
|
||||||
1. Copy some service file (I've used httpd service) and set
|
|
||||||
CPUQuota=30% in the [service] section.
|
|
||||||
2. Check /sys/fs/cgroup/cgroup.subtree_control - there should be no
|
|
||||||
"cpu"
|
|
||||||
3. Start modified service
|
|
||||||
4. Check /sys/fs/cgroup/cgroup.subtree_control - there should be "cpu"
|
|
||||||
5. Start corosync - It should be able to get rt priority
|
|
||||||
|
|
||||||
When move_to_root_cgroup is disabled (applies only for kernels
|
|
||||||
with CONFIG_RT_GROUP_SCHED enabled), behavior differs:
|
|
||||||
- If corosync is started before modified service, so
|
|
||||||
there is no "cpu" in /sys/fs/cgroup/cgroup.subtree_control
|
|
||||||
corosync starts without problem and gets rt priority.
|
|
||||||
Starting modified service later will never add "cpu" into
|
|
||||||
/sys/fs/cgroup/cgroup.subtree_control (because corosync is holding
|
|
||||||
rt priority and it is placed in the non-root cgroup by systemd).
|
|
||||||
|
|
||||||
- When corosync is started after modified service, so "cpu"
|
|
||||||
is in /sys/fs/cgroup/cgroup.subtree_control, corosync is not
|
|
||||||
able to get RT priority.
|
|
||||||
|
|
||||||
It's worth noting problems when cgroup v2 is used together with systemd
|
|
||||||
logging described in corosync.conf(5) man page.
|
|
||||||
|
|
||||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
|
||||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
|
||||||
---
|
|
||||||
exec/coroparse.c | 3 +-
|
|
||||||
exec/main.c | 98 ++++++++++++++++++++++++++++++++++-----------
|
|
||||||
man/corosync.conf.5 | 38 +++++++++++++++---
|
|
||||||
3 files changed, 109 insertions(+), 30 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/exec/coroparse.c b/exec/coroparse.c
|
|
||||||
index 741f3741..56b8034e 100644
|
|
||||||
--- a/exec/coroparse.c
|
|
||||||
+++ b/exec/coroparse.c
|
|
||||||
@@ -828,7 +828,8 @@ static int main_config_parser_cb(const char *path,
|
|
||||||
}
|
|
||||||
if (strcmp(path, "system.move_to_root_cgroup") == 0) {
|
|
||||||
if ((strcmp(value, "yes") != 0) &&
|
|
||||||
- (strcmp(value, "no") != 0)) {
|
|
||||||
+ (strcmp(value, "no") != 0) &&
|
|
||||||
+ (strcmp(value, "auto") != 0)) {
|
|
||||||
*error_string = "Invalid system.move_to_root_cgroup";
|
|
||||||
|
|
||||||
return (0);
|
|
||||||
diff --git a/exec/main.c b/exec/main.c
|
|
||||||
index aa6d9fbf..5fb4d47c 100644
|
|
||||||
--- a/exec/main.c
|
|
||||||
+++ b/exec/main.c
|
|
||||||
@@ -169,6 +169,12 @@ static char corosync_config_file[PATH_MAX + 1] = COROSYSCONFDIR "/corosync.conf"
|
|
||||||
|
|
||||||
static int lockfile_fd = -1;
|
|
||||||
|
|
||||||
+enum move_to_root_cgroup_mode {
|
|
||||||
+ MOVE_TO_ROOT_CGROUP_MODE_OFF = 0,
|
|
||||||
+ MOVE_TO_ROOT_CGROUP_MODE_ON = 1,
|
|
||||||
+ MOVE_TO_ROOT_CGROUP_MODE_AUTO = 2,
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
qb_loop_t *cs_poll_handle_get (void)
|
|
||||||
{
|
|
||||||
return (corosync_poll_handle);
|
|
||||||
@@ -859,7 +865,12 @@ static void timer_function_scheduler_timeout (void *data)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
-static int corosync_set_rr_scheduler (void)
|
|
||||||
+/*
|
|
||||||
+ * Set main pid RR scheduler.
|
|
||||||
+ * silent: don't log sched_get_priority_max and sched_setscheduler errors
|
|
||||||
+ * Returns: 0 - success, -1 failure, -2 platform doesn't support SCHED_RR
|
|
||||||
+ */
|
|
||||||
+static int corosync_set_rr_scheduler (int silent)
|
|
||||||
{
|
|
||||||
int ret_val = 0;
|
|
||||||
|
|
||||||
@@ -871,9 +882,11 @@ static int corosync_set_rr_scheduler (void)
|
|
||||||
global_sched_param.sched_priority = sched_priority;
|
|
||||||
res = sched_setscheduler (0, SCHED_RR, &global_sched_param);
|
|
||||||
if (res == -1) {
|
|
||||||
- LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
|
|
||||||
- "Could not set SCHED_RR at priority %d",
|
|
||||||
- global_sched_param.sched_priority);
|
|
||||||
+ if (!silent) {
|
|
||||||
+ LOGSYS_PERROR(errno, LOGSYS_LEVEL_WARNING,
|
|
||||||
+ "Could not set SCHED_RR at priority %d",
|
|
||||||
+ global_sched_param.sched_priority);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
global_sched_param.sched_priority = 0;
|
|
||||||
#ifdef HAVE_QB_LOG_THREAD_PRIORITY_SET
|
|
||||||
@@ -898,15 +911,17 @@ static int corosync_set_rr_scheduler (void)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
- LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
|
|
||||||
- "Could not get maximum scheduler priority");
|
|
||||||
+ if (!silent) {
|
|
||||||
+ LOGSYS_PERROR (errno, LOGSYS_LEVEL_WARNING,
|
|
||||||
+ "Could not get maximum scheduler priority");
|
|
||||||
+ }
|
|
||||||
sched_priority = 0;
|
|
||||||
ret_val = -1;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
log_printf(LOGSYS_LEVEL_WARNING,
|
|
||||||
"The Platform is missing process priority setting features. Leaving at default.");
|
|
||||||
- ret_val = -1;
|
|
||||||
+ ret_val = -2;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return (ret_val);
|
|
||||||
@@ -1173,6 +1188,7 @@ error_close:
|
|
||||||
static int corosync_move_to_root_cgroup(void) {
|
|
||||||
FILE *f;
|
|
||||||
int res = -1;
|
|
||||||
+ const char *cgroup_task_fname = NULL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
|
|
||||||
@@ -1183,15 +1199,29 @@ static int corosync_move_to_root_cgroup(void) {
|
|
||||||
*/
|
|
||||||
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
|
|
||||||
if (f == NULL) {
|
|
||||||
- log_printf(LOGSYS_LEVEL_DEBUG, "cpu.rt_runtime_us doesn't exists -> "
|
|
||||||
- "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
|
|
||||||
+ /*
|
|
||||||
+ * Try cgroup v2
|
|
||||||
+ */
|
|
||||||
+ f = fopen("/sys/fs/cgroup/cgroup.procs", "rt");
|
|
||||||
+ if (f == NULL) {
|
|
||||||
+ log_printf(LOG_DEBUG, "cpu.rt_runtime_us or cgroup.procs doesn't exist -> "
|
|
||||||
+ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
|
|
||||||
|
|
||||||
- res = 0;
|
|
||||||
- goto exit_res;
|
|
||||||
+ res = 0;
|
|
||||||
+ goto exit_res;
|
|
||||||
+ } else {
|
|
||||||
+ log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v2 root cgroup");
|
|
||||||
+
|
|
||||||
+ cgroup_task_fname = "/sys/fs/cgroup/cgroup.procs";
|
|
||||||
+ }
|
|
||||||
+ } else {
|
|
||||||
+ log_printf(LOGSYS_LEVEL_DEBUG, "Moving main pid to cgroup v1 root cgroup");
|
|
||||||
+
|
|
||||||
+ cgroup_task_fname = "/sys/fs/cgroup/cpu/tasks";
|
|
||||||
}
|
|
||||||
(void)fclose(f);
|
|
||||||
|
|
||||||
- f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
|
|
||||||
+ f = fopen(cgroup_task_fname, "w");
|
|
||||||
if (f == NULL) {
|
|
||||||
log_printf(LOGSYS_LEVEL_WARNING, "Can't open cgroups tasks file for writing");
|
|
||||||
|
|
||||||
@@ -1256,7 +1286,8 @@ int main (int argc, char **argv, char **envp)
|
|
||||||
const char *error_string;
|
|
||||||
struct totem_config totem_config;
|
|
||||||
int res, ch;
|
|
||||||
- int background, sched_rr, prio, testonly, move_to_root_cgroup;
|
|
||||||
+ int background, sched_rr, prio, testonly;
|
|
||||||
+ enum move_to_root_cgroup_mode move_to_root_cgroup;
|
|
||||||
enum e_corosync_done flock_err;
|
|
||||||
uint64_t totem_config_warnings;
|
|
||||||
struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
|
|
||||||
@@ -1264,6 +1295,7 @@ int main (int argc, char **argv, char **envp)
|
|
||||||
char *ep;
|
|
||||||
char *tmp_str;
|
|
||||||
int log_subsys_id_totem;
|
|
||||||
+ int silent;
|
|
||||||
|
|
||||||
/* default configuration
|
|
||||||
*/
|
|
||||||
@@ -1417,21 +1449,19 @@ int main (int argc, char **argv, char **envp)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
- move_to_root_cgroup = 1;
|
|
||||||
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_AUTO;
|
|
||||||
if (icmap_get_string("system.move_to_root_cgroup", &tmp_str) == CS_OK) {
|
|
||||||
- if (strcmp(tmp_str, "yes") != 0) {
|
|
||||||
- move_to_root_cgroup = 0;
|
|
||||||
+ /*
|
|
||||||
+ * Validity of move_to_root_cgroup values checked in coroparse.c
|
|
||||||
+ */
|
|
||||||
+ if (strcmp(tmp_str, "yes") == 0) {
|
|
||||||
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_ON;
|
|
||||||
+ } else if (strcmp(tmp_str, "no") == 0) {
|
|
||||||
+ move_to_root_cgroup = MOVE_TO_ROOT_CGROUP_MODE_OFF;
|
|
||||||
}
|
|
||||||
free(tmp_str);
|
|
||||||
}
|
|
||||||
|
|
||||||
- /*
|
|
||||||
- * Try to move corosync into root cpu cgroup. Failure is not fatal and
|
|
||||||
- * error is deliberately ignored.
|
|
||||||
- */
|
|
||||||
- if (move_to_root_cgroup) {
|
|
||||||
- (void)corosync_move_to_root_cgroup();
|
|
||||||
- }
|
|
||||||
|
|
||||||
sched_rr = 1;
|
|
||||||
if (icmap_get_string("system.sched_rr", &tmp_str) == CS_OK) {
|
|
||||||
@@ -1462,11 +1492,31 @@ int main (int argc, char **argv, char **envp)
|
|
||||||
free(tmp_str);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_ON) {
|
|
||||||
+ /*
|
|
||||||
+ * Try to move corosync into root cpu cgroup. Failure is not fatal and
|
|
||||||
+ * error is deliberately ignored.
|
|
||||||
+ */
|
|
||||||
+ (void)corosync_move_to_root_cgroup();
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* Set round robin realtime scheduling with priority 99
|
|
||||||
*/
|
|
||||||
if (sched_rr) {
|
|
||||||
- if (corosync_set_rr_scheduler () != 0) {
|
|
||||||
+ silent = (move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO);
|
|
||||||
+ res = corosync_set_rr_scheduler (silent);
|
|
||||||
+
|
|
||||||
+ if (res == -1 && move_to_root_cgroup == MOVE_TO_ROOT_CGROUP_MODE_AUTO) {
|
|
||||||
+ /*
|
|
||||||
+ * Try to move process to root cgroup and try set priority again
|
|
||||||
+ */
|
|
||||||
+ (void)corosync_move_to_root_cgroup();
|
|
||||||
+
|
|
||||||
+ res = corosync_set_rr_scheduler (0);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (res != 0) {
|
|
||||||
prio = INT_MIN;
|
|
||||||
} else {
|
|
||||||
prio = 0;
|
|
||||||
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
|
|
||||||
index 25289ba4..0588ad1e 100644
|
|
||||||
--- a/man/corosync.conf.5
|
|
||||||
+++ b/man/corosync.conf.5
|
|
||||||
@@ -1,6 +1,6 @@
|
|
||||||
.\"/*
|
|
||||||
.\" * Copyright (c) 2005 MontaVista Software, Inc.
|
|
||||||
-.\" * Copyright (c) 2006-2020 Red Hat, Inc.
|
|
||||||
+.\" * Copyright (c) 2006-2021 Red Hat, Inc.
|
|
||||||
.\" *
|
|
||||||
.\" * All rights reserved.
|
|
||||||
.\" *
|
|
||||||
@@ -32,7 +32,7 @@
|
|
||||||
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
||||||
.\" * THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
.\" */
|
|
||||||
-.TH COROSYNC_CONF 5 2021-04-09 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
|
|
||||||
+.TH COROSYNC_CONF 5 2021-07-23 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
|
|
||||||
.SH NAME
|
|
||||||
corosync.conf - corosync executive configuration file
|
|
||||||
|
|
||||||
@@ -799,9 +799,37 @@ meaning maximal / minimal priority (so minimal / maximal nice value).
|
|
||||||
|
|
||||||
.TP
|
|
||||||
move_to_root_cgroup
|
|
||||||
-Should be set to yes (default) if corosync should try to move itself to root
|
|
||||||
-cgroup. This feature is available only for systems with cgroups with RT
|
|
||||||
-sched enabled (Linux with CONFIG_RT_GROUP_SCHED kernel option).
|
|
||||||
+Can be one of
|
|
||||||
+.B yes
|
|
||||||
+(Corosync always moves itself to root cgroup),
|
|
||||||
+.B no
|
|
||||||
+(Corosync never tries to move itself to root cgroup) or
|
|
||||||
+.B auto
|
|
||||||
+(Corosync first checks if sched_rr is enabled, and if
|
|
||||||
+so, it tries to set round robin realtime scheduling with maximal priority to itself.
|
|
||||||
+If setting of priority fails, corosync tries to move itself to root
|
|
||||||
+cgroup and retries setting of priority).
|
|
||||||
+
|
|
||||||
+This feature is available only for systems with cgroups v1 with RT
|
|
||||||
+sched enabled (Linux with CONFIG_RT_GROUP_SCHED kernel option) and cgroups v2.
|
|
||||||
+
|
|
||||||
+It's worth noting that currently (May 3 2021) cgroup2 doesn’t yet
|
|
||||||
+support control of realtime processes and the cpu controller can only be
|
|
||||||
+enabled when all RT processes are in the root cgroup (applies only for kernel
|
|
||||||
+with CONFIG_RT_GROUP_SCHED enabled). So when move_to_root_cgroup
|
|
||||||
+is disabled, kernel is compiled with CONFIG_RT_GROUP_SCHED and systemd is used,
|
|
||||||
+it may be impossible to make systemd options
|
|
||||||
+like CPUQuota working correctly until corosync is stopped.
|
|
||||||
+
|
|
||||||
+Also when moving to root cgroup is enforced and used together with cgroup2 and systemd
|
|
||||||
+it makes impossible (most of the time) for journald to add systemd specific
|
|
||||||
+metadata (most importantly _SYSTEMD_UNIT) properly, because corosync is
|
|
||||||
+moved out of cgroup created by systemd. This means
|
|
||||||
+it is not possible to filter corosync logged messages based on these metadata
|
|
||||||
+(for example using -u or _SYSTEMD_UNIT=UNIT pattern) and also running
|
|
||||||
+systemctl status doesn't display (all) corosync log messages.
|
|
||||||
+The problem is even worse because journald caches pid for some time
|
|
||||||
+(approx. 5 sec) so initial corosync messages have correct metadata.
|
|
||||||
|
|
||||||
.TP
|
|
||||||
allow_knet_handle_fallback
|
|
||||||
--
|
|
||||||
2.27.0
|
|
||||||
|
|
@ -17,14 +17,12 @@
|
|||||||
|
|
||||||
Name: corosync
|
Name: corosync
|
||||||
Summary: The Corosync Cluster Engine and Application Programming Interfaces
|
Summary: The Corosync Cluster Engine and Application Programming Interfaces
|
||||||
Version: 3.1.4
|
Version: 3.1.5
|
||||||
Release: 3%{?gitver}%{?dist}
|
Release: 1%{?gitver}%{?dist}
|
||||||
License: BSD
|
License: BSD
|
||||||
URL: http://corosync.github.io/corosync/
|
URL: http://corosync.github.io/corosync/
|
||||||
Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz
|
Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz
|
||||||
|
|
||||||
Patch0: bz1948974-1-main-Add-support-for-cgroup-v2-and-auto-mode.patch
|
|
||||||
|
|
||||||
# Runtime bits
|
# Runtime bits
|
||||||
# The automatic dependency overridden in favor of explicit version lock
|
# The automatic dependency overridden in favor of explicit version lock
|
||||||
Requires: corosynclib%{?_isa} = %{version}-%{release}
|
Requires: corosynclib%{?_isa} = %{version}-%{release}
|
||||||
@ -73,7 +71,6 @@ BuildRequires: make
|
|||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n %{name}-%{version}%{?gittarver}
|
%setup -q -n %{name}-%{version}%{?gittarver}
|
||||||
%patch0 -p1 -b .bz1948974-1
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%if %{with runautogen}
|
%if %{with runautogen}
|
||||||
@ -292,6 +289,11 @@ network splits)
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Aug 04 2021 Jan Friesse <jfriesse@redhat.com> - 3.1.5-1
|
||||||
|
- Related: rhbz#1948974
|
||||||
|
|
||||||
|
- New upstream release (rhbz#1948974)
|
||||||
|
|
||||||
* Fri Jul 23 2021 Jan Friesse <jfriesse@redhat.com> - 3.1.4-3
|
* Fri Jul 23 2021 Jan Friesse <jfriesse@redhat.com> - 3.1.4-3
|
||||||
- Related: rhbz#1948974
|
- Related: rhbz#1948974
|
||||||
|
|
||||||
|
2
sources
2
sources
@ -1 +1 @@
|
|||||||
SHA512 (corosync-3.1.4.tar.gz) = 3e8ed70653d2559449765dac91f3ad87b8a474b81529895a7b33dee044946e05b08dbf9f36da5be60d93527976ac0e390beb3ce2f77d088613fdb45e940558ec
|
SHA512 (corosync-3.1.5.tar.gz) = eb974a32f60c52564057ed41c1ebf31fe4332a5a082ebbd5fa2540af8fa9e8c0c42d4ef9066abcb9d7dd04c12b97cd13642289c65b5b6b65cfd30c12641ada1d
|
||||||
|
Loading…
Reference in New Issue
Block a user