Switch network locking backend default to nftables

Resolves: RHEL-58354

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber 2024-10-29 12:35:29 +01:00
parent 0793de91e2
commit d866d1994f
No known key found for this signature in database
GPG Key ID: 82C9378ED3C4906A
7 changed files with 351 additions and 22 deletions

View File

@ -0,0 +1,87 @@
From 089345f77a34d1bc7ef146d650636afcd3cdda21 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Wed, 10 Jul 2024 18:34:50 +0200
Subject: [PATCH] Adjust to glibc __rseq_size semantic change
In commit 2e456ccf0c34a056e3ccafac4a0c7effef14d918 ("Linux: Make
__rseq_size useful for feature detection (bug 31965)") glibc 2.40
changed the meaning of __rseq_size slightly: it is now the size
of the active/feature area (20 bytes initially), and not the size
of the entire initially defined struct (32 bytes including padding).
The reason for the change is that the size including padding does not
allow detection of newly added features while previously unused
padding is consumed.
The prep_libc_rseq_info change in criu/cr-restore.c is not necessary
on kernels which have full ptrace support for obtaining rseq
information because the code is not used. On older kernels, it is
a correctness fix because with size 20 (the new value), rseq
registeration would fail.
The two other changes are required to make rseq unregistration work
in tests.
Signed-off-by: Florian Weimer <fweimer@redhat.com>
---
criu/cr-restore.c | 8 ++++++++
test/zdtm/static/rseq00.c | 5 ++++-
test/zdtm/transition/rseq01.c | 5 ++++-
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 4db2f4ecfc..b95d4f134b 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2618,7 +2618,15 @@ static void prep_libc_rseq_info(struct rst_rseq_param *rseq)
if (!kdat.has_ptrace_get_rseq_conf) {
#if defined(__GLIBC__) && defined(RSEQ_SIG)
rseq->rseq_abi_pointer = encode_pointer(__criu_thread_pointer() + __rseq_offset);
+ /*
+ * Current glibc reports the feature/active size in
+ * __rseq_size, not the size passed to the kernel.
+ * This could be 20, but older kernels expect 32 for
+ * the size argument even if only 20 bytes are used.
+ */
rseq->rseq_abi_size = __rseq_size;
+ if (rseq->rseq_abi_size < 32)
+ rseq->rseq_abi_size = 32;
rseq->signature = RSEQ_SIG;
#else
rseq->rseq_abi_pointer = 0;
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
index 471ad6a43f..7add7801eb 100644
--- a/test/zdtm/static/rseq00.c
+++ b/test/zdtm/static/rseq00.c
@@ -46,12 +46,15 @@ static inline void *__criu_thread_pointer(void)
static inline void unregister_glibc_rseq(void)
{
struct rseq *rseq = (struct rseq *)((char *)__criu_thread_pointer() + __rseq_offset);
+ unsigned int size = __rseq_size;
/* hack: mark glibc rseq structure as failed to register */
rseq->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
/* unregister rseq */
- syscall(__NR_rseq, (void *)rseq, __rseq_size, 1, RSEQ_SIG);
+ if (__rseq_size < 32)
+ size = 32;
+ syscall(__NR_rseq, (void *)rseq, size, 1, RSEQ_SIG);
}
#else
static inline void unregister_glibc_rseq(void)
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
index 0fbcc2dca0..08a7a8e1a6 100644
--- a/test/zdtm/transition/rseq01.c
+++ b/test/zdtm/transition/rseq01.c
@@ -33,7 +33,10 @@ static inline void *thread_pointer(void)
static inline void unregister_old_rseq(void)
{
/* unregister rseq */
- syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
+ unsigned int size = __rseq_size;
+ if (__rseq_size < 32)
+ size = 32;
+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), size, 1, RSEQ_SIG);
}
#else
static inline void unregister_old_rseq(void)

View File

@ -12,11 +12,24 @@
Name: criu
Version: 3.19
Release: 6%{?dist}
Release: 7%{?dist}
Summary: Tool for Checkpoint/Restore in User-space
License: GPL-2.0-only AND LGPL-2.1-only AND MIT
URL: http://criu.org/
Source0: https://github.com/checkpoint-restore/criu/archive/v%{version}/criu-%{version}.tar.gz
# This switches the default network locking backend from
# iptables to nftables
Patch0: network.lock.nftables.patch
# Update restartable sequences to latest upstream code
Patch1: https://github.com/checkpoint-restore/criu/commit/089345f77a34d1bc7ef146d650636afcd3cdda21.patch
# Upstream tracked as
# https://github.com/checkpoint-restore/criu/pull/2549
# https://github.com/checkpoint-restore/criu/pull/2550
Patch2: nftables.chain.patch
# Unfortunately crun added code to always force
# iptables backed network locking. This disables
# setting the network locking to iptables via RPC.
Patch3: disable.network.locking.via.rpc.patch
# Add protobuf-c as a dependency.
# We use this patch because the protobuf-c package name
@ -37,11 +50,11 @@ BuildRequires: asciidoctor
BuildRequires: perl-interpreter
BuildRequires: libselinux-devel
BuildRequires: gnutls-devel
BuildRequires: nftables-devel
# Checkpointing containers with a tmpfs requires tar
Recommends: tar
%if 0%{?fedora}
BuildRequires: libbsd-devel
BuildRequires: nftables-devel
%endif
BuildRequires: make
@ -97,6 +110,10 @@ This script can help to workaround the so called "PID mismatch" problem.
%prep
%setup -q
%patch -P 0 -p1
%patch -P 1 -p1
%patch -P 2 -p1
%patch -P 3 -p1
%patch -P 99 -p1
%build
@ -156,6 +173,9 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/libcriu.a
%doc %{_mandir}/man1/criu-ns.1*
%changelog
* Tue Dec 10 2024 Adrian Reber <areber@redhat.com> - 3.19-7
- Switch network locking backend default to nftables
* Tue Oct 29 2024 Troy Dawson <tdawson@redhat.com> - 3.19-6
- Bump release for October 2024 mass rebuild:
Resolves: RHEL-64018

View File

@ -0,0 +1,12 @@
diff -ur ../criu-3.19/criu/cr-service.c criu-3.19/criu/cr-service.c
--- ../criu-3.19/criu/cr-service.c 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/criu/cr-service.c 2024-12-17 19:53:43.865616992 +0100
@@ -570,7 +570,7 @@
if (req->has_network_lock) {
switch (req->network_lock) {
case CRIU_NETWORK_LOCK_METHOD__IPTABLES:
- opts.network_lock_method = NETWORK_LOCK_IPTABLES;
+ opts.network_lock_method = NETWORK_LOCK_NFTABLES;
break;
case CRIU_NETWORK_LOCK_METHOD__NFTABLES:
opts.network_lock_method = NETWORK_LOCK_NFTABLES;

View File

@ -0,0 +1,11 @@
--- a/criu/include/cr_options.h.orig 2024-12-10 16:57:20.061293476 +0100
+++ b/criu/include/cr_options.h 2024-12-10 16:57:34.789131372 +0100
@@ -70,7 +70,7 @@
NETWORK_LOCK_SKIP,
};
-#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_IPTABLES
+#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_NFTABLES
/*
* Ghost file size we allow to carry by default.

205
nftables.chain.patch Normal file
View File

@ -0,0 +1,205 @@
diff -ur ../criu-3.19/criu/cr-dump.c criu-3.19/criu/cr-dump.c
--- ../criu-3.19/criu/cr-dump.c 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/criu/cr-dump.c 2024-12-17 09:53:58.545908685 +0100
@@ -2182,7 +2182,7 @@
if (collect_pstree_ids())
goto err;
- if (network_lock())
+ if (network_lock(&he))
goto err;
if (rpc_query_external_files())
diff -ur ../criu-3.19/criu/cr-restore.c criu-3.19/criu/cr-restore.c
--- ../criu-3.19/criu/cr-restore.c 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/criu/cr-restore.c 2024-12-17 09:29:47.771542239 +0100
@@ -2359,7 +2359,7 @@
* the '--empty-ns net' mode no iptables C/R is done and we
* need to return these rules by hands.
*/
- ret = network_lock_internal();
+ ret = network_lock_internal(NULL);
if (ret)
goto out_kill;
}
diff -ur ../criu-3.19/criu/image.c criu-3.19/criu/image.c
--- ../criu-3.19/criu/image.c 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/criu/image.c 2024-12-17 09:56:13.751949657 +0100
@@ -25,6 +25,7 @@
TaskKobjIdsEntry *root_ids;
u32 root_cg_set;
Lsmtype image_lsm;
+char nft_lock_table[32];
int check_img_inventory(bool restore)
{
@@ -99,6 +100,9 @@
} else {
opts.network_lock_method = he->network_lock_method;
}
+
+ if (he->nft_lock_table)
+ strncpy(nft_lock_table, he->nft_lock_table, sizeof(nft_lock_table) - 1);
}
ret = 0;
diff -ur ../criu-3.19/criu/include/net.h criu-3.19/criu/include/net.h
--- ../criu-3.19/criu/include/net.h 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/criu/include/net.h 2024-12-17 09:27:40.578168778 +0100
@@ -29,9 +29,10 @@
extern int collect_net_namespaces(bool for_dump);
-extern int network_lock(void);
+#include "images/inventory.pb-c.h"
+extern int network_lock(InventoryEntry *he);
extern void network_unlock(void);
-extern int network_lock_internal(void);
+extern int network_lock_internal(InventoryEntry *he);
extern struct ns_desc net_ns_desc;
diff -ur ../criu-3.19/criu/net.c criu-3.19/criu/net.c
--- ../criu-3.19/criu/net.c 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/criu/net.c 2024-12-17 09:53:25.370199544 +0100
@@ -229,6 +229,8 @@
"max_dgram_qlen",
};
+extern char nft_lock_table[32];
+
/*
* MAX_CONF_UNIX_PATH = (sizeof(CONF_UNIX_FMT) - strlen("%s"))
* + MAX_CONF_UNIX_OPT_PATH
@@ -3053,21 +3055,34 @@
return ret;
}
-static inline int nftables_lock_network_internal(void)
+static inline int nftables_lock_network_internal(InventoryEntry *he)
{
#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
struct nft_ctx *nft;
int ret = 0;
char table[32];
char buf[128];
+ FILE *fp;
if (nftables_get_table(table, sizeof(table)))
return -1;
+ if (he) {
+ he->nft_lock_table = strdup(table);
+ }
+
nft = nft_ctx_new(NFT_CTX_DEFAULT);
if (!nft)
return -1;
+ fp = fdopen(log_get_fd(), "w");
+ if (!fp) {
+ pr_perror("fdopen() failed");
+ goto err3;
+ }
+ nft_ctx_set_output(nft, fp);
+ nft_ctx_set_error(nft, fp);
+
snprintf(buf, sizeof(buf), "create table %s", table);
if (NFT_RUN_CMD(nft, buf))
goto err2;
@@ -3094,6 +3109,9 @@
snprintf(buf, sizeof(buf), "delete table %s", table);
NFT_RUN_CMD(nft, buf);
err2:
+ fflush(fp);
+ fclose(fp);
+err3:
ret = -1;
pr_err("Locking network failed using nftables\n");
out:
@@ -3130,7 +3148,7 @@
return ret;
}
-int network_lock_internal(void)
+int network_lock_internal(InventoryEntry *he)
{
int ret = 0, nsret;
@@ -3143,7 +3161,7 @@
if (opts.network_lock_method == NETWORK_LOCK_IPTABLES)
ret = iptables_network_lock_internal();
else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
- ret = nftables_lock_network_internal();
+ ret = nftables_lock_network_internal(he);
if (restore_ns(nsret, &net_ns_desc))
ret = -1;
@@ -3158,18 +3176,34 @@
struct nft_ctx *nft;
char table[32];
char buf[128];
+ FILE *fp;
- if (nftables_get_table(table, sizeof(table)))
- return -1;
+ if (nft_lock_table[0] != 0) {
+ strncpy(table, nft_lock_table, sizeof(table));
+ } else {
+ if (nftables_get_table(table, sizeof(table)))
+ return -1;
+ }
nft = nft_ctx_new(NFT_CTX_DEFAULT);
if (!nft)
return -1;
+ fp = fdopen(log_get_fd(), "w");
+ if (!fp) {
+ pr_perror("fdopen() failed");
+ nft_ctx_free(nft);
+ return -1;
+ }
+ nft_ctx_set_output(nft, fp);
+ nft_ctx_set_error(nft, fp);
+
snprintf(buf, sizeof(buf), "delete table %s", table);
if (NFT_RUN_CMD(nft, buf))
ret = -1;
+ fflush(fp);
+ fclose(fp);
nft_ctx_free(nft);
return ret;
#else
@@ -3216,7 +3250,7 @@
return ret;
}
-int network_lock(void)
+int network_lock(InventoryEntry *he)
{
pr_info("Lock network\n");
@@ -3230,10 +3264,10 @@
if (run_scripts(ACT_NET_LOCK))
return -1;
- return network_lock_internal();
+ return network_lock_internal(he);
}
-void network_unlock(void)
+void network_unlock()
{
pr_info("Unlock network\n");
diff -ur ../criu-3.19/images/inventory.proto criu-3.19/images/inventory.proto
--- ../criu-3.19/images/inventory.proto 2023-11-28 01:47:16.000000000 +0100
+++ criu-3.19/images/inventory.proto 2024-12-17 09:21:55.378011178 +0100
@@ -21,4 +21,5 @@
optional uint32 pre_dump_mode = 9;
optional bool tcp_close = 10;
optional uint32 network_lock_method = 11;
+ optional string nft_lock_table = 13;
}

View File

@ -4,24 +4,20 @@ set -x
uname -a
# These zdtm tests are skipped because they fail only in CI system
# These zdtm tests are skipped because most of them rely
# on the iptables binary.
EXCLUDES=" \
-x zdtm/static/socket-tcp-reseted \
-x zdtm/static/socket-tcp-closed \
-x zdtm/static/net_lock_socket_iptables \
-x zdtm/static/net_lock_socket_iptables6 \
-x zdtm/static/netns-nf \
-x zdtm/static/netns_lock_iptables \
-x zdtm/static/socket-tcp-closed-last-ack \
-x zdtm/static/socket-tcp6-closed \
-x zdtm/static/socket-tcp4v6-closed \
-x zdtm/static/maps01 \
-x zdtm/static/maps04 \
-x zdtm/static/cgroup04 \
-x zdtm/static/cgroup_ifpriomap \
-x zdtm/static/netns_sub \
-x zdtm/static/netns_sub_veth \
-x zdtm/static/file_locks01 \
-x zdtm/static/socket-tcp-nfconntrack \
-x zdtm/static/socket-tcp-reseted \
-x zdtm/static/socket-tcp-syn-sent \
-x zdtm/static/mntns_link_remap \
-x zdtm/static/unlink_fstat03 \
-x zdtm/static/unlink_regular00 \
-x zdtm/static/cgroup02 "
-x zdtm/static/unlink_regular00 "
run_test() {
./zdtm.py run --criu-bin /usr/sbin/criu ${EXCLUDES} \
@ -33,11 +29,7 @@ run_test() {
RESULT=42
# F30, F29 do not provide python -> python3 symlink
test -e /usr/bin/python || ln -sf /usr/bin/python3 /usr/bin/python
python -V
# this socket brakes CRIU's test cases
# this socket breaks CRIU's test cases
rm -f /var/lib/sss/pipes/nss
cd source
@ -45,6 +37,9 @@ cd source
echo "Build CRIU"
make
which criu
rpm -qf `which criu`
cd test
echo "Run the actual CRIU test suite"

View File

@ -26,7 +26,6 @@
- nftables-devel
- python3-pyyaml
- python3-protobuf
- python-unversioned-command
tests:
- zdtm:
dir: .