diff --git a/089345f77a34d1bc7ef146d650636afcd3cdda21.patch b/089345f77a34d1bc7ef146d650636afcd3cdda21.patch new file mode 100644 index 0000000..d8aef52 --- /dev/null +++ b/089345f77a34d1bc7ef146d650636afcd3cdda21.patch @@ -0,0 +1,87 @@ +From 089345f77a34d1bc7ef146d650636afcd3cdda21 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Wed, 10 Jul 2024 18:34:50 +0200 +Subject: [PATCH] Adjust to glibc __rseq_size semantic change + +In commit 2e456ccf0c34a056e3ccafac4a0c7effef14d918 ("Linux: Make +__rseq_size useful for feature detection (bug 31965)") glibc 2.40 +changed the meaning of __rseq_size slightly: it is now the size +of the active/feature area (20 bytes initially), and not the size +of the entire initially defined struct (32 bytes including padding). +The reason for the change is that the size including padding does not +allow detection of newly added features while previously unused +padding is consumed. + +The prep_libc_rseq_info change in criu/cr-restore.c is not necessary +on kernels which have full ptrace support for obtaining rseq +information because the code is not used. On older kernels, it is +a correctness fix because with size 20 (the new value), rseq +registeration would fail. + +The two other changes are required to make rseq unregistration work +in tests. + +Signed-off-by: Florian Weimer +--- + criu/cr-restore.c | 8 ++++++++ + test/zdtm/static/rseq00.c | 5 ++++- + test/zdtm/transition/rseq01.c | 5 ++++- + 3 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/criu/cr-restore.c b/criu/cr-restore.c +index 4db2f4ecfc..b95d4f134b 100644 +--- a/criu/cr-restore.c ++++ b/criu/cr-restore.c +@@ -2618,7 +2618,15 @@ static void prep_libc_rseq_info(struct rst_rseq_param *rseq) + if (!kdat.has_ptrace_get_rseq_conf) { + #if defined(__GLIBC__) && defined(RSEQ_SIG) + rseq->rseq_abi_pointer = encode_pointer(__criu_thread_pointer() + __rseq_offset); ++ /* ++ * Current glibc reports the feature/active size in ++ * __rseq_size, not the size passed to the kernel. ++ * This could be 20, but older kernels expect 32 for ++ * the size argument even if only 20 bytes are used. ++ */ + rseq->rseq_abi_size = __rseq_size; ++ if (rseq->rseq_abi_size < 32) ++ rseq->rseq_abi_size = 32; + rseq->signature = RSEQ_SIG; + #else + rseq->rseq_abi_pointer = 0; +diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c +index 471ad6a43f..7add7801eb 100644 +--- a/test/zdtm/static/rseq00.c ++++ b/test/zdtm/static/rseq00.c +@@ -46,12 +46,15 @@ static inline void *__criu_thread_pointer(void) + static inline void unregister_glibc_rseq(void) + { + struct rseq *rseq = (struct rseq *)((char *)__criu_thread_pointer() + __rseq_offset); ++ unsigned int size = __rseq_size; + + /* hack: mark glibc rseq structure as failed to register */ + rseq->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; + + /* unregister rseq */ +- syscall(__NR_rseq, (void *)rseq, __rseq_size, 1, RSEQ_SIG); ++ if (__rseq_size < 32) ++ size = 32; ++ syscall(__NR_rseq, (void *)rseq, size, 1, RSEQ_SIG); + } + #else + static inline void unregister_glibc_rseq(void) +diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c +index 0fbcc2dca0..08a7a8e1a6 100644 +--- a/test/zdtm/transition/rseq01.c ++++ b/test/zdtm/transition/rseq01.c +@@ -33,7 +33,10 @@ static inline void *thread_pointer(void) + static inline void unregister_old_rseq(void) + { + /* unregister rseq */ +- syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); ++ unsigned int size = __rseq_size; ++ if (__rseq_size < 32) ++ size = 32; ++ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), size, 1, RSEQ_SIG); + } + #else + static inline void unregister_old_rseq(void) diff --git a/criu.spec b/criu.spec index ed43038..cc51007 100644 --- a/criu.spec +++ b/criu.spec @@ -12,11 +12,24 @@ Name: criu Version: 3.19 -Release: 6%{?dist} +Release: 7%{?dist} Summary: Tool for Checkpoint/Restore in User-space License: GPL-2.0-only AND LGPL-2.1-only AND MIT URL: http://criu.org/ Source0: https://github.com/checkpoint-restore/criu/archive/v%{version}/criu-%{version}.tar.gz +# This switches the default network locking backend from +# iptables to nftables +Patch0: network.lock.nftables.patch +# Update restartable sequences to latest upstream code +Patch1: https://github.com/checkpoint-restore/criu/commit/089345f77a34d1bc7ef146d650636afcd3cdda21.patch +# Upstream tracked as +# https://github.com/checkpoint-restore/criu/pull/2549 +# https://github.com/checkpoint-restore/criu/pull/2550 +Patch2: nftables.chain.patch +# Unfortunately crun added code to always force +# iptables backed network locking. This disables +# setting the network locking to iptables via RPC. +Patch3: disable.network.locking.via.rpc.patch # Add protobuf-c as a dependency. # We use this patch because the protobuf-c package name @@ -37,11 +50,11 @@ BuildRequires: asciidoctor BuildRequires: perl-interpreter BuildRequires: libselinux-devel BuildRequires: gnutls-devel +BuildRequires: nftables-devel # Checkpointing containers with a tmpfs requires tar Recommends: tar %if 0%{?fedora} BuildRequires: libbsd-devel -BuildRequires: nftables-devel %endif BuildRequires: make @@ -97,6 +110,10 @@ This script can help to workaround the so called "PID mismatch" problem. %prep %setup -q +%patch -P 0 -p1 +%patch -P 1 -p1 +%patch -P 2 -p1 +%patch -P 3 -p1 %patch -P 99 -p1 %build @@ -156,6 +173,9 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/libcriu.a %doc %{_mandir}/man1/criu-ns.1* %changelog +* Tue Dec 10 2024 Adrian Reber - 3.19-7 +- Switch network locking backend default to nftables + * Tue Oct 29 2024 Troy Dawson - 3.19-6 - Bump release for October 2024 mass rebuild: Resolves: RHEL-64018 diff --git a/disable.network.locking.via.rpc.patch b/disable.network.locking.via.rpc.patch new file mode 100644 index 0000000..1a25b1d --- /dev/null +++ b/disable.network.locking.via.rpc.patch @@ -0,0 +1,12 @@ +diff -ur ../criu-3.19/criu/cr-service.c criu-3.19/criu/cr-service.c +--- ../criu-3.19/criu/cr-service.c 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/criu/cr-service.c 2024-12-17 19:53:43.865616992 +0100 +@@ -570,7 +570,7 @@ + if (req->has_network_lock) { + switch (req->network_lock) { + case CRIU_NETWORK_LOCK_METHOD__IPTABLES: +- opts.network_lock_method = NETWORK_LOCK_IPTABLES; ++ opts.network_lock_method = NETWORK_LOCK_NFTABLES; + break; + case CRIU_NETWORK_LOCK_METHOD__NFTABLES: + opts.network_lock_method = NETWORK_LOCK_NFTABLES; diff --git a/network.lock.nftables.patch b/network.lock.nftables.patch new file mode 100644 index 0000000..c74ca04 --- /dev/null +++ b/network.lock.nftables.patch @@ -0,0 +1,11 @@ +--- a/criu/include/cr_options.h.orig 2024-12-10 16:57:20.061293476 +0100 ++++ b/criu/include/cr_options.h 2024-12-10 16:57:34.789131372 +0100 +@@ -70,7 +70,7 @@ + NETWORK_LOCK_SKIP, + }; + +-#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_IPTABLES ++#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_NFTABLES + + /* + * Ghost file size we allow to carry by default. diff --git a/nftables.chain.patch b/nftables.chain.patch new file mode 100644 index 0000000..f61d339 --- /dev/null +++ b/nftables.chain.patch @@ -0,0 +1,205 @@ +diff -ur ../criu-3.19/criu/cr-dump.c criu-3.19/criu/cr-dump.c +--- ../criu-3.19/criu/cr-dump.c 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/criu/cr-dump.c 2024-12-17 09:53:58.545908685 +0100 +@@ -2182,7 +2182,7 @@ + if (collect_pstree_ids()) + goto err; + +- if (network_lock()) ++ if (network_lock(&he)) + goto err; + + if (rpc_query_external_files()) +diff -ur ../criu-3.19/criu/cr-restore.c criu-3.19/criu/cr-restore.c +--- ../criu-3.19/criu/cr-restore.c 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/criu/cr-restore.c 2024-12-17 09:29:47.771542239 +0100 +@@ -2359,7 +2359,7 @@ + * the '--empty-ns net' mode no iptables C/R is done and we + * need to return these rules by hands. + */ +- ret = network_lock_internal(); ++ ret = network_lock_internal(NULL); + if (ret) + goto out_kill; + } +diff -ur ../criu-3.19/criu/image.c criu-3.19/criu/image.c +--- ../criu-3.19/criu/image.c 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/criu/image.c 2024-12-17 09:56:13.751949657 +0100 +@@ -25,6 +25,7 @@ + TaskKobjIdsEntry *root_ids; + u32 root_cg_set; + Lsmtype image_lsm; ++char nft_lock_table[32]; + + int check_img_inventory(bool restore) + { +@@ -99,6 +100,9 @@ + } else { + opts.network_lock_method = he->network_lock_method; + } ++ ++ if (he->nft_lock_table) ++ strncpy(nft_lock_table, he->nft_lock_table, sizeof(nft_lock_table) - 1); + } + + ret = 0; +diff -ur ../criu-3.19/criu/include/net.h criu-3.19/criu/include/net.h +--- ../criu-3.19/criu/include/net.h 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/criu/include/net.h 2024-12-17 09:27:40.578168778 +0100 +@@ -29,9 +29,10 @@ + + extern int collect_net_namespaces(bool for_dump); + +-extern int network_lock(void); ++#include "images/inventory.pb-c.h" ++extern int network_lock(InventoryEntry *he); + extern void network_unlock(void); +-extern int network_lock_internal(void); ++extern int network_lock_internal(InventoryEntry *he); + + extern struct ns_desc net_ns_desc; + +diff -ur ../criu-3.19/criu/net.c criu-3.19/criu/net.c +--- ../criu-3.19/criu/net.c 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/criu/net.c 2024-12-17 09:53:25.370199544 +0100 +@@ -229,6 +229,8 @@ + "max_dgram_qlen", + }; + ++extern char nft_lock_table[32]; ++ + /* + * MAX_CONF_UNIX_PATH = (sizeof(CONF_UNIX_FMT) - strlen("%s")) + * + MAX_CONF_UNIX_OPT_PATH +@@ -3053,21 +3055,34 @@ + return ret; + } + +-static inline int nftables_lock_network_internal(void) ++static inline int nftables_lock_network_internal(InventoryEntry *he) + { + #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + struct nft_ctx *nft; + int ret = 0; + char table[32]; + char buf[128]; ++ FILE *fp; + + if (nftables_get_table(table, sizeof(table))) + return -1; + ++ if (he) { ++ he->nft_lock_table = strdup(table); ++ } ++ + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + return -1; + ++ fp = fdopen(log_get_fd(), "w"); ++ if (!fp) { ++ pr_perror("fdopen() failed"); ++ goto err3; ++ } ++ nft_ctx_set_output(nft, fp); ++ nft_ctx_set_error(nft, fp); ++ + snprintf(buf, sizeof(buf), "create table %s", table); + if (NFT_RUN_CMD(nft, buf)) + goto err2; +@@ -3094,6 +3109,9 @@ + snprintf(buf, sizeof(buf), "delete table %s", table); + NFT_RUN_CMD(nft, buf); + err2: ++ fflush(fp); ++ fclose(fp); ++err3: + ret = -1; + pr_err("Locking network failed using nftables\n"); + out: +@@ -3130,7 +3148,7 @@ + return ret; + } + +-int network_lock_internal(void) ++int network_lock_internal(InventoryEntry *he) + { + int ret = 0, nsret; + +@@ -3143,7 +3161,7 @@ + if (opts.network_lock_method == NETWORK_LOCK_IPTABLES) + ret = iptables_network_lock_internal(); + else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES) +- ret = nftables_lock_network_internal(); ++ ret = nftables_lock_network_internal(he); + + if (restore_ns(nsret, &net_ns_desc)) + ret = -1; +@@ -3158,18 +3176,34 @@ + struct nft_ctx *nft; + char table[32]; + char buf[128]; ++ FILE *fp; + +- if (nftables_get_table(table, sizeof(table))) +- return -1; ++ if (nft_lock_table[0] != 0) { ++ strncpy(table, nft_lock_table, sizeof(table)); ++ } else { ++ if (nftables_get_table(table, sizeof(table))) ++ return -1; ++ } + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + return -1; + ++ fp = fdopen(log_get_fd(), "w"); ++ if (!fp) { ++ pr_perror("fdopen() failed"); ++ nft_ctx_free(nft); ++ return -1; ++ } ++ nft_ctx_set_output(nft, fp); ++ nft_ctx_set_error(nft, fp); ++ + snprintf(buf, sizeof(buf), "delete table %s", table); + if (NFT_RUN_CMD(nft, buf)) + ret = -1; + ++ fflush(fp); ++ fclose(fp); + nft_ctx_free(nft); + return ret; + #else +@@ -3216,7 +3250,7 @@ + return ret; + } + +-int network_lock(void) ++int network_lock(InventoryEntry *he) + { + pr_info("Lock network\n"); + +@@ -3230,10 +3264,10 @@ + if (run_scripts(ACT_NET_LOCK)) + return -1; + +- return network_lock_internal(); ++ return network_lock_internal(he); + } + +-void network_unlock(void) ++void network_unlock() + { + pr_info("Unlock network\n"); + +diff -ur ../criu-3.19/images/inventory.proto criu-3.19/images/inventory.proto +--- ../criu-3.19/images/inventory.proto 2023-11-28 01:47:16.000000000 +0100 ++++ criu-3.19/images/inventory.proto 2024-12-17 09:21:55.378011178 +0100 +@@ -21,4 +21,5 @@ + optional uint32 pre_dump_mode = 9; + optional bool tcp_close = 10; + optional uint32 network_lock_method = 11; ++ optional string nft_lock_table = 13; + } diff --git a/tests/run-zdtm.sh b/tests/run-zdtm.sh index 56fc8ec..c720595 100755 --- a/tests/run-zdtm.sh +++ b/tests/run-zdtm.sh @@ -4,24 +4,20 @@ set -x uname -a -# These zdtm tests are skipped because they fail only in CI system +# These zdtm tests are skipped because most of them rely +# on the iptables binary. EXCLUDES=" \ - -x zdtm/static/socket-tcp-reseted \ - -x zdtm/static/socket-tcp-closed \ + -x zdtm/static/net_lock_socket_iptables \ + -x zdtm/static/net_lock_socket_iptables6 \ + -x zdtm/static/netns-nf \ + -x zdtm/static/netns_lock_iptables \ -x zdtm/static/socket-tcp-closed-last-ack \ - -x zdtm/static/socket-tcp6-closed \ - -x zdtm/static/socket-tcp4v6-closed \ - -x zdtm/static/maps01 \ - -x zdtm/static/maps04 \ - -x zdtm/static/cgroup04 \ - -x zdtm/static/cgroup_ifpriomap \ - -x zdtm/static/netns_sub \ - -x zdtm/static/netns_sub_veth \ - -x zdtm/static/file_locks01 \ + -x zdtm/static/socket-tcp-nfconntrack \ + -x zdtm/static/socket-tcp-reseted \ + -x zdtm/static/socket-tcp-syn-sent \ -x zdtm/static/mntns_link_remap \ -x zdtm/static/unlink_fstat03 \ - -x zdtm/static/unlink_regular00 \ - -x zdtm/static/cgroup02 " + -x zdtm/static/unlink_regular00 " run_test() { ./zdtm.py run --criu-bin /usr/sbin/criu ${EXCLUDES} \ @@ -33,11 +29,7 @@ run_test() { RESULT=42 -# F30, F29 do not provide python -> python3 symlink -test -e /usr/bin/python || ln -sf /usr/bin/python3 /usr/bin/python -python -V - -# this socket brakes CRIU's test cases +# this socket breaks CRIU's test cases rm -f /var/lib/sss/pipes/nss cd source @@ -45,6 +37,9 @@ cd source echo "Build CRIU" make +which criu +rpm -qf `which criu` + cd test echo "Run the actual CRIU test suite" diff --git a/tests/tests.yml b/tests/tests.yml index 6e498e2..c9e4055 100644 --- a/tests/tests.yml +++ b/tests/tests.yml @@ -26,7 +26,6 @@ - nftables-devel - python3-pyyaml - python3-protobuf - - python-unversioned-command tests: - zdtm: dir: .