Update to 4.1

Resolves: RHEL-89354

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber 2025-05-12 11:38:24 +02:00
parent 3bb91bc1af
commit a5fe1ad3da
13 changed files with 345 additions and 1212 deletions

1
.gitignore vendored
View File

@ -56,3 +56,4 @@
/criu-3.17.1.tar.gz
/criu-3.18.tar.gz
/criu-3.19.tar.gz
/criu-4.1.tar.gz

View File

@ -1,87 +0,0 @@
From 089345f77a34d1bc7ef146d650636afcd3cdda21 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Wed, 10 Jul 2024 18:34:50 +0200
Subject: [PATCH] Adjust to glibc __rseq_size semantic change
In commit 2e456ccf0c34a056e3ccafac4a0c7effef14d918 ("Linux: Make
__rseq_size useful for feature detection (bug 31965)") glibc 2.40
changed the meaning of __rseq_size slightly: it is now the size
of the active/feature area (20 bytes initially), and not the size
of the entire initially defined struct (32 bytes including padding).
The reason for the change is that the size including padding does not
allow detection of newly added features while previously unused
padding is consumed.
The prep_libc_rseq_info change in criu/cr-restore.c is not necessary
on kernels which have full ptrace support for obtaining rseq
information because the code is not used. On older kernels, it is
a correctness fix because with size 20 (the new value), rseq
registeration would fail.
The two other changes are required to make rseq unregistration work
in tests.
Signed-off-by: Florian Weimer <fweimer@redhat.com>
---
criu/cr-restore.c | 8 ++++++++
test/zdtm/static/rseq00.c | 5 ++++-
test/zdtm/transition/rseq01.c | 5 ++++-
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 4db2f4ecfc..b95d4f134b 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2618,7 +2618,15 @@ static void prep_libc_rseq_info(struct rst_rseq_param *rseq)
if (!kdat.has_ptrace_get_rseq_conf) {
#if defined(__GLIBC__) && defined(RSEQ_SIG)
rseq->rseq_abi_pointer = encode_pointer(__criu_thread_pointer() + __rseq_offset);
+ /*
+ * Current glibc reports the feature/active size in
+ * __rseq_size, not the size passed to the kernel.
+ * This could be 20, but older kernels expect 32 for
+ * the size argument even if only 20 bytes are used.
+ */
rseq->rseq_abi_size = __rseq_size;
+ if (rseq->rseq_abi_size < 32)
+ rseq->rseq_abi_size = 32;
rseq->signature = RSEQ_SIG;
#else
rseq->rseq_abi_pointer = 0;
diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c
index 471ad6a43f..7add7801eb 100644
--- a/test/zdtm/static/rseq00.c
+++ b/test/zdtm/static/rseq00.c
@@ -46,12 +46,15 @@ static inline void *__criu_thread_pointer(void)
static inline void unregister_glibc_rseq(void)
{
struct rseq *rseq = (struct rseq *)((char *)__criu_thread_pointer() + __rseq_offset);
+ unsigned int size = __rseq_size;
/* hack: mark glibc rseq structure as failed to register */
rseq->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
/* unregister rseq */
- syscall(__NR_rseq, (void *)rseq, __rseq_size, 1, RSEQ_SIG);
+ if (__rseq_size < 32)
+ size = 32;
+ syscall(__NR_rseq, (void *)rseq, size, 1, RSEQ_SIG);
}
#else
static inline void unregister_glibc_rseq(void)
diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c
index 0fbcc2dca0..08a7a8e1a6 100644
--- a/test/zdtm/transition/rseq01.c
+++ b/test/zdtm/transition/rseq01.c
@@ -33,7 +33,10 @@ static inline void *thread_pointer(void)
static inline void unregister_old_rseq(void)
{
/* unregister rseq */
- syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG);
+ unsigned int size = __rseq_size;
+ if (__rseq_size < 32)
+ size = 32;
+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), size, 1, RSEQ_SIG);
}
#else
static inline void unregister_old_rseq(void)

View File

@ -1,128 +0,0 @@
From 0a17c4160580d9bc7092ba9eb7db86952921d221 Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Thu, 16 Jan 2025 07:52:42 +0000
Subject: [PATCH 1/2] util: added cleanup_file attribute.
Signed-off-by: Adrian Reber <areber@redhat.com>
---
criu/include/util.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/criu/include/util.h b/criu/include/util.h
index ae293a68c8..4793f7f20e 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -406,6 +406,14 @@ static inline void cleanup_freep(void *p)
free(*pp);
}
+#define cleanup_file __attribute__((cleanup(cleanup_filep)))
+static inline void cleanup_filep(FILE **f)
+{
+ FILE *file = *f;
+ if (file)
+ (void)fclose(file);
+}
+
extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args);
/*
From 1ed4109958644fbe1cbadf7c72472c82a12834b0 Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Tue, 17 Dec 2024 08:52:46 +0100
Subject: [PATCH 2/2] net: redirect nftables stdout and stderr to CRIU's log
file
When using the nftables network locking backend and restoring a process
a second time the network locking has already been deleted by the first
restore. The second restore will print out to the console text like:
Error: Could not process rule: No such file or directory
delete table inet CRIU-202621
With this change CRIU's log FD is used by libnftables stdout and stderr.
Signed-off-by: Adrian Reber <areber@redhat.com>
---
criu/net.c | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/criu/net.c b/criu/net.c
index eee3311087..efd52db327 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -3066,9 +3066,43 @@ static int iptables_restore(bool ipv6, char *buf, int size)
return ret;
}
+#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+static inline FILE *redirect_nftables_output(struct nft_ctx *nft)
+{
+ FILE *fp;
+ int fd;
+
+ fd = dup(log_get_fd());
+ if (fd < 0) {
+ pr_perror("dup() to redirect nftables output failed");
+ return NULL;
+ }
+
+ fp = fdopen(fd, "w");
+ if (!fp) {
+ pr_perror("fdopen() to redirect nftables output failed");
+ return NULL;
+ }
+
+ /**
+ * Without setvbuf() the output from libnftables will be
+ * somewhere in the log file, probably at the end.
+ * With setvbuf() potential output will be at the correct
+ * position.
+ */
+ setvbuf(fp, NULL, _IONBF, 0);
+
+ nft_ctx_set_output(nft, fp);
+ nft_ctx_set_error(nft, fp);
+
+ return fp;
+}
+#endif
+
static inline int nftables_lock_network_internal(void)
{
#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
+ cleanup_file FILE *fp = NULL;
struct nft_ctx *nft;
int ret = 0;
char table[32];
@@ -3081,6 +3115,10 @@ static inline int nftables_lock_network_internal(void)
if (!nft)
return -1;
+ fp = redirect_nftables_output(nft);
+ if (!fp)
+ goto out;
+
snprintf(buf, sizeof(buf), "create table %s", table);
if (NFT_RUN_CMD(nft, buf))
goto err2;
@@ -3168,6 +3206,7 @@ static inline int nftables_network_unlock(void)
{
#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
int ret = 0;
+ cleanup_file FILE *fp = NULL;
struct nft_ctx *nft;
char table[32];
char buf[128];
@@ -3179,6 +3218,10 @@ static inline int nftables_network_unlock(void)
if (!nft)
return -1;
+ fp = redirect_nftables_output(nft);
+ if (!fp)
+ return -1;
+
snprintf(buf, sizeof(buf), "delete table %s", table);
if (NFT_RUN_CMD(nft, buf))
ret = -1;

View File

@ -1,473 +0,0 @@
From 9a2b7d6b3baa2b3183489ed9cebece039f9f488f Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Thu, 23 Jan 2025 09:26:15 +0000
Subject: [PATCH 1/2] criu: use libuuid for criu_run_id generation
criu_run_id will be used in upcoming changes to create and remove
network rules for network locking. Instead of trying to come up with
a way to create unique IDs, just use an existing library.
libuuid should be installed on most systems as it is indirectly required
by systemd (via libmount).
Signed-off-by: Adrian Reber <areber@redhat.com>
---
.cirrus.yml | 2 +-
.github/workflows/check-commits.yml | 2 +-
compel/include/uapi/infect-util.h | 11 ++++++++++-
compel/src/lib/infect-util.c | 2 +-
compel/src/lib/infect.c | 2 +-
criu/Makefile.packages | 4 +++-
criu/fdstore.c | 2 +-
criu/files.c | 2 +-
criu/include/util.h | 4 +++-
criu/pidfd-store.c | 2 +-
criu/unittest/mock.c | 4 +++-
criu/util.c | 17 +++++++----------
scripts/build/Dockerfile.alpine | 3 ++-
scripts/build/Dockerfile.amd-rocm | 1 +
scripts/build/Dockerfile.archlinux | 1 +
scripts/build/Dockerfile.hotspot-alpine | 1 +
scripts/build/Dockerfile.hotspot-ubuntu | 1 +
scripts/build/Dockerfile.linux32.tmpl | 1 +
scripts/build/Dockerfile.openj9-ubuntu | 1 +
.../build/Dockerfile.riscv64-stable-cross.tmpl | 1 +
scripts/build/Dockerfile.stable-cross.tmpl | 1 +
scripts/build/Dockerfile.tmpl | 1 +
scripts/build/Dockerfile.unstable-cross.tmpl | 1 +
scripts/ci/prepare-for-fedora-rawhide.sh | 1 +
scripts/ci/run-ci-tests.sh | 2 +-
scripts/ci/vagrant.sh | 2 +-
26 files changed, 48 insertions(+), 24 deletions(-)
diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h
index ace6f6b6b1..658df9393d 100644
--- a/compel/include/uapi/infect-util.h
+++ b/compel/include/uapi/infect-util.h
@@ -3,11 +3,20 @@
#include "common/compiler.h"
+/**
+ * The length of the hash is based on what libuuid provides.
+ * According to the manpage this is:
+ *
+ * The uuid_unparse() function converts the supplied UUID uu from the binary
+ * representation into a 36-byte string (plus trailing '\0')
+ */
+#define RUN_ID_HASH_LENGTH 37
+
/*
* compel_run_id is a unique value of the current run. It can be used to
* generate resource ID-s to avoid conflicts with other processes.
*/
-extern uint64_t compel_run_id;
+extern char compel_run_id[RUN_ID_HASH_LENGTH];
struct parasite_ctl;
extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd);
diff --git a/compel/src/lib/infect-util.c b/compel/src/lib/infect-util.c
index 00a7c83f7d..dc57e28f7c 100644
--- a/compel/src/lib/infect-util.c
+++ b/compel/src/lib/infect-util.c
@@ -7,7 +7,7 @@
#include "infect-rpc.h"
#include "infect-util.h"
-uint64_t compel_run_id;
+char compel_run_id[RUN_ID_HASH_LENGTH];
int compel_util_send_fd(struct parasite_ctl *ctl, int fd)
{
diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c
index 1e3ffb9670..caf54e03fd 100644
--- a/compel/src/lib/infect.c
+++ b/compel/src/lib/infect.c
@@ -427,7 +427,7 @@ static int gen_parasite_saddr(struct sockaddr_un *saddr, int key)
int sun_len;
saddr->sun_family = AF_UNIX;
- snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%" PRIx64, key, compel_run_id);
+ snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%s", key, compel_run_id);
sun_len = SUN_LEN(saddr);
*saddr->sun_path = '\0';
diff --git a/criu/Makefile.packages b/criu/Makefile.packages
index 7f6113c8f1..3e2e6efd18 100644
--- a/criu/Makefile.packages
+++ b/criu/Makefile.packages
@@ -6,6 +6,7 @@ REQ-RPM-PKG-NAMES += protobuf-devel
REQ-RPM-PKG-NAMES += protobuf-python
REQ-RPM-PKG-NAMES += libnl3-devel
REQ-RPM-PKG-NAMES += libcap-devel
+REQ-RPM-PKG-NAMES += libuuid-devel
REQ-RPM-PKG-TEST-NAMES += libaio-devel
@@ -16,6 +17,7 @@ REQ-DEB-PKG-NAMES += protobuf-compiler
REQ-DEB-PKG-NAMES += $(PYTHON)-protobuf
REQ-DEB-PKG-NAMES += libnl-3-dev
REQ-DEB-PKG-NAMES += libcap-dev
+REQ-DEB-PKG-NAMES += uuid-dev
REQ-DEB-PKG-TEST-NAMES += $(PYTHON)-yaml
REQ-DEB-PKG-TEST-NAMES += libaio-dev
@@ -25,7 +27,7 @@ REQ-DEB-PKG-TEST-NAMES += libaio-dev
REQ-RPM-PKG-TEST-NAMES += $(PYTHON)-PyYAML
-export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
+export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -luuid
check-packages-failed:
$(warning Can not find some of the required libraries)
diff --git a/criu/fdstore.c b/criu/fdstore.c
index d615ad15d0..6ac639c553 100644
--- a/criu/fdstore.c
+++ b/criu/fdstore.c
@@ -58,7 +58,7 @@ int fdstore_init(void)
}
addr.sun_family = AF_UNIX;
- addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%" PRIx64, st.st_ino,
+ addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%s", st.st_ino,
criu_run_id);
addrlen += sizeof(addr.sun_family);
diff --git a/criu/files.c b/criu/files.c
index 31e705bcc5..f16ec32a23 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -978,7 +978,7 @@ static int receive_fd(struct fdinfo_list_entry *fle);
static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
{
addr->sun_family = AF_UNIX;
- snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%" PRIx64, pid, criu_run_id);
+ snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%s", pid, criu_run_id);
*len = SUN_LEN(addr);
*addr->sun_path = '\0';
}
diff --git a/criu/include/util.h b/criu/include/util.h
index 4793f7f20e..194e94deeb 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -21,6 +21,8 @@
#include "log.h"
#include "common/err.h"
+#include "compel/infect-util.h"
+
#define PREF_SHIFT_OP(pref, op, size) ((size)op(pref##BYTES_SHIFT))
#define KBYTES_SHIFT 10
#define MBYTES_SHIFT 20
@@ -420,7 +422,7 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void
* criu_run_id is a unique value of the current run. It can be used to
* generate resource ID-s to avoid conflicts with other CRIU processes.
*/
-extern uint64_t criu_run_id;
+extern char criu_run_id[RUN_ID_HASH_LENGTH];
extern void util_init(void);
extern char *resolve_mountpoint(char *path);
diff --git a/criu/pidfd-store.c b/criu/pidfd-store.c
index 9fdc74cb74..110f7802a2 100644
--- a/criu/pidfd-store.c
+++ b/criu/pidfd-store.c
@@ -99,7 +99,7 @@ int init_pidfd_store_sk(pid_t pid, int sk)
goto err;
}
- addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%" PRIx64, pid, sk,
+ addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%s", pid, sk,
criu_run_id);
addrlen += sizeof(addr.sun_family);
diff --git a/criu/unittest/mock.c b/criu/unittest/mock.c
index e517720e42..b2d5072787 100644
--- a/criu/unittest/mock.c
+++ b/criu/unittest/mock.c
@@ -5,6 +5,8 @@
#include <stdint.h>
#include <stdlib.h>
+#include "compel/infect-util.h"
+
int add_external(char *key)
{
return 0;
@@ -141,4 +143,4 @@ int check_mount_v2(void)
return 0;
}
-uint64_t compel_run_id;
+char compel_run_id[RUN_ID_HASH_LENGTH];
diff --git a/criu/util.c b/criu/util.c
index d2bc9a8657..58c18e20be 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -28,6 +28,7 @@
#include <ftw.h>
#include <time.h>
#include <libgen.h>
+#include <uuid/uuid.h>
#include "linux/mount.h"
@@ -2026,20 +2027,16 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
return fret;
}
-uint64_t criu_run_id;
+char criu_run_id[RUN_ID_HASH_LENGTH];
void util_init(void)
{
- struct stat statbuf;
+ uuid_t uuid;
- criu_run_id = getpid();
- if (!stat("/proc/self/ns/pid", &statbuf))
- criu_run_id |= (uint64_t)statbuf.st_ino << 32;
- else if (errno != ENOENT)
- pr_perror("Can't stat /proc/self/ns/pid - CRIU run id might not be unique");
-
- compel_run_id = criu_run_id;
- pr_info("CRIU run id = %#" PRIx64 "\n", criu_run_id);
+ uuid_generate(uuid);
+ uuid_unparse(uuid, criu_run_id);
+ pr_info("CRIU run id = %s\n", criu_run_id);
+ memcpy(compel_run_id, criu_run_id, sizeof(criu_run_id));
}
/*
diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine
index 329d7791de..d843793ea2 100644
--- a/scripts/build/Dockerfile.alpine
+++ b/scripts/build/Dockerfile.alpine
@@ -24,7 +24,8 @@ RUN apk update && apk add \
sudo \
libcap-utils \
libdrm-dev \
- util-linux
+ util-linux \
+ util-linux-dev
COPY . /criu
WORKDIR /criu
diff --git a/scripts/build/Dockerfile.amd-rocm b/scripts/build/Dockerfile.amd-rocm
index c466a73d2d..ed66ae4fec 100644
--- a/scripts/build/Dockerfile.amd-rocm
+++ b/scripts/build/Dockerfile.amd-rocm
@@ -56,6 +56,7 @@ RUN apt-get clean -qqy && apt-get update -qqy && apt-get install -qqy --no-insta
python-protobuf \
python3-minimal \
python-ipaddress \
+ uuid-dev \
curl \
wget \
vim \
diff --git a/scripts/build/Dockerfile.archlinux b/scripts/build/Dockerfile.archlinux
index 4056514891..9d11194bb0 100644
--- a/scripts/build/Dockerfile.archlinux
+++ b/scripts/build/Dockerfile.archlinux
@@ -35,6 +35,7 @@ RUN pacman -Syu --noconfirm \
python-junit-xml \
python-importlib-metadata \
libdrm \
+ util-linux-libs \
diffutils
COPY . /criu
diff --git a/scripts/build/Dockerfile.hotspot-alpine b/scripts/build/Dockerfile.hotspot-alpine
index cb9332fd0c..6caf9d0b1b 100644
--- a/scripts/build/Dockerfile.hotspot-alpine
+++ b/scripts/build/Dockerfile.hotspot-alpine
@@ -19,6 +19,7 @@ RUN apk update && apk add \
maven \
ip6tables \
iptables \
+ util-linux-dev \
bash
COPY . /criu
diff --git a/scripts/build/Dockerfile.hotspot-ubuntu b/scripts/build/Dockerfile.hotspot-ubuntu
index 0318f650f3..67de916acb 100644
--- a/scripts/build/Dockerfile.hotspot-ubuntu
+++ b/scripts/build/Dockerfile.hotspot-ubuntu
@@ -22,6 +22,7 @@ RUN apt-install protobuf-c-compiler \
pkg-config \
iptables \
gcc \
+ uuid-dev \
maven
COPY . /criu
diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl
index 13e9926424..d218e06414 100644
--- a/scripts/build/Dockerfile.linux32.tmpl
+++ b/scripts/build/Dockerfile.linux32.tmpl
@@ -21,6 +21,7 @@ RUN apt-install \
pkg-config \
protobuf-c-compiler \
protobuf-compiler \
+ uuid-dev \
python3-minimal
COPY . /criu
diff --git a/scripts/ci/prepare-for-fedora-rawhide.sh b/scripts/ci/prepare-for-fedora-rawhide.sh
index 09085c403b..42252c93c9 100755
--- a/scripts/ci/prepare-for-fedora-rawhide.sh
+++ b/scripts/ci/prepare-for-fedora-rawhide.sh
@@ -36,6 +36,7 @@ dnf install -y \
e2fsprogs \
rubygem-asciidoctor \
libdrm-devel \
+ libuuid-devel \
kmod
# /tmp is no longer 755 in the rawhide container image and breaks CI - fix it
From c39bce3cf17782784d1a14cf40a4cedd059059fa Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Thu, 23 Jan 2025 17:42:45 +0000
Subject: [PATCH 2/2] net: remember the name of the lock chain (nftables)
Using libnftables the chain to lock the network is composed of
("CRIU-%d", real_pid). This leads to around 40 zdtm tests failing
with errors like this:
Error: No such file or directory; did you mean table 'CRIU-62' in family inet?
delete table inet CRIU-86
The reason is that as soon as a process is running in a namespace the
real PID can be anything and only the PID in the namespace is restored
correctly. Relying on the real PID does not work for the chain name.
Using the PID of the innermost namespace would lead to the chain be
called 'CRIU-1' most of the time which is also not really unique.
With this commit the change is now named using the already existing CRIU
run ID. To be able to correctly restore the process and delete the
locking table, the CRIU run id during checkpointing is now stored in the
inventory as dump_criu_run_id.
Signed-off-by: Adrian Reber <areber@redhat.com>
---
criu/image.c | 30 ++++++++++++++++++++++++++++++
criu/include/util.h | 2 ++
criu/netfilter.c | 20 +++++++++++++++++++-
images/inventory.proto | 4 ++++
4 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/criu/image.c b/criu/image.c
index 9589167fb1..f3747d6ff5 100644
--- a/criu/image.c
+++ b/criu/image.c
@@ -25,6 +25,7 @@ bool img_common_magic = true;
TaskKobjIdsEntry *root_ids;
u32 root_cg_set;
Lsmtype image_lsm;
+char dump_criu_run_id[RUN_ID_HASH_LENGTH];
int check_img_inventory(bool restore)
{
@@ -120,6 +121,24 @@ int check_img_inventory(bool restore)
} else {
opts.network_lock_method = he->network_lock_method;
}
+
+ /**
+ * This contains the criu_run_id during dumping of the process.
+ * For things like removing network locking (nftables) this
+ * information is needed to identify the name of the network
+ * locking table.
+ */
+ if (he->dump_criu_run_id) {
+ strncpy(dump_criu_run_id, he->dump_criu_run_id, sizeof(dump_criu_run_id) - 1);
+ pr_info("Dump CRIU run id = %s\n", dump_criu_run_id);
+ } else {
+ /**
+ * If restoring from an old image this is a marker
+ * that no dump_criu_run_id exists.
+ */
+ dump_criu_run_id[0] = NO_DUMP_CRIU_RUN_ID;
+ }
+
}
ret = 0;
@@ -367,6 +386,17 @@ int prepare_inventory(InventoryEntry *he)
he->has_network_lock_method = true;
he->network_lock_method = opts.network_lock_method;
+ /**
+ * This contains the criu_run_id during dumping of the process.
+ * For things like removing network locking (nftables) this
+ * information is needed to identify the name of the network
+ * locking table.
+ */
+ he->dump_criu_run_id = xstrdup(criu_run_id);
+
+ if (!he->dump_criu_run_id)
+ return -1;
+
return 0;
}
diff --git a/criu/include/util.h b/criu/include/util.h
index 194e94deeb..55ad5b63cf 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -424,6 +424,8 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void
*/
extern char criu_run_id[RUN_ID_HASH_LENGTH];
extern void util_init(void);
+#define NO_DUMP_CRIU_RUN_ID 0x7f
+extern char dump_criu_run_id[RUN_ID_HASH_LENGTH];
extern char *resolve_mountpoint(char *path);
diff --git a/criu/netfilter.c b/criu/netfilter.c
index 9e78dc4b03..e2c82764f2 100644
--- a/criu/netfilter.c
+++ b/criu/netfilter.c
@@ -299,7 +299,25 @@ int nftables_lock_connection(struct inet_sk_desc *sk)
int nftables_get_table(char *table, int n)
{
- if (snprintf(table, n, "inet CRIU-%d", root_item->pid->real) < 0) {
+ int ret;
+
+ switch(dump_criu_run_id[0]) {
+ case 0:
+ /* This is not a restore.*/
+ ret = snprintf(table, n, "inet CRIU-%s", criu_run_id);
+ break;
+ case NO_DUMP_CRIU_RUN_ID:
+ /**
+ * This is a restore from an older image with no
+ * dump_criu_run_id available. Let's use the old ID.
+ */
+ ret = snprintf(table, n, "inet CRIU-%d", root_item->pid->real);
+ break;
+ default:
+ ret = snprintf(table, n, "inet CRIU-%s", dump_criu_run_id);
+ }
+
+ if (ret < 0) {
pr_err("Cannot generate CRIU's nftables table name\n");
return -1;
}
diff --git a/images/inventory.proto b/images/inventory.proto
index 7f655031bc..1e18815bb9 100644
--- a/images/inventory.proto
+++ b/images/inventory.proto
@@ -29,4 +29,8 @@ message inventory_entry {
optional uint32 pre_dump_mode = 9;
optional bool tcp_close = 10;
optional uint32 network_lock_method = 11;
+ // Remember the criu_run_id when CRIU dumped the process.
+ // This is currently used to delete the correct nftables
+ // network locking rule.
+ optional string dump_criu_run_id = 13;
}

View File

@ -1,452 +0,0 @@
From ed2468f0a3c1c3c3b40b41047ffd97ce32346a4e Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Wed, 22 Jan 2025 14:35:26 +0100
Subject: [PATCH] vdso: switch from DT_HASH to DT_GNU_HASH (aarch64)
Trying to run latest CRIU on CentOS Stream 10 or Ubuntu 24.04 (aarch64)
fails like this:
# criu/criu check -v4
[...]
(00.096460) vdso: Parsing at ffffb2e2a000 ffffb2e2c000
(00.096539) vdso: PT_LOAD p_vaddr: 0
(00.096567) vdso: DT_STRTAB: 1d0
(00.096592) vdso: DT_SYMTAB: 128
(00.096616) vdso: DT_STRSZ: 8a
(00.096640) vdso: DT_SYMENT: 18
(00.096663) Error (criu/pie-util-vdso.c:193): vdso: Not all dynamic entries are present
(00.096688) Error (criu/vdso.c:627): vdso: Failed to fill self vdso symtable
(00.096713) Error (criu/kerndat.c:1906): kerndat_vdso_fill_symtable failed when initializing kerndat.
(00.096812) Found mmap_min_addr 0x10000
(00.096881) files stat: fs/nr_open 1073741816
(00.096908) Error (criu/crtools.c:267): Could not initialize kernel features detection.
This seems to be related to the kernel (6.12.0-41.el10.aarch64). The
Ubuntu user-space is running in a container on the same kernel.
Looking at the kernel this seems to be related to:
commit 48f6430505c0b0498ee9020ce3cf9558b1caaaeb
Author: Fangrui Song <i@maskray.me>
Date: Thu Jul 18 10:34:23 2024 -0700
arm64/vdso: Remove --hash-style=sysv
glibc added support for .gnu.hash in 2006 and .hash has been obsoleted
for more than one decade in many Linux distributions. Using
--hash-style=sysv might imply unaddressed issues and confuse readers.
Just drop the option and rely on the linker default, which is likely
"both", or "gnu" when the distribution really wants to eliminate sysv
hash overhead.
Similar to commit 6b7e26547fad ("x86/vdso: Emit a GNU hash").
The commit basically does:
-ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
+ldflags-y := -shared -soname=linux-vdso.so.1 \
Which results in only a GNU hash being added to the ELF header. This
change has been merged with 6.11.
Looking at the referenced x86 commit:
commit 6b7e26547fad7ace3dcb27a5babd2317fb9d1e12
Author: Andy Lutomirski <luto@amacapital.net>
Date: Thu Aug 6 14:45:45 2015 -0700
x86/vdso: Emit a GNU hash
Some dynamic loaders may be slightly faster if a GNU hash is
available. Strangely, this seems to have no effect at all on
the vdso size.
This is unlikely to have any measurable effect on the time it
takes to resolve vdso symbols (since there are so few of them).
In some contexts, it can be a win for a different reason: if
every DSO has a GNU hash section, then libc can avoid
calculating SysV hashes at all. Both musl and glibc appear to
have this optimization.
It's plausible that this breaks some ancient glibc version. If
so, then, depending on what glibc versions break, we could
either require COMPAT_VDSO for them or consider reverting.
Which is also a really simple change:
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \
The big difference here is that for x86 both hash sections are
generated. For aarch64 only the newer GNU hash is generated. That is why
we only see this error on kernel >= 6.11 and aarch64.
Changing from DT_HASH to DT_GNU_HASH seems to work on aarch64. The test
suite runs without any errors.
Unfortunately I am not aware of all implication of this change and if a
successful test suite run means that it still works.
Looking at the kernel I see following hash styles for the VDSO:
aarch64: not specified (only GNU hash style)
arm: --hash-style=sysv
loongarch: --hash-style=sysv
mips: --hash-style=sysv
powerpc: --hash-style=both
riscv: --hash-style=both
s390: --hash-style=both
x86: --hash-style=both
Only aarch64 on kernels >= 6.11 is a problem right now, because all
other platforms provide the old style hashing.
Signed-off-by: Adrian Reber <areber@redhat.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Co-authored-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
criu/pie/util-vdso.c | 245 ++++++++++++++++++++++++++++++++++---------
1 file changed, 198 insertions(+), 47 deletions(-)
diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c
index f1e3239ff5..9819335d81 100644
--- a/criu/pie/util-vdso.c
+++ b/criu/pie/util-vdso.c
@@ -5,6 +5,7 @@
#include <fcntl.h>
#include <errno.h>
#include <stdint.h>
+#include <stdbool.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -48,10 +49,25 @@ static bool __ptr_struct_oob(uintptr_t ptr, size_t struct_size, uintptr_t start,
return __ptr_oob(ptr, start, size) || __ptr_struct_end_oob(ptr, struct_size, start, size);
}
+/* Local strlen implementation */
+static size_t __strlen(const char *str)
+{
+ const char *ptr;
+
+ if (!str)
+ return 0;
+
+ ptr = str;
+ while (*ptr != '\0')
+ ptr++;
+
+ return ptr - str;
+}
+
/*
* Elf hash, see format specification.
*/
-static unsigned long elf_hash(const unsigned char *name)
+static unsigned long elf_sysv_hash(const unsigned char *name)
{
unsigned long h = 0, g;
@@ -65,6 +81,15 @@ static unsigned long elf_hash(const unsigned char *name)
return h;
}
+/* * The GNU hash format. Taken from glibc. */
+static unsigned long elf_gnu_hash(const unsigned char *name)
+{
+ unsigned long h = 5381;
+ for (unsigned char c = *name; c != '\0'; c = *++name)
+ h = h * 33 + c;
+ return h;
+}
+
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define BORD ELFDATA2MSB /* 0x02 */
#else
@@ -149,11 +174,14 @@ static int parse_elf_phdr(uintptr_t mem, size_t size, Phdr_t **dynamic, Phdr_t *
* Output parameters are:
* @dyn_strtab - address of the symbol table
* @dyn_symtab - address of the string table section
- * @dyn_hash - address of the symbol hash table
+ * @dyn_hash - address of the symbol hash table
+ * @use_gnu_hash - the format of hash DT_HASH or DT_GNU_HASH
*/
-static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t **dyn_strtab, Dyn_t **dyn_symtab,
- Dyn_t **dyn_hash)
+static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic,
+ Dyn_t **dyn_strtab, Dyn_t **dyn_symtab,
+ Dyn_t **dyn_hash, bool *use_gnu_hash)
{
+ Dyn_t *dyn_gnu_hash = NULL, *dyn_sysv_hash = NULL;
Dyn_t *dyn_syment = NULL;
Dyn_t *dyn_strsz = NULL;
uintptr_t addr;
@@ -184,16 +212,52 @@ static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t
dyn_syment = d;
pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val);
} else if (d->d_tag == DT_HASH) {
- *dyn_hash = d;
+ dyn_sysv_hash = d;
pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
+ } else if (d->d_tag == DT_GNU_HASH) {
+ /*
+ * This is complicated.
+ *
+ * Looking at the Linux kernel source, the following can be seen
+ * regarding which hashing style the VDSO uses on each arch:
+ *
+ * aarch64: not specified (depends on linker, can be
+ * only GNU hash style)
+ * arm: --hash-style=sysv
+ * loongarch: --hash-style=sysv
+ * mips: --hash-style=sysv
+ * powerpc: --hash-style=both
+ * riscv: --hash-style=both
+ * s390: --hash-style=both
+ * x86: --hash-style=both
+ *
+ * Some architectures are using both hash-styles, that
+ * is the easiest for CRIU. Some architectures are only
+ * using the old style (sysv), that is what CRIU supports.
+ *
+ * Starting with Linux 6.11, aarch64 unfortunately decided
+ * to switch from '--hash-style=sysv' to ''. Specifying
+ * nothing unfortunately may mean GNU hash style only and not
+ * 'both' (depending on the linker).
+ */
+ dyn_gnu_hash = d;
+ pr_debug("DT_GNU_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
}
}
- if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || !*dyn_hash) {
+ if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment ||
+ (!dyn_gnu_hash && !dyn_sysv_hash)) {
pr_err("Not all dynamic entries are present\n");
return -EINVAL;
}
+ /*
+ * Prefer DT_HASH over DT_GNU_HASH as it's been more tested and
+ * as a result more stable.
+ */
+ *use_gnu_hash = !dyn_sysv_hash;
+ *dyn_hash = dyn_sysv_hash ?: dyn_gnu_hash;
+
return 0;
err_oob:
@@ -208,60 +272,141 @@ typedef unsigned long Hash_t;
typedef Word_t Hash_t;
#endif
-static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, struct vdso_symtable *t,
- uintptr_t dynsymbol_names, Hash_t *hash, Dyn_t *dyn_symtab)
+static bool elf_symbol_match(uintptr_t mem, size_t size,
+ uintptr_t dynsymbol_names, Sym_t *sym,
+ const char *symbol, const size_t vdso_symbol_length)
{
- ARCH_VDSO_SYMBOLS_LIST
-
- const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS };
- const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1;
+ uintptr_t addr = (uintptr_t)sym;
+ char *name;
- Hash_t nbucket, nchain;
- Hash_t *bucket, *chain;
+ if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size))
+ return false;
- unsigned int i, j, k;
- uintptr_t addr;
+ if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL)
+ return false;
- nbucket = hash[0];
- nchain = hash[1];
- bucket = &hash[2];
- chain = &hash[nbucket + 2];
+ addr = dynsymbol_names + sym->st_name;
+ if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size))
+ return false;
+ name = (void *)addr;
- pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", (long)nbucket, (long)nchain, (unsigned long)bucket,
- (unsigned long)chain);
+ return !std_strncmp(name, symbol, vdso_symbol_length);
+}
- for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
- const char *symbol = vdso_symbols[i];
- k = elf_hash((const unsigned char *)symbol);
- for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) {
- Sym_t *sym;
- char *name;
+static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size,
+ const char *symbol, uint32_t symbol_hash, unsigned int sym_off,
+ uintptr_t dynsymbol_names, Dyn_t *dyn_symtab, Phdr_t *load,
+ Hash_t nbucket, Hash_t nchain, Hash_t *bucket, Hash_t *chain,
+ const size_t vdso_symbol_length, bool use_gnu_hash)
+{
+ unsigned int j;
+ uintptr_t addr;
- addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
+ j = bucket[symbol_hash % nbucket];
+ if (j == STN_UNDEF)
+ return 0;
+
+ addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
+
+ if (use_gnu_hash) {
+ uint32_t *h = bucket + nbucket + (j - sym_off);
+ uint32_t hash_val;
+
+ symbol_hash |= 1;
+ do {
+ Sym_t *sym = (void *)addr + sizeof(Sym_t) * j;
+
+ hash_val = *h++;
+ if ((hash_val | 1) == symbol_hash &&
+ elf_symbol_match(mem, size, dynsymbol_names, sym,
+ symbol, vdso_symbol_length))
+ return sym->st_value;
+ j++;
+ } while (!(hash_val & 1));
+ } else {
+ for (; j < nchain && j != STN_UNDEF; j = chain[j]) {
+ Sym_t *sym = (void *)addr + sizeof(Sym_t) * j;
+
+ if (elf_symbol_match(mem, size, dynsymbol_names, sym,
+ symbol, vdso_symbol_length))
+ return sym->st_value;
+ }
+ }
+ return 0;
+}
- addr += sizeof(Sym_t) * j;
- if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size))
- continue;
- sym = (void *)addr;
+static int parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load,
+ struct vdso_symtable *t, uintptr_t dynsymbol_names,
+ Hash_t *hash, Dyn_t *dyn_symtab, bool use_gnu_hash)
+{
+ ARCH_VDSO_SYMBOLS_LIST
- if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL)
- continue;
+ const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS };
+ const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1;
- addr = dynsymbol_names + sym->st_name;
- if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size))
- continue;
- name = (void *)addr;
+ Hash_t *bucket = NULL;
+ Hash_t *chain = NULL;
+ Hash_t nbucket = 0;
+ Hash_t nchain = 0;
+
+ unsigned int sym_off = 0;
+ unsigned int i = 0;
+
+ unsigned long (*elf_hash)(const unsigned char *);
+
+ if (use_gnu_hash) {
+ uint32_t *gnu_hash = (uint32_t *)hash;
+ uint32_t bloom_sz;
+ size_t *bloom;
+
+ nbucket = gnu_hash[0];
+ sym_off = gnu_hash[1];
+ bloom_sz = gnu_hash[2];
+ bloom = (size_t *)&gnu_hash[4];
+ bucket = (Hash_t *)(&bloom[bloom_sz]);
+ elf_hash = &elf_gnu_hash;
+ pr_debug("nbucket %lx sym_off %lx bloom_sz %lx bloom %lx bucket %lx\n",
+ (unsigned long)nbucket, (unsigned long)sym_off,
+ (unsigned long)bloom_sz, (unsigned long)bloom,
+ (unsigned long)bucket);
+ } else {
+ nbucket = hash[0];
+ nchain = hash[1];
+ bucket = &hash[2];
+ chain = &hash[nbucket + 2];
+ elf_hash = &elf_sysv_hash;
+ pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n",
+ (unsigned long)nbucket, (unsigned long)nchain,
+ (unsigned long)bucket, (unsigned long)chain);
+ }
- if (std_strncmp(name, symbol, vdso_symbol_length))
- continue;
- /* XXX: provide strncpy() implementation for PIE */
- memcpy(t->symbols[i].name, name, vdso_symbol_length);
- t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
- break;
+ for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
+ const char *symbol = vdso_symbols[i];
+ unsigned long addr, symbol_hash;
+ const size_t symbol_length = __strlen(symbol);
+
+ symbol_hash = elf_hash((const unsigned char *)symbol);
+ addr = elf_symbol_lookup(mem, size, symbol, symbol_hash,
+ sym_off, dynsymbol_names, dyn_symtab, load,
+ nbucket, nchain, bucket, chain,
+ vdso_symbol_length, use_gnu_hash);
+ pr_debug("symbol %s at address %lx\n", symbol, addr);
+ if (!addr)
+ continue;
+
+ /* XXX: provide strncpy() implementation for PIE */
+ if (symbol_length > vdso_symbol_length) {
+ pr_err("strlen(%s) %zd, only %zd bytes available\n",
+ symbol, symbol_length, vdso_symbol_length);
+ return -EINVAL;
}
+ memcpy(t->symbols[i].name, symbol, symbol_length);
+ t->symbols[i].offset = addr - load->p_vaddr;
}
+
+ return 0;
}
int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
@@ -271,6 +416,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
Dyn_t *dyn_symtab = NULL;
Dyn_t *dyn_hash = NULL;
Hash_t *hash = NULL;
+ bool use_gnu_hash;
uintptr_t dynsymbol_names;
uintptr_t addr;
@@ -296,7 +442,8 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
* needed. Note that we're interested in a small set of tags.
*/
- ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, &dyn_hash);
+ ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab,
+ &dyn_hash, &use_gnu_hash);
if (ret < 0)
return ret;
@@ -310,7 +457,11 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t)
goto err_oob;
hash = (void *)addr;
- parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab);
+ ret = parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab,
+ use_gnu_hash);
+
+ if (ret <0)
+ return ret;
return 0;

View File

@ -1,38 +0,0 @@
From de5dba8c47ffe4e16fae17539270d55e1a8604d1 Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Fri, 7 Feb 2025 09:24:19 +0100
Subject: [PATCH] vdso: handle s390x correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
On s390x there is currently a build failure:
criu/pie/util-vdso.c: In function elf_symbol_lookup:
criu/pie/util-vdso.c:313:31: error: initialization of uint32_t * {aka unsigned int *} from incompatible pointer type Hash_t * {aka long unsigned int *} [-Wincompatible-pointer-types]
313 | uint32_t *h = bucket + nbucket + (j - sym_off);
| ^~~~~~
Replacing uint32_t with Hash_t which is defined behind a architecture
specific ifdef solves this error.
Signed-off-by: Adrian Reber <areber@redhat.com>
---
criu/pie/util-vdso.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c
index 9819335d81..d16fd85f43 100644
--- a/criu/pie/util-vdso.c
+++ b/criu/pie/util-vdso.c
@@ -310,8 +310,8 @@ static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size,
addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr;
if (use_gnu_hash) {
- uint32_t *h = bucket + nbucket + (j - sym_off);
- uint32_t hash_val;
+ Hash_t *h = bucket + nbucket + (j - sym_off);
+ Hash_t hash_val;
symbol_hash |= 1;
do {

154
2648.patch Normal file
View File

@ -0,0 +1,154 @@
From 5813fcabd6a42eaecdb9972e064f176660fd0e6c Mon Sep 17 00:00:00 2001
From: Younes Manton <ymanton@ca.ibm.com>
Date: Tue, 23 Jan 2024 08:22:07 -0800
Subject: [PATCH] s390: Fix FP reg restore after parasite code runs
Currently we save FP regs before parasite code runs, and restore after
for --leave-running, --check-only, and in case of errors. In case of
errors the error may have happened before FP regs were saved, so we
should only restore them if they were actually saved.
Signed-off-by: Younes Manton <ymanton@ca.ibm.com>
---
criu/arch/s390/crtools.c | 90 +++++++++++++++++++++++-----------------
1 file changed, 52 insertions(+), 38 deletions(-)
diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c
index 96cef819e3..e08c838783 100644
--- a/criu/arch/s390/crtools.c
+++ b/criu/arch/s390/crtools.c
@@ -142,6 +142,29 @@ static void print_core_fp_regs(const char *msg, CoreEntry *core)
print_core_ri_cb(core);
}
+/*
+ * Allocate floating point registers
+ */
+static UserS390FpregsEntry *allocate_fp_regs(void)
+{
+ UserS390FpregsEntry *fpregs;
+
+ fpregs = xmalloc(sizeof(*fpregs));
+ if (!fpregs)
+ return NULL;
+ user_s390_fpregs_entry__init(fpregs);
+
+ fpregs->n_fprs = 16;
+ fpregs->fprs = xzalloc(16 * sizeof(uint64_t));
+ if (!fpregs->fprs)
+ goto fail_free_fpregs;
+ return fpregs;
+
+fail_free_fpregs:
+ xfree(fpregs);
+ return NULL;
+}
+
/*
* Allocate VxrsLow registers
*/
@@ -294,7 +317,13 @@ int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_stru
CoreEntry *core = arg;
gpregs = CORE_THREAD_ARCH_INFO(core)->gpregs;
- fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
+ /*
+ * We delay allocating this until now because checkpointing can fail earlier.
+ * When it fails we need to know if we reached here or not so that the cleanup
+ * code doesn't restore FPRs that were never saved in the first place.
+ */
+ fpregs = allocate_fp_regs();
+ CORE_THREAD_ARCH_INFO(core)->fpregs = fpregs;
/* Vector registers */
if (f->flags & USER_FPREGS_VXRS) {
@@ -399,36 +428,15 @@ int restore_fpu(struct rt_sigframe *f, CoreEntry *core)
return 0;
}
-/*
- * Allocate floating point registers
- */
-static UserS390FpregsEntry *allocate_fp_regs(void)
-{
- UserS390FpregsEntry *fpregs;
-
- fpregs = xmalloc(sizeof(*fpregs));
- if (!fpregs)
- return NULL;
- user_s390_fpregs_entry__init(fpregs);
-
- fpregs->n_fprs = 16;
- fpregs->fprs = xzalloc(16 * sizeof(uint64_t));
- if (!fpregs->fprs)
- goto fail_free_fpregs;
- return fpregs;
-
-fail_free_fpregs:
- xfree(fpregs);
- return NULL;
-}
-
/*
* Free floating point registers
*/
static void free_fp_regs(UserS390FpregsEntry *fpregs)
{
- xfree(fpregs->fprs);
- xfree(fpregs);
+ if (fpregs) {
+ xfree(fpregs->fprs);
+ xfree(fpregs);
+ }
}
/*
@@ -487,15 +495,17 @@ int arch_alloc_thread_info(CoreEntry *core)
ti_s390->gpregs = allocate_gp_regs();
if (!ti_s390->gpregs)
goto fail_free_ti_s390;
- ti_s390->fpregs = allocate_fp_regs();
- if (!ti_s390->fpregs)
- goto fail_free_gp_regs;
+
+ /*
+ * Delay allocating space until needed. Checkpointing can fail before that
+ * and the cleanup code needs to be able to tell if FPRs were saved or not
+ * before trying to restore the register state.
+ */
+ ti_s390->fpregs = NULL;
CORE_THREAD_ARCH_INFO(core) = ti_s390;
return 0;
-fail_free_gp_regs:
- free_gp_regs(ti_s390->gpregs);
fail_free_ti_s390:
xfree(ti_s390);
return -1;
@@ -678,14 +688,18 @@ static int set_task_regs(pid_t pid, CoreEntry *core)
user_fpregs_struct_t fpregs;
memset(&fpregs, 0, sizeof(fpregs));
- /* Floating point registers */
+ /*
+ * Floating point registers
+ * Optional on checkpoint; checkpoint may have failed and we may reach here as part of cleanup
+ * so there's no guarantee that we saved FPRs for this thread.
+ */
cfpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
- if (!cfpregs)
- return -1;
- fpregs.prfpreg.fpc = cfpregs->fpc;
- memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs));
- if (set_fp_regs(pid, &fpregs) < 0)
- return -1;
+ if (cfpregs) {
+ fpregs.prfpreg.fpc = cfpregs->fpc;
+ memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs));
+ if (set_fp_regs(pid, &fpregs) < 0)
+ return -1;
+ }
/* Vector registers (optional) */
cvxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low;
if (cvxrs_low != NULL) {

134
2653.patch Normal file
View File

@ -0,0 +1,134 @@
From 22fdffbdde9476b27988b3ee0a4013a4453784c9 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Mon, 21 Apr 2025 06:33:41 +0000
Subject: [PATCH] net: nftables: avoid restore failure if the CRIU nft table
already exist
CRIU locks the network during restore in an "empty" network namespace.
However, "empty" in this context means CRIU isn't restoring the
namespace. This network namespace can be the same namespace where
processes have been dumped and so the network is already locked in it.
Fixes #2650
Signed-off-by: Andrei Vagin <avagin@gmail.com>
---
criu/cr-restore.c | 2 +-
criu/include/net.h | 2 +-
criu/net.c | 30 +++++++++++++++++-------------
3 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 583b446e0b..30932f60a2 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2119,7 +2119,7 @@ static int restore_root_task(struct pstree_item *init)
* the '--empty-ns net' mode no iptables C/R is done and we
* need to return these rules by hands.
*/
- ret = network_lock_internal();
+ ret = network_lock_internal(/* restore = */ true);
if (ret)
goto out_kill;
}
diff --git a/criu/include/net.h b/criu/include/net.h
index 5e8a848620..7c5ede21e1 100644
--- a/criu/include/net.h
+++ b/criu/include/net.h
@@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump);
extern int network_lock(void);
extern void network_unlock(void);
-extern int network_lock_internal(void);
+extern int network_lock_internal(bool restore);
extern struct ns_desc net_ns_desc;
diff --git a/criu/net.c b/criu/net.c
index ee46f1c495..300df480b0 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -3206,12 +3206,12 @@ static inline FILE *redirect_nftables_output(struct nft_ctx *nft)
}
#endif
-static inline int nftables_lock_network_internal(void)
+static inline int nftables_lock_network_internal(bool restore)
{
#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
cleanup_file FILE *fp = NULL;
struct nft_ctx *nft;
- int ret = 0;
+ int ret = 0, exit_code = -1;
char table[32];
char buf[128];
@@ -3224,11 +3224,16 @@ static inline int nftables_lock_network_internal(void)
fp = redirect_nftables_output(nft);
if (!fp)
- goto out;
+ goto err2;
snprintf(buf, sizeof(buf), "create table %s", table);
- if (NFT_RUN_CMD(nft, buf))
+ ret = NFT_RUN_CMD(nft, buf);
+ if (ret) {
+ /* The network has been locked on dump. */
+ if (restore && errno == EEXIST)
+ return 0;
goto err2;
+ }
snprintf(buf, sizeof(buf), "add chain %s output { type filter hook output priority 0; policy drop; }", table);
if (NFT_RUN_CMD(nft, buf))
@@ -3246,17 +3251,16 @@ static inline int nftables_lock_network_internal(void)
if (NFT_RUN_CMD(nft, buf))
goto err1;
- goto out;
-
+ exit_code = 0;
+out:
+ nft_ctx_free(nft);
+ return exit_code;
err1:
snprintf(buf, sizeof(buf), "delete table %s", table);
NFT_RUN_CMD(nft, buf);
err2:
- ret = -1;
pr_err("Locking network failed using nftables\n");
-out:
- nft_ctx_free(nft);
- return ret;
+ goto out;
#else
pr_err("CRIU was built without libnftables support\n");
return -1;
@@ -3288,7 +3292,7 @@ static int iptables_network_lock_internal(void)
return ret;
}
-int network_lock_internal(void)
+int network_lock_internal(bool restore)
{
int ret = 0, nsret;
@@ -3301,7 +3305,7 @@ int network_lock_internal(void)
if (opts.network_lock_method == NETWORK_LOCK_IPTABLES)
ret = iptables_network_lock_internal();
else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
- ret = nftables_lock_network_internal();
+ ret = nftables_lock_network_internal(restore);
if (restore_ns(nsret, &net_ns_desc))
ret = -1;
@@ -3427,7 +3431,7 @@ int network_lock(void)
if (run_scripts(ACT_NET_LOCK))
return -1;
- return network_lock_internal();
+ return network_lock_internal(false);
}
void network_unlock(void)

36
2662.patch Normal file
View File

@ -0,0 +1,36 @@
From 45d187f9147a9081cdd2df3f8ac6518eee14c9c0 Mon Sep 17 00:00:00 2001
From: Radostin Stoyanov <rstoyanov@fedoraproject.org>
Date: Wed, 7 May 2025 14:06:55 +0100
Subject: [PATCH] sk-inet: add message how to disable MPTCP in Go
With Go version 1.24, ListenConfig now uses MPTCP by default [1].
Checkpoint/restore for this protocol is not currently supported
and adding support requires kernel changes that are not trivial
to implement. As a result, checkpointing of many containers that
run Go programs is likely to fail with the following error [2]:
(00.026522) Error (criu/sk-inet.c:130): inet: Unsupported proto 262 for socket 2f9bc5
This patch adds a message with suggested workaround for this problem.
[1] https://go.dev/doc/go1.24#netpkgnet
[2] https://github.com/checkpoint-restore/criu/issues/2655
Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
---
criu/sk-inet.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index 92f53e5697..a191e78c48 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -128,6 +128,8 @@ static int can_dump_ipproto(unsigned int ino, int proto, int type)
break;
default:
pr_err("Unsupported proto %d for socket %x\n", proto, ino);
+ if (proto == IPPROTO_MPTCP)
+ pr_err("For Go programs, consider using \"GODEBUG=multipathtcp=0\" to disable MPTCP\n");
return 0;
}

View File

@ -11,30 +11,22 @@
%undefine _auto_set_build_flags
Name: criu
Version: 3.19
Release: 9%{?dist}
Version: 4.1
Release: 1%{?dist}
Summary: Tool for Checkpoint/Restore in User-space
License: GPL-2.0-only AND LGPL-2.1-only AND MIT
URL: http://criu.org/
Source0: https://github.com/checkpoint-restore/criu/archive/v%{version}/criu-%{version}.tar.gz
# This switches the default network locking backend from
# iptables to nftables
Patch0: network.lock.nftables.patch
# Update restartable sequences to latest upstream code
Patch1: https://github.com/checkpoint-restore/criu/commit/089345f77a34d1bc7ef146d650636afcd3cdda21.patch
# net: nftables: avoid restore failure if the CRIU nft table already exist
Patch0: https://github.com/checkpoint-restore/criu/pull/2653.patch
# s390: Fix FP reg restore after parasite code runs
Patch1: https://github.com/checkpoint-restore/criu/pull/2648.patch
# sk-inet: add message how to disable MPTCP in Go
Patch2: https://github.com/checkpoint-restore/criu/pull/2662.patch
# Unfortunately crun added code to always force
# iptables backed network locking. This disables
# setting the network locking to iptables via RPC.
Patch2: disable.network.locking.via.rpc.patch
# net: redirect nftables stdout and stderr to CRIU's log file #2549
Patch3: https://patch-diff.githubusercontent.com/raw/checkpoint-restore/criu/pull/2549.patch
# net: remember the name of the lock chain (nftables) #2550
# based on https://patch-diff.githubusercontent.com/raw/checkpoint-restore/criu/pull/2550.patch
Patch4: 2550.patch
# vdso: switch from DT_HASH to DT_GNU_HASH (aarch64) #2570
Patch5: https://patch-diff.githubusercontent.com/raw/checkpoint-restore/criu/pull/2570.patch
# vdso: handle s390x correctly #2590
Patch6: https://github.com/checkpoint-restore/criu/pull/2590.patch
Patch3: disable.network.locking.via.rpc.patch
# Add protobuf-c as a dependency.
# We use this patch because the protobuf-c package name
@ -120,9 +112,6 @@ This script can help to workaround the so called "PID mismatch" problem.
%patch -P 1 -p1
%patch -P 2 -p1
%patch -P 3 -p1
%patch -P 4 -p1
%patch -P 5 -p1
%patch -P 6 -p1
%patch -P 99 -p1
%build
@ -132,15 +121,15 @@ This script can help to workaround the so called "PID mismatch" problem.
# %{?_smp_mflags} does not work
# -fstack-protector breaks build
CFLAGS+=`echo %{optflags} | sed -e 's,-fstack-protector\S*,,g'` make V=1 WERROR=0 PREFIX=%{_prefix} RUNDIR=/run/criu PYTHON=%{py_binary}
CFLAGS+=`echo %{optflags} | sed -e 's,-fstack-protector\S*,,g'` make V=1 WERROR=0 PREFIX=%{_prefix} RUNDIR=/run/criu PYTHON=%{py_binary} PLUGINDIR=%{_libdir}/criu NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES
make docs V=1
%install
sed -e "s,--upgrade --ignore-installed,--no-index --no-deps -v --no-build-isolation,g" -i lib/Makefile -i crit/Makefile
make install-criu DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir}
make install-lib DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=%{py_binary}
make install-crit DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=%{py_binary}
make install-lib DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=%{py_binary} PIPFLAGS="--no-build-isolation --no-index --no-deps --progress-bar off --upgrade --ignore-installed"
make install-crit DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} BINDIR=%{_bindir} SBINDIR=%{_sbindir} PYTHON=%{py_binary} PIPFLAGS="--no-build-isolation --no-index --no-deps --progress-bar off --upgrade --ignore-installed"
make install-man DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir}
rm -f $RPM_BUILD_ROOT%{_mandir}/man1/compel.1
rm -f $RPM_BUILD_ROOT%{_mandir}/man1/criu-amdgpu-plugin.1
@ -182,6 +171,9 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/libcriu.a
%doc %{_mandir}/man1/criu-ns.1*
%changelog
* Mon May 12 2025 Adrian Reber <areber@redhat.com> - 4.1-1
- Update to 4.1
* Fri Feb 07 2025 Adrian Reber <areber@redhat.com> - 3.19-9
- Fix VDSO compile error on s390x

View File

@ -1,11 +0,0 @@
--- a/criu/include/cr_options.h.orig 2024-12-10 16:57:20.061293476 +0100
+++ b/criu/include/cr_options.h 2024-12-10 16:57:34.789131372 +0100
@@ -70,7 +70,7 @@
NETWORK_LOCK_SKIP,
};
-#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_IPTABLES
+#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_NFTABLES
/*
* Ghost file size we allow to carry by default.

4
rpminspect.yaml Normal file
View File

@ -0,0 +1,4 @@
---
annocheck:
jobs:
- hardened: --verbose --skip-dynamic-tags --skip-property-note --skip-bind-now --skip-pie --skip-cf-protection

View File

@ -1 +1,2 @@
SHA512 (criu-3.19.tar.gz) = d243818cdac51580c952a80e9164786a67ce5e294c0faa6dc700f5e8da8e36495f0b64f5c27b345ede7d6697ed7a69fa4e9a85cef451f32e3ffeb78564884571
SHA512 (criu-4.1.tar.gz) = 769001a7e527c129fe73509fd0c7d3fc3b9b1080dc69929032cb84f60f95256f5d145ed4b7ea11f090a7f468f2bb2a0ecf56475eb292966cad26d643f0e46816