diff --git a/.gitignore b/.gitignore index 0d113cc..9e1ddb3 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,4 @@ /criu-3.17.1.tar.gz /criu-3.18.tar.gz /criu-3.19.tar.gz +/criu-4.1.tar.gz diff --git a/089345f77a34d1bc7ef146d650636afcd3cdda21.patch b/089345f77a34d1bc7ef146d650636afcd3cdda21.patch deleted file mode 100644 index d8aef52..0000000 --- a/089345f77a34d1bc7ef146d650636afcd3cdda21.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 089345f77a34d1bc7ef146d650636afcd3cdda21 Mon Sep 17 00:00:00 2001 -From: Florian Weimer -Date: Wed, 10 Jul 2024 18:34:50 +0200 -Subject: [PATCH] Adjust to glibc __rseq_size semantic change - -In commit 2e456ccf0c34a056e3ccafac4a0c7effef14d918 ("Linux: Make -__rseq_size useful for feature detection (bug 31965)") glibc 2.40 -changed the meaning of __rseq_size slightly: it is now the size -of the active/feature area (20 bytes initially), and not the size -of the entire initially defined struct (32 bytes including padding). -The reason for the change is that the size including padding does not -allow detection of newly added features while previously unused -padding is consumed. - -The prep_libc_rseq_info change in criu/cr-restore.c is not necessary -on kernels which have full ptrace support for obtaining rseq -information because the code is not used. On older kernels, it is -a correctness fix because with size 20 (the new value), rseq -registeration would fail. - -The two other changes are required to make rseq unregistration work -in tests. - -Signed-off-by: Florian Weimer ---- - criu/cr-restore.c | 8 ++++++++ - test/zdtm/static/rseq00.c | 5 ++++- - test/zdtm/transition/rseq01.c | 5 ++++- - 3 files changed, 16 insertions(+), 2 deletions(-) - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 4db2f4ecfc..b95d4f134b 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -2618,7 +2618,15 @@ static void prep_libc_rseq_info(struct rst_rseq_param *rseq) - if (!kdat.has_ptrace_get_rseq_conf) { - #if defined(__GLIBC__) && defined(RSEQ_SIG) - rseq->rseq_abi_pointer = encode_pointer(__criu_thread_pointer() + __rseq_offset); -+ /* -+ * Current glibc reports the feature/active size in -+ * __rseq_size, not the size passed to the kernel. -+ * This could be 20, but older kernels expect 32 for -+ * the size argument even if only 20 bytes are used. -+ */ - rseq->rseq_abi_size = __rseq_size; -+ if (rseq->rseq_abi_size < 32) -+ rseq->rseq_abi_size = 32; - rseq->signature = RSEQ_SIG; - #else - rseq->rseq_abi_pointer = 0; -diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c -index 471ad6a43f..7add7801eb 100644 ---- a/test/zdtm/static/rseq00.c -+++ b/test/zdtm/static/rseq00.c -@@ -46,12 +46,15 @@ static inline void *__criu_thread_pointer(void) - static inline void unregister_glibc_rseq(void) - { - struct rseq *rseq = (struct rseq *)((char *)__criu_thread_pointer() + __rseq_offset); -+ unsigned int size = __rseq_size; - - /* hack: mark glibc rseq structure as failed to register */ - rseq->cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; - - /* unregister rseq */ -- syscall(__NR_rseq, (void *)rseq, __rseq_size, 1, RSEQ_SIG); -+ if (__rseq_size < 32) -+ size = 32; -+ syscall(__NR_rseq, (void *)rseq, size, 1, RSEQ_SIG); - } - #else - static inline void unregister_glibc_rseq(void) -diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c -index 0fbcc2dca0..08a7a8e1a6 100644 ---- a/test/zdtm/transition/rseq01.c -+++ b/test/zdtm/transition/rseq01.c -@@ -33,7 +33,10 @@ static inline void *thread_pointer(void) - static inline void unregister_old_rseq(void) - { - /* unregister rseq */ -- syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+ unsigned int size = __rseq_size; -+ if (__rseq_size < 32) -+ size = 32; -+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), size, 1, RSEQ_SIG); - } - #else - static inline void unregister_old_rseq(void) diff --git a/2549.patch b/2549.patch deleted file mode 100644 index 4cbf648..0000000 --- a/2549.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 0a17c4160580d9bc7092ba9eb7db86952921d221 Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Thu, 16 Jan 2025 07:52:42 +0000 -Subject: [PATCH 1/2] util: added cleanup_file attribute. - -Signed-off-by: Adrian Reber ---- - criu/include/util.h | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/criu/include/util.h b/criu/include/util.h -index ae293a68c8..4793f7f20e 100644 ---- a/criu/include/util.h -+++ b/criu/include/util.h -@@ -406,6 +406,14 @@ static inline void cleanup_freep(void *p) - free(*pp); - } - -+#define cleanup_file __attribute__((cleanup(cleanup_filep))) -+static inline void cleanup_filep(FILE **f) -+{ -+ FILE *file = *f; -+ if (file) -+ (void)fclose(file); -+} -+ - extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args); - - /* - -From 1ed4109958644fbe1cbadf7c72472c82a12834b0 Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Tue, 17 Dec 2024 08:52:46 +0100 -Subject: [PATCH 2/2] net: redirect nftables stdout and stderr to CRIU's log - file - -When using the nftables network locking backend and restoring a process -a second time the network locking has already been deleted by the first -restore. The second restore will print out to the console text like: - -Error: Could not process rule: No such file or directory -delete table inet CRIU-202621 - -With this change CRIU's log FD is used by libnftables stdout and stderr. - -Signed-off-by: Adrian Reber ---- - criu/net.c | 43 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 43 insertions(+) - -diff --git a/criu/net.c b/criu/net.c -index eee3311087..efd52db327 100644 ---- a/criu/net.c -+++ b/criu/net.c -@@ -3066,9 +3066,43 @@ static int iptables_restore(bool ipv6, char *buf, int size) - return ret; - } - -+#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) -+static inline FILE *redirect_nftables_output(struct nft_ctx *nft) -+{ -+ FILE *fp; -+ int fd; -+ -+ fd = dup(log_get_fd()); -+ if (fd < 0) { -+ pr_perror("dup() to redirect nftables output failed"); -+ return NULL; -+ } -+ -+ fp = fdopen(fd, "w"); -+ if (!fp) { -+ pr_perror("fdopen() to redirect nftables output failed"); -+ return NULL; -+ } -+ -+ /** -+ * Without setvbuf() the output from libnftables will be -+ * somewhere in the log file, probably at the end. -+ * With setvbuf() potential output will be at the correct -+ * position. -+ */ -+ setvbuf(fp, NULL, _IONBF, 0); -+ -+ nft_ctx_set_output(nft, fp); -+ nft_ctx_set_error(nft, fp); -+ -+ return fp; -+} -+#endif -+ - static inline int nftables_lock_network_internal(void) - { - #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) -+ cleanup_file FILE *fp = NULL; - struct nft_ctx *nft; - int ret = 0; - char table[32]; -@@ -3081,6 +3115,10 @@ static inline int nftables_lock_network_internal(void) - if (!nft) - return -1; - -+ fp = redirect_nftables_output(nft); -+ if (!fp) -+ goto out; -+ - snprintf(buf, sizeof(buf), "create table %s", table); - if (NFT_RUN_CMD(nft, buf)) - goto err2; -@@ -3168,6 +3206,7 @@ static inline int nftables_network_unlock(void) - { - #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) - int ret = 0; -+ cleanup_file FILE *fp = NULL; - struct nft_ctx *nft; - char table[32]; - char buf[128]; -@@ -3179,6 +3218,10 @@ static inline int nftables_network_unlock(void) - if (!nft) - return -1; - -+ fp = redirect_nftables_output(nft); -+ if (!fp) -+ return -1; -+ - snprintf(buf, sizeof(buf), "delete table %s", table); - if (NFT_RUN_CMD(nft, buf)) - ret = -1; diff --git a/2550.patch b/2550.patch deleted file mode 100644 index 5f67f71..0000000 --- a/2550.patch +++ /dev/null @@ -1,473 +0,0 @@ -From 9a2b7d6b3baa2b3183489ed9cebece039f9f488f Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Thu, 23 Jan 2025 09:26:15 +0000 -Subject: [PATCH 1/2] criu: use libuuid for criu_run_id generation - -criu_run_id will be used in upcoming changes to create and remove -network rules for network locking. Instead of trying to come up with -a way to create unique IDs, just use an existing library. - -libuuid should be installed on most systems as it is indirectly required -by systemd (via libmount). - -Signed-off-by: Adrian Reber ---- - .cirrus.yml | 2 +- - .github/workflows/check-commits.yml | 2 +- - compel/include/uapi/infect-util.h | 11 ++++++++++- - compel/src/lib/infect-util.c | 2 +- - compel/src/lib/infect.c | 2 +- - criu/Makefile.packages | 4 +++- - criu/fdstore.c | 2 +- - criu/files.c | 2 +- - criu/include/util.h | 4 +++- - criu/pidfd-store.c | 2 +- - criu/unittest/mock.c | 4 +++- - criu/util.c | 17 +++++++---------- - scripts/build/Dockerfile.alpine | 3 ++- - scripts/build/Dockerfile.amd-rocm | 1 + - scripts/build/Dockerfile.archlinux | 1 + - scripts/build/Dockerfile.hotspot-alpine | 1 + - scripts/build/Dockerfile.hotspot-ubuntu | 1 + - scripts/build/Dockerfile.linux32.tmpl | 1 + - scripts/build/Dockerfile.openj9-ubuntu | 1 + - .../build/Dockerfile.riscv64-stable-cross.tmpl | 1 + - scripts/build/Dockerfile.stable-cross.tmpl | 1 + - scripts/build/Dockerfile.tmpl | 1 + - scripts/build/Dockerfile.unstable-cross.tmpl | 1 + - scripts/ci/prepare-for-fedora-rawhide.sh | 1 + - scripts/ci/run-ci-tests.sh | 2 +- - scripts/ci/vagrant.sh | 2 +- - 26 files changed, 48 insertions(+), 24 deletions(-) - -diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h -index ace6f6b6b1..658df9393d 100644 ---- a/compel/include/uapi/infect-util.h -+++ b/compel/include/uapi/infect-util.h -@@ -3,11 +3,20 @@ - - #include "common/compiler.h" - -+/** -+ * The length of the hash is based on what libuuid provides. -+ * According to the manpage this is: -+ * -+ * The uuid_unparse() function converts the supplied UUID uu from the binary -+ * representation into a 36-byte string (plus trailing '\0') -+ */ -+#define RUN_ID_HASH_LENGTH 37 -+ - /* - * compel_run_id is a unique value of the current run. It can be used to - * generate resource ID-s to avoid conflicts with other processes. - */ --extern uint64_t compel_run_id; -+extern char compel_run_id[RUN_ID_HASH_LENGTH]; - - struct parasite_ctl; - extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd); -diff --git a/compel/src/lib/infect-util.c b/compel/src/lib/infect-util.c -index 00a7c83f7d..dc57e28f7c 100644 ---- a/compel/src/lib/infect-util.c -+++ b/compel/src/lib/infect-util.c -@@ -7,7 +7,7 @@ - #include "infect-rpc.h" - #include "infect-util.h" - --uint64_t compel_run_id; -+char compel_run_id[RUN_ID_HASH_LENGTH]; - - int compel_util_send_fd(struct parasite_ctl *ctl, int fd) - { -diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c -index 1e3ffb9670..caf54e03fd 100644 ---- a/compel/src/lib/infect.c -+++ b/compel/src/lib/infect.c -@@ -427,7 +427,7 @@ static int gen_parasite_saddr(struct sockaddr_un *saddr, int key) - int sun_len; - - saddr->sun_family = AF_UNIX; -- snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%" PRIx64, key, compel_run_id); -+ snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%s", key, compel_run_id); - - sun_len = SUN_LEN(saddr); - *saddr->sun_path = '\0'; -diff --git a/criu/Makefile.packages b/criu/Makefile.packages -index 7f6113c8f1..3e2e6efd18 100644 ---- a/criu/Makefile.packages -+++ b/criu/Makefile.packages -@@ -6,6 +6,7 @@ REQ-RPM-PKG-NAMES += protobuf-devel - REQ-RPM-PKG-NAMES += protobuf-python - REQ-RPM-PKG-NAMES += libnl3-devel - REQ-RPM-PKG-NAMES += libcap-devel -+REQ-RPM-PKG-NAMES += libuuid-devel - - REQ-RPM-PKG-TEST-NAMES += libaio-devel - -@@ -16,6 +17,7 @@ REQ-DEB-PKG-NAMES += protobuf-compiler - REQ-DEB-PKG-NAMES += $(PYTHON)-protobuf - REQ-DEB-PKG-NAMES += libnl-3-dev - REQ-DEB-PKG-NAMES += libcap-dev -+REQ-DEB-PKG-NAMES += uuid-dev - - REQ-DEB-PKG-TEST-NAMES += $(PYTHON)-yaml - REQ-DEB-PKG-TEST-NAMES += libaio-dev -@@ -25,7 +27,7 @@ REQ-DEB-PKG-TEST-NAMES += libaio-dev - REQ-RPM-PKG-TEST-NAMES += $(PYTHON)-PyYAML - - --export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -+export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -luuid - - check-packages-failed: - $(warning Can not find some of the required libraries) -diff --git a/criu/fdstore.c b/criu/fdstore.c -index d615ad15d0..6ac639c553 100644 ---- a/criu/fdstore.c -+++ b/criu/fdstore.c -@@ -58,7 +58,7 @@ int fdstore_init(void) - } - - addr.sun_family = AF_UNIX; -- addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%" PRIx64, st.st_ino, -+ addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%s", st.st_ino, - criu_run_id); - addrlen += sizeof(addr.sun_family); - -diff --git a/criu/files.c b/criu/files.c -index 31e705bcc5..f16ec32a23 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -978,7 +978,7 @@ static int receive_fd(struct fdinfo_list_entry *fle); - static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid) - { - addr->sun_family = AF_UNIX; -- snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%" PRIx64, pid, criu_run_id); -+ snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%s", pid, criu_run_id); - *len = SUN_LEN(addr); - *addr->sun_path = '\0'; - } -diff --git a/criu/include/util.h b/criu/include/util.h -index 4793f7f20e..194e94deeb 100644 ---- a/criu/include/util.h -+++ b/criu/include/util.h -@@ -21,6 +21,8 @@ - #include "log.h" - #include "common/err.h" - -+#include "compel/infect-util.h" -+ - #define PREF_SHIFT_OP(pref, op, size) ((size)op(pref##BYTES_SHIFT)) - #define KBYTES_SHIFT 10 - #define MBYTES_SHIFT 20 -@@ -420,7 +422,7 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void - * criu_run_id is a unique value of the current run. It can be used to - * generate resource ID-s to avoid conflicts with other CRIU processes. - */ --extern uint64_t criu_run_id; -+extern char criu_run_id[RUN_ID_HASH_LENGTH]; - extern void util_init(void); - - extern char *resolve_mountpoint(char *path); -diff --git a/criu/pidfd-store.c b/criu/pidfd-store.c -index 9fdc74cb74..110f7802a2 100644 ---- a/criu/pidfd-store.c -+++ b/criu/pidfd-store.c -@@ -99,7 +99,7 @@ int init_pidfd_store_sk(pid_t pid, int sk) - goto err; - } - -- addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%" PRIx64, pid, sk, -+ addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%s", pid, sk, - criu_run_id); - addrlen += sizeof(addr.sun_family); - -diff --git a/criu/unittest/mock.c b/criu/unittest/mock.c -index e517720e42..b2d5072787 100644 ---- a/criu/unittest/mock.c -+++ b/criu/unittest/mock.c -@@ -5,6 +5,8 @@ - #include - #include - -+#include "compel/infect-util.h" -+ - int add_external(char *key) - { - return 0; -@@ -141,4 +143,4 @@ int check_mount_v2(void) - return 0; - } - --uint64_t compel_run_id; -+char compel_run_id[RUN_ID_HASH_LENGTH]; -diff --git a/criu/util.c b/criu/util.c -index d2bc9a8657..58c18e20be 100644 ---- a/criu/util.c -+++ b/criu/util.c -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - #include "linux/mount.h" - -@@ -2026,20 +2027,16 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) - return fret; - } - --uint64_t criu_run_id; -+char criu_run_id[RUN_ID_HASH_LENGTH]; - - void util_init(void) - { -- struct stat statbuf; -+ uuid_t uuid; - -- criu_run_id = getpid(); -- if (!stat("/proc/self/ns/pid", &statbuf)) -- criu_run_id |= (uint64_t)statbuf.st_ino << 32; -- else if (errno != ENOENT) -- pr_perror("Can't stat /proc/self/ns/pid - CRIU run id might not be unique"); -- -- compel_run_id = criu_run_id; -- pr_info("CRIU run id = %#" PRIx64 "\n", criu_run_id); -+ uuid_generate(uuid); -+ uuid_unparse(uuid, criu_run_id); -+ pr_info("CRIU run id = %s\n", criu_run_id); -+ memcpy(compel_run_id, criu_run_id, sizeof(criu_run_id)); - } - - /* -diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine -index 329d7791de..d843793ea2 100644 ---- a/scripts/build/Dockerfile.alpine -+++ b/scripts/build/Dockerfile.alpine -@@ -24,7 +24,8 @@ RUN apk update && apk add \ - sudo \ - libcap-utils \ - libdrm-dev \ -- util-linux -+ util-linux \ -+ util-linux-dev - - COPY . /criu - WORKDIR /criu -diff --git a/scripts/build/Dockerfile.amd-rocm b/scripts/build/Dockerfile.amd-rocm -index c466a73d2d..ed66ae4fec 100644 ---- a/scripts/build/Dockerfile.amd-rocm -+++ b/scripts/build/Dockerfile.amd-rocm -@@ -56,6 +56,7 @@ RUN apt-get clean -qqy && apt-get update -qqy && apt-get install -qqy --no-insta - python-protobuf \ - python3-minimal \ - python-ipaddress \ -+ uuid-dev \ - curl \ - wget \ - vim \ -diff --git a/scripts/build/Dockerfile.archlinux b/scripts/build/Dockerfile.archlinux -index 4056514891..9d11194bb0 100644 ---- a/scripts/build/Dockerfile.archlinux -+++ b/scripts/build/Dockerfile.archlinux -@@ -35,6 +35,7 @@ RUN pacman -Syu --noconfirm \ - python-junit-xml \ - python-importlib-metadata \ - libdrm \ -+ util-linux-libs \ - diffutils - - COPY . /criu -diff --git a/scripts/build/Dockerfile.hotspot-alpine b/scripts/build/Dockerfile.hotspot-alpine -index cb9332fd0c..6caf9d0b1b 100644 ---- a/scripts/build/Dockerfile.hotspot-alpine -+++ b/scripts/build/Dockerfile.hotspot-alpine -@@ -19,6 +19,7 @@ RUN apk update && apk add \ - maven \ - ip6tables \ - iptables \ -+ util-linux-dev \ - bash - - COPY . /criu -diff --git a/scripts/build/Dockerfile.hotspot-ubuntu b/scripts/build/Dockerfile.hotspot-ubuntu -index 0318f650f3..67de916acb 100644 ---- a/scripts/build/Dockerfile.hotspot-ubuntu -+++ b/scripts/build/Dockerfile.hotspot-ubuntu -@@ -22,6 +22,7 @@ RUN apt-install protobuf-c-compiler \ - pkg-config \ - iptables \ - gcc \ -+ uuid-dev \ - maven - - COPY . /criu -diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl -index 13e9926424..d218e06414 100644 ---- a/scripts/build/Dockerfile.linux32.tmpl -+++ b/scripts/build/Dockerfile.linux32.tmpl -@@ -21,6 +21,7 @@ RUN apt-install \ - pkg-config \ - protobuf-c-compiler \ - protobuf-compiler \ -+ uuid-dev \ - python3-minimal - - COPY . /criu -diff --git a/scripts/ci/prepare-for-fedora-rawhide.sh b/scripts/ci/prepare-for-fedora-rawhide.sh -index 09085c403b..42252c93c9 100755 ---- a/scripts/ci/prepare-for-fedora-rawhide.sh -+++ b/scripts/ci/prepare-for-fedora-rawhide.sh -@@ -36,6 +36,7 @@ dnf install -y \ - e2fsprogs \ - rubygem-asciidoctor \ - libdrm-devel \ -+ libuuid-devel \ - kmod - - # /tmp is no longer 755 in the rawhide container image and breaks CI - fix it - -From c39bce3cf17782784d1a14cf40a4cedd059059fa Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Thu, 23 Jan 2025 17:42:45 +0000 -Subject: [PATCH 2/2] net: remember the name of the lock chain (nftables) - -Using libnftables the chain to lock the network is composed of -("CRIU-%d", real_pid). This leads to around 40 zdtm tests failing -with errors like this: - -Error: No such file or directory; did you mean table 'CRIU-62' in family inet? -delete table inet CRIU-86 - -The reason is that as soon as a process is running in a namespace the -real PID can be anything and only the PID in the namespace is restored -correctly. Relying on the real PID does not work for the chain name. - -Using the PID of the innermost namespace would lead to the chain be -called 'CRIU-1' most of the time which is also not really unique. - -With this commit the change is now named using the already existing CRIU -run ID. To be able to correctly restore the process and delete the -locking table, the CRIU run id during checkpointing is now stored in the -inventory as dump_criu_run_id. - -Signed-off-by: Adrian Reber ---- - criu/image.c | 30 ++++++++++++++++++++++++++++++ - criu/include/util.h | 2 ++ - criu/netfilter.c | 20 +++++++++++++++++++- - images/inventory.proto | 4 ++++ - 4 files changed, 55 insertions(+), 1 deletion(-) - -diff --git a/criu/image.c b/criu/image.c -index 9589167fb1..f3747d6ff5 100644 ---- a/criu/image.c -+++ b/criu/image.c -@@ -25,6 +25,7 @@ bool img_common_magic = true; - TaskKobjIdsEntry *root_ids; - u32 root_cg_set; - Lsmtype image_lsm; -+char dump_criu_run_id[RUN_ID_HASH_LENGTH]; - - int check_img_inventory(bool restore) - { -@@ -120,6 +121,24 @@ int check_img_inventory(bool restore) - } else { - opts.network_lock_method = he->network_lock_method; - } -+ -+ /** -+ * This contains the criu_run_id during dumping of the process. -+ * For things like removing network locking (nftables) this -+ * information is needed to identify the name of the network -+ * locking table. -+ */ -+ if (he->dump_criu_run_id) { -+ strncpy(dump_criu_run_id, he->dump_criu_run_id, sizeof(dump_criu_run_id) - 1); -+ pr_info("Dump CRIU run id = %s\n", dump_criu_run_id); -+ } else { -+ /** -+ * If restoring from an old image this is a marker -+ * that no dump_criu_run_id exists. -+ */ -+ dump_criu_run_id[0] = NO_DUMP_CRIU_RUN_ID; -+ } -+ - } - - ret = 0; -@@ -367,6 +386,17 @@ int prepare_inventory(InventoryEntry *he) - he->has_network_lock_method = true; - he->network_lock_method = opts.network_lock_method; - -+ /** -+ * This contains the criu_run_id during dumping of the process. -+ * For things like removing network locking (nftables) this -+ * information is needed to identify the name of the network -+ * locking table. -+ */ -+ he->dump_criu_run_id = xstrdup(criu_run_id); -+ -+ if (!he->dump_criu_run_id) -+ return -1; -+ - return 0; - } - -diff --git a/criu/include/util.h b/criu/include/util.h -index 194e94deeb..55ad5b63cf 100644 ---- a/criu/include/util.h -+++ b/criu/include/util.h -@@ -424,6 +424,8 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void - */ - extern char criu_run_id[RUN_ID_HASH_LENGTH]; - extern void util_init(void); -+#define NO_DUMP_CRIU_RUN_ID 0x7f -+extern char dump_criu_run_id[RUN_ID_HASH_LENGTH]; - - extern char *resolve_mountpoint(char *path); - -diff --git a/criu/netfilter.c b/criu/netfilter.c -index 9e78dc4b03..e2c82764f2 100644 ---- a/criu/netfilter.c -+++ b/criu/netfilter.c -@@ -299,7 +299,25 @@ int nftables_lock_connection(struct inet_sk_desc *sk) - - int nftables_get_table(char *table, int n) - { -- if (snprintf(table, n, "inet CRIU-%d", root_item->pid->real) < 0) { -+ int ret; -+ -+ switch(dump_criu_run_id[0]) { -+ case 0: -+ /* This is not a restore.*/ -+ ret = snprintf(table, n, "inet CRIU-%s", criu_run_id); -+ break; -+ case NO_DUMP_CRIU_RUN_ID: -+ /** -+ * This is a restore from an older image with no -+ * dump_criu_run_id available. Let's use the old ID. -+ */ -+ ret = snprintf(table, n, "inet CRIU-%d", root_item->pid->real); -+ break; -+ default: -+ ret = snprintf(table, n, "inet CRIU-%s", dump_criu_run_id); -+ } -+ -+ if (ret < 0) { - pr_err("Cannot generate CRIU's nftables table name\n"); - return -1; - } -diff --git a/images/inventory.proto b/images/inventory.proto -index 7f655031bc..1e18815bb9 100644 ---- a/images/inventory.proto -+++ b/images/inventory.proto -@@ -29,4 +29,8 @@ message inventory_entry { - optional uint32 pre_dump_mode = 9; - optional bool tcp_close = 10; - optional uint32 network_lock_method = 11; -+ // Remember the criu_run_id when CRIU dumped the process. -+ // This is currently used to delete the correct nftables -+ // network locking rule. -+ optional string dump_criu_run_id = 13; - } diff --git a/2570.patch b/2570.patch deleted file mode 100644 index d504860..0000000 --- a/2570.patch +++ /dev/null @@ -1,452 +0,0 @@ -From ed2468f0a3c1c3c3b40b41047ffd97ce32346a4e Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Wed, 22 Jan 2025 14:35:26 +0100 -Subject: [PATCH] vdso: switch from DT_HASH to DT_GNU_HASH (aarch64) - -Trying to run latest CRIU on CentOS Stream 10 or Ubuntu 24.04 (aarch64) -fails like this: - - # criu/criu check -v4 - [...] - (00.096460) vdso: Parsing at ffffb2e2a000 ffffb2e2c000 - (00.096539) vdso: PT_LOAD p_vaddr: 0 - (00.096567) vdso: DT_STRTAB: 1d0 - (00.096592) vdso: DT_SYMTAB: 128 - (00.096616) vdso: DT_STRSZ: 8a - (00.096640) vdso: DT_SYMENT: 18 - (00.096663) Error (criu/pie-util-vdso.c:193): vdso: Not all dynamic entries are present - (00.096688) Error (criu/vdso.c:627): vdso: Failed to fill self vdso symtable - (00.096713) Error (criu/kerndat.c:1906): kerndat_vdso_fill_symtable failed when initializing kerndat. - (00.096812) Found mmap_min_addr 0x10000 - (00.096881) files stat: fs/nr_open 1073741816 - (00.096908) Error (criu/crtools.c:267): Could not initialize kernel features detection. - -This seems to be related to the kernel (6.12.0-41.el10.aarch64). The -Ubuntu user-space is running in a container on the same kernel. - -Looking at the kernel this seems to be related to: - - commit 48f6430505c0b0498ee9020ce3cf9558b1caaaeb - Author: Fangrui Song - Date: Thu Jul 18 10:34:23 2024 -0700 - - arm64/vdso: Remove --hash-style=sysv - - glibc added support for .gnu.hash in 2006 and .hash has been obsoleted - for more than one decade in many Linux distributions. Using - --hash-style=sysv might imply unaddressed issues and confuse readers. - - Just drop the option and rely on the linker default, which is likely - "both", or "gnu" when the distribution really wants to eliminate sysv - hash overhead. - - Similar to commit 6b7e26547fad ("x86/vdso: Emit a GNU hash"). - -The commit basically does: - - -ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ - +ldflags-y := -shared -soname=linux-vdso.so.1 \ - -Which results in only a GNU hash being added to the ELF header. This -change has been merged with 6.11. - -Looking at the referenced x86 commit: - - commit 6b7e26547fad7ace3dcb27a5babd2317fb9d1e12 - Author: Andy Lutomirski - Date: Thu Aug 6 14:45:45 2015 -0700 - - x86/vdso: Emit a GNU hash - - Some dynamic loaders may be slightly faster if a GNU hash is - available. Strangely, this seems to have no effect at all on - the vdso size. - - This is unlikely to have any measurable effect on the time it - takes to resolve vdso symbols (since there are so few of them). - In some contexts, it can be a win for a different reason: if - every DSO has a GNU hash section, then libc can avoid - calculating SysV hashes at all. Both musl and glibc appear to - have this optimization. - - It's plausible that this breaks some ancient glibc version. If - so, then, depending on what glibc versions break, we could - either require COMPAT_VDSO for them or consider reverting. - -Which is also a really simple change: - - -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ - +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ - -The big difference here is that for x86 both hash sections are -generated. For aarch64 only the newer GNU hash is generated. That is why -we only see this error on kernel >= 6.11 and aarch64. - -Changing from DT_HASH to DT_GNU_HASH seems to work on aarch64. The test -suite runs without any errors. - -Unfortunately I am not aware of all implication of this change and if a -successful test suite run means that it still works. - -Looking at the kernel I see following hash styles for the VDSO: - -aarch64: not specified (only GNU hash style) -arm: --hash-style=sysv -loongarch: --hash-style=sysv -mips: --hash-style=sysv -powerpc: --hash-style=both -riscv: --hash-style=both -s390: --hash-style=both -x86: --hash-style=both - -Only aarch64 on kernels >= 6.11 is a problem right now, because all -other platforms provide the old style hashing. - -Signed-off-by: Adrian Reber -Co-developed-by: Dmitry Safonov -Co-authored-by: Dmitry Safonov -Signed-off-by: Dmitry Safonov ---- - criu/pie/util-vdso.c | 245 ++++++++++++++++++++++++++++++++++--------- - 1 file changed, 198 insertions(+), 47 deletions(-) - -diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c -index f1e3239ff5..9819335d81 100644 ---- a/criu/pie/util-vdso.c -+++ b/criu/pie/util-vdso.c -@@ -5,6 +5,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -48,10 +49,25 @@ static bool __ptr_struct_oob(uintptr_t ptr, size_t struct_size, uintptr_t start, - return __ptr_oob(ptr, start, size) || __ptr_struct_end_oob(ptr, struct_size, start, size); - } - -+/* Local strlen implementation */ -+static size_t __strlen(const char *str) -+{ -+ const char *ptr; -+ -+ if (!str) -+ return 0; -+ -+ ptr = str; -+ while (*ptr != '\0') -+ ptr++; -+ -+ return ptr - str; -+} -+ - /* - * Elf hash, see format specification. - */ --static unsigned long elf_hash(const unsigned char *name) -+static unsigned long elf_sysv_hash(const unsigned char *name) - { - unsigned long h = 0, g; - -@@ -65,6 +81,15 @@ static unsigned long elf_hash(const unsigned char *name) - return h; - } - -+/* * The GNU hash format. Taken from glibc. */ -+static unsigned long elf_gnu_hash(const unsigned char *name) -+{ -+ unsigned long h = 5381; -+ for (unsigned char c = *name; c != '\0'; c = *++name) -+ h = h * 33 + c; -+ return h; -+} -+ - #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - #define BORD ELFDATA2MSB /* 0x02 */ - #else -@@ -149,11 +174,14 @@ static int parse_elf_phdr(uintptr_t mem, size_t size, Phdr_t **dynamic, Phdr_t * - * Output parameters are: - * @dyn_strtab - address of the symbol table - * @dyn_symtab - address of the string table section -- * @dyn_hash - address of the symbol hash table -+ * @dyn_hash - address of the symbol hash table -+ * @use_gnu_hash - the format of hash DT_HASH or DT_GNU_HASH - */ --static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t **dyn_strtab, Dyn_t **dyn_symtab, -- Dyn_t **dyn_hash) -+static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, -+ Dyn_t **dyn_strtab, Dyn_t **dyn_symtab, -+ Dyn_t **dyn_hash, bool *use_gnu_hash) - { -+ Dyn_t *dyn_gnu_hash = NULL, *dyn_sysv_hash = NULL; - Dyn_t *dyn_syment = NULL; - Dyn_t *dyn_strsz = NULL; - uintptr_t addr; -@@ -184,16 +212,52 @@ static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t - dyn_syment = d; - pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val); - } else if (d->d_tag == DT_HASH) { -- *dyn_hash = d; -+ dyn_sysv_hash = d; - pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr); -+ } else if (d->d_tag == DT_GNU_HASH) { -+ /* -+ * This is complicated. -+ * -+ * Looking at the Linux kernel source, the following can be seen -+ * regarding which hashing style the VDSO uses on each arch: -+ * -+ * aarch64: not specified (depends on linker, can be -+ * only GNU hash style) -+ * arm: --hash-style=sysv -+ * loongarch: --hash-style=sysv -+ * mips: --hash-style=sysv -+ * powerpc: --hash-style=both -+ * riscv: --hash-style=both -+ * s390: --hash-style=both -+ * x86: --hash-style=both -+ * -+ * Some architectures are using both hash-styles, that -+ * is the easiest for CRIU. Some architectures are only -+ * using the old style (sysv), that is what CRIU supports. -+ * -+ * Starting with Linux 6.11, aarch64 unfortunately decided -+ * to switch from '--hash-style=sysv' to ''. Specifying -+ * nothing unfortunately may mean GNU hash style only and not -+ * 'both' (depending on the linker). -+ */ -+ dyn_gnu_hash = d; -+ pr_debug("DT_GNU_HASH: %lx\n", (unsigned long)d->d_un.d_ptr); - } - } - -- if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || !*dyn_hash) { -+ if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || -+ (!dyn_gnu_hash && !dyn_sysv_hash)) { - pr_err("Not all dynamic entries are present\n"); - return -EINVAL; - } - -+ /* -+ * Prefer DT_HASH over DT_GNU_HASH as it's been more tested and -+ * as a result more stable. -+ */ -+ *use_gnu_hash = !dyn_sysv_hash; -+ *dyn_hash = dyn_sysv_hash ?: dyn_gnu_hash; -+ - return 0; - - err_oob: -@@ -208,60 +272,141 @@ typedef unsigned long Hash_t; - typedef Word_t Hash_t; - #endif - --static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, struct vdso_symtable *t, -- uintptr_t dynsymbol_names, Hash_t *hash, Dyn_t *dyn_symtab) -+static bool elf_symbol_match(uintptr_t mem, size_t size, -+ uintptr_t dynsymbol_names, Sym_t *sym, -+ const char *symbol, const size_t vdso_symbol_length) - { -- ARCH_VDSO_SYMBOLS_LIST -- -- const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS }; -- const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1; -+ uintptr_t addr = (uintptr_t)sym; -+ char *name; - -- Hash_t nbucket, nchain; -- Hash_t *bucket, *chain; -+ if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) -+ return false; - -- unsigned int i, j, k; -- uintptr_t addr; -+ if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL) -+ return false; - -- nbucket = hash[0]; -- nchain = hash[1]; -- bucket = &hash[2]; -- chain = &hash[nbucket + 2]; -+ addr = dynsymbol_names + sym->st_name; -+ if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size)) -+ return false; -+ name = (void *)addr; - -- pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", (long)nbucket, (long)nchain, (unsigned long)bucket, -- (unsigned long)chain); -+ return !std_strncmp(name, symbol, vdso_symbol_length); -+} - -- for (i = 0; i < VDSO_SYMBOL_MAX; i++) { -- const char *symbol = vdso_symbols[i]; -- k = elf_hash((const unsigned char *)symbol); - -- for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { -- Sym_t *sym; -- char *name; -+static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size, -+ const char *symbol, uint32_t symbol_hash, unsigned int sym_off, -+ uintptr_t dynsymbol_names, Dyn_t *dyn_symtab, Phdr_t *load, -+ Hash_t nbucket, Hash_t nchain, Hash_t *bucket, Hash_t *chain, -+ const size_t vdso_symbol_length, bool use_gnu_hash) -+{ -+ unsigned int j; -+ uintptr_t addr; - -- addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; -+ j = bucket[symbol_hash % nbucket]; -+ if (j == STN_UNDEF) -+ return 0; -+ -+ addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; -+ -+ if (use_gnu_hash) { -+ uint32_t *h = bucket + nbucket + (j - sym_off); -+ uint32_t hash_val; -+ -+ symbol_hash |= 1; -+ do { -+ Sym_t *sym = (void *)addr + sizeof(Sym_t) * j; -+ -+ hash_val = *h++; -+ if ((hash_val | 1) == symbol_hash && -+ elf_symbol_match(mem, size, dynsymbol_names, sym, -+ symbol, vdso_symbol_length)) -+ return sym->st_value; -+ j++; -+ } while (!(hash_val & 1)); -+ } else { -+ for (; j < nchain && j != STN_UNDEF; j = chain[j]) { -+ Sym_t *sym = (void *)addr + sizeof(Sym_t) * j; -+ -+ if (elf_symbol_match(mem, size, dynsymbol_names, sym, -+ symbol, vdso_symbol_length)) -+ return sym->st_value; -+ } -+ } -+ return 0; -+} - -- addr += sizeof(Sym_t) * j; -- if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) -- continue; -- sym = (void *)addr; -+static int parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, -+ struct vdso_symtable *t, uintptr_t dynsymbol_names, -+ Hash_t *hash, Dyn_t *dyn_symtab, bool use_gnu_hash) -+{ -+ ARCH_VDSO_SYMBOLS_LIST - -- if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL) -- continue; -+ const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS }; -+ const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1; - -- addr = dynsymbol_names + sym->st_name; -- if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size)) -- continue; -- name = (void *)addr; -+ Hash_t *bucket = NULL; -+ Hash_t *chain = NULL; -+ Hash_t nbucket = 0; -+ Hash_t nchain = 0; -+ -+ unsigned int sym_off = 0; -+ unsigned int i = 0; -+ -+ unsigned long (*elf_hash)(const unsigned char *); -+ -+ if (use_gnu_hash) { -+ uint32_t *gnu_hash = (uint32_t *)hash; -+ uint32_t bloom_sz; -+ size_t *bloom; -+ -+ nbucket = gnu_hash[0]; -+ sym_off = gnu_hash[1]; -+ bloom_sz = gnu_hash[2]; -+ bloom = (size_t *)&gnu_hash[4]; -+ bucket = (Hash_t *)(&bloom[bloom_sz]); -+ elf_hash = &elf_gnu_hash; -+ pr_debug("nbucket %lx sym_off %lx bloom_sz %lx bloom %lx bucket %lx\n", -+ (unsigned long)nbucket, (unsigned long)sym_off, -+ (unsigned long)bloom_sz, (unsigned long)bloom, -+ (unsigned long)bucket); -+ } else { -+ nbucket = hash[0]; -+ nchain = hash[1]; -+ bucket = &hash[2]; -+ chain = &hash[nbucket + 2]; -+ elf_hash = &elf_sysv_hash; -+ pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", -+ (unsigned long)nbucket, (unsigned long)nchain, -+ (unsigned long)bucket, (unsigned long)chain); -+ } - -- if (std_strncmp(name, symbol, vdso_symbol_length)) -- continue; - -- /* XXX: provide strncpy() implementation for PIE */ -- memcpy(t->symbols[i].name, name, vdso_symbol_length); -- t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr; -- break; -+ for (i = 0; i < VDSO_SYMBOL_MAX; i++) { -+ const char *symbol = vdso_symbols[i]; -+ unsigned long addr, symbol_hash; -+ const size_t symbol_length = __strlen(symbol); -+ -+ symbol_hash = elf_hash((const unsigned char *)symbol); -+ addr = elf_symbol_lookup(mem, size, symbol, symbol_hash, -+ sym_off, dynsymbol_names, dyn_symtab, load, -+ nbucket, nchain, bucket, chain, -+ vdso_symbol_length, use_gnu_hash); -+ pr_debug("symbol %s at address %lx\n", symbol, addr); -+ if (!addr) -+ continue; -+ -+ /* XXX: provide strncpy() implementation for PIE */ -+ if (symbol_length > vdso_symbol_length) { -+ pr_err("strlen(%s) %zd, only %zd bytes available\n", -+ symbol, symbol_length, vdso_symbol_length); -+ return -EINVAL; - } -+ memcpy(t->symbols[i].name, symbol, symbol_length); -+ t->symbols[i].offset = addr - load->p_vaddr; - } -+ -+ return 0; - } - - int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) -@@ -271,6 +416,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) - Dyn_t *dyn_symtab = NULL; - Dyn_t *dyn_hash = NULL; - Hash_t *hash = NULL; -+ bool use_gnu_hash; - - uintptr_t dynsymbol_names; - uintptr_t addr; -@@ -296,7 +442,8 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) - * needed. Note that we're interested in a small set of tags. - */ - -- ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, &dyn_hash); -+ ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, -+ &dyn_hash, &use_gnu_hash); - if (ret < 0) - return ret; - -@@ -310,7 +457,11 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) - goto err_oob; - hash = (void *)addr; - -- parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab); -+ ret = parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab, -+ use_gnu_hash); -+ -+ if (ret <0) -+ return ret; - - return 0; - diff --git a/2590.patch b/2590.patch deleted file mode 100644 index b890e68..0000000 --- a/2590.patch +++ /dev/null @@ -1,38 +0,0 @@ -From de5dba8c47ffe4e16fae17539270d55e1a8604d1 Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Fri, 7 Feb 2025 09:24:19 +0100 -Subject: [PATCH] vdso: handle s390x correctly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -On s390x there is currently a build failure: - -criu/pie/util-vdso.c: In function ‘elf_symbol_lookup’: -criu/pie/util-vdso.c:313:31: error: initialization of ‘uint32_t *’ {aka ‘unsigned int *’} from incompatible pointer type ‘Hash_t *’ {aka ‘long unsigned int *’} [-Wincompatible-pointer-types] - 313 | uint32_t *h = bucket + nbucket + (j - sym_off); - | ^~~~~~ - -Replacing uint32_t with Hash_t which is defined behind a architecture -specific ifdef solves this error. - -Signed-off-by: Adrian Reber ---- - criu/pie/util-vdso.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c -index 9819335d81..d16fd85f43 100644 ---- a/criu/pie/util-vdso.c -+++ b/criu/pie/util-vdso.c -@@ -310,8 +310,8 @@ static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size, - addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; - - if (use_gnu_hash) { -- uint32_t *h = bucket + nbucket + (j - sym_off); -- uint32_t hash_val; -+ Hash_t *h = bucket + nbucket + (j - sym_off); -+ Hash_t hash_val; - - symbol_hash |= 1; - do { diff --git a/2648.patch b/2648.patch new file mode 100644 index 0000000..274ac95 --- /dev/null +++ b/2648.patch @@ -0,0 +1,154 @@ +From 5813fcabd6a42eaecdb9972e064f176660fd0e6c Mon Sep 17 00:00:00 2001 +From: Younes Manton +Date: Tue, 23 Jan 2024 08:22:07 -0800 +Subject: [PATCH] s390: Fix FP reg restore after parasite code runs + +Currently we save FP regs before parasite code runs, and restore after +for --leave-running, --check-only, and in case of errors. In case of +errors the error may have happened before FP regs were saved, so we +should only restore them if they were actually saved. + +Signed-off-by: Younes Manton +--- + criu/arch/s390/crtools.c | 90 +++++++++++++++++++++++----------------- + 1 file changed, 52 insertions(+), 38 deletions(-) + +diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c +index 96cef819e3..e08c838783 100644 +--- a/criu/arch/s390/crtools.c ++++ b/criu/arch/s390/crtools.c +@@ -142,6 +142,29 @@ static void print_core_fp_regs(const char *msg, CoreEntry *core) + print_core_ri_cb(core); + } + ++/* ++ * Allocate floating point registers ++ */ ++static UserS390FpregsEntry *allocate_fp_regs(void) ++{ ++ UserS390FpregsEntry *fpregs; ++ ++ fpregs = xmalloc(sizeof(*fpregs)); ++ if (!fpregs) ++ return NULL; ++ user_s390_fpregs_entry__init(fpregs); ++ ++ fpregs->n_fprs = 16; ++ fpregs->fprs = xzalloc(16 * sizeof(uint64_t)); ++ if (!fpregs->fprs) ++ goto fail_free_fpregs; ++ return fpregs; ++ ++fail_free_fpregs: ++ xfree(fpregs); ++ return NULL; ++} ++ + /* + * Allocate VxrsLow registers + */ +@@ -294,7 +317,13 @@ int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_stru + CoreEntry *core = arg; + + gpregs = CORE_THREAD_ARCH_INFO(core)->gpregs; +- fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs; ++ /* ++ * We delay allocating this until now because checkpointing can fail earlier. ++ * When it fails we need to know if we reached here or not so that the cleanup ++ * code doesn't restore FPRs that were never saved in the first place. ++ */ ++ fpregs = allocate_fp_regs(); ++ CORE_THREAD_ARCH_INFO(core)->fpregs = fpregs; + + /* Vector registers */ + if (f->flags & USER_FPREGS_VXRS) { +@@ -399,36 +428,15 @@ int restore_fpu(struct rt_sigframe *f, CoreEntry *core) + return 0; + } + +-/* +- * Allocate floating point registers +- */ +-static UserS390FpregsEntry *allocate_fp_regs(void) +-{ +- UserS390FpregsEntry *fpregs; +- +- fpregs = xmalloc(sizeof(*fpregs)); +- if (!fpregs) +- return NULL; +- user_s390_fpregs_entry__init(fpregs); +- +- fpregs->n_fprs = 16; +- fpregs->fprs = xzalloc(16 * sizeof(uint64_t)); +- if (!fpregs->fprs) +- goto fail_free_fpregs; +- return fpregs; +- +-fail_free_fpregs: +- xfree(fpregs); +- return NULL; +-} +- + /* + * Free floating point registers + */ + static void free_fp_regs(UserS390FpregsEntry *fpregs) + { +- xfree(fpregs->fprs); +- xfree(fpregs); ++ if (fpregs) { ++ xfree(fpregs->fprs); ++ xfree(fpregs); ++ } + } + + /* +@@ -487,15 +495,17 @@ int arch_alloc_thread_info(CoreEntry *core) + ti_s390->gpregs = allocate_gp_regs(); + if (!ti_s390->gpregs) + goto fail_free_ti_s390; +- ti_s390->fpregs = allocate_fp_regs(); +- if (!ti_s390->fpregs) +- goto fail_free_gp_regs; ++ ++ /* ++ * Delay allocating space until needed. Checkpointing can fail before that ++ * and the cleanup code needs to be able to tell if FPRs were saved or not ++ * before trying to restore the register state. ++ */ ++ ti_s390->fpregs = NULL; + + CORE_THREAD_ARCH_INFO(core) = ti_s390; + return 0; + +-fail_free_gp_regs: +- free_gp_regs(ti_s390->gpregs); + fail_free_ti_s390: + xfree(ti_s390); + return -1; +@@ -678,14 +688,18 @@ static int set_task_regs(pid_t pid, CoreEntry *core) + user_fpregs_struct_t fpregs; + + memset(&fpregs, 0, sizeof(fpregs)); +- /* Floating point registers */ ++ /* ++ * Floating point registers ++ * Optional on checkpoint; checkpoint may have failed and we may reach here as part of cleanup ++ * so there's no guarantee that we saved FPRs for this thread. ++ */ + cfpregs = CORE_THREAD_ARCH_INFO(core)->fpregs; +- if (!cfpregs) +- return -1; +- fpregs.prfpreg.fpc = cfpregs->fpc; +- memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs)); +- if (set_fp_regs(pid, &fpregs) < 0) +- return -1; ++ if (cfpregs) { ++ fpregs.prfpreg.fpc = cfpregs->fpc; ++ memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs)); ++ if (set_fp_regs(pid, &fpregs) < 0) ++ return -1; ++ } + /* Vector registers (optional) */ + cvxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low; + if (cvxrs_low != NULL) { diff --git a/2653.patch b/2653.patch new file mode 100644 index 0000000..bcb96fd --- /dev/null +++ b/2653.patch @@ -0,0 +1,134 @@ +From 22fdffbdde9476b27988b3ee0a4013a4453784c9 Mon Sep 17 00:00:00 2001 +From: Andrei Vagin +Date: Mon, 21 Apr 2025 06:33:41 +0000 +Subject: [PATCH] net: nftables: avoid restore failure if the CRIU nft table + already exist + +CRIU locks the network during restore in an "empty" network namespace. +However, "empty" in this context means CRIU isn't restoring the +namespace. This network namespace can be the same namespace where +processes have been dumped and so the network is already locked in it. + +Fixes #2650 + +Signed-off-by: Andrei Vagin +--- + criu/cr-restore.c | 2 +- + criu/include/net.h | 2 +- + criu/net.c | 30 +++++++++++++++++------------- + 3 files changed, 19 insertions(+), 15 deletions(-) + +diff --git a/criu/cr-restore.c b/criu/cr-restore.c +index 583b446e0b..30932f60a2 100644 +--- a/criu/cr-restore.c ++++ b/criu/cr-restore.c +@@ -2119,7 +2119,7 @@ static int restore_root_task(struct pstree_item *init) + * the '--empty-ns net' mode no iptables C/R is done and we + * need to return these rules by hands. + */ +- ret = network_lock_internal(); ++ ret = network_lock_internal(/* restore = */ true); + if (ret) + goto out_kill; + } +diff --git a/criu/include/net.h b/criu/include/net.h +index 5e8a848620..7c5ede21e1 100644 +--- a/criu/include/net.h ++++ b/criu/include/net.h +@@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump); + + extern int network_lock(void); + extern void network_unlock(void); +-extern int network_lock_internal(void); ++extern int network_lock_internal(bool restore); + + extern struct ns_desc net_ns_desc; + +diff --git a/criu/net.c b/criu/net.c +index ee46f1c495..300df480b0 100644 +--- a/criu/net.c ++++ b/criu/net.c +@@ -3206,12 +3206,12 @@ static inline FILE *redirect_nftables_output(struct nft_ctx *nft) + } + #endif + +-static inline int nftables_lock_network_internal(void) ++static inline int nftables_lock_network_internal(bool restore) + { + #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + cleanup_file FILE *fp = NULL; + struct nft_ctx *nft; +- int ret = 0; ++ int ret = 0, exit_code = -1; + char table[32]; + char buf[128]; + +@@ -3224,11 +3224,16 @@ static inline int nftables_lock_network_internal(void) + + fp = redirect_nftables_output(nft); + if (!fp) +- goto out; ++ goto err2; + + snprintf(buf, sizeof(buf), "create table %s", table); +- if (NFT_RUN_CMD(nft, buf)) ++ ret = NFT_RUN_CMD(nft, buf); ++ if (ret) { ++ /* The network has been locked on dump. */ ++ if (restore && errno == EEXIST) ++ return 0; + goto err2; ++ } + + snprintf(buf, sizeof(buf), "add chain %s output { type filter hook output priority 0; policy drop; }", table); + if (NFT_RUN_CMD(nft, buf)) +@@ -3246,17 +3251,16 @@ static inline int nftables_lock_network_internal(void) + if (NFT_RUN_CMD(nft, buf)) + goto err1; + +- goto out; +- ++ exit_code = 0; ++out: ++ nft_ctx_free(nft); ++ return exit_code; + err1: + snprintf(buf, sizeof(buf), "delete table %s", table); + NFT_RUN_CMD(nft, buf); + err2: +- ret = -1; + pr_err("Locking network failed using nftables\n"); +-out: +- nft_ctx_free(nft); +- return ret; ++ goto out; + #else + pr_err("CRIU was built without libnftables support\n"); + return -1; +@@ -3288,7 +3292,7 @@ static int iptables_network_lock_internal(void) + return ret; + } + +-int network_lock_internal(void) ++int network_lock_internal(bool restore) + { + int ret = 0, nsret; + +@@ -3301,7 +3305,7 @@ int network_lock_internal(void) + if (opts.network_lock_method == NETWORK_LOCK_IPTABLES) + ret = iptables_network_lock_internal(); + else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES) +- ret = nftables_lock_network_internal(); ++ ret = nftables_lock_network_internal(restore); + + if (restore_ns(nsret, &net_ns_desc)) + ret = -1; +@@ -3427,7 +3431,7 @@ int network_lock(void) + if (run_scripts(ACT_NET_LOCK)) + return -1; + +- return network_lock_internal(); ++ return network_lock_internal(false); + } + + void network_unlock(void) diff --git a/2662.patch b/2662.patch new file mode 100644 index 0000000..fa1f095 --- /dev/null +++ b/2662.patch @@ -0,0 +1,36 @@ +From 45d187f9147a9081cdd2df3f8ac6518eee14c9c0 Mon Sep 17 00:00:00 2001 +From: Radostin Stoyanov +Date: Wed, 7 May 2025 14:06:55 +0100 +Subject: [PATCH] sk-inet: add message how to disable MPTCP in Go + +With Go version 1.24, ListenConfig now uses MPTCP by default [1]. +Checkpoint/restore for this protocol is not currently supported +and adding support requires kernel changes that are not trivial +to implement. As a result, checkpointing of many containers that +run Go programs is likely to fail with the following error [2]: + +(00.026522) Error (criu/sk-inet.c:130): inet: Unsupported proto 262 for socket 2f9bc5 + +This patch adds a message with suggested workaround for this problem. + +[1] https://go.dev/doc/go1.24#netpkgnet +[2] https://github.com/checkpoint-restore/criu/issues/2655 + +Signed-off-by: Radostin Stoyanov +--- + criu/sk-inet.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/criu/sk-inet.c b/criu/sk-inet.c +index 92f53e5697..a191e78c48 100644 +--- a/criu/sk-inet.c ++++ b/criu/sk-inet.c +@@ -128,6 +128,8 @@ static int can_dump_ipproto(unsigned int ino, int proto, int type) + break; + default: + pr_err("Unsupported proto %d for socket %x\n", proto, ino); ++ if (proto == IPPROTO_MPTCP) ++ pr_err("For Go programs, consider using \"GODEBUG=multipathtcp=0\" to disable MPTCP\n"); + return 0; + } + diff --git a/criu.spec b/criu.spec index d066bc9..513eeae 100644 --- a/criu.spec +++ b/criu.spec @@ -11,30 +11,22 @@ %undefine _auto_set_build_flags Name: criu -Version: 3.19 -Release: 9%{?dist} +Version: 4.1 +Release: 1%{?dist} Summary: Tool for Checkpoint/Restore in User-space License: GPL-2.0-only AND LGPL-2.1-only AND MIT URL: http://criu.org/ Source0: https://github.com/checkpoint-restore/criu/archive/v%{version}/criu-%{version}.tar.gz -# This switches the default network locking backend from -# iptables to nftables -Patch0: network.lock.nftables.patch -# Update restartable sequences to latest upstream code -Patch1: https://github.com/checkpoint-restore/criu/commit/089345f77a34d1bc7ef146d650636afcd3cdda21.patch +# net: nftables: avoid restore failure if the CRIU nft table already exist +Patch0: https://github.com/checkpoint-restore/criu/pull/2653.patch +# s390: Fix FP reg restore after parasite code runs +Patch1: https://github.com/checkpoint-restore/criu/pull/2648.patch +# sk-inet: add message how to disable MPTCP in Go +Patch2: https://github.com/checkpoint-restore/criu/pull/2662.patch # Unfortunately crun added code to always force # iptables backed network locking. This disables # setting the network locking to iptables via RPC. -Patch2: disable.network.locking.via.rpc.patch -# net: redirect nftables stdout and stderr to CRIU's log file #2549 -Patch3: https://patch-diff.githubusercontent.com/raw/checkpoint-restore/criu/pull/2549.patch -# net: remember the name of the lock chain (nftables) #2550 -# based on https://patch-diff.githubusercontent.com/raw/checkpoint-restore/criu/pull/2550.patch -Patch4: 2550.patch -# vdso: switch from DT_HASH to DT_GNU_HASH (aarch64) #2570 -Patch5: https://patch-diff.githubusercontent.com/raw/checkpoint-restore/criu/pull/2570.patch -# vdso: handle s390x correctly #2590 -Patch6: https://github.com/checkpoint-restore/criu/pull/2590.patch +Patch3: disable.network.locking.via.rpc.patch # Add protobuf-c as a dependency. # We use this patch because the protobuf-c package name @@ -120,9 +112,6 @@ This script can help to workaround the so called "PID mismatch" problem. %patch -P 1 -p1 %patch -P 2 -p1 %patch -P 3 -p1 -%patch -P 4 -p1 -%patch -P 5 -p1 -%patch -P 6 -p1 %patch -P 99 -p1 %build @@ -132,15 +121,15 @@ This script can help to workaround the so called "PID mismatch" problem. # %{?_smp_mflags} does not work # -fstack-protector breaks build -CFLAGS+=`echo %{optflags} | sed -e 's,-fstack-protector\S*,,g'` make V=1 WERROR=0 PREFIX=%{_prefix} RUNDIR=/run/criu PYTHON=%{py_binary} +CFLAGS+=`echo %{optflags} | sed -e 's,-fstack-protector\S*,,g'` make V=1 WERROR=0 PREFIX=%{_prefix} RUNDIR=/run/criu PYTHON=%{py_binary} PLUGINDIR=%{_libdir}/criu NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES make docs V=1 %install sed -e "s,--upgrade --ignore-installed,--no-index --no-deps -v --no-build-isolation,g" -i lib/Makefile -i crit/Makefile make install-criu DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} -make install-lib DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=%{py_binary} -make install-crit DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=%{py_binary} +make install-lib DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=%{py_binary} PIPFLAGS="--no-build-isolation --no-index --no-deps --progress-bar off --upgrade --ignore-installed" +make install-crit DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} BINDIR=%{_bindir} SBINDIR=%{_sbindir} PYTHON=%{py_binary} PIPFLAGS="--no-build-isolation --no-index --no-deps --progress-bar off --upgrade --ignore-installed" make install-man DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} rm -f $RPM_BUILD_ROOT%{_mandir}/man1/compel.1 rm -f $RPM_BUILD_ROOT%{_mandir}/man1/criu-amdgpu-plugin.1 @@ -182,6 +171,9 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/libcriu.a %doc %{_mandir}/man1/criu-ns.1* %changelog +* Mon May 12 2025 Adrian Reber - 4.1-1 +- Update to 4.1 + * Fri Feb 07 2025 Adrian Reber - 3.19-9 - Fix VDSO compile error on s390x diff --git a/network.lock.nftables.patch b/network.lock.nftables.patch deleted file mode 100644 index c74ca04..0000000 --- a/network.lock.nftables.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- a/criu/include/cr_options.h.orig 2024-12-10 16:57:20.061293476 +0100 -+++ b/criu/include/cr_options.h 2024-12-10 16:57:34.789131372 +0100 -@@ -70,7 +70,7 @@ - NETWORK_LOCK_SKIP, - }; - --#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_IPTABLES -+#define NETWORK_LOCK_DEFAULT NETWORK_LOCK_NFTABLES - - /* - * Ghost file size we allow to carry by default. diff --git a/rpminspect.yaml b/rpminspect.yaml new file mode 100644 index 0000000..6b8f937 --- /dev/null +++ b/rpminspect.yaml @@ -0,0 +1,4 @@ +--- +annocheck: + jobs: + - hardened: --verbose --skip-dynamic-tags --skip-property-note --skip-bind-now --skip-pie --skip-cf-protection diff --git a/sources b/sources index bb80cdd..f7464d1 100644 --- a/sources +++ b/sources @@ -1 +1,2 @@ SHA512 (criu-3.19.tar.gz) = d243818cdac51580c952a80e9164786a67ce5e294c0faa6dc700f5e8da8e36495f0b64f5c27b345ede7d6697ed7a69fa4e9a85cef451f32e3ffeb78564884571 +SHA512 (criu-4.1.tar.gz) = 769001a7e527c129fe73509fd0c7d3fc3b9b1080dc69929032cb84f60f95256f5d145ed4b7ea11f090a7f468f2bb2a0ecf56475eb292966cad26d643f0e46816