criu/2550.patch
Adrian Reber 4d542e727d
Adapt patches to patches merged upstream
Fix running on aarch64

Resolves: RHEL-58354

Signed-off-by: Adrian Reber <areber@redhat.com>
2025-02-04 11:58:15 +01:00

474 lines
16 KiB
Diff

From 9a2b7d6b3baa2b3183489ed9cebece039f9f488f Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Thu, 23 Jan 2025 09:26:15 +0000
Subject: [PATCH 1/2] criu: use libuuid for criu_run_id generation
criu_run_id will be used in upcoming changes to create and remove
network rules for network locking. Instead of trying to come up with
a way to create unique IDs, just use an existing library.
libuuid should be installed on most systems as it is indirectly required
by systemd (via libmount).
Signed-off-by: Adrian Reber <areber@redhat.com>
---
.cirrus.yml | 2 +-
.github/workflows/check-commits.yml | 2 +-
compel/include/uapi/infect-util.h | 11 ++++++++++-
compel/src/lib/infect-util.c | 2 +-
compel/src/lib/infect.c | 2 +-
criu/Makefile.packages | 4 +++-
criu/fdstore.c | 2 +-
criu/files.c | 2 +-
criu/include/util.h | 4 +++-
criu/pidfd-store.c | 2 +-
criu/unittest/mock.c | 4 +++-
criu/util.c | 17 +++++++----------
scripts/build/Dockerfile.alpine | 3 ++-
scripts/build/Dockerfile.amd-rocm | 1 +
scripts/build/Dockerfile.archlinux | 1 +
scripts/build/Dockerfile.hotspot-alpine | 1 +
scripts/build/Dockerfile.hotspot-ubuntu | 1 +
scripts/build/Dockerfile.linux32.tmpl | 1 +
scripts/build/Dockerfile.openj9-ubuntu | 1 +
.../build/Dockerfile.riscv64-stable-cross.tmpl | 1 +
scripts/build/Dockerfile.stable-cross.tmpl | 1 +
scripts/build/Dockerfile.tmpl | 1 +
scripts/build/Dockerfile.unstable-cross.tmpl | 1 +
scripts/ci/prepare-for-fedora-rawhide.sh | 1 +
scripts/ci/run-ci-tests.sh | 2 +-
scripts/ci/vagrant.sh | 2 +-
26 files changed, 48 insertions(+), 24 deletions(-)
diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h
index ace6f6b6b1..658df9393d 100644
--- a/compel/include/uapi/infect-util.h
+++ b/compel/include/uapi/infect-util.h
@@ -3,11 +3,20 @@
#include "common/compiler.h"
+/**
+ * The length of the hash is based on what libuuid provides.
+ * According to the manpage this is:
+ *
+ * The uuid_unparse() function converts the supplied UUID uu from the binary
+ * representation into a 36-byte string (plus trailing '\0')
+ */
+#define RUN_ID_HASH_LENGTH 37
+
/*
* compel_run_id is a unique value of the current run. It can be used to
* generate resource ID-s to avoid conflicts with other processes.
*/
-extern uint64_t compel_run_id;
+extern char compel_run_id[RUN_ID_HASH_LENGTH];
struct parasite_ctl;
extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd);
diff --git a/compel/src/lib/infect-util.c b/compel/src/lib/infect-util.c
index 00a7c83f7d..dc57e28f7c 100644
--- a/compel/src/lib/infect-util.c
+++ b/compel/src/lib/infect-util.c
@@ -7,7 +7,7 @@
#include "infect-rpc.h"
#include "infect-util.h"
-uint64_t compel_run_id;
+char compel_run_id[RUN_ID_HASH_LENGTH];
int compel_util_send_fd(struct parasite_ctl *ctl, int fd)
{
diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c
index 1e3ffb9670..caf54e03fd 100644
--- a/compel/src/lib/infect.c
+++ b/compel/src/lib/infect.c
@@ -427,7 +427,7 @@ static int gen_parasite_saddr(struct sockaddr_un *saddr, int key)
int sun_len;
saddr->sun_family = AF_UNIX;
- snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%" PRIx64, key, compel_run_id);
+ snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%s", key, compel_run_id);
sun_len = SUN_LEN(saddr);
*saddr->sun_path = '\0';
diff --git a/criu/Makefile.packages b/criu/Makefile.packages
index 7f6113c8f1..3e2e6efd18 100644
--- a/criu/Makefile.packages
+++ b/criu/Makefile.packages
@@ -6,6 +6,7 @@ REQ-RPM-PKG-NAMES += protobuf-devel
REQ-RPM-PKG-NAMES += protobuf-python
REQ-RPM-PKG-NAMES += libnl3-devel
REQ-RPM-PKG-NAMES += libcap-devel
+REQ-RPM-PKG-NAMES += libuuid-devel
REQ-RPM-PKG-TEST-NAMES += libaio-devel
@@ -16,6 +17,7 @@ REQ-DEB-PKG-NAMES += protobuf-compiler
REQ-DEB-PKG-NAMES += $(PYTHON)-protobuf
REQ-DEB-PKG-NAMES += libnl-3-dev
REQ-DEB-PKG-NAMES += libcap-dev
+REQ-DEB-PKG-NAMES += uuid-dev
REQ-DEB-PKG-TEST-NAMES += $(PYTHON)-yaml
REQ-DEB-PKG-TEST-NAMES += libaio-dev
@@ -25,7 +27,7 @@ REQ-DEB-PKG-TEST-NAMES += libaio-dev
REQ-RPM-PKG-TEST-NAMES += $(PYTHON)-PyYAML
-export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
+export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -luuid
check-packages-failed:
$(warning Can not find some of the required libraries)
diff --git a/criu/fdstore.c b/criu/fdstore.c
index d615ad15d0..6ac639c553 100644
--- a/criu/fdstore.c
+++ b/criu/fdstore.c
@@ -58,7 +58,7 @@ int fdstore_init(void)
}
addr.sun_family = AF_UNIX;
- addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%" PRIx64, st.st_ino,
+ addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%s", st.st_ino,
criu_run_id);
addrlen += sizeof(addr.sun_family);
diff --git a/criu/files.c b/criu/files.c
index 31e705bcc5..f16ec32a23 100644
--- a/criu/files.c
+++ b/criu/files.c
@@ -978,7 +978,7 @@ static int receive_fd(struct fdinfo_list_entry *fle);
static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
{
addr->sun_family = AF_UNIX;
- snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%" PRIx64, pid, criu_run_id);
+ snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%s", pid, criu_run_id);
*len = SUN_LEN(addr);
*addr->sun_path = '\0';
}
diff --git a/criu/include/util.h b/criu/include/util.h
index 4793f7f20e..194e94deeb 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -21,6 +21,8 @@
#include "log.h"
#include "common/err.h"
+#include "compel/infect-util.h"
+
#define PREF_SHIFT_OP(pref, op, size) ((size)op(pref##BYTES_SHIFT))
#define KBYTES_SHIFT 10
#define MBYTES_SHIFT 20
@@ -420,7 +422,7 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void
* criu_run_id is a unique value of the current run. It can be used to
* generate resource ID-s to avoid conflicts with other CRIU processes.
*/
-extern uint64_t criu_run_id;
+extern char criu_run_id[RUN_ID_HASH_LENGTH];
extern void util_init(void);
extern char *resolve_mountpoint(char *path);
diff --git a/criu/pidfd-store.c b/criu/pidfd-store.c
index 9fdc74cb74..110f7802a2 100644
--- a/criu/pidfd-store.c
+++ b/criu/pidfd-store.c
@@ -99,7 +99,7 @@ int init_pidfd_store_sk(pid_t pid, int sk)
goto err;
}
- addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%" PRIx64, pid, sk,
+ addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%s", pid, sk,
criu_run_id);
addrlen += sizeof(addr.sun_family);
diff --git a/criu/unittest/mock.c b/criu/unittest/mock.c
index e517720e42..b2d5072787 100644
--- a/criu/unittest/mock.c
+++ b/criu/unittest/mock.c
@@ -5,6 +5,8 @@
#include <stdint.h>
#include <stdlib.h>
+#include "compel/infect-util.h"
+
int add_external(char *key)
{
return 0;
@@ -141,4 +143,4 @@ int check_mount_v2(void)
return 0;
}
-uint64_t compel_run_id;
+char compel_run_id[RUN_ID_HASH_LENGTH];
diff --git a/criu/util.c b/criu/util.c
index d2bc9a8657..58c18e20be 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -28,6 +28,7 @@
#include <ftw.h>
#include <time.h>
#include <libgen.h>
+#include <uuid/uuid.h>
#include "linux/mount.h"
@@ -2026,20 +2027,16 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
return fret;
}
-uint64_t criu_run_id;
+char criu_run_id[RUN_ID_HASH_LENGTH];
void util_init(void)
{
- struct stat statbuf;
+ uuid_t uuid;
- criu_run_id = getpid();
- if (!stat("/proc/self/ns/pid", &statbuf))
- criu_run_id |= (uint64_t)statbuf.st_ino << 32;
- else if (errno != ENOENT)
- pr_perror("Can't stat /proc/self/ns/pid - CRIU run id might not be unique");
-
- compel_run_id = criu_run_id;
- pr_info("CRIU run id = %#" PRIx64 "\n", criu_run_id);
+ uuid_generate(uuid);
+ uuid_unparse(uuid, criu_run_id);
+ pr_info("CRIU run id = %s\n", criu_run_id);
+ memcpy(compel_run_id, criu_run_id, sizeof(criu_run_id));
}
/*
diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine
index 329d7791de..d843793ea2 100644
--- a/scripts/build/Dockerfile.alpine
+++ b/scripts/build/Dockerfile.alpine
@@ -24,7 +24,8 @@ RUN apk update && apk add \
sudo \
libcap-utils \
libdrm-dev \
- util-linux
+ util-linux \
+ util-linux-dev
COPY . /criu
WORKDIR /criu
diff --git a/scripts/build/Dockerfile.amd-rocm b/scripts/build/Dockerfile.amd-rocm
index c466a73d2d..ed66ae4fec 100644
--- a/scripts/build/Dockerfile.amd-rocm
+++ b/scripts/build/Dockerfile.amd-rocm
@@ -56,6 +56,7 @@ RUN apt-get clean -qqy && apt-get update -qqy && apt-get install -qqy --no-insta
python-protobuf \
python3-minimal \
python-ipaddress \
+ uuid-dev \
curl \
wget \
vim \
diff --git a/scripts/build/Dockerfile.archlinux b/scripts/build/Dockerfile.archlinux
index 4056514891..9d11194bb0 100644
--- a/scripts/build/Dockerfile.archlinux
+++ b/scripts/build/Dockerfile.archlinux
@@ -35,6 +35,7 @@ RUN pacman -Syu --noconfirm \
python-junit-xml \
python-importlib-metadata \
libdrm \
+ util-linux-libs \
diffutils
COPY . /criu
diff --git a/scripts/build/Dockerfile.hotspot-alpine b/scripts/build/Dockerfile.hotspot-alpine
index cb9332fd0c..6caf9d0b1b 100644
--- a/scripts/build/Dockerfile.hotspot-alpine
+++ b/scripts/build/Dockerfile.hotspot-alpine
@@ -19,6 +19,7 @@ RUN apk update && apk add \
maven \
ip6tables \
iptables \
+ util-linux-dev \
bash
COPY . /criu
diff --git a/scripts/build/Dockerfile.hotspot-ubuntu b/scripts/build/Dockerfile.hotspot-ubuntu
index 0318f650f3..67de916acb 100644
--- a/scripts/build/Dockerfile.hotspot-ubuntu
+++ b/scripts/build/Dockerfile.hotspot-ubuntu
@@ -22,6 +22,7 @@ RUN apt-install protobuf-c-compiler \
pkg-config \
iptables \
gcc \
+ uuid-dev \
maven
COPY . /criu
diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl
index 13e9926424..d218e06414 100644
--- a/scripts/build/Dockerfile.linux32.tmpl
+++ b/scripts/build/Dockerfile.linux32.tmpl
@@ -21,6 +21,7 @@ RUN apt-install \
pkg-config \
protobuf-c-compiler \
protobuf-compiler \
+ uuid-dev \
python3-minimal
COPY . /criu
diff --git a/scripts/ci/prepare-for-fedora-rawhide.sh b/scripts/ci/prepare-for-fedora-rawhide.sh
index 09085c403b..42252c93c9 100755
--- a/scripts/ci/prepare-for-fedora-rawhide.sh
+++ b/scripts/ci/prepare-for-fedora-rawhide.sh
@@ -36,6 +36,7 @@ dnf install -y \
e2fsprogs \
rubygem-asciidoctor \
libdrm-devel \
+ libuuid-devel \
kmod
# /tmp is no longer 755 in the rawhide container image and breaks CI - fix it
From c39bce3cf17782784d1a14cf40a4cedd059059fa Mon Sep 17 00:00:00 2001
From: Adrian Reber <areber@redhat.com>
Date: Thu, 23 Jan 2025 17:42:45 +0000
Subject: [PATCH 2/2] net: remember the name of the lock chain (nftables)
Using libnftables the chain to lock the network is composed of
("CRIU-%d", real_pid). This leads to around 40 zdtm tests failing
with errors like this:
Error: No such file or directory; did you mean table 'CRIU-62' in family inet?
delete table inet CRIU-86
The reason is that as soon as a process is running in a namespace the
real PID can be anything and only the PID in the namespace is restored
correctly. Relying on the real PID does not work for the chain name.
Using the PID of the innermost namespace would lead to the chain be
called 'CRIU-1' most of the time which is also not really unique.
With this commit the change is now named using the already existing CRIU
run ID. To be able to correctly restore the process and delete the
locking table, the CRIU run id during checkpointing is now stored in the
inventory as dump_criu_run_id.
Signed-off-by: Adrian Reber <areber@redhat.com>
---
criu/image.c | 30 ++++++++++++++++++++++++++++++
criu/include/util.h | 2 ++
criu/netfilter.c | 20 +++++++++++++++++++-
images/inventory.proto | 4 ++++
4 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/criu/image.c b/criu/image.c
index 9589167fb1..f3747d6ff5 100644
--- a/criu/image.c
+++ b/criu/image.c
@@ -25,6 +25,7 @@ bool img_common_magic = true;
TaskKobjIdsEntry *root_ids;
u32 root_cg_set;
Lsmtype image_lsm;
+char dump_criu_run_id[RUN_ID_HASH_LENGTH];
int check_img_inventory(bool restore)
{
@@ -120,6 +121,24 @@ int check_img_inventory(bool restore)
} else {
opts.network_lock_method = he->network_lock_method;
}
+
+ /**
+ * This contains the criu_run_id during dumping of the process.
+ * For things like removing network locking (nftables) this
+ * information is needed to identify the name of the network
+ * locking table.
+ */
+ if (he->dump_criu_run_id) {
+ strncpy(dump_criu_run_id, he->dump_criu_run_id, sizeof(dump_criu_run_id) - 1);
+ pr_info("Dump CRIU run id = %s\n", dump_criu_run_id);
+ } else {
+ /**
+ * If restoring from an old image this is a marker
+ * that no dump_criu_run_id exists.
+ */
+ dump_criu_run_id[0] = NO_DUMP_CRIU_RUN_ID;
+ }
+
}
ret = 0;
@@ -367,6 +386,17 @@ int prepare_inventory(InventoryEntry *he)
he->has_network_lock_method = true;
he->network_lock_method = opts.network_lock_method;
+ /**
+ * This contains the criu_run_id during dumping of the process.
+ * For things like removing network locking (nftables) this
+ * information is needed to identify the name of the network
+ * locking table.
+ */
+ he->dump_criu_run_id = xstrdup(criu_run_id);
+
+ if (!he->dump_criu_run_id)
+ return -1;
+
return 0;
}
diff --git a/criu/include/util.h b/criu/include/util.h
index 194e94deeb..55ad5b63cf 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -424,6 +424,8 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void
*/
extern char criu_run_id[RUN_ID_HASH_LENGTH];
extern void util_init(void);
+#define NO_DUMP_CRIU_RUN_ID 0x7f
+extern char dump_criu_run_id[RUN_ID_HASH_LENGTH];
extern char *resolve_mountpoint(char *path);
diff --git a/criu/netfilter.c b/criu/netfilter.c
index 9e78dc4b03..e2c82764f2 100644
--- a/criu/netfilter.c
+++ b/criu/netfilter.c
@@ -299,7 +299,25 @@ int nftables_lock_connection(struct inet_sk_desc *sk)
int nftables_get_table(char *table, int n)
{
- if (snprintf(table, n, "inet CRIU-%d", root_item->pid->real) < 0) {
+ int ret;
+
+ switch(dump_criu_run_id[0]) {
+ case 0:
+ /* This is not a restore.*/
+ ret = snprintf(table, n, "inet CRIU-%s", criu_run_id);
+ break;
+ case NO_DUMP_CRIU_RUN_ID:
+ /**
+ * This is a restore from an older image with no
+ * dump_criu_run_id available. Let's use the old ID.
+ */
+ ret = snprintf(table, n, "inet CRIU-%d", root_item->pid->real);
+ break;
+ default:
+ ret = snprintf(table, n, "inet CRIU-%s", dump_criu_run_id);
+ }
+
+ if (ret < 0) {
pr_err("Cannot generate CRIU's nftables table name\n");
return -1;
}
diff --git a/images/inventory.proto b/images/inventory.proto
index 7f655031bc..1e18815bb9 100644
--- a/images/inventory.proto
+++ b/images/inventory.proto
@@ -29,4 +29,8 @@ message inventory_entry {
optional uint32 pre_dump_mode = 9;
optional bool tcp_close = 10;
optional uint32 network_lock_method = 11;
+ // Remember the criu_run_id when CRIU dumped the process.
+ // This is currently used to delete the correct nftables
+ // network locking rule.
+ optional string dump_criu_run_id = 13;
}