65 lines
2.2 KiB
Diff
65 lines
2.2 KiB
Diff
|
From 52d09b57a499ed7b3757e0e2954c2783198d5b23 Mon Sep 17 00:00:00 2001
|
||
|
From: Damien Ciabrini <damien.ciabrini@gmail.com>
|
||
|
Date: Mon, 9 Nov 2020 20:42:19 +0100
|
||
|
Subject: [PATCH] podman: recover from podman's storage being out of sync
|
||
|
|
||
|
If a system crash while podman is stopping a container (e.g. a fencing action
|
||
|
took place), it might happen that on reboot, podman is not able to recreate
|
||
|
a container as requested by the resource agent.
|
||
|
|
||
|
When such a start operation fails, it might be because the internal storage
|
||
|
layer still references an old container with the same name, even though podman
|
||
|
itself thinks there is no such container. If so, purge the storage layer to try
|
||
|
to clean the corruption and try recreating the container.
|
||
|
---
|
||
|
heartbeat/podman | 29 +++++++++++++++++++++++++++--
|
||
|
1 file changed, 27 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/heartbeat/podman b/heartbeat/podman
|
||
|
index 81b00ee6f..d4d608ca3 100755
|
||
|
--- a/heartbeat/podman
|
||
|
+++ b/heartbeat/podman
|
||
|
@@ -345,6 +345,32 @@ create_transient_drop_in_dependency()
|
||
|
}
|
||
|
|
||
|
|
||
|
+run_new_container()
|
||
|
+{
|
||
|
+ local opts=$1
|
||
|
+ local image=$2
|
||
|
+ local cmd=$3
|
||
|
+ local rc
|
||
|
+
|
||
|
+ ocf_log info "running container $CONTAINER for the first time"
|
||
|
+ ocf_run podman run $opts $image $cmd
|
||
|
+ rc=$?
|
||
|
+ if [ $rc -eq 125 ]; then
|
||
|
+ # If an internal podman error occurred, it might be because
|
||
|
+ # the internal storage layer still references an old container
|
||
|
+ # with the same name, even though podman itself thinks there
|
||
|
+ # is no such container. If so, purge the storage layer to try
|
||
|
+ # to clean the corruption and try again.
|
||
|
+ ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying."
|
||
|
+ ocf_run podman rm --storage $CONTAINER
|
||
|
+ ocf_run podman run $opts $image $cmd
|
||
|
+ rc=$?
|
||
|
+ fi
|
||
|
+
|
||
|
+ return $rc
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
podman_start()
|
||
|
{
|
||
|
local cid
|
||
|
@@ -378,8 +404,7 @@ podman_start()
|
||
|
# make sure any previous container matching our container name is cleaned up first.
|
||
|
# we already know at this point it wouldn't be running
|
||
|
remove_container
|
||
|
- ocf_log info "running container $CONTAINER for the first time"
|
||
|
- ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
|
||
|
+ run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd"
|
||
|
fi
|
||
|
rc=$?
|
||
|
|