From 462ada6164cb77c81f5291d88287d68506d38056 Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Tue, 9 Jul 2019 23:14:21 +0200 Subject: [PATCH] Generate addition drop-in dependencies for podman containers When podman creates a container, it creates two additional systemd scope files dynamically: - libpod-conmon-.scope - runs a conmon process that tracks a container's pid1 into a dedicated pidfile. - libpod-.scope - created dynamically by runc, for cgroups accounting On shutdown, it can happen that systemd stops those scope early, which in turn sends a SIGTERM to pacemaker-managed containers before pacemaker has scheduled any stop operation. That confuses the cluster and may break shutdown. Add a new option in the resource-agent to inject additional dependencies into the dynamically created scope files, so that systemd is not allowed to stop scopes before the pacemaker service itself is stopped. When that option is enabled, the scopes look like: # podman ps | grep galera c329819a1227 192.168.122.8:8787/rhosp15/openstack-mariadb:latest dumb-init -- /bin... About an hour ago Up About an hour ago galera-bundle-podman-0 # systemctl cat libpod*c329819a1227* # /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope # This is a transient unit file, created programmatically via the systemd API. Do not edit. [Scope] Slice=machine.slice Delegate=yes [Unit] DefaultDependencies=no # /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf [Unit] Before=pacemaker.service # /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope # This is a transient unit file, created programmatically via the systemd API. Do not edit. [Unit] Description=libcontainer container c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b [Scope] Slice=machine.slice Delegate=yes MemoryAccounting=yes CPUAccounting=yes BlockIOAccounting=yes [Unit] DefaultDependencies=no # /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf [Unit] Before=pacemaker.service Effectively, this prevents systemd from managing the shutdown of any pacemaker-managed podman container. Related: rhbz#1726442 --- heartbeat/podman | 82 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/heartbeat/podman b/heartbeat/podman index 8fc2c4695..8a916eb8c 100755 --- a/heartbeat/podman +++ b/heartbeat/podman @@ -158,6 +158,16 @@ to have the particular one persist when this happens. reuse container + + + +Use transient drop-in files to add extra dependencies to the systemd +scopes associated to the container. During reboot, this prevents systemd +to stop the container before pacemaker. + +drop-in dependency + + @@ -273,8 +283,57 @@ podman_create_mounts() { IFS="$oldIFS" } +podman_container_id() +{ + # Retrieve the container ID by doing a "podman ps" rather than + # a "podman inspect", because the latter has performance issues + # under IO load. + # We could have run "podman start $CONTAINER" to get the ID back + # but if the container is stopped, the command will return a + # name instead of a container ID. This would break us. + podman ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$CONTAINER" | cut -d' ' -f1 +} + + +create_transient_drop_in_dependency() +{ + local cid=$1 + local rc=$OCF_SUCCESS + + if [ -z "$cid" ]; then + ocf_log error "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency" + return $OCF_ERR_GENERIC + fi + + ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)" + for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do + if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then + mkdir -p /run/systemd/transient/"$scope" && \ + echo -e "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \ + chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf + rc=$? + fi + done + + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)" + else + systemctl daemon-reload + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\"" + fi + fi + + return $rc +} + + podman_start() { + local cid + local rc + podman_create_mounts local run_opts="-d --name=${CONTAINER}" # check to see if the container has already started @@ -306,8 +365,17 @@ podman_start() ocf_log info "running container $CONTAINER for the first time" ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd fi + rc=$? - if [ $? -ne 0 ]; then + # if the container was stopped or didn't exist before, systemd + # removed the libpod* scopes. So always try to recreate the drop-ins + if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then + cid=$(podman_container_id) + create_transient_drop_in_dependency "$cid" + rc=$? + fi + + if [ $rc -ne 0 ]; then ocf_exit_reason "podman failed to launch container" return $OCF_ERR_GENERIC fi @@ -353,6 +421,8 @@ podman_stop() else ocf_log debug "waiting $timeout second[s] before killing container" ocf_run podman stop -t=$timeout $CONTAINER + # on stop, systemd will automatically delete any transient + # drop-in conf that has been created earlier fi if [ $? -ne 0 ]; then @@ -456,6 +526,16 @@ CONTAINER=$OCF_RESKEY_name # exec command to be non-empty : ${OCF_RESKEY_monitor_cmd:=/bin/true} +# When OCF_RESKEY_drop_in_dependency is not populated, we +# look at another file-based way of enabling the option. +# Otherwise, consider it disabled. +if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then + if [ -f "/etc/sysconfig/podman_drop_in" ] || \ + [ -f "/etc/default/podman_drop_in" ]; then + OCF_RESKEY_drop_in_dependency=yes + fi +fi + case $__OCF_ACTION in meta-data) meta_data exit $OCF_SUCCESS;;