From c1a06343df3a7bd6b43c21be18d7a406a3951ce3 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Mon, 12 Aug 2019 16:07:39 +0800 Subject: [PATCH] kdumpctl: wait a while for network ready if dump target is ssh If dump target is ipv6 address, a host should have ipv6 address ready before starting kdump service. Otherwise, kdump service fails to start due to the failure "ssh dump_server_ip mkdir -p $SAVE_PATH". And user can see message like: "Could not create root@2620:52:0:10da:46a8:42ff:fe23:3272/var/crash" I observe a long period (about 30s) on some machine before they got ipv6 address dynamiclly, which is never seen on ipv4 host. Hence kdump service has a dependency on ipv6 address. But there is no good way to resolve it. One way is asking user to run the cmd "nmcli connection modify eth0 ipv6.may-fail false". But this will block systemd until ipv6 address is ready. Despite doing so, kdump can try its best (wait 1 minutes after it starts up) before failure. How to implement the wait is arguable. It will involve too many technique details if explicitly waiting on ipv6 address, instead, just lean on 'ssh' return value to see the availability of network. Signed-off-by: Pingfan Liu Acked-by: Kairui Song --- kdumpctl | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/kdumpctl b/kdumpctl index a1a6ee2..2f2d819 100755 --- a/kdumpctl +++ b/kdumpctl @@ -730,12 +730,39 @@ check_ssh_config() return 0 } +# ipv6 host address may takes a long time to be ready. +# Instead of checking against ipv6 address, we just check the network reachable +# by the return val of 'ssh' +check_and_wait_network_ready() +{ + local start_time=$(date +%s) + local cur + local diff + + while true; do + ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH + # ssh exits with the exit status of the remote command or with 255 if an error occurred + if [ $? -eq 0 ]; then + return 0 + elif [ $? -ne 255 ]; then + return 1 + fi + cur=$(date +%s) + diff=$( $cur - $start_time ) + # 60s time out + if [ $diff -gt 60 ]; then + break; + fi + sleep 1 + done + + return 1 +} + check_ssh_target() { - local _ret - ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH - _ret=$? - if [ $_ret -ne 0 ]; then + check_and_wait_network_ready + if [ $? -ne 0 ]; then echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"kdumpctl propagate\"" >&2 return 1 fi