61 lines
2.3 KiB
Diff
61 lines
2.3 KiB
Diff
|
From cf1e7bfab984b5e9451a63c25b39c0932e0d9116 Mon Sep 17 00:00:00 2001
|
||
|
From: Michele Baldessari <michele@acksyn.org>
|
||
|
Date: Wed, 6 May 2020 16:11:36 +0200
|
||
|
Subject: [PATCH] Increase the rabbitmqctl wait timeout during start()
|
||
|
|
||
|
After we start the rabbitmq process we wait for the pid to show up
|
||
|
and then declare the server to be started successfully.
|
||
|
This wait is done via 'rabbitmqctl wait'. Now from
|
||
|
From https://www.rabbitmq.com/rabbitmqctl.8.html we have:
|
||
|
|
||
|
If the specified pidfile is not created or erlang node is not started
|
||
|
within --timeout the command will fail. Default timeout is 10 seconds.
|
||
|
|
||
|
This default of 10 seconds might not be enough in overloaded
|
||
|
environments. So what we want to do here is wait for as much time as
|
||
|
the start() operation allows us. So we wait for OCF_RESKEY_CRM_meta_timeout
|
||
|
minus 5 seconds. In the rare and non-sensical case that it is less than
|
||
|
10s we do not pass a timeout string at all to rabbitmqctl.
|
||
|
|
||
|
Co-Authored-By: John Eckersberg <jeckersb@redhat.com>
|
||
|
---
|
||
|
heartbeat/rabbitmq-cluster | 15 +++++++++++++--
|
||
|
1 file changed, 13 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
|
||
|
index a9ebd37ad..f7d48120c 100755
|
||
|
--- a/heartbeat/rabbitmq-cluster
|
||
|
+++ b/heartbeat/rabbitmq-cluster
|
||
|
@@ -294,6 +294,8 @@ rmq_monitor() {
|
||
|
rmq_init_and_wait()
|
||
|
{
|
||
|
local rc
|
||
|
+ local wait_timeout
|
||
|
+ local timeout_string
|
||
|
|
||
|
prepare_dir $RMQ_PID_DIR
|
||
|
prepare_dir $RMQ_LOG_DIR
|
||
|
@@ -305,11 +307,20 @@ rmq_init_and_wait()
|
||
|
setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &
|
||
|
|
||
|
ocf_log info "Waiting for server to start"
|
||
|
- $RMQ_CTL wait $RMQ_PID_FILE
|
||
|
+ # We want to give the wait command almost the full startup timeout we are given
|
||
|
+ # So we use the start operation timeout (in ms), convert it and subtract 5 seconds
|
||
|
+ # In the silly case that it is less than 10 seconds we just skip setting the timeout
|
||
|
+ wait_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / 1000 - 5`
|
||
|
+ if [ $wait_timeout -gt 10 ]; then
|
||
|
+ timeout_string="--timeout ${wait_timeout}"
|
||
|
+ else
|
||
|
+ timeout_string=""
|
||
|
+ fi
|
||
|
+ $RMQ_CTL $timeout_string wait $RMQ_PID_FILE
|
||
|
rc=$?
|
||
|
if [ $rc -ne $OCF_SUCCESS ]; then
|
||
|
remove_pid
|
||
|
- ocf_log info "rabbitmq-server start failed: $rc"
|
||
|
+ ocf_log info "rabbitmq-server start failed with a timeout of ($timeout_string): $rc"
|
||
|
return $OCF_ERR_GENERIC
|
||
|
fi
|
||
|
|