274 lines
9.7 KiB
Diff
274 lines
9.7 KiB
Diff
|
--- ClusterLabs-resource-agents-e711383f/heartbeat/aws-vpc-route53.in 2018-06-29 14:05:02.000000000 +0200
|
||
|
+++ /home/oalbrigt/src/resource-agents/heartbeat/aws-vpc-route53.in 2019-11-07 12:24:18.822111495 +0100
|
||
|
@@ -152,9 +152,15 @@
|
||
|
END
|
||
|
}
|
||
|
|
||
|
-ec2ip_validate() {
|
||
|
+r53_validate() {
|
||
|
ocf_log debug "function: validate"
|
||
|
|
||
|
+ # Check for required binaries
|
||
|
+ ocf_log debug "Checking for required binaries"
|
||
|
+ for command in curl dig; do
|
||
|
+ check_binary "$command"
|
||
|
+ done
|
||
|
+
|
||
|
# Full name
|
||
|
[[ -z "$OCF_RESKEY_fullname" ]] && ocf_log error "Full name parameter not set $OCF_RESKEY_fullname!" && exit $OCF_ERR_CONFIGURED
|
||
|
|
||
|
@@ -175,32 +181,111 @@
|
||
|
ocf_log debug "ok"
|
||
|
|
||
|
if [ -n "$OCF_RESKEY_profile" ]; then
|
||
|
- AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile"
|
||
|
+ AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10"
|
||
|
else
|
||
|
- AWS_PROFILE_OPT="--profile default"
|
||
|
+ AWS_PROFILE_OPT="--profile default --cli-connect-timeout 10"
|
||
|
fi
|
||
|
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
-ec2ip_monitor() {
|
||
|
- ec2ip_validate
|
||
|
+r53_monitor() {
|
||
|
+ #
|
||
|
+ # For every start action the agent will call Route53 API to check for DNS record
|
||
|
+ # otherwise it will try to get results directly bu querying the DNS using "dig".
|
||
|
+ # Due to complexity in some DNS architectures "dig" can fail, and if this happens
|
||
|
+ # the monitor will fallback to the Route53 API call.
|
||
|
+ #
|
||
|
+ # There will be no failure, failover or restart of the agent if the monitor operation fails
|
||
|
+ # hence we only return $OCF_SUCESS in this function
|
||
|
+ #
|
||
|
+ # In case of the monitor operation detects a wrong or non-existent Route53 DNS entry
|
||
|
+ # it will try to fix the existing one, or create it again
|
||
|
+ #
|
||
|
+ #
|
||
|
+ ARECORD=""
|
||
|
+ IPREGEX="^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"
|
||
|
+ r53_validate
|
||
|
ocf_log debug "Checking Route53 record sets"
|
||
|
- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')"
|
||
|
- ARECORD="$(aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query "ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" | grep RESOURCERECORDS | /usr/bin/awk '{ print $2 }' )"
|
||
|
- ocf_log debug "Found IP address: $ARECORD ."
|
||
|
- if [ "${ARECORD}" == "${IPADDRESS}" ]; then
|
||
|
- ocf_log debug "ARECORD $ARECORD found"
|
||
|
+ #
|
||
|
+ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)"
|
||
|
+ #
|
||
|
+ if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then
|
||
|
+ #
|
||
|
+ cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']"
|
||
|
+ ocf_log info "Route53 Agent Starting or probing - executing monitoring API call: $cmd"
|
||
|
+ CLIRES="$($cmd 2>&1)"
|
||
|
+ rc=$?
|
||
|
+ ocf_log debug "awscli returned code: $rc"
|
||
|
+ if [ $rc -ne 0 ]; then
|
||
|
+ CLIRES=$(echo $CLIRES | grep -v '^$')
|
||
|
+ ocf_log warn "Route53 API returned an error: $CLIRES"
|
||
|
+ ocf_log warn "Skipping cluster action due to API call error"
|
||
|
+ return $OCF_ERR_GENERIC
|
||
|
+ fi
|
||
|
+ ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }')
|
||
|
+ #
|
||
|
+ if ocf_is_probe; then
|
||
|
+ #
|
||
|
+ # Prevent R53 record change during probe
|
||
|
+ #
|
||
|
+ if [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then
|
||
|
+ ocf_log debug "Route53 DNS record $ARECORD found at probing, disregarding"
|
||
|
+ return $OCF_NOT_RUNNING
|
||
|
+ fi
|
||
|
+ fi
|
||
|
+ else
|
||
|
+ #
|
||
|
+ cmd="dig +retries=3 +time=5 +short $OCF_RESKEY_fullname 2>/dev/null"
|
||
|
+ ocf_log info "executing monitoring command : $cmd"
|
||
|
+ ARECORD="$($cmd)"
|
||
|
+ rc=$?
|
||
|
+ ocf_log debug "dig return code: $rc"
|
||
|
+ #
|
||
|
+ if [[ ! $ARECORD =~ $IPREGEX ]] || [ $rc -ne 0 ]; then
|
||
|
+ ocf_log info "Fallback to Route53 API query due to DNS resolution failure"
|
||
|
+ cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']"
|
||
|
+ ocf_log debug "executing monitoring API call: $cmd"
|
||
|
+ CLIRES="$($cmd 2>&1)"
|
||
|
+ rc=$?
|
||
|
+ ocf_log debug "awscli return code: $rc"
|
||
|
+ if [ $rc -ne 0 ]; then
|
||
|
+ CLIRES=$(echo $CLIRES | grep -v '^$')
|
||
|
+ ocf_log warn "Route53 API returned an error: $CLIRES"
|
||
|
+ ocf_log warn "Monitor skipping cluster action due to API call error"
|
||
|
+ return $OCF_SUCCESS
|
||
|
+ fi
|
||
|
+ ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }')
|
||
|
+ fi
|
||
|
+ #
|
||
|
+ fi
|
||
|
+ ocf_log info "Route53 DNS record pointing $OCF_RESKEY_fullname to IP address $ARECORD"
|
||
|
+ #
|
||
|
+ if [ "$ARECORD" == "$IPADDRESS" ]; then
|
||
|
+ ocf_log info "Route53 DNS record $ARECORD found"
|
||
|
+ return $OCF_SUCCESS
|
||
|
+ elif [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then
|
||
|
+ ocf_log info "Route53 DNS record points to a different host, setting DNS record on Route53 to this host"
|
||
|
+ _update_record "UPSERT" "$IPADDRESS"
|
||
|
return $OCF_SUCCESS
|
||
|
else
|
||
|
- ocf_log debug "No ARECORD found"
|
||
|
- return $OCF_NOT_RUNNING
|
||
|
+ ocf_log info "No Route53 DNS record found, setting DNS record on Route53 to this host"
|
||
|
+ _update_record "UPSERT" "$IPADDRESS"
|
||
|
+ return $OCF_SUCCESS
|
||
|
fi
|
||
|
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
_update_record() {
|
||
|
+ #
|
||
|
+ # This function is the one that will actually execute Route53's API call
|
||
|
+ # and configure the DNS record using the correct API calls and parameters
|
||
|
+ #
|
||
|
+ # It creates a temporary JSON file under /tmp with the required API payload
|
||
|
+ #
|
||
|
+ # Failures in this function are critical and will cause the agent to fail
|
||
|
+ #
|
||
|
update_action="$1"
|
||
|
IPADDRESS="$2"
|
||
|
ocf_log info "Updating Route53 $OCF_RESKEY_hostedzoneid with $IPADDRESS for $OCF_RESKEY_fullname"
|
||
|
@@ -209,19 +294,19 @@
|
||
|
ocf_exit_reason "Failed to create temporary file for record update"
|
||
|
exit $OCF_ERR_GENERIC
|
||
|
fi
|
||
|
- cat >>"${ROUTE53RECORD}" <<-EOF
|
||
|
+ cat >>"$ROUTE53RECORD" <<-EOF
|
||
|
{
|
||
|
"Comment": "Update record to reflect new IP address for a system ",
|
||
|
"Changes": [
|
||
|
{
|
||
|
- "Action": "${update_action}",
|
||
|
+ "Action": "$update_action",
|
||
|
"ResourceRecordSet": {
|
||
|
- "Name": "${OCF_RESKEY_fullname}",
|
||
|
+ "Name": "$OCF_RESKEY_fullname",
|
||
|
"Type": "A",
|
||
|
- "TTL": ${OCF_RESKEY_ttl},
|
||
|
+ "TTL": $OCF_RESKEY_ttl,
|
||
|
"ResourceRecords": [
|
||
|
{
|
||
|
- "Value": "${IPADDRESS}"
|
||
|
+ "Value": "$IPADDRESS"
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
@@ -229,46 +314,53 @@
|
||
|
]
|
||
|
}
|
||
|
EOF
|
||
|
- cmd="aws --profile ${OCF_RESKEY_profile} route53 change-resource-record-sets --hosted-zone-id ${OCF_RESKEY_hostedzoneid} \
|
||
|
- --change-batch file://${ROUTE53RECORD} "
|
||
|
+ cmd="aws --profile $OCF_RESKEY_profile route53 change-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --change-batch file://$ROUTE53RECORD "
|
||
|
ocf_log debug "Executing command: $cmd"
|
||
|
- CHANGEID=$($cmd | grep CHANGEINFO | /usr/bin/awk -F'\t' '{ print $3 }' )
|
||
|
- ocf_log debug "Change id: ${CHANGEID}"
|
||
|
- rmtempfile ${ROUTE53RECORD}
|
||
|
- CHANGEID=$(echo $CHANGEID |cut -d'/' -f 3 |cut -d'"' -f 1 )
|
||
|
- ocf_log debug "Change id: ${CHANGEID}"
|
||
|
+ CLIRES="$($cmd 2>&1)"
|
||
|
+ rc=$?
|
||
|
+ ocf_log debug "awscli returned code: $rc"
|
||
|
+ if [ $rc -ne 0 ]; then
|
||
|
+ CLIRES=$(echo $CLIRES | grep -v '^$')
|
||
|
+ ocf_log warn "Route53 API returned an error: $CLIRES"
|
||
|
+ ocf_log warn "Skipping cluster action due to API call error"
|
||
|
+ return $OCF_ERR_GENERIC
|
||
|
+ fi
|
||
|
+ CHANGEID=$(echo $CLIRES | awk '{ print $12 }')
|
||
|
+ ocf_log debug "Change id: $CHANGEID"
|
||
|
+ rmtempfile $ROUTE53RECORD
|
||
|
+ CHANGEID=$(echo $CHANGEID | cut -d'/' -f 3 | cut -d'"' -f 1 )
|
||
|
+ ocf_log debug "Change id: $CHANGEID"
|
||
|
STATUS="PENDING"
|
||
|
- MYSECONDS=2
|
||
|
+ MYSECONDS=20
|
||
|
while [ "$STATUS" = 'PENDING' ]; do
|
||
|
- sleep ${MYSECONDS}
|
||
|
- STATUS="$(aws --profile ${OCF_RESKEY_profile} route53 get-change --id $CHANGEID | grep CHANGEINFO | /usr/bin/awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )"
|
||
|
- ocf_log debug "Waited for ${MYSECONDS} seconds and checked execution of Route 53 update status: ${STATUS} "
|
||
|
+ sleep $MYSECONDS
|
||
|
+ STATUS="$(aws --profile $OCF_RESKEY_profile route53 get-change --id $CHANGEID | grep CHANGEINFO | awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )"
|
||
|
+ ocf_log debug "Waited for $MYSECONDS seconds and checked execution of Route 53 update status: $STATUS "
|
||
|
done
|
||
|
}
|
||
|
|
||
|
-ec2ip_stop() {
|
||
|
- ocf_log info "Bringing down Route53 agent. (Will remove ARECORD)"
|
||
|
- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')"
|
||
|
- ARECORD="$(aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query "ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" | grep RESOURCERECORDS | /usr/bin/awk '{ print $2 }' )"
|
||
|
- ocf_log debug "Found IP address: $ARECORD ."
|
||
|
- if [ ${ARECORD} != ${IPADDRESS} ]; then
|
||
|
- ocf_log debug "No ARECORD found"
|
||
|
- return $OCF_SUCCESS
|
||
|
- else
|
||
|
- # determine IP address
|
||
|
- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')"
|
||
|
- # Patch file
|
||
|
- ocf_log debug "Deleting IP address to ${IPADDRESS}"
|
||
|
- return $OCF_SUCCESS
|
||
|
- fi
|
||
|
-
|
||
|
- _update_record "DELETE" "$IPADDRESS"
|
||
|
+r53_stop() {
|
||
|
+ #
|
||
|
+ # Stop operation doesn't perform any API call or try to remove the DNS record
|
||
|
+ # this mostly because this is not necessarily mandatory or desired
|
||
|
+ # the start and monitor functions will take care of changing the DNS record
|
||
|
+ # if the agent starts in a different cluster node
|
||
|
+ #
|
||
|
+ ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)"
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
-ec2ip_start() {
|
||
|
- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')"
|
||
|
- _update_record "UPSERT" "$IPADDRESS"
|
||
|
+r53_start() {
|
||
|
+ #
|
||
|
+ # Start agent and config DNS in Route53
|
||
|
+ #
|
||
|
+ ocf_log info "Starting Route53 DNS update...."
|
||
|
+ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)"
|
||
|
+ r53_monitor
|
||
|
+ if [ $? != $OCF_SUCCESS ]; then
|
||
|
+ ocf_log info "Could not start agent - check configurations"
|
||
|
+ return $OCF_ERR_GENERIC
|
||
|
+ fi
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
@@ -284,16 +376,16 @@
|
||
|
exit $OCF_SUCCESS
|
||
|
;;
|
||
|
monitor)
|
||
|
- ec2ip_monitor
|
||
|
+ r53_monitor
|
||
|
;;
|
||
|
stop)
|
||
|
- ec2ip_stop
|
||
|
+ r53_stop
|
||
|
;;
|
||
|
validate-all)
|
||
|
- ec2ip_validate
|
||
|
+ r53_validate
|
||
|
;;
|
||
|
start)
|
||
|
- ec2ip_start
|
||
|
+ r53_start
|
||
|
;;
|
||
|
*)
|
||
|
usage
|