diff --git a/.gitignore b/.gitignore index f79624a..9640a61 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,6 @@ SOURCES/aliyun-python-sdk-ecs-4.9.3.tar.gz SOURCES/aliyun-python-sdk-vpc-3.0.2.tar.gz SOURCES/colorama-0.3.3.tar.gz SOURCES/google-cloud-sdk-241.0.0-linux-x86_64.tar.gz +SOURCES/httplib2-0.18.1.tar.gz SOURCES/pycryptodome-3.6.4.tar.gz SOURCES/pyroute2-0.4.13.tar.gz diff --git a/.resource-agents.metadata b/.resource-agents.metadata index e21f8a9..546b20f 100644 --- a/.resource-agents.metadata +++ b/.resource-agents.metadata @@ -5,5 +5,6 @@ c2a98b9a1562d223a76514f05028488ca000c395 SOURCES/aliyun-python-sdk-ecs-4.9.3.tar f14647a4d37a9a254c4e711b95a7654fc418e41e SOURCES/aliyun-python-sdk-vpc-3.0.2.tar.gz 0fe5bd8bca54dd71223778a1e0bcca9af324abb1 SOURCES/colorama-0.3.3.tar.gz 876e2b0c0e3031c6e6101745acd08e4e9f53d6a9 SOURCES/google-cloud-sdk-241.0.0-linux-x86_64.tar.gz +c5d22ce6660999633154927684eb9b799123e569 SOURCES/httplib2-0.18.1.tar.gz 326a73f58a62ebee00c11a12cfdd838b196e0e8e SOURCES/pycryptodome-3.6.4.tar.gz 147149db11104c06d405fd077dcd2aa1c345f109 SOURCES/pyroute2-0.4.13.tar.gz diff --git a/SOURCES/7-gcp-bundled.patch b/SOURCES/7-gcp-bundled.patch index b341dac..b1b8a50 100644 --- a/SOURCES/7-gcp-bundled.patch +++ b/SOURCES/7-gcp-bundled.patch @@ -10,14 +10,26 @@ diff -uNr a/heartbeat/gcp-vpc-move-ip.in b/heartbeat/gcp-vpc-move-ip.in OCF_RESKEY_configuration_default="default" OCF_RESKEY_vpc_network_default="default" OCF_RESKEY_interface_default="eth0" +diff -uNr a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in +--- a/heartbeat/gcp-vpc-move-vip.in 2020-06-25 13:21:42.090334894 +0200 ++++ b/heartbeat/gcp-vpc-move-vip.in 2020-06-25 13:14:16.668092817 +0200 +@@ -28,6 +28,7 @@ + from ocf import * + + try: ++ sys.path.insert(0, '/usr/lib/resource-agents/bundled/gcp/google-cloud-sdk/lib/third_party') + import googleapiclient.discovery + except ImportError: + pass diff -uNr a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in ---- a/heartbeat/gcp-vpc-move-route.in 2019-04-05 09:20:26.180739624 +0200 -+++ b/heartbeat/gcp-vpc-move-route.in 2019-04-05 09:22:28.648649593 +0200 -@@ -45,6 +45,7 @@ +--- a/heartbeat/gcp-vpc-move-route.in 2020-06-25 13:22:03.216301380 +0200 ++++ b/heartbeat/gcp-vpc-move-route.in 2020-06-25 13:13:19.864183380 +0200 +@@ -45,6 +45,8 @@ from ocf import * try: + sys.path.insert(0, '/usr/lib/resource-agents/bundled/gcp') ++ sys.path.insert(0, '/usr/lib/resource-agents/bundled/gcp/google-cloud-sdk/lib/third_party') import googleapiclient.discovery import pyroute2 except ImportError: diff --git a/SOURCES/aliyun-vpc-move-ip-4-bundled.patch b/SOURCES/aliyun-vpc-move-ip-4-bundled.patch index b52e7c8..29a92b9 100644 --- a/SOURCES/aliyun-vpc-move-ip-4-bundled.patch +++ b/SOURCES/aliyun-vpc-move-ip-4-bundled.patch @@ -1,62 +1,15 @@ -diff -uNr a/heartbeat/aliyun-vpc-move-ip b/heartbeat/aliyun-vpc-move-ip ---- a/heartbeat/aliyun-vpc-move-ip 2018-08-03 15:21:34.869664678 +0200 -+++ b/heartbeat/aliyun-vpc-move-ip 2018-08-03 15:22:48.632185198 +0200 -@@ -36,13 +36,13 @@ - ip_drop - fi +--- a/heartbeat/aliyun-vpc-move-ip 2020-06-09 13:45:38.432860930 +0200 ++++ b/heartbeat/aliyun-vpc-move-ip 2020-06-09 13:51:06.341211557 +0200 +@@ -35,10 +35,10 @@ + USAGE="usage: $0 {start|stop|status|meta-data}"; -- cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" -+ cmd="aliyuncli-ra vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" - ocf_log debug "executing command: $cmd" - $cmd - rc=$? - while [ $rc -ne 0 ]; do - sleep 1 -- cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" -+ cmd="aliyuncli-ra vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" - ocf_log debug "executing command: $cmd" - $cmd - rc=$? -@@ -75,7 +75,7 @@ - return $OCF_ERR_GENERIC - fi + if [ "${OCF_RESKEY_aliyuncli}" = "detect" ]; then +- OCF_RESKEY_aliyuncli="$(which aliyuncli 2> /dev/null || which aliyun 2> /dev/null)" ++ OCF_RESKEY_aliyuncli="$(which aliyuncli-ra 2> /dev/null || which aliyuncli 2> /dev/null || which aliyun 2> /dev/null)" + fi -- cmd="aliyuncli vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text" -+ cmd="aliyuncli-ra vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text" - ocf_log debug "executing command: $cmd" - $cmd - if [ $? -ne 0 ]; then -@@ -90,13 +90,13 @@ - } - - wait_for_started() { -- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" -+ cmd="aliyuncli-ra vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" - ocf_log debug "executing command: $cmd" - ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')" - - while [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; do - sleep 3 -- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" -+ cmd="aliyuncli-ra vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" - ocf_log debug "executing command: $cmd" - ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')" - done -@@ -107,7 +107,7 @@ - - while [ ! -z "$ROUTE_TO_INSTANCE" ]; do - sleep 1 -- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" -+ cmd="aliyuncli-ra vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" - ocf_log debug "executing command: $cmd" - ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" - done -@@ -248,7 +248,7 @@ - - ecs_ip_monitor() { - ocf_log debug "function: ecsip_monitor: check routing table" -- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" -+ cmd="aliyuncli-ra vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" - ocf_log debug "executing command: $cmd" - - ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" +-if [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli' ]]; then ++if [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli-ra' ]] || [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli' ]]; then + OUTPUT="text" + EXECUTING='{ print $3 }' + IFS_=" " diff --git a/SOURCES/bz1633251-gcp-pd-move-1.patch b/SOURCES/bz1633251-gcp-pd-move-1.patch new file mode 100644 index 0000000..c7cbe8e --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-1.patch @@ -0,0 +1,425 @@ +From dedf420b8aa7e7e64fa56eeda2d7aeb5b2a5fcd9 Mon Sep 17 00:00:00 2001 +From: Gustavo Serra Scalet +Date: Mon, 17 Sep 2018 12:29:51 -0300 +Subject: [PATCH] Add gcp-pd-move python script + +--- + configure.ac | 1 + + doc/man/Makefile.am | 1 + + heartbeat/Makefile.am | 1 + + heartbeat/gcp-pd-move.in | 370 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 373 insertions(+) + create mode 100755 heartbeat/gcp-pd-move.in + +diff --git a/configure.ac b/configure.ac +index 10f5314da..b7ffb99f3 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -958,6 +958,7 @@ AC_CONFIG_FILES([heartbeat/conntrackd], [chmod +x heartbeat/conntrackd]) + AC_CONFIG_FILES([heartbeat/dnsupdate], [chmod +x heartbeat/dnsupdate]) + AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88]) + AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio]) ++AC_CONFIG_FILES([heartbeat/gcp-pd-move], [chmod +x heartbeat/gcp-pd-move]) + AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip]) + AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip]) + AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route]) +diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am +index 0bef88740..0235c9af6 100644 +--- a/doc/man/Makefile.am ++++ b/doc/man/Makefile.am +@@ -115,6 +115,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_fio.7 \ + ocf_heartbeat_galera.7 \ + ocf_heartbeat_garbd.7 \ ++ ocf_heartbeat_gcp-pd-move.7 \ + ocf_heartbeat_gcp-vpc-move-ip.7 \ + ocf_heartbeat_gcp-vpc-move-vip.7 \ + ocf_heartbeat_gcp-vpc-move-route.7 \ +diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am +index 993bff042..843186c98 100644 +--- a/heartbeat/Makefile.am ++++ b/heartbeat/Makefile.am +@@ -111,6 +111,7 @@ ocf_SCRIPTS = AoEtarget \ + fio \ + galera \ + garbd \ ++ gcp-pd-move \ + gcp-vpc-move-ip \ + gcp-vpc-move-vip \ + gcp-vpc-move-route \ +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +new file mode 100755 +index 000000000..f9f6c3163 +--- /dev/null ++++ b/heartbeat/gcp-pd-move.in +@@ -0,0 +1,370 @@ ++#!@PYTHON@ -tt ++# - *- coding: utf- 8 - *- ++# ++# --------------------------------------------------------------------- ++# Copyright 2018 Google Inc. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# --------------------------------------------------------------------- ++# Description: Google Cloud Platform - Disk attach ++# --------------------------------------------------------------------- ++ ++import json ++import logging ++import os ++import re ++import sys ++import time ++ ++OCF_FUNCTIONS_DIR = "%s/lib/heartbeat" % os.environ.get("OCF_ROOT") ++sys.path.append(OCF_FUNCTIONS_DIR) ++ ++import ocf ++ ++try: ++ import googleapiclient.discovery ++except ImportError: ++ pass ++ ++if sys.version_info >= (3, 0): ++ # Python 3 imports. ++ import urllib.parse as urlparse ++ import urllib.request as urlrequest ++else: ++ # Python 2 imports. ++ import urllib as urlparse ++ import urllib2 as urlrequest ++ ++ ++CONN = None ++PROJECT = None ++ZONE = None ++REGION = None ++LIST_DISK_ATTACHED_INSTANCES = None ++INSTANCE_NAME = None ++ ++PARAMETERS = { ++ 'disk_name': None, ++ 'disk_scope': None, ++ 'disk_csek_file': None, ++ 'mode': None, ++ 'device_name': None, ++} ++ ++MANDATORY_PARAMETERS = ['disk_name', 'disk_scope'] ++ ++METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' ++METADATA_HEADERS = {'Metadata-Flavor': 'Google'} ++METADATA = ''' ++ ++ ++1.0 ++ ++Resource Agent that can attach or detach a regional/zonal disk on current GCP ++instance. ++Requirements : ++- Disk has to be properly created as regional/zonal in order to be used ++correctly. ++ ++Attach/Detach a persistent disk on current GCP instance ++ ++ ++The name of the GCP disk. ++Disk name ++ ++ ++ ++Disk scope ++Network name ++ ++ ++ ++Path to a Customer-Supplied Encryption Key (CSEK) key file ++Customer-Supplied Encryption Key file ++ ++ ++ ++Attachment mode (rw, ro) ++Attachment mode ++ ++ ++ ++An optional name that indicates the disk name the guest operating system will see. ++Optional device name ++ ++ ++ ++ ++ ++ ++ ++ ++ ++''' ++ ++ ++def get_metadata(metadata_key, params=None, timeout=None): ++ """Performs a GET request with the metadata headers. ++ ++ Args: ++ metadata_key: string, the metadata to perform a GET request on. ++ params: dictionary, the query parameters in the GET request. ++ timeout: int, timeout in seconds for metadata requests. ++ ++ Returns: ++ HTTP response from the GET request. ++ ++ Raises: ++ urlerror.HTTPError: raises when the GET request fails. ++ """ ++ timeout = timeout or 60 ++ metadata_url = os.path.join(METADATA_SERVER, metadata_key) ++ params = urlparse.urlencode(params or {}) ++ url = '%s?%s' % (metadata_url, params) ++ request = urlrequest.Request(url, headers=METADATA_HEADERS) ++ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) ++ return request_opener.open(request, timeout=timeout * 1.1).read() ++ ++ ++def populate_vars(): ++ global CONN ++ global INSTANCE_NAME ++ global PROJECT ++ global ZONE ++ global REGION ++ global LIST_DISK_ATTACHED_INSTANCES ++ ++ global PARAMETERS ++ ++ # Populate global vars ++ try: ++ CONN = googleapiclient.discovery.build('compute', 'v1') ++ except Exception as e: ++ logger.error('Couldn\'t connect with google api: ' + str(e)) ++ sys.exit(ocf.OCF_ERR_CONFIGURED) ++ ++ for param in PARAMETERS: ++ value = os.environ.get('OCF_RESKEY_%s' % param, None) ++ if not value and param in MANDATORY_PARAMETERS: ++ logger.error('Missing %s mandatory parameter' % param) ++ sys.exit(ocf.OCF_ERR_CONFIGURED) ++ PARAMETERS[param] = value ++ ++ try: ++ INSTANCE_NAME = get_metadata('instance/name') ++ except Exception as e: ++ logger.error( ++ 'Couldn\'t get instance name, is this running inside GCE?: ' + str(e)) ++ sys.exit(ocf.OCF_ERR_CONFIGURED) ++ ++ PROJECT = get_metadata('project/project-id') ++ ZONE = get_metadata('instance/zone').split('/')[-1] ++ REGION = ZONE[:-2] ++ LIST_DISK_ATTACHED_INSTANCES = get_disk_attached_instances( ++ PARAMETERS['disk_name']) ++ ++ ++def configure_logs(): ++ # Prepare logging ++ global logger ++ logging.getLogger('googleapiclient').setLevel(logging.WARN) ++ logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging') ++ if logging_env: ++ logging_env = logging_env.lower() ++ if any(x in logging_env for x in ['yes', 'true', 'enabled']): ++ try: ++ import google.cloud.logging.handlers ++ client = google.cloud.logging.Client() ++ handler = google.cloud.logging.handlers.CloudLoggingHandler( ++ client, name=INSTANCE_NAME) ++ handler.setLevel(logging.INFO) ++ formatter = logging.Formatter('gcp:alias "%(message)s"') ++ handler.setFormatter(formatter) ++ ocf.log.addHandler(handler) ++ logger = logging.LoggerAdapter( ++ ocf.log, {'OCF_RESOURCE_INSTANCE': ocf.OCF_RESOURCE_INSTANCE}) ++ except ImportError: ++ logger.error('Couldn\'t import google.cloud.logging, ' ++ 'disabling Stackdriver-logging support') ++ ++ ++def wait_for_operation(operation): ++ while True: ++ result = CONN.zoneOperations().get( ++ project=PROJECT, ++ zone=ZONE, ++ operation=operation['name']).execute() ++ ++ if result['status'] == 'DONE': ++ if 'error' in result: ++ raise Exception(result['error']) ++ return ++ time.sleep(1) ++ ++ ++def get_disk_attached_instances(disk): ++ def get_users_list(): ++ fl = 'name="%s"' % disk ++ request = CONN.disks().aggregatedList(project=PROJECT, filter=fl) ++ while request is not None: ++ response = request.execute() ++ locations = response.get('items', {}) ++ for location in locations.values(): ++ for d in location.get('disks', []): ++ if d['name'] == disk: ++ return d.get('users', []) ++ request = CONN.instances().aggregatedList_next( ++ previous_request=request, previous_response=response) ++ raise Exception("Unable to find disk %s" % disk) ++ ++ def get_only_instance_name(user): ++ return re.sub('.*/instances/', '', user) ++ ++ return map(get_only_instance_name, get_users_list()) ++ ++ ++def is_disk_attached(instance): ++ return instance in LIST_DISK_ATTACHED_INSTANCES ++ ++ ++def detach_disk(instance, disk_name): ++ # Python API misses disk-scope argument. ++ ++ # Detaching a disk is only possible by using deviceName, which is retrieved ++ # as a disk parameter when listing the instance information ++ request = CONN.instances().get( ++ project=PROJECT, zone=ZONE, instance=instance) ++ response = request.execute() ++ ++ device_name = None ++ for disk in response['disks']: ++ if disk_name in disk['source']: ++ device_name = disk['deviceName'] ++ break ++ ++ if not device_name: ++ logger.error("Didn't find %(d)s deviceName attached to %(i)s" % { ++ 'd': disk_name, ++ 'i': instance, ++ }) ++ return ++ ++ request = CONN.instances().detachDisk( ++ project=PROJECT, zone=ZONE, instance=instance, deviceName=device_name) ++ wait_for_operation(request.execute()) ++ ++ ++def attach_disk(instance, disk_name): ++ location = 'zones/%s' % ZONE ++ if PARAMETERS['disk_scope'] == 'regional': ++ location = 'regions/%s' % REGION ++ prefix = 'https://www.googleapis.com/compute/v1' ++ body = { ++ 'source': '%(prefix)s/projects/%(project)s/%(location)s/disks/%(disk)s' % { ++ 'prefix': prefix, ++ 'project': PROJECT, ++ 'location': location, ++ 'disk': disk_name, ++ }, ++ } ++ ++ # Customer-Supplied Encryption Key (CSEK) ++ if PARAMETERS['disk_csek_file']: ++ with open(PARAMETERS['disk_csek_file']) as csek_file: ++ body['diskEncryptionKey'] = { ++ 'rawKey': csek_file.read(), ++ } ++ ++ if PARAMETERS['device_name']: ++ body['deviceName'] = PARAMETERS['device_name'] ++ ++ if PARAMETERS['mode']: ++ body['mode'] = PARAMETERS['mode'] ++ ++ force_attach = None ++ if PARAMETERS['disk_scope'] == 'regional': ++ # Python API misses disk-scope argument. ++ force_attach = True ++ else: ++ # If this disk is attached to some instance, detach it first. ++ for other_instance in LIST_DISK_ATTACHED_INSTANCES: ++ logger.info("Detaching disk %(disk_name)s from other instance %(i)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'i': other_instance, ++ }) ++ detach_disk(other_instance, PARAMETERS['disk_name']) ++ ++ request = CONN.instances().attachDisk( ++ project=PROJECT, zone=ZONE, instance=instance, body=body, ++ forceAttach=force_attach) ++ wait_for_operation(request.execute()) ++ ++ ++def fetch_data(): ++ configure_logs() ++ populate_vars() ++ ++ ++def gcp_pd_move_start(): ++ fetch_data() ++ if not is_disk_attached(INSTANCE_NAME): ++ logger.info("Attaching disk %(disk_name)s to %(instance)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'instance': INSTANCE_NAME, ++ }) ++ attach_disk(INSTANCE_NAME, PARAMETERS['disk_name']) ++ ++ ++def gcp_pd_move_stop(): ++ fetch_data() ++ if is_disk_attached(INSTANCE_NAME): ++ logger.info("Detaching disk %(disk_name)s to %(instance)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'instance': INSTANCE_NAME, ++ }) ++ detach_disk(INSTANCE_NAME, PARAMETERS['disk_name']) ++ ++ ++def gcp_pd_move_status(): ++ fetch_data() ++ if is_disk_attached(INSTANCE_NAME): ++ logger.info("Disk %(disk_name)s is correctly attached to %(instance)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'instance': INSTANCE_NAME, ++ }) ++ else: ++ sys.exit(ocf.OCF_NOT_RUNNING) ++ ++ ++def main(): ++ if len(sys.argv) < 2: ++ logger.error('Missing argument') ++ return ++ ++ command = sys.argv[1] ++ if 'meta-data' in command: ++ print(METADATA) ++ return ++ ++ if command in 'start': ++ gcp_pd_move_start() ++ elif command in 'stop': ++ gcp_pd_move_stop() ++ elif command in ('monitor', 'status'): ++ gcp_pd_move_status() ++ else: ++ configure_logs() ++ logger.error('no such function %s' % str(command)) ++ ++ ++if __name__ == "__main__": ++ main() diff --git a/SOURCES/bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch b/SOURCES/bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch new file mode 100644 index 0000000..9a9681c --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch @@ -0,0 +1,18 @@ +commit cbe0e6507992b50afbaebc46dfaf8955cc02e5ec +Author: Oyvind Albrigtsen + + Python agents: use OCF_FUNCTIONS_DIR env variable when available + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +index f9f6c316..c5007a43 100755 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -25,7 +25,7 @@ import re + import sys + import time + +-OCF_FUNCTIONS_DIR = "%s/lib/heartbeat" % os.environ.get("OCF_ROOT") ++OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) + sys.path.append(OCF_FUNCTIONS_DIR) + + import ocf diff --git a/SOURCES/bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch b/SOURCES/bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch new file mode 100644 index 0000000..5819b94 --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch @@ -0,0 +1,48 @@ +From 4fa41a1d7b4bee31526649c40cc4c58bc6333917 Mon Sep 17 00:00:00 2001 +From: masaki-tamura +Date: Wed, 2 Oct 2019 17:12:42 +0900 +Subject: [PATCH 1/2] add parameter stackdriver_logging + +--- + heartbeat/gcp-pd-move.in | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +index c5007a43c..fac5c9744 100755 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -102,6 +102,11 @@ correctly. + Optional device name + + ++ ++Use stackdriver_logging output to global resource (yes, true, enabled) ++Use stackdriver_logging ++ ++ + + + + +From f762ce3da00e1775587a04751a8828ba004fb534 Mon Sep 17 00:00:00 2001 +From: masaki-tamura +Date: Wed, 2 Oct 2019 17:44:30 +0900 +Subject: [PATCH 2/2] defautl no + +--- + heartbeat/gcp-pd-move.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +index fac5c9744..7fabc80dc 100755 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -105,7 +105,7 @@ correctly. + + Use stackdriver_logging output to global resource (yes, true, enabled) + Use stackdriver_logging +- ++ + + + diff --git a/SOURCES/bz1633251-gcp-pd-move-4-fixes-and-improvements.patch b/SOURCES/bz1633251-gcp-pd-move-4-fixes-and-improvements.patch new file mode 100644 index 0000000..79e1bc0 --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-4-fixes-and-improvements.patch @@ -0,0 +1,176 @@ +From 9dedf4d4ad3a94e4ce75e0f29ffdd018e3709ae3 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 28 May 2020 11:39:20 +0200 +Subject: [PATCH] gcp-pd-move: fixes and improvements + +- Fixed Python 3 encoding issue +- Improved metadata +- Change monitor loglevel to debug +- Removed "regional" functionality that doesnt work with attachDisk() +- Updated rw/ro to READ_WRITE/READ_ONLY in metadata/default value +--- + heartbeat/gcp-pd-move.in | 63 ++++++++++++++++++++-------------------- + 1 file changed, 32 insertions(+), 31 deletions(-) + mode change 100755 => 100644 heartbeat/gcp-pd-move.in + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +old mode 100755 +new mode 100644 +index 7fabc80dc..f82bd25e5 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -29,6 +29,7 @@ OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os. + sys.path.append(OCF_FUNCTIONS_DIR) + + import ocf ++from ocf import logger + + try: + import googleapiclient.discovery +@@ -48,16 +49,16 @@ else: + CONN = None + PROJECT = None + ZONE = None +-REGION = None + LIST_DISK_ATTACHED_INSTANCES = None + INSTANCE_NAME = None + + PARAMETERS = { +- 'disk_name': None, +- 'disk_scope': None, +- 'disk_csek_file': None, +- 'mode': None, +- 'device_name': None, ++ 'disk_name': '', ++ 'disk_scope': 'detect', ++ 'disk_csek_file': '', ++ 'mode': "READ_WRITE", ++ 'device_name': '', ++ 'stackdriver_logging': 'no', + } + + MANDATORY_PARAMETERS = ['disk_name', 'disk_scope'] +@@ -80,32 +81,32 @@ correctly. + + The name of the GCP disk. + Disk name +- ++ + +- +-Disk scope ++ ++Disk scope + Network name +- ++ + +- ++ + Path to a Customer-Supplied Encryption Key (CSEK) key file + Customer-Supplied Encryption Key file +- ++ + +- +-Attachment mode (rw, ro) ++ ++Attachment mode (READ_WRITE, READ_ONLY) + Attachment mode +- ++ + +- ++ + An optional name that indicates the disk name the guest operating system will see. + Optional device name +- ++ + +- ++ + Use stackdriver_logging output to global resource (yes, true, enabled) + Use stackdriver_logging +- ++ + + + +@@ -114,7 +115,9 @@ correctly. + + + +-''' ++'''.format(PARAMETERS['disk_name'], PARAMETERS['disk_scope'], ++ PARAMETERS['disk_csek_file'], PARAMETERS['mode'], PARAMETERS['device_name'], ++ PARAMETERS['stackdriver_logging']) + + + def get_metadata(metadata_key, params=None, timeout=None): +@@ -137,7 +140,7 @@ def get_metadata(metadata_key, params=None, timeout=None): + url = '%s?%s' % (metadata_url, params) + request = urlrequest.Request(url, headers=METADATA_HEADERS) + request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) +- return request_opener.open(request, timeout=timeout * 1.1).read() ++ return request_opener.open(request, timeout=timeout * 1.1).read().decode("utf-8") + + + def populate_vars(): +@@ -145,11 +148,8 @@ def populate_vars(): + global INSTANCE_NAME + global PROJECT + global ZONE +- global REGION + global LIST_DISK_ATTACHED_INSTANCES + +- global PARAMETERS +- + # Populate global vars + try: + CONN = googleapiclient.discovery.build('compute', 'v1') +@@ -158,11 +158,12 @@ def populate_vars(): + sys.exit(ocf.OCF_ERR_CONFIGURED) + + for param in PARAMETERS: +- value = os.environ.get('OCF_RESKEY_%s' % param, None) ++ value = os.environ.get('OCF_RESKEY_%s' % param, PARAMETERS[param]) + if not value and param in MANDATORY_PARAMETERS: + logger.error('Missing %s mandatory parameter' % param) + sys.exit(ocf.OCF_ERR_CONFIGURED) +- PARAMETERS[param] = value ++ elif value: ++ PARAMETERS[param] = value + + try: + INSTANCE_NAME = get_metadata('instance/name') +@@ -172,8 +173,10 @@ def populate_vars(): + sys.exit(ocf.OCF_ERR_CONFIGURED) + + PROJECT = get_metadata('project/project-id') +- ZONE = get_metadata('instance/zone').split('/')[-1] +- REGION = ZONE[:-2] ++ if PARAMETERS['disk_scope'] in ['detect', 'regional']: ++ ZONE = get_metadata('instance/zone').split('/')[-1] ++ else: ++ ZONE = PARAMETERS['disk_scope'] + LIST_DISK_ATTACHED_INSTANCES = get_disk_attached_instances( + PARAMETERS['disk_name']) + +@@ -270,8 +273,6 @@ def detach_disk(instance, disk_name): + + def attach_disk(instance, disk_name): + location = 'zones/%s' % ZONE +- if PARAMETERS['disk_scope'] == 'regional': +- location = 'regions/%s' % REGION + prefix = 'https://www.googleapis.com/compute/v1' + body = { + 'source': '%(prefix)s/projects/%(project)s/%(location)s/disks/%(disk)s' % { +@@ -342,7 +343,7 @@ def gcp_pd_move_stop(): + def gcp_pd_move_status(): + fetch_data() + if is_disk_attached(INSTANCE_NAME): +- logger.info("Disk %(disk_name)s is correctly attached to %(instance)s" % { ++ logger.debug("Disk %(disk_name)s is correctly attached to %(instance)s" % { + 'disk_name': PARAMETERS['disk_name'], + 'instance': INSTANCE_NAME, + }) diff --git a/SOURCES/bz1633251-gcp-pd-move-5-bundle.patch b/SOURCES/bz1633251-gcp-pd-move-5-bundle.patch new file mode 100644 index 0000000..6d6b244 --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-5-bundle.patch @@ -0,0 +1,10 @@ +--- ClusterLabs-resource-agents-e711383f/heartbeat/gcp-pd-move.in 2020-05-28 14:46:28.396220588 +0200 ++++ /home/oalbrigt/src/resource-agents/gcp-pd-move.rhel8 2020-05-28 14:16:25.845308597 +0200 +@@ -32,6 +32,7 @@ + from ocf import logger + + try: ++ sys.path.insert(0, '/usr/lib/resource-agents/bundled/gcp/google-cloud-sdk/lib/third_party') + import googleapiclient.discovery + except ImportError: + pass diff --git a/SOURCES/bz1744190-pgsql-1-set-primary-standby-initial-score.patch b/SOURCES/bz1744190-pgsql-1-set-primary-standby-initial-score.patch new file mode 100644 index 0000000..d11f12d --- /dev/null +++ b/SOURCES/bz1744190-pgsql-1-set-primary-standby-initial-score.patch @@ -0,0 +1,34 @@ +From f8e1b1407b613657ebd90381d53e6a567b92b241 Mon Sep 17 00:00:00 2001 +From: Kazutomo Nakahira +Date: Mon, 17 Dec 2018 14:15:24 +0900 +Subject: [PATCH] Medium: pgsql: Set initial score for primary and hot standby + in the probe. + +--- + heartbeat/pgsql | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/heartbeat/pgsql b/heartbeat/pgsql +index 842dc0ac4..8ef84dd3e 100755 +--- a/heartbeat/pgsql ++++ b/heartbeat/pgsql +@@ -974,11 +974,19 @@ pgsql_real_monitor() { + case "$output" in + f) ocf_log debug "PostgreSQL is running as a primary." + if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then ++ if ocf_is_probe; then ++ # Set initial score for primary. ++ exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME ++ fi + return $OCF_RUNNING_MASTER + fi + ;; + + t) ocf_log debug "PostgreSQL is running as a hot standby." ++ if ocf_is_probe; then ++ # Set initial score for hot standby. ++ exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE ++ fi + return $OCF_SUCCESS;; + + *) ocf_exit_reason "$CHECK_MS_SQL output is $output" diff --git a/SOURCES/bz1744190-pgsql-2-improve-start-checks.patch b/SOURCES/bz1744190-pgsql-2-improve-start-checks.patch new file mode 100644 index 0000000..daca241 --- /dev/null +++ b/SOURCES/bz1744190-pgsql-2-improve-start-checks.patch @@ -0,0 +1,34 @@ +From ac430f79c333d73e6cd59ae59178c7040e7dbfda Mon Sep 17 00:00:00 2001 +From: Kazunori INOUE +Date: Wed, 8 May 2019 18:23:59 +0900 +Subject: [PATCH] pgsql: enhance checks in pgsql_real_start to prevent + incorrect status gets + +--- + heartbeat/pgsql | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/pgsql b/heartbeat/pgsql +index 842dc0ac4..5d04618e6 100755 +--- a/heartbeat/pgsql ++++ b/heartbeat/pgsql +@@ -483,7 +483,7 @@ runasowner() { + "-q") + quietrun="-q" + shift 1;; +- "warn"|"err") ++ "info"|"warn"|"err") + loglevel="-$1" + shift 1;; + *) +@@ -544,7 +544,9 @@ pgsql_real_start() { + local postgres_options + local rc + +- if pgsql_status; then ++ pgsql_real_monitor info ++ rc=$? ++ if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then + ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`" + if is_replication; then + return $OCF_ERR_GENERIC diff --git a/SOURCES/bz1744224-IPsrcaddr-3-fix-probe-issues.patch b/SOURCES/bz1744224-IPsrcaddr-3-fix-probe-issues.patch new file mode 100644 index 0000000..b9f8e7e --- /dev/null +++ b/SOURCES/bz1744224-IPsrcaddr-3-fix-probe-issues.patch @@ -0,0 +1,45 @@ +From 7afc581f6cd8fc37c3e14ece12fb16d31f1886f9 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 10 Jan 2020 14:35:56 +0100 +Subject: [PATCH] IPsrcaddr: fixes to avoid failing during probe + +--- + heartbeat/IPsrcaddr | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr +index f9085f082..0ef8b391f 100755 +--- a/heartbeat/IPsrcaddr ++++ b/heartbeat/IPsrcaddr +@@ -75,6 +75,10 @@ USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; + CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination" + CMDCHANGE="$IP2UTIL route change to " + ++if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then ++ CMDSHOW="$CMDSHOW src $OCF_RESKEY_ipaddress" ++fi ++ + if [ "$OCF_RESKEY_table" = "local" ]; then + TABLE="$TABLE local" + fi +@@ -183,7 +187,7 @@ export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress + + srca_read() { + # Capture matching route - doublequotes prevent word splitting... +- ROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed" ++ ROUTE="`$CMDSHOW 2> /dev/null`" || errorexit "command '$CMDSHOW' failed" + + # ... so we can make sure there is only 1 matching route + [ 1 -eq `echo "$ROUTE" | wc -l` ] || \ +@@ -199,6 +203,11 @@ srca_read() { + # and what remains after stripping out the source ip address clause + ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\5/"` + ++ # using "src " only returns output if there's a match ++ if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then ++ [ -z "$ROUTE" ] && return 1 || return 0 ++ fi ++ + [ -z "$SRCIP" ] && return 1 + [ $SRCIP = $1 ] && return 0 + return 2 diff --git a/SOURCES/bz1744224-IPsrcaddr-4-fix-hardcoded-device.patch b/SOURCES/bz1744224-IPsrcaddr-4-fix-hardcoded-device.patch new file mode 100644 index 0000000..e0e1d04 --- /dev/null +++ b/SOURCES/bz1744224-IPsrcaddr-4-fix-hardcoded-device.patch @@ -0,0 +1,23 @@ +From 5f0d15ad70098510a3782d6fd18d6eacfb51b0cf Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 16 Jan 2020 14:59:26 +0100 +Subject: [PATCH] IPsrcaddr: remove hardcoded device when using destination + parameter + +--- + heartbeat/IPsrcaddr | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr +index 0ef8b391f..7cdc3a9fe 100755 +--- a/heartbeat/IPsrcaddr ++++ b/heartbeat/IPsrcaddr +@@ -262,7 +262,7 @@ srca_stop() { + + OPTS="" + if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] ;then +- PRIMARY_IP="$($IP2UTIL -4 -o addr show dev eth0 primary | awk '{split($4,a,"/");print a[1]}')" ++ PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')" + OPTS="proto kernel scope host src $PRIMARY_IP" + fi + diff --git a/SOURCES/bz1751949-1-SAPInstance-add-reload-action.patch b/SOURCES/bz1751949-1-SAPInstance-add-reload-action.patch deleted file mode 100644 index 9f036c0..0000000 --- a/SOURCES/bz1751949-1-SAPInstance-add-reload-action.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 70a28e8130be863a9073b0a80e0511e971e205c4 Mon Sep 17 00:00:00 2001 -From: Fabian Herschel -Date: Fri, 27 Jul 2018 12:33:19 +0200 -Subject: [PATCH 1/2] SAPInstance: implemeted reload method The reload method - is needed to avoid resource restarts after a non-unique parameter has been - changed. This is in special for interest of the MONITOR_SERVICES parameter. - ---- - heartbeat/SAPInstance | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance -index 8de7cee8c..c25839f0c 100755 ---- a/heartbeat/SAPInstance -+++ b/heartbeat/SAPInstance -@@ -61,6 +61,7 @@ sapinstance_usage() { - The 'monitor' operation reports whether the instance seems to be working - The 'promote' operation starts the primary instance in a Master/Slave configuration - The 'demote' operation stops the primary instance and starts the ERS instance -+ The 'reload' operation allows changed parameters (non-unique only) without restarting the service - The 'notify' operation always returns SUCCESS - The 'validate-all' operation reports whether the parameters are valid - The 'methods' operation reports on the methods $0 supports -@@ -224,6 +225,7 @@ The name of the SAP START profile. Specify this parameter, if you have changed t - - - -+ - - - -@@ -244,6 +246,7 @@ sapinstance_methods() { - monitor - promote - demote -+ reload - notify - validate-all - methods -@@ -965,6 +968,9 @@ case "$ACTION" in - exit $?;; - validate-all) sapinstance_validate - exit $?;; -+ reload ) -+ ocf_log info "reloading SAPInstance parameters" -+ exit $OCF_SUCCESS;; - *) sapinstance_methods - exit $OCF_ERR_UNIMPLEMENTED;; - esac - -From ee529b088cc1111656e94dea56b9fcfa6d813313 Mon Sep 17 00:00:00 2001 -From: Fabian Herschel -Date: Fri, 27 Jul 2018 13:02:39 +0200 -Subject: [PATCH 2/2] SAPInstance: Improved indents - ---- - heartbeat/SAPInstance | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance -index c25839f0c..174ea36ef 100755 ---- a/heartbeat/SAPInstance -+++ b/heartbeat/SAPInstance -@@ -61,7 +61,7 @@ sapinstance_usage() { - The 'monitor' operation reports whether the instance seems to be working - The 'promote' operation starts the primary instance in a Master/Slave configuration - The 'demote' operation stops the primary instance and starts the ERS instance -- The 'reload' operation allows changed parameters (non-unique only) without restarting the service -+ The 'reload' operation allows changed parameters (non-unique only) without restarting the service - The 'notify' operation always returns SUCCESS - The 'validate-all' operation reports whether the parameters are valid - The 'methods' operation reports on the methods $0 supports -@@ -246,7 +246,7 @@ sapinstance_methods() { - monitor - promote - demote -- reload -+ reload - notify - validate-all - methods -@@ -969,8 +969,8 @@ case "$ACTION" in - validate-all) sapinstance_validate - exit $?;; - reload ) -- ocf_log info "reloading SAPInstance parameters" -- exit $OCF_SUCCESS;; -+ ocf_log info "reloading SAPInstance parameters" -+ exit $OCF_SUCCESS;; - *) sapinstance_methods - exit $OCF_ERR_UNIMPLEMENTED;; - esac diff --git a/SOURCES/bz1751949-2-SAPInstance-improve-profile-detection.patch b/SOURCES/bz1751949-2-SAPInstance-improve-profile-detection.patch deleted file mode 100644 index e21e2da..0000000 --- a/SOURCES/bz1751949-2-SAPInstance-improve-profile-detection.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 8eda4725a946ca669df035ed0ffdf053a65e1258 Mon Sep 17 00:00:00 2001 -From: Fabian Herschel -Date: Thu, 2 Aug 2018 15:36:31 +0200 -Subject: [PATCH] SAPInstance: Improved SAP instance profile detection - ---- - heartbeat/SAPInstance | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance -index 174ea36ef..eb058cccf 100755 ---- a/heartbeat/SAPInstance -+++ b/heartbeat/SAPInstance -@@ -371,7 +371,11 @@ sapinstance_init() { - - if [ -z "$currentSTART_PROFILE" ] - then -- SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -+ if [ ! -r "$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -a -r "$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" ]; then -+ SAPSTARTPROFILE="$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" -+ else -+ SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -+ fi - else - SAPSTARTPROFILE="$currentSTART_PROFILE" - fi diff --git a/SOURCES/bz1751949-3-SAPInstance-metadata-improvements.patch b/SOURCES/bz1751949-3-SAPInstance-metadata-improvements.patch deleted file mode 100644 index 7421431..0000000 --- a/SOURCES/bz1751949-3-SAPInstance-metadata-improvements.patch +++ /dev/null @@ -1,37 +0,0 @@ ---- a/heartbeat/SAPInstance 2019-02-20 12:42:55.655819263 +0100 -+++ b/heartbeat/SAPInstance 2019-02-08 10:57:02.281048136 +0100 -@@ -159,14 +159,14 @@ - - - -- Usual a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the gracefull stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! -+ Usually a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the graceful stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !! - Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL) - - - - Only used in a Master/Slave resource configuration: - The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile. --The enqueue replication instance must be installed, before you want to configure a master-slave cluster recource. -+The enqueue replication instance must be installed, before you want to configure a master-slave cluster resource. - - The master-slave configuration in the cluster must use this properties: - clone_max = 2 -@@ -209,7 +209,7 @@ - Only used for ASCS/ERS SAP Netweaver installations without implementing a master/slave resource to - allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set - to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also -- systems for NetWeaver less than 7.40, if you like to impelemnt the NW-HA-CLU-740 scenario. -+ systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario. - - Mark SAPInstance as ERS instance - -@@ -225,7 +225,7 @@ - - - -- -+ - - - diff --git a/SOURCES/bz1759115-aws-vpc-route53-1-update.patch b/SOURCES/bz1759115-aws-vpc-route53-1-update.patch new file mode 100644 index 0000000..9c689b1 --- /dev/null +++ b/SOURCES/bz1759115-aws-vpc-route53-1-update.patch @@ -0,0 +1,273 @@ +--- ClusterLabs-resource-agents-e711383f/heartbeat/aws-vpc-route53.in 2018-06-29 14:05:02.000000000 +0200 ++++ /home/oalbrigt/src/resource-agents/heartbeat/aws-vpc-route53.in 2019-11-07 12:24:18.822111495 +0100 +@@ -152,9 +152,15 @@ + END + } + +-ec2ip_validate() { ++r53_validate() { + ocf_log debug "function: validate" + ++ # Check for required binaries ++ ocf_log debug "Checking for required binaries" ++ for command in curl dig; do ++ check_binary "$command" ++ done ++ + # Full name + [[ -z "$OCF_RESKEY_fullname" ]] && ocf_log error "Full name parameter not set $OCF_RESKEY_fullname!" && exit $OCF_ERR_CONFIGURED + +@@ -175,32 +181,111 @@ + ocf_log debug "ok" + + if [ -n "$OCF_RESKEY_profile" ]; then +- AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile" ++ AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" + else +- AWS_PROFILE_OPT="--profile default" ++ AWS_PROFILE_OPT="--profile default --cli-connect-timeout 10" + fi + + return $OCF_SUCCESS + } + +-ec2ip_monitor() { +- ec2ip_validate ++r53_monitor() { ++ # ++ # For every start action the agent will call Route53 API to check for DNS record ++ # otherwise it will try to get results directly bu querying the DNS using "dig". ++ # Due to complexity in some DNS architectures "dig" can fail, and if this happens ++ # the monitor will fallback to the Route53 API call. ++ # ++ # There will be no failure, failover or restart of the agent if the monitor operation fails ++ # hence we only return $OCF_SUCESS in this function ++ # ++ # In case of the monitor operation detects a wrong or non-existent Route53 DNS entry ++ # it will try to fix the existing one, or create it again ++ # ++ # ++ ARECORD="" ++ IPREGEX="^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$" ++ r53_validate + ocf_log debug "Checking Route53 record sets" +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- ARECORD="$(aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query "ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" | grep RESOURCERECORDS | /usr/bin/awk '{ print $2 }' )" +- ocf_log debug "Found IP address: $ARECORD ." +- if [ "${ARECORD}" == "${IPADDRESS}" ]; then +- ocf_log debug "ARECORD $ARECORD found" ++ # ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ # ++ if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then ++ # ++ cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" ++ ocf_log info "Route53 Agent Starting or probing - executing monitoring API call: $cmd" ++ CLIRES="$($cmd 2>&1)" ++ rc=$? ++ ocf_log debug "awscli returned code: $rc" ++ if [ $rc -ne 0 ]; then ++ CLIRES=$(echo $CLIRES | grep -v '^$') ++ ocf_log warn "Route53 API returned an error: $CLIRES" ++ ocf_log warn "Skipping cluster action due to API call error" ++ return $OCF_ERR_GENERIC ++ fi ++ ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }') ++ # ++ if ocf_is_probe; then ++ # ++ # Prevent R53 record change during probe ++ # ++ if [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then ++ ocf_log debug "Route53 DNS record $ARECORD found at probing, disregarding" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ else ++ # ++ cmd="dig +retries=3 +time=5 +short $OCF_RESKEY_fullname 2>/dev/null" ++ ocf_log info "executing monitoring command : $cmd" ++ ARECORD="$($cmd)" ++ rc=$? ++ ocf_log debug "dig return code: $rc" ++ # ++ if [[ ! $ARECORD =~ $IPREGEX ]] || [ $rc -ne 0 ]; then ++ ocf_log info "Fallback to Route53 API query due to DNS resolution failure" ++ cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" ++ ocf_log debug "executing monitoring API call: $cmd" ++ CLIRES="$($cmd 2>&1)" ++ rc=$? ++ ocf_log debug "awscli return code: $rc" ++ if [ $rc -ne 0 ]; then ++ CLIRES=$(echo $CLIRES | grep -v '^$') ++ ocf_log warn "Route53 API returned an error: $CLIRES" ++ ocf_log warn "Monitor skipping cluster action due to API call error" ++ return $OCF_SUCCESS ++ fi ++ ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }') ++ fi ++ # ++ fi ++ ocf_log info "Route53 DNS record pointing $OCF_RESKEY_fullname to IP address $ARECORD" ++ # ++ if [ "$ARECORD" == "$IPADDRESS" ]; then ++ ocf_log info "Route53 DNS record $ARECORD found" ++ return $OCF_SUCCESS ++ elif [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then ++ ocf_log info "Route53 DNS record points to a different host, setting DNS record on Route53 to this host" ++ _update_record "UPSERT" "$IPADDRESS" + return $OCF_SUCCESS + else +- ocf_log debug "No ARECORD found" +- return $OCF_NOT_RUNNING ++ ocf_log info "No Route53 DNS record found, setting DNS record on Route53 to this host" ++ _update_record "UPSERT" "$IPADDRESS" ++ return $OCF_SUCCESS + fi + + return $OCF_SUCCESS + } + + _update_record() { ++ # ++ # This function is the one that will actually execute Route53's API call ++ # and configure the DNS record using the correct API calls and parameters ++ # ++ # It creates a temporary JSON file under /tmp with the required API payload ++ # ++ # Failures in this function are critical and will cause the agent to fail ++ # + update_action="$1" + IPADDRESS="$2" + ocf_log info "Updating Route53 $OCF_RESKEY_hostedzoneid with $IPADDRESS for $OCF_RESKEY_fullname" +@@ -209,19 +294,19 @@ + ocf_exit_reason "Failed to create temporary file for record update" + exit $OCF_ERR_GENERIC + fi +- cat >>"${ROUTE53RECORD}" <<-EOF ++ cat >>"$ROUTE53RECORD" <<-EOF + { + "Comment": "Update record to reflect new IP address for a system ", + "Changes": [ + { +- "Action": "${update_action}", ++ "Action": "$update_action", + "ResourceRecordSet": { +- "Name": "${OCF_RESKEY_fullname}", ++ "Name": "$OCF_RESKEY_fullname", + "Type": "A", +- "TTL": ${OCF_RESKEY_ttl}, ++ "TTL": $OCF_RESKEY_ttl, + "ResourceRecords": [ + { +- "Value": "${IPADDRESS}" ++ "Value": "$IPADDRESS" + } + ] + } +@@ -229,46 +314,53 @@ + ] + } + EOF +- cmd="aws --profile ${OCF_RESKEY_profile} route53 change-resource-record-sets --hosted-zone-id ${OCF_RESKEY_hostedzoneid} \ +- --change-batch file://${ROUTE53RECORD} " ++ cmd="aws --profile $OCF_RESKEY_profile route53 change-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --change-batch file://$ROUTE53RECORD " + ocf_log debug "Executing command: $cmd" +- CHANGEID=$($cmd | grep CHANGEINFO | /usr/bin/awk -F'\t' '{ print $3 }' ) +- ocf_log debug "Change id: ${CHANGEID}" +- rmtempfile ${ROUTE53RECORD} +- CHANGEID=$(echo $CHANGEID |cut -d'/' -f 3 |cut -d'"' -f 1 ) +- ocf_log debug "Change id: ${CHANGEID}" ++ CLIRES="$($cmd 2>&1)" ++ rc=$? ++ ocf_log debug "awscli returned code: $rc" ++ if [ $rc -ne 0 ]; then ++ CLIRES=$(echo $CLIRES | grep -v '^$') ++ ocf_log warn "Route53 API returned an error: $CLIRES" ++ ocf_log warn "Skipping cluster action due to API call error" ++ return $OCF_ERR_GENERIC ++ fi ++ CHANGEID=$(echo $CLIRES | awk '{ print $12 }') ++ ocf_log debug "Change id: $CHANGEID" ++ rmtempfile $ROUTE53RECORD ++ CHANGEID=$(echo $CHANGEID | cut -d'/' -f 3 | cut -d'"' -f 1 ) ++ ocf_log debug "Change id: $CHANGEID" + STATUS="PENDING" +- MYSECONDS=2 ++ MYSECONDS=20 + while [ "$STATUS" = 'PENDING' ]; do +- sleep ${MYSECONDS} +- STATUS="$(aws --profile ${OCF_RESKEY_profile} route53 get-change --id $CHANGEID | grep CHANGEINFO | /usr/bin/awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )" +- ocf_log debug "Waited for ${MYSECONDS} seconds and checked execution of Route 53 update status: ${STATUS} " ++ sleep $MYSECONDS ++ STATUS="$(aws --profile $OCF_RESKEY_profile route53 get-change --id $CHANGEID | grep CHANGEINFO | awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )" ++ ocf_log debug "Waited for $MYSECONDS seconds and checked execution of Route 53 update status: $STATUS " + done + } + +-ec2ip_stop() { +- ocf_log info "Bringing down Route53 agent. (Will remove ARECORD)" +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- ARECORD="$(aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query "ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" | grep RESOURCERECORDS | /usr/bin/awk '{ print $2 }' )" +- ocf_log debug "Found IP address: $ARECORD ." +- if [ ${ARECORD} != ${IPADDRESS} ]; then +- ocf_log debug "No ARECORD found" +- return $OCF_SUCCESS +- else +- # determine IP address +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- # Patch file +- ocf_log debug "Deleting IP address to ${IPADDRESS}" +- return $OCF_SUCCESS +- fi +- +- _update_record "DELETE" "$IPADDRESS" ++r53_stop() { ++ # ++ # Stop operation doesn't perform any API call or try to remove the DNS record ++ # this mostly because this is not necessarily mandatory or desired ++ # the start and monitor functions will take care of changing the DNS record ++ # if the agent starts in a different cluster node ++ # ++ ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)" + return $OCF_SUCCESS + } + +-ec2ip_start() { +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- _update_record "UPSERT" "$IPADDRESS" ++r53_start() { ++ # ++ # Start agent and config DNS in Route53 ++ # ++ ocf_log info "Starting Route53 DNS update...." ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ r53_monitor ++ if [ $? != $OCF_SUCCESS ]; then ++ ocf_log info "Could not start agent - check configurations" ++ return $OCF_ERR_GENERIC ++ fi + return $OCF_SUCCESS + } + +@@ -284,16 +376,16 @@ + exit $OCF_SUCCESS + ;; + monitor) +- ec2ip_monitor ++ r53_monitor + ;; + stop) +- ec2ip_stop ++ r53_stop + ;; + validate-all) +- ec2ip_validate ++ r53_validate + ;; + start) +- ec2ip_start ++ r53_start + ;; + *) + usage diff --git a/SOURCES/bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch b/SOURCES/bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch new file mode 100644 index 0000000..afb8bb6 --- /dev/null +++ b/SOURCES/bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch @@ -0,0 +1,220 @@ +From 9b77d06bfe3308692946b8ac08bc7ec3399a762b Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 2 Apr 2020 13:38:30 +0200 +Subject: [PATCH 1/2] aws-vpc-route53: cleanup and improvements + +--- + heartbeat/aws-vpc-route53.in | 73 ++++++++++++++++++++---------------- + 1 file changed, 41 insertions(+), 32 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index b276dfb3c..1cfc2b01f 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -43,8 +43,14 @@ + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + ++OCF_RESKEY_hostedzoneid_default="" ++OCF_RESKEY_fullname_default="" ++OCF_RESKEY_ip_default="local" + OCF_RESKEY_ttl_default=10 + ++: ${OCF_RESKEY_hostedzoneid:=${OCF_RESKEY_hostedzoneid_default}} ++: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}} ++: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}} + : ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}} + + ####################################################################### +@@ -104,7 +110,7 @@ Hosted zone ID of Route 53. This is the table of + the Route 53 record. + + AWS hosted zone ID +- ++ + + + +@@ -113,7 +119,7 @@ Example: service.cloud.example.corp. + Note: The trailing dot is important to Route53! + + Full service name +- ++ + + + +@@ -189,6 +195,31 @@ r53_validate() { + return $OCF_SUCCESS + } + ++r53_start() { ++ # ++ # Start agent and config DNS in Route53 ++ # ++ ocf_log info "Starting Route53 DNS update...." ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ r53_monitor ++ if [ $? != $OCF_SUCCESS ]; then ++ ocf_log info "Could not start agent - check configurations" ++ return $OCF_ERR_GENERIC ++ fi ++ return $OCF_SUCCESS ++} ++ ++r53_stop() { ++ # ++ # Stop operation doesn't perform any API call or try to remove the DNS record ++ # this mostly because this is not necessarily mandatory or desired ++ # the start and monitor functions will take care of changing the DNS record ++ # if the agent starts in a different cluster node ++ # ++ ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)" ++ return $OCF_SUCCESS ++} ++ + r53_monitor() { + # + # For every start action the agent will call Route53 API to check for DNS record +@@ -339,31 +370,6 @@ _update_record() { + done + } + +-r53_stop() { +- # +- # Stop operation doesn't perform any API call or try to remove the DNS record +- # this mostly because this is not necessarily mandatory or desired +- # the start and monitor functions will take care of changing the DNS record +- # if the agent starts in a different cluster node +- # +- ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)" +- return $OCF_SUCCESS +-} +- +-r53_start() { +- # +- # Start agent and config DNS in Route53 +- # +- ocf_log info "Starting Route53 DNS update...." +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" +- r53_monitor +- if [ $? != $OCF_SUCCESS ]; then +- ocf_log info "Could not start agent - check configurations" +- return $OCF_ERR_GENERIC +- fi +- return $OCF_SUCCESS +-} +- + ############################################################################### + + case $__OCF_ACTION in +@@ -375,20 +381,23 @@ case $__OCF_ACTION in + metadata + exit $OCF_SUCCESS + ;; +- monitor) +- r53_monitor ++ start) ++ r53_validate || exit $? ++ r53_start + ;; + stop) + r53_stop + ;; ++ monitor) ++ r53_monitor ++ ;; + validate-all) + r53_validate + ;; +- start) +- r53_start +- ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; + esac ++ ++exit $? + +From 745c6b9b3e331ed3705a641f1ec03a2604de3a1d Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 2 Apr 2020 13:40:33 +0200 +Subject: [PATCH 2/2] aws-vpc-route53: add support for public and secondary + private IPs + +--- + heartbeat/aws-vpc-route53.in | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index 1cfc2b01f..ca6556951 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -121,6 +121,15 @@ Note: The trailing dot is important to Route53! + Full service name + + ++ ++ ++IP (local (default), public or secondary private IP address (e.g. 10.0.0.1). ++ ++A secondary private IP can be setup with the awsvip agent. ++ ++Type of IP or secondary private IP address (local, public or e.g. 10.0.0.1) ++ ++ + + + Time to live for Route53 ARECORD +@@ -173,6 +182,15 @@ r53_validate() { + # Hosted Zone ID + [[ -z "$OCF_RESKEY_hostedzoneid" ]] && ocf_log error "Hosted Zone ID parameter not set $OCF_RESKEY_hostedzoneid!" && exit $OCF_ERR_CONFIGURED + ++ # Type of IP/secondary IP address ++ case $OCF_RESKEY_ip in ++ local|public|*.*.*.*) ++ ;; ++ *) ++ ocf_exit_reason "Invalid value for ip: ${OCF_RESKEY_ip}" ++ exit $OCF_ERR_CONFIGURED ++ esac ++ + # profile + [[ -z "$OCF_RESKEY_profile" ]] && ocf_log error "AWS CLI profile not set $OCF_RESKEY_profile!" && exit $OCF_ERR_CONFIGURED + +@@ -200,7 +218,7 @@ r53_start() { + # Start agent and config DNS in Route53 + # + ocf_log info "Starting Route53 DNS update...." +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ _get_ip + r53_monitor + if [ $? != $OCF_SUCCESS ]; then + ocf_log info "Could not start agent - check configurations" +@@ -239,7 +257,7 @@ r53_monitor() { + r53_validate + ocf_log debug "Checking Route53 record sets" + # +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ _get_ip + # + if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then + # +@@ -308,6 +326,15 @@ r53_monitor() { + return $OCF_SUCCESS + } + ++_get_ip() { ++ case $OCF_RESKEY_ip in ++ local|public) ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4)";; ++ *.*.*.*) ++ IPADDRESS="${OCF_RESKEY_ip}";; ++ esac ++} ++ + _update_record() { + # + # This function is the one that will actually execute Route53's API call diff --git a/SOURCES/bz1759115-aws-vpc-route53-3-awscli-property.patch b/SOURCES/bz1759115-aws-vpc-route53-3-awscli-property.patch new file mode 100644 index 0000000..07a02c3 --- /dev/null +++ b/SOURCES/bz1759115-aws-vpc-route53-3-awscli-property.patch @@ -0,0 +1,302 @@ +From 01d3e07ec6c5240633633cb56d1bc915190f40a5 Mon Sep 17 00:00:00 2001 +From: Brandon Perkins +Date: Fri, 24 Apr 2020 18:19:19 -0400 +Subject: [PATCH 1/4] Replace aws command line with OCF_RESKEY_awscli property. + +--- + heartbeat/aws-vpc-move-ip | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip +index 26ca6007d..af697adbe 100755 +--- a/heartbeat/aws-vpc-move-ip ++++ b/heartbeat/aws-vpc-move-ip +@@ -159,14 +159,14 @@ END + execute_cmd_as_role(){ + cmd=$1 + role=$2 +- output="$(aws sts assume-role --role-arn $role --role-session-name AWSCLI-RouteTableUpdate --profile $OCF_RESKEY_profile --output=text)" ++ output="$($OCF_RESKEY_awscli sts assume-role --role-arn $role --role-session-name AWSCLI-RouteTableUpdate --profile $OCF_RESKEY_profile --output=text)" + export AWS_ACCESS_KEY_ID="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $5}')" + export AWS_SECRET_ACCESS_KEY="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $7}')" + export AWS_SESSION_TOKEN="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $8}')" + + #Execute command + ocf_log debug "Assumed Role ${role}" +- ocf_log debug "$(aws sts get-caller-identity)" ++ ocf_log debug "$($OCF_RESKEY_awscli sts get-caller-identity)" + ocf_log debug "executing command: $cmd" + response="$($cmd)" + unset output AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN +@@ -181,7 +181,7 @@ ec2ip_set_address_param_compat(){ + } + + ec2ip_validate() { +- for cmd in aws ip curl; do ++ for cmd in $OCF_RESKEY_awscli ip curl; do + check_binary "$cmd" + done + + +From 20466ba91c21a489303774ac9a1f5f5fd7b86f12 Mon Sep 17 00:00:00 2001 +From: Brandon Perkins +Date: Fri, 24 Apr 2020 18:20:17 -0400 +Subject: [PATCH 2/4] - Replace aws command line with OCF_RESKEY_awscli + property. - Add OCF_RESKEY_awscli and OCF_RESKEY_profile default variables. - + Add awscli (Path to AWS CLI tools) parameter. - Remove required attribute on + profile parameter. - Replace --profile $OCF_RESKEY_profile with + AWS_PROFILE_OPT. + +--- + heartbeat/aws-vpc-route53.in | 71 ++++++++++++++++++++++-------------- + 1 file changed, 43 insertions(+), 28 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index ca6556951..3042b345b 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -43,11 +43,16 @@ + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + ++# Defaults ++OCF_RESKEY_awscli_default="/usr/bin/aws" ++OCF_RESKEY_profile_default="default" + OCF_RESKEY_hostedzoneid_default="" + OCF_RESKEY_fullname_default="" + OCF_RESKEY_ip_default="local" + OCF_RESKEY_ttl_default=10 + ++: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}} ++: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}} + : ${OCF_RESKEY_hostedzoneid:=${OCF_RESKEY_hostedzoneid_default}} + : ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}} + : ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}} +@@ -103,7 +108,35 @@ primitive res_route53 ocf:heartbeat:aws-vpc-route53 \ + meta target-role=Started + + Update Route53 VPC record for AWS EC2 ++ + ++ ++ ++Path to command line tools for AWS ++ ++Path to AWS CLI tools ++ ++ ++ ++ ++ ++The name of the AWS CLI profile of the root account. This ++profile will have to use the "text" format for CLI output. ++The file /root/.aws/config should have an entry which looks ++like: ++ ++ [profile cluster] ++ region = us-east-1 ++ output = text ++ ++"cluster" is the name which has to be used in the cluster ++configuration. The region has to be the current one. The ++output has to be "text". ++ ++AWS Profile Name ++ ++ ++ + + + Hosted zone ID of Route 53. This is the table of +@@ -112,6 +145,7 @@ the Route 53 record. + AWS hosted zone ID + + ++ + + + The full name of the service which will host the IP address. +@@ -121,6 +155,7 @@ Note: The trailing dot is important to Route53! + Full service name + + ++ + + + IP (local (default), public or secondary private IP address (e.g. 10.0.0.1). +@@ -130,6 +165,7 @@ A secondary private IP can be setup with the awsvip agent. + Type of IP or secondary private IP address (local, public or e.g. 10.0.0.1) + + ++ + + + Time to live for Route53 ARECORD +@@ -137,25 +173,8 @@ Time to live for Route53 ARECORD + ARECORD TTL + + +- +- +-The name of the AWS CLI profile of the root account. This +-profile will have to use the "text" format for CLI output. +-The file /root/.aws/config should have an entry which looks +-like: +- +- [profile cluster] +- region = us-east-1 +- output = text +- +-"cluster" is the name which has to be used in the cluster +-configuration. The region has to be the current one. The +-output has to be "text". +- +-AWS Profile Name +- +- + ++ + + + +@@ -198,17 +217,13 @@ r53_validate() { + [[ -z "$OCF_RESKEY_ttl" ]] && ocf_log error "TTL not set $OCF_RESKEY_ttl!" && exit $OCF_ERR_CONFIGURED + + ocf_log debug "Testing aws command" +- aws --version 2>&1 ++ $OCF_RESKEY_awscli --version 2>&1 + if [ "$?" -gt 0 ]; then + ocf_log error "Error while executing aws command as user root! Please check if AWS CLI tools (Python flavor) are properly installed and configured." && exit $OCF_ERR_INSTALLED + fi + ocf_log debug "ok" + +- if [ -n "$OCF_RESKEY_profile" ]; then +- AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" +- else +- AWS_PROFILE_OPT="--profile default --cli-connect-timeout 10" +- fi ++ AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" + + return $OCF_SUCCESS + } +@@ -261,7 +276,7 @@ r53_monitor() { + # + if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then + # +- cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" ++ cmd="$OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" + ocf_log info "Route53 Agent Starting or probing - executing monitoring API call: $cmd" + CLIRES="$($cmd 2>&1)" + rc=$? +@@ -293,7 +308,7 @@ r53_monitor() { + # + if [[ ! $ARECORD =~ $IPREGEX ]] || [ $rc -ne 0 ]; then + ocf_log info "Fallback to Route53 API query due to DNS resolution failure" +- cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" ++ cmd="$OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" + ocf_log debug "executing monitoring API call: $cmd" + CLIRES="$($cmd 2>&1)" + rc=$? +@@ -372,7 +387,7 @@ _update_record() { + ] + } + EOF +- cmd="aws --profile $OCF_RESKEY_profile route53 change-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --change-batch file://$ROUTE53RECORD " ++ cmd="$OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 change-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --change-batch file://$ROUTE53RECORD " + ocf_log debug "Executing command: $cmd" + CLIRES="$($cmd 2>&1)" + rc=$? +@@ -392,7 +407,7 @@ _update_record() { + MYSECONDS=20 + while [ "$STATUS" = 'PENDING' ]; do + sleep $MYSECONDS +- STATUS="$(aws --profile $OCF_RESKEY_profile route53 get-change --id $CHANGEID | grep CHANGEINFO | awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )" ++ STATUS="$($OCF_RESKEY_awscli $AWS_PROFILE_OPT route53 get-change --id $CHANGEID | grep CHANGEINFO | awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )" + ocf_log debug "Waited for $MYSECONDS seconds and checked execution of Route 53 update status: $STATUS " + done + } + +From 113bee3ae17a8d610edc0e3879b56e96efbe8b31 Mon Sep 17 00:00:00 2001 +From: Brandon Perkins +Date: Mon, 27 Apr 2020 11:08:27 -0400 +Subject: [PATCH 3/4] Move AWS_PROFILE_OPT before the start/stop/etc and after + the usage/meta-data case statements. + +--- + heartbeat/aws-vpc-route53.in | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index 3042b345b..ee4f8afcb 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -223,8 +223,6 @@ r53_validate() { + fi + ocf_log debug "ok" + +- AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" +- + return $OCF_SUCCESS + } + +@@ -423,6 +421,11 @@ case $__OCF_ACTION in + metadata + exit $OCF_SUCCESS + ;; ++esac ++ ++AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" ++ ++case $__OCF_ACTION in + start) + r53_validate || exit $? + r53_start + +From 8f46c90a73731be0c8f99adcd718f7cfc2d52002 Mon Sep 17 00:00:00 2001 +From: Brandon Perkins +Date: Mon, 27 Apr 2020 11:54:22 -0400 +Subject: [PATCH 4/4] Move AWS_PROFILE_OPT before functions and after + initialization. + +--- + heartbeat/aws-vpc-route53.in | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index ee4f8afcb..b06b93726 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -37,6 +37,7 @@ + # + # Mar. 15, 2017, vers 1.0.2 + ++ + ####################################################################### + # Initialization: + +@@ -57,9 +58,13 @@ OCF_RESKEY_ttl_default=10 + : ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}} + : ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}} + : ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}} ++####################################################################### ++ + ++AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" + ####################################################################### + ++ + usage() { + cat <<-EOT + usage: $0 {start|stop|status|monitor|validate-all|meta-data} +@@ -421,11 +426,6 @@ case $__OCF_ACTION in + metadata + exit $OCF_SUCCESS + ;; +-esac +- +-AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" +- +-case $__OCF_ACTION in + start) + r53_validate || exit $? + r53_start diff --git a/SOURCES/bz1767916-IPaddr2-clusterip-not-supported.patch b/SOURCES/bz1767916-IPaddr2-clusterip-not-supported.patch new file mode 100644 index 0000000..6e8ccfe --- /dev/null +++ b/SOURCES/bz1767916-IPaddr2-clusterip-not-supported.patch @@ -0,0 +1,104 @@ +From 92c49b6f2847546f3f938b10a2a97021774f0be3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Wed, 4 Dec 2019 14:36:59 +0100 +Subject: [PATCH] IPaddr2: ipt_CLUSTERIP "iptables" extension not "nft" backend + compatible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: +https://lists.clusterlabs.org/pipermail/users/2019-December/026674.html +(thread also sketches a future ambition for a [presumably, to revert +the habit of a functional overloading] separate agent to use +"xt_cluster" extension/cluster match). + +Signed-off-by: Jan Pokorný +--- + heartbeat/IPaddr2 | 29 ++++++++++++++++++++++------- + heartbeat/ocf-binaries.in | 2 ++ + 2 files changed, 24 insertions(+), 7 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index 6f8e8c734..db0b0e547 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -123,6 +123,8 @@ VLDIR=$HA_RSCTMP + SENDARPPIDDIR=$HA_RSCTMP + CIP_lockfile=$HA_RSCTMP/IPaddr2-CIP-${OCF_RESKEY_ip} + ++IPADDR2_CIP_IPTABLES=$IPTABLES ++ + ####################################################################### + + meta_data() { +@@ -138,11 +140,21 @@ It can add an IP alias, or remove one. + In addition, it can implement Cluster Alias IP functionality + if invoked as a clone resource. + +-If used as a clone, you should explicitly set clone-node-max >= 2, ++If used as a clone, "shared address with a trivial, stateless ++(autonomous) load-balancing/mutual exclusion on ingress" mode gets ++applied (as opposed to "assume resource uniqueness" mode otherwise). ++For that, Linux firewall (kernel and userspace) is assumed, and since ++recent distributions are ambivalent in plain "iptables" command to ++particular back-end resolution, "iptables-legacy" (when present) gets ++prioritized so as to avoid incompatibilities (note that respective ++ipt_CLUSTERIP firewall extension in use here is, at the same time, ++marked deprecated, yet said "legacy" layer can make it workable, ++literally, to this day) with "netfilter" one (as in "iptables-nft"). ++In that case, you should explicitly set clone-node-max >= 2, + and/or clone-max < number of nodes. In case of node failure, + clone instances need to be re-allocated on surviving nodes. +-This would not be possible if there is already an instance on those nodes, +-and clone-node-max=1 (which is the default). ++This would not be possible if there is already an instance ++on those nodes, and clone-node-max=1 (which is the default). + + + Manages virtual IPv4 and IPv6 addresses (Linux specific version) +@@ -995,7 +1007,7 @@ ip_start() { + + if [ -n "$IP_CIP" ] && ([ $ip_status = "no" ] || [ $ip_status = "partial2" ]); then + $MODPROBE ip_conntrack +- $IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ ++ $IPADDR2_CIP_IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ + --new \ + --clustermac $IF_MAC \ + --total-nodes $IP_INC_GLOBAL \ +@@ -1089,7 +1101,7 @@ ip_stop() { + i=1 + while [ $i -le $IP_INC_GLOBAL ]; do + ocf_log info $i +- $IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ ++ $IPADDR2_CIP_IPTABLES -D INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \ + --new \ + --clustermac $IF_MAC \ + --total-nodes $IP_INC_GLOBAL \ +@@ -1186,8 +1198,11 @@ ip_validate() { + set_send_arp_program + + if [ -n "$IP_CIP" ]; then +- check_binary $IPTABLES +- check_binary $MODPROBE ++ if have_binary "$IPTABLES_LEGACY"; then ++ IPADDR2_CIP_IPTABLES="$IPTABLES_LEGACY" ++ fi ++ check_binary "$IPADDR2_CIP_IPTABLES" ++ check_binary $MODPROBE + fi + + # $BASEIP, $NETMASK, $NIC , $IP_INC_GLOBAL, and $BRDCAST have been checked within ip_init, +diff --git a/heartbeat/ocf-binaries.in b/heartbeat/ocf-binaries.in +index 9439ae170..e9bf95fc2 100644 +--- a/heartbeat/ocf-binaries.in ++++ b/heartbeat/ocf-binaries.in +@@ -26,6 +26,8 @@ export PATH + : ${GREP:=grep} + : ${IFCONFIG:=ifconfig} + : ${IPTABLES:=iptables} ++## for cases that are known not to be serviceable with iptables-nft impl. ++: ${IPTABLES_LEGACY:=iptables-legacy} + : ${IP2UTIL:=ip} + : ${MDADM:=mdadm} + : ${MODPROBE:=modprobe} diff --git a/SOURCES/bz1777381-Filesystem-1-refresh-UUID.patch b/SOURCES/bz1777381-Filesystem-1-refresh-UUID.patch new file mode 100644 index 0000000..bd84123 --- /dev/null +++ b/SOURCES/bz1777381-Filesystem-1-refresh-UUID.patch @@ -0,0 +1,33 @@ +From 18888da3ceef7a56388c89a616485fd8faa392cc Mon Sep 17 00:00:00 2001 +From: Roger Zhou +Date: Thu, 14 Nov 2019 17:52:13 +0800 +Subject: [PATCH] Filesystem: refresh UUID in the start phase + +In the case a fresh filesystem is just created from another node on the +shared storage, is not visible yet. Then try partprobe to refresh +/dev/disk/by-uuid/* up to date. + +Signed-off-by: Roger Zhou +--- + heartbeat/Filesystem | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index e66ddc77f..543986441 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -454,6 +454,14 @@ Filesystem_start() + # accordingly + + if [ $blockdevice = "yes" ]; then ++ if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then ++ # In the case a fresh filesystem is just created ++ # from another node on the shared storage, and ++ # is not visible yet. Then try partprobe to ++ # refresh /dev/disk/by-uuid/* up to date. ++ have_binary partprobe && partprobe >/dev/null 2>&1 ++ fi ++ + if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then + ocf_exit_reason "Couldn't find device [$DEVICE]. Expected /dev/??? to exist" + exit $OCF_ERR_INSTALLED diff --git a/SOURCES/bz1777381-Filesystem-2-udev-settle.patch b/SOURCES/bz1777381-Filesystem-2-udev-settle.patch new file mode 100644 index 0000000..fde7f89 --- /dev/null +++ b/SOURCES/bz1777381-Filesystem-2-udev-settle.patch @@ -0,0 +1,124 @@ +From af39017b9333dcbadee2a15f3829667f2b18fb45 Mon Sep 17 00:00:00 2001 +From: Roger Zhou +Date: Fri, 20 Dec 2019 23:28:45 +0800 +Subject: [PATCH 1/2] Filesystem: respect udevd need time to create UUID + symlinks + +To refresh the filesystem UUID, there is a race condition. partprobe +might return before the UUID symlink get created. Particularly, when the +system has many devices, the udev daemon could need visible time to +process the udev event queue. Hence, wait udev for a moment. + +Signed-off-by: Roger Zhou +--- + heartbeat/Filesystem | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 543986441..c21ad5761 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -460,6 +460,10 @@ Filesystem_start() + # is not visible yet. Then try partprobe to + # refresh /dev/disk/by-uuid/* up to date. + have_binary partprobe && partprobe >/dev/null 2>&1 ++ local timeout ++ timeout=${OCF_RESKEY_CRM_meta_timeout:="60000"} ++ timeout=$((timeout/1000)) ++ have_binary udevadm && udevadm settle -t $timeout --exit-if-exists=$DEVICE + fi + + if [ "$DEVICE" != "/dev/null" -a ! -b "$DEVICE" ] ; then + +From a9fb8077c8201b287ee0486b2a34db4b7d4d8f5d Mon Sep 17 00:00:00 2001 +From: Roger Zhou +Date: Wed, 25 Dec 2019 15:45:03 +0800 +Subject: [PATCH 2/2] Filesystem: add trigger_udev_rules_if_need() for -U, -L, + or /dev/xxx device + +DEVICE parameter of this RA accepts "-U " and "-L + ++ ++ ++Role to use to query/update the route table ++ ++route table query/update role ++ ++ ++ + + + Name of the network interface, i.e. eth0 diff --git a/SOURCES/bz1814896-Filesystem-fast_stop-default-to-no-for-GFS2.patch b/SOURCES/bz1814896-Filesystem-fast_stop-default-to-no-for-GFS2.patch new file mode 100644 index 0000000..b0e8230 --- /dev/null +++ b/SOURCES/bz1814896-Filesystem-fast_stop-default-to-no-for-GFS2.patch @@ -0,0 +1,46 @@ +--- a/heartbeat/Filesystem 2020-06-11 15:49:54.111316780 +0200 ++++ b/heartbeat/Filesystem 2020-06-11 15:53:53.423821158 +0200 +@@ -60,6 +60,21 @@ + # Defaults + DFLT_STATUSDIR=".Filesystem_status/" + ++# Parameter defaults ++ ++OCF_RESKEY_fstype_default="" ++OCF_RESKEY_fast_stop_default="yes" ++ ++: ${OCF_RESKEY_fstype=${OCF_RESKEY_fstype_default}} ++if [ -z "${OCF_RESKEY_fast_stop}" ]; then ++ case "$OCF_RESKEY_fstype" in ++ gfs2) ++ OCF_RESKEY_fast_stop="no";; ++ *) ++ OCF_RESKEY_fast_stop=${OCF_RESKEY_fast_stop_default};; ++ esac ++fi ++ + # Variables used by multiple methods + HOSTOS=`uname` + +@@ -135,7 +150,7 @@ + The type of filesystem to be mounted. + + filesystem type +- ++ + + + +@@ -178,9 +193,11 @@ + users easily and want to prevent the stop action from failing, + then set this parameter to "no" and add an appropriate timeout + for the stop operation. ++ ++This defaults to "no" for GFS2 filesystems. + + fast stop +- ++ + + + diff --git a/SOURCES/bz1817432-use-safe-temp-file-location.patch b/SOURCES/bz1817432-use-safe-temp-file-location.patch new file mode 100644 index 0000000..0149d72 --- /dev/null +++ b/SOURCES/bz1817432-use-safe-temp-file-location.patch @@ -0,0 +1,44 @@ +diff -uNr a/heartbeat/ClusterMon b/heartbeat/ClusterMon +--- a/heartbeat/ClusterMon 2018-06-29 14:05:02.000000000 +0200 ++++ b/heartbeat/ClusterMon 2020-03-27 12:09:23.636845893 +0100 +@@ -86,7 +86,7 @@ + PID file location to ensure only one instance is running + + PID file +- ++ + + + +@@ -94,7 +94,7 @@ + Location to write HTML output to. + + HTML output +- ++ + + + +@@ -233,8 +233,8 @@ + fi + + : ${OCF_RESKEY_update:="15000"} +-: ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} +-: ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} ++: ${OCF_RESKEY_pidfile:="${HA_RSCTMP}/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} ++: ${OCF_RESKEY_htmlfile:="${HA_RSCTMP}/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} + + OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000` + +diff -uNr a/heartbeat/sapdb-nosha.sh b/heartbeat/sapdb-nosha.sh +--- a/heartbeat/sapdb-nosha.sh 2018-06-29 14:05:02.000000000 +0200 ++++ b/heartbeat/sapdb-nosha.sh 2020-03-27 12:07:16.183958164 +0100 +@@ -740,5 +740,5 @@ + } + + # Set a tempfile and make sure to clean it up again +-TEMPFILE="/tmp/SAPDatabase.$$.tmp" +-trap trap_handler INT TERM +\ No newline at end of file ++TEMPFILE="${HA_RSCTMP}/SAPDatabase.$$.tmp" ++trap trap_handler INT TERM diff --git a/SOURCES/bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch b/SOURCES/bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch new file mode 100644 index 0000000..2b025c5 --- /dev/null +++ b/SOURCES/bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch @@ -0,0 +1,23 @@ +From bb9e54cdac71a1f26aa626d234e38c8ae8417e9f Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 26 Mar 2020 16:26:14 +0100 +Subject: [PATCH] ocf-shellfuncs: fix ocf_is_clone() (clone_max can be 0 with + cloned resources) + +--- + heartbeat/ocf-shellfuncs.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in +index 7a97558a5..e0eaae1d5 100644 +--- a/heartbeat/ocf-shellfuncs.in ++++ b/heartbeat/ocf-shellfuncs.in +@@ -557,7 +557,7 @@ ocf_is_probe() { + # defined as a resource where the clone-max meta attribute is present, + # and set to greater than zero. + ocf_is_clone() { +- [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] && [ "${OCF_RESKEY_CRM_meta_clone_max}" -gt 0 ] ++ [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] + } + + # returns true if the resource is configured as a multistate diff --git a/SOURCES/bz1817598-ocf_is_clone-2-update-comment.patch b/SOURCES/bz1817598-ocf_is_clone-2-update-comment.patch new file mode 100644 index 0000000..4b9be99 --- /dev/null +++ b/SOURCES/bz1817598-ocf_is_clone-2-update-comment.patch @@ -0,0 +1,24 @@ +From 420e55da2eb542b35fe8af5d05496b129cd190d5 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 27 Mar 2020 08:44:12 +0100 +Subject: [PATCH] ocf-shellfuncs: ocf_is_clone: update comment based on + clone-max fix in previous commit + +--- + heartbeat/ocf-shellfuncs.in | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in +index e0eaae1d5..c4d40e382 100644 +--- a/heartbeat/ocf-shellfuncs.in ++++ b/heartbeat/ocf-shellfuncs.in +@@ -554,8 +554,7 @@ ocf_is_probe() { + } + + # returns true if the resource is configured as a clone. This is +-# defined as a resource where the clone-max meta attribute is present, +-# and set to greater than zero. ++# defined as a resource where the clone-max meta attribute is present. + ocf_is_clone() { + [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] + } diff --git a/SOURCES/bz1818997-nfsserver-fix-nfsv4-only-support.patch b/SOURCES/bz1818997-nfsserver-fix-nfsv4-only-support.patch new file mode 100644 index 0000000..b3efdce --- /dev/null +++ b/SOURCES/bz1818997-nfsserver-fix-nfsv4-only-support.patch @@ -0,0 +1,43 @@ +From 47dd1d16f08de06d512f9e04c3966c35f0ac4d3f Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 27 May 2020 13:05:57 +0200 +Subject: [PATCH] nfsserver: fix NFSv4-only support + +When disabling NFSv2 and NFSv3 mountd doesnt register with rpcbind, but +it's still running. This patch checks that mountd is running instead of +basing its status on it being registered w/rpcbind. +--- + heartbeat/nfsserver | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver +index acef0147a..9e6e1fcb1 100755 +--- a/heartbeat/nfsserver ++++ b/heartbeat/nfsserver +@@ -316,7 +316,7 @@ nfsserver_systemd_monitor() + fi + + ocf_log debug "Status: nfs-mountd" +- rpcinfo -t localhost 100005 > /dev/null 2>&1 ++ ps axww | grep -q "[r]pc.mountd" + rc=$? + if [ "$rc" -ne "0" ]; then + ocf_exit_reason "nfs-mountd is not running" +@@ -683,7 +683,7 @@ nfsserver_start () + local i=1 + while : ; do + ocf_log info "Start: nfs-mountd i: $i" +- rpcinfo -t localhost 100005 > /dev/null 2>&1 ++ ps axww | grep -q "[r]pc.mountd" + rc=$? + if [ "$rc" -eq "0" ]; then + break; +@@ -800,7 +800,7 @@ nfsserver_stop () + + nfs_exec stop nfs-mountd > /dev/null 2>&1 + ocf_log info "Stop: nfs-mountd" +- rpcinfo -t localhost 100005 > /dev/null 2>&1 ++ ps axww | grep -q "[r]pc.mountd" + rc=$? + if [ "$rc" -eq "0" ]; then + ocf_exit_reason "Failed to stop nfs-mountd" diff --git a/SOURCES/bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch b/SOURCES/bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch new file mode 100644 index 0000000..85355b3 --- /dev/null +++ b/SOURCES/bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch @@ -0,0 +1,24 @@ +From 390d1cb8b057ef0e6869fb57dc1e6b6997af49f0 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 3 Apr 2020 16:10:04 +0200 +Subject: [PATCH] aws-vpc-move-ip: delete remaining route entries + +--- + heartbeat/aws-vpc-move-ip | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip +index 97a467217..26ca6007d 100755 +--- a/heartbeat/aws-vpc-move-ip ++++ b/heartbeat/aws-vpc-move-ip +@@ -256,6 +256,10 @@ ec2ip_drop() { + return $OCF_ERR_GENERIC + fi + ++ # delete remaining route-entries if any ++ ip route show to exact ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface | xargs -r ip route delete ++ ip route show table local to exact ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface | xargs -r ip route delete ++ + return $OCF_SUCCESS + } + diff --git a/SOURCES/bz1819965-1-ocf.py-update.patch b/SOURCES/bz1819965-1-ocf.py-update.patch new file mode 100644 index 0000000..e94deb7 --- /dev/null +++ b/SOURCES/bz1819965-1-ocf.py-update.patch @@ -0,0 +1,357 @@ +--- a/heartbeat/ocf.py 2020-04-08 13:03:20.543477544 +0200 ++++ b/heartbeat/ocf.py 2020-04-06 10:23:45.950913519 +0200 +@@ -88,6 +88,10 @@ + + OCF_RESOURCE_INSTANCE = env.get("OCF_RESOURCE_INSTANCE") + ++OCF_ACTION = env.get("__OCF_ACTION") ++if OCF_ACTION is None and len(argv) == 2: ++ OCF_ACTION = argv[1] ++ + HA_DEBUG = env.get("HA_debug", 0) + HA_DATEFMT = env.get("HA_DATEFMT", "%b %d %T ") + HA_LOGFACILITY = env.get("HA_LOGFACILITY") +@@ -135,3 +139,343 @@ + log.addHandler(dfh) + + logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE}) ++ ++ ++_exit_reason_set = False ++ ++def ocf_exit_reason(msg): ++ """ ++ Print exit error string to stderr. ++ ++ Allows the OCF agent to provide a string describing ++ why the exit code was returned. ++ """ ++ global _exit_reason_set ++ cookie = env.get("OCF_EXIT_REASON_PREFIX", "ocf-exit-reason:") ++ sys.stderr.write("{}{}\n".format(cookie, msg)) ++ sys.stderr.flush() ++ logger.error(msg) ++ _exit_reason_set = True ++ ++ ++def have_binary(name): ++ """ ++ True if binary exists, False otherwise. ++ """ ++ def _access_check(fn): ++ return (os.path.exists(fn) and ++ os.access(fn, os.F_OK | os.X_OK) and ++ not os.path.isdir(fn)) ++ if _access_check(name): ++ return True ++ path = env.get("PATH", os.defpath).split(os.pathsep) ++ seen = set() ++ for dir in path: ++ dir = os.path.normcase(dir) ++ if dir not in seen: ++ seen.add(dir) ++ name2 = os.path.join(dir, name) ++ if _access_check(name2): ++ return True ++ return False ++ ++ ++def is_true(val): ++ """ ++ Convert an OCF truth value to a ++ Python boolean. ++ """ ++ return val in ("yes", "true", "1", 1, "YES", "TRUE", "ja", "on", "ON", True) ++ ++ ++def is_probe(): ++ """ ++ A probe is defined as a monitor operation ++ with an interval of zero. This is called ++ by Pacemaker to check the status of a possibly ++ not running resource. ++ """ ++ return (OCF_ACTION == "monitor" and ++ env.get("OCF_RESKEY_CRM_meta_interval", "") == "0") ++ ++ ++def get_parameter(name, default=None): ++ """ ++ Extract the parameter value from the environment ++ """ ++ return env.get("OCF_RESKEY_{}".format(name), default) ++ ++ ++def distro(): ++ """ ++ Return name of distribution/platform. ++ ++ If possible, returns "name/version", else ++ just "name". ++ """ ++ import subprocess ++ import platform ++ try: ++ ret = subprocess.check_output(["lsb_release", "-si"]) ++ if type(ret) != str: ++ ret = ret.decode() ++ distro = ret.strip() ++ ret = subprocess.check_output(["lsb_release", "-sr"]) ++ if type(ret) != str: ++ ret = ret.decode() ++ version = ret.strip() ++ return "{}/{}".format(distro, version) ++ except Exception: ++ if os.path.exists("/etc/debian_version"): ++ return "Debian" ++ if os.path.exists("/etc/SuSE-release"): ++ return "SUSE" ++ if os.path.exists("/etc/redhat-release"): ++ return "Redhat" ++ return platform.system() ++ ++ ++class Parameter(object): ++ def __init__(self, name, shortdesc, longdesc, content_type, unique, required, default): ++ self.name = name ++ self.shortdesc = shortdesc ++ self.longdesc = longdesc ++ self.content_type = content_type ++ self.unique = unique ++ self.required = required ++ self.default = default ++ ++ def __str__(self): ++ return self.to_xml() ++ ++ def to_xml(self): ++ ret = '' + "\n" ++ ret += '' + self.shortdesc + '' + "\n" ++ ret += ' ++ ++ ++1.0 ++ ++{longdesc} ++ ++{shortdesc} ++ ++ ++{parameters} ++ ++ ++ ++{actions} ++ ++ ++ ++""".format(name=self.name, ++ longdesc=self.longdesc, ++ shortdesc=self.shortdesc, ++ parameters="".join(p.to_xml() for p in self.parameters), ++ actions="".join(a.to_xml() for a in self.actions)) ++ ++ def run(self): ++ run(self) ++ ++ ++def run(agent, handlers=None): ++ """ ++ Main loop implementation for resource agents. ++ Does not return. ++ ++ Arguments: ++ ++ agent: Agent object. ++ ++ handlers: Dict of action name to handler function. ++ ++ Handler functions can take parameters as arguments, ++ the run loop will read parameter values from the ++ environment and pass to the handler. ++ """ ++ import inspect ++ ++ agent._handlers.update(handlers or {}) ++ handlers = agent._handlers ++ ++ def check_required_params(): ++ for p in agent.parameters: ++ if p.required and get_parameter(p.name) is None: ++ ocf_exit_reason("{}: Required parameter not set".format(p.name)) ++ sys.exit(OCF_ERR_CONFIGURED) ++ ++ def call_handler(func): ++ if hasattr(inspect, 'signature'): ++ params = inspect.signature(func).parameters.keys() ++ else: ++ params = inspect.getargspec(func).args ++ def value_for_parameter(param): ++ val = get_parameter(param) ++ if val is not None: ++ return val ++ for p in agent.parameters: ++ if p.name == param: ++ return p.default ++ arglist = [value_for_parameter(p) for p in params] ++ try: ++ rc = func(*arglist) ++ if rc is None: ++ rc = OCF_SUCCESS ++ return rc ++ except Exception as err: ++ if not _exit_reason_set: ++ ocf_exit_reason(str(err)) ++ else: ++ logger.error(str(err)) ++ return OCF_ERR_GENERIC ++ ++ meta_data_action = False ++ for action in agent.actions: ++ if action.name == "meta-data": ++ meta_data_action = True ++ break ++ if not meta_data_action: ++ agent.add_action("meta-data", timeout=10) ++ ++ if len(sys.argv) == 2 and sys.argv[1] in ("-h", "--help"): ++ sys.stdout.write("usage: %s {%s}\n\n" % (sys.argv[0], "|".join(sorted(handlers.keys()))) + ++ "Expects to have a fully populated OCF RA compliant environment set.\n") ++ sys.exit(OCF_SUCCESS) ++ ++ if OCF_ACTION is None: ++ ocf_exit_reason("No action argument set") ++ sys.exit(OCF_ERR_UNIMPLEMENTED) ++ if OCF_ACTION in ('meta-data', 'usage', 'methods'): ++ sys.stdout.write(agent.to_xml() + "\n") ++ sys.exit(OCF_SUCCESS) ++ ++ check_required_params() ++ if OCF_ACTION in handlers: ++ rc = call_handler(handlers[OCF_ACTION]) ++ sys.exit(rc) ++ sys.exit(OCF_ERR_UNIMPLEMENTED) ++ ++ ++if __name__ == "__main__": ++ import unittest ++ ++ class TestMetadata(unittest.TestCase): ++ def test_noparams_noactions(self): ++ m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc") ++ self.assertEqual(""" ++ ++ ++1.0 ++ ++longdesc ++ ++shortdesc ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++""", str(m)) ++ ++ def test_params_actions(self): ++ m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc") ++ m.add_parameter("testparam") ++ m.add_action("start") ++ self.assertEqual(str(m.actions[0]), '\n') ++ ++ unittest.main() diff --git a/SOURCES/bz1819965-2-azure-events.patch b/SOURCES/bz1819965-2-azure-events.patch new file mode 100644 index 0000000..220d2ba --- /dev/null +++ b/SOURCES/bz1819965-2-azure-events.patch @@ -0,0 +1,1060 @@ +diff -uNr a/configure.ac b/configure.ac +--- a/configure.ac 2020-04-16 11:54:08.466619607 +0200 ++++ b/configure.ac 2020-04-16 12:05:17.241352586 +0200 +@@ -30,6 +30,8 @@ + PKG_FEATURES="" + + AC_CONFIG_AUX_DIR(.) ++AC_CONFIG_MACRO_DIR([m4]) ++ + AC_CANONICAL_HOST + + dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) +@@ -72,6 +74,11 @@ + [AC_MSG_ERROR([systemd support requested but pkg-config unable to query systemd package])]) + with_systemdsystemunitdir=no], + [with_systemdsystemunitdir="$def_systemdsystemunitdir"])]) ++if test "x$with_systemdsystemunitdir" != "xno" && \ ++ test "x${prefix}" != "xNONE" && \ ++ test "x${prefix}" != "x/usr"; then ++ with_systemdsystemunitdir="${prefix}/$with_systemdsystemunitdir" ++fi + AS_IF([test "x$with_systemdsystemunitdir" != "xno"], + [AC_SUBST([systemdsystemunitdir], [$with_systemdsystemunitdir])]) + AM_CONDITIONAL([HAVE_SYSTEMD], [test "x$with_systemdsystemunitdir" != "xno"]) +@@ -79,6 +86,11 @@ + AC_ARG_WITH([systemdtmpfilesdir], + AS_HELP_STRING([--with-systemdtmpfilesdir=DIR], [Directory for systemd tmp files]), + [], [with_systemdtmpfilesdir=$($PKGCONFIG --variable=tmpfilesdir systemd)]) ++ if test "x$with_systemdtmpfilesdir" != xno && \ ++ test "x${prefix}" != "xNONE" && \ ++ test "x${prefix}" != "x/usr"; then ++ with_systemdtmpfilesdir="${prefix}/$with_systemdtmpfilesdir" ++ fi + if test "x$with_systemdtmpfilesdir" != xno; then + AC_SUBST([systemdtmpfilesdir], [$with_systemdtmpfilesdir]) + fi +@@ -501,12 +513,35 @@ + AC_SUBST(RM) + AC_SUBST(TEST) + ++dnl Ensure PYTHON is an absolute path ++AC_PATH_PROG([PYTHON], [$PYTHON]) ++ + AM_PATH_PYTHON + if test -z "$PYTHON"; then + echo "*** Essential program python not found" 1>&2 +- exit 1 + fi + ++AC_PYTHON_MODULE(googleapiclient) ++AC_PYTHON_MODULE(pyroute2) ++ ++AS_VERSION_COMPARE([$PYTHON_VERSION], [2.7], [BUILD_OCF_PY=0], [BUILD_OCF_PY=1], [BUILD_OCF_PY=1]) ++ ++BUILD_AZURE_EVENTS=1 ++if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then ++ BUILD_AZURE_EVENTS=0 ++ AC_MSG_WARN("Not building azure-events") ++fi ++AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1) ++ ++BUILD_GCP_PD_MOVE=1 ++AM_CONDITIONAL(BUILD_GCP_PD_MOVE, test $BUILD_GCP_PD_MOVE -eq 1) ++ ++BUILD_GCP_VPC_MOVE_ROUTE=1 ++AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_ROUTE, test $BUILD_GCP_VPC_MOVE_ROUTE -eq 1) ++ ++BUILD_GCP_VPC_MOVE_VIP=1 ++AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_VIP, test $BUILD_GCP_VPC_MOVE_VIP -eq 1) ++ + AC_PATH_PROGS(ROUTE, route) + AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) + +@@ -541,6 +576,12 @@ + if test x"${STYLESHEET_PREFIX}" = x""; then + DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \ + -type d | LC_ALL=C sort) ++ if test x"${DIRS}" = x""; then ++ # when datadir is not standard OS path, we cannot find docbook.xsl ++ # use standard OS path as backup ++ DIRS=$(find "/usr/share" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \ ++ -type d | LC_ALL=C sort) ++ fi + XSLT=$(basename ${DOCBOOK_XSL_PATH}) + for d in ${DIRS}; do + if test -f "${d}/${XSLT}"; then +@@ -948,6 +989,7 @@ + ) + + dnl Files we output that need to be executable ++AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events]) + AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget]) + AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID]) + AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE]) +@@ -1021,7 +1063,7 @@ + AC_MSG_RESULT([]) + AC_MSG_RESULT([$PACKAGE configuration:]) + AC_MSG_RESULT([ Version = ${VERSION}]) +-AC_MSG_RESULT([ Build Version = e711383fd5c7bef9c24ff6bc85465e59f91080f9]) ++AC_MSG_RESULT([ Build Version = $Format:%H$]) + AC_MSG_RESULT([ Features =${PKG_FEATURES}]) + AC_MSG_RESULT([]) + AC_MSG_RESULT([ Prefix = ${prefix}]) +diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am +--- a/doc/man/Makefile.am 2020-04-16 11:54:08.466619607 +0200 ++++ b/doc/man/Makefile.am 2020-04-16 12:08:34.913726440 +0200 +@@ -55,7 +55,7 @@ + # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was + # first reported in 1995 and added to Savannah in in 2005... + if BUILD_DOC +-man_MANS = ocf_heartbeat_AoEtarget.7 \ ++man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_AudibleAlarm.7 \ + ocf_heartbeat_ClusterMon.7 \ + ocf_heartbeat_CTDB.7 \ +@@ -183,6 +183,22 @@ + man_MANS += ocf_heartbeat_IPv6addr.7 + endif + ++if BUILD_AZURE_EVENTS ++man_MANS += ocf_heartbeat_azure-events.7 ++endif ++ ++if BUILD_GCP_PD_MOVE ++man_MANS += ocf_heartbeat_gcp-pd-move.7 ++endif ++ ++if BUILD_GCP_VPC_MOVE_ROUTE ++man_MANS += ocf_heartbeat_gcp-vpc-move-route.7 ++endif ++ ++if BUILD_GCP_VPC_MOVE_VIP ++man_MANS += ocf_heartbeat_gcp-vpc-move-vip.7 ++endif ++ + xmlfiles = $(man_MANS:.7=.xml) + + %.1 %.5 %.7 %.8: %.xml +diff -uNr a/heartbeat/azure-events.in b/heartbeat/azure-events.in +--- a/heartbeat/azure-events.in 1970-01-01 01:00:00.000000000 +0100 ++++ b/heartbeat/azure-events.in 2020-04-16 12:02:15.114693551 +0200 +@@ -0,0 +1,824 @@ ++#!@PYTHON@ -tt ++# ++# Resource agent for monitoring Azure Scheduled Events ++# ++# License: GNU General Public License (GPL) ++# (c) 2018 Tobias Niekamp, Microsoft Corp. ++# and Linux-HA contributors ++ ++import os ++import sys ++import time ++import subprocess ++import json ++try: ++ import urllib2 ++except ImportError: ++ import urllib.request as urllib2 ++import socket ++from collections import defaultdict ++ ++OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) ++sys.path.append(OCF_FUNCTIONS_DIR) ++import ocf ++ ++############################################################################## ++ ++ ++VERSION = "0.10" ++USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro()) ++ ++attr_globalPullState = "azure-events_globalPullState" ++attr_lastDocVersion = "azure-events_lastDocVersion" ++attr_curNodeState = "azure-events_curNodeState" ++attr_pendingEventIDs = "azure-events_pendingEventIDs" ++ ++default_loglevel = ocf.logging.INFO ++default_relevantEventTypes = set(["Reboot", "Redeploy"]) ++ ++global_pullMaxAttempts = 3 ++global_pullDelaySecs = 1 ++ ++############################################################################## ++ ++class attrDict(defaultdict): ++ """ ++ A wrapper for accessing dict keys like an attribute ++ """ ++ def __init__(self, data): ++ super(attrDict, self).__init__(attrDict) ++ for d in data.keys(): ++ self.__setattr__(d, data[d]) ++ ++ def __getattr__(self, key): ++ try: ++ return self[key] ++ except KeyError: ++ raise AttributeError(key) ++ ++ def __setattr__(self, key, value): ++ self[key] = value ++ ++############################################################################## ++ ++class azHelper: ++ """ ++ Helper class for Azure's metadata API (including Scheduled Events) ++ """ ++ metadata_host = "http://169.254.169.254/metadata" ++ instance_api = "instance" ++ events_api = "scheduledevents" ++ api_version = "2017-08-01" ++ ++ @staticmethod ++ def _sendMetadataRequest(endpoint, postData=None): ++ """ ++ Send a request to Azure's Azure Metadata Service API ++ """ ++ url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version) ++ ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData)) ++ ocf.logger.debug("_sendMetadataRequest: url = %s" % url) ++ ++ req = urllib2.Request(url, postData) ++ req.add_header("Metadata", "true") ++ req.add_header("User-Agent", USER_AGENT) ++ resp = urllib2.urlopen(req) ++ data = resp.read() ++ ocf.logger.debug("_sendMetadataRequest: response = %s" % data) ++ if data: ++ data = json.loads(data) ++ ++ ocf.logger.debug("_sendMetadataRequest: finished") ++ return data ++ ++ @staticmethod ++ def getInstanceInfo(): ++ """ ++ Fetch details about the current VM from Azure's Azure Metadata Service API ++ """ ++ ocf.logger.debug("getInstanceInfo: begin") ++ ++ jsondata = azHelper._sendMetadataRequest(azHelper.instance_api) ++ ocf.logger.debug("getInstanceInfo: json = %s" % jsondata) ++ ++ ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"])) ++ return attrDict(jsondata["compute"]) ++ ++ @staticmethod ++ def pullScheduledEvents(): ++ """ ++ Retrieve all currently scheduled events via Azure Metadata Service API ++ """ ++ ocf.logger.debug("pullScheduledEvents: begin") ++ ++ jsondata = azHelper._sendMetadataRequest(azHelper.events_api) ++ ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata) ++ ++ ocf.logger.debug("pullScheduledEvents: finished") ++ return attrDict(jsondata) ++ ++ @staticmethod ++ def forceEvents(eventIDs): ++ """ ++ Force a set of events to start immediately ++ """ ++ ocf.logger.debug("forceEvents: begin") ++ ++ events = [] ++ for e in eventIDs: ++ events.append({ ++ "EventId": e, ++ }) ++ postData = { ++ "StartRequests" : events ++ } ++ ocf.logger.info("forceEvents: postData = %s" % postData) ++ resp = azHelper._sendMetadataRequest(azHelper.events_api, postData=json.dumps(postData)) ++ ++ ocf.logger.debug("forceEvents: finished") ++ return ++ ++############################################################################## ++ ++class clusterHelper: ++ """ ++ Helper functions for Pacemaker control via crm ++ """ ++ @staticmethod ++ def _getLocation(node): ++ """ ++ Helper function to retrieve local/global attributes ++ """ ++ if node: ++ return ["--node", node] ++ else: ++ return ["--type", "crm_config"] ++ ++ @staticmethod ++ def _exec(command, *args): ++ """ ++ Helper function to execute a UNIX command ++ """ ++ args = list(args) ++ ocf.logger.debug("_exec: begin; command = %s, args = %s" % (command, str(args))) ++ ++ def flatten(*n): ++ return (str(e) for a in n ++ for e in (flatten(*a) if isinstance(a, (tuple, list)) else (str(a),))) ++ command = list(flatten([command] + args)) ++ ocf.logger.debug("_exec: cmd = %s" % " ".join(command)) ++ try: ++ ret = subprocess.check_output(command) ++ ocf.logger.debug("_exec: return = %s" % ret) ++ return ret.rstrip() ++ except Exception as err: ++ ocf.logger.exception(err) ++ return None ++ ++ @staticmethod ++ def setAttr(key, value, node=None): ++ """ ++ Set the value of a specific global/local attribute in the Pacemaker cluster ++ """ ++ ocf.logger.debug("setAttr: begin; key = %s, value = %s, node = %s" % (key, value, node)) ++ ++ if value: ++ ret = clusterHelper._exec("crm_attribute", ++ "--name", key, ++ "--update", value, ++ clusterHelper._getLocation(node)) ++ else: ++ ret = clusterHelper._exec("crm_attribute", ++ "--name", key, ++ "--delete", ++ clusterHelper._getLocation(node)) ++ ++ ocf.logger.debug("setAttr: finished") ++ return len(ret) == 0 ++ ++ @staticmethod ++ def getAttr(key, node=None): ++ """ ++ Retrieve a global/local attribute from the Pacemaker cluster ++ """ ++ ocf.logger.debug("getAttr: begin; key = %s, node = %s" % (key, node)) ++ ++ val = clusterHelper._exec("crm_attribute", ++ "--name", key, ++ "--query", "--quiet", ++ "--default", "", ++ clusterHelper._getLocation(node)) ++ ocf.logger.debug("getAttr: finished") ++ if not val: ++ return None ++ return val if not val.isdigit() else int(val) ++ ++ @staticmethod ++ def getAllNodes(): ++ """ ++ Get a list of hostnames for all nodes in the Pacemaker cluster ++ """ ++ ocf.logger.debug("getAllNodes: begin") ++ ++ nodes = [] ++ nodeList = clusterHelper._exec("crm_node", "--list") ++ for n in nodeList.decode().split("\n"): ++ nodes.append(n.split()[1]) ++ ocf.logger.debug("getAllNodes: finished; return %s" % str(nodes)) ++ ++ return nodes ++ ++ @staticmethod ++ def getHostNameFromAzName(azName): ++ """ ++ Helper function to get the actual host name from an Azure node name ++ """ ++ return clusterHelper.getAttr("hostName_%s" % azName) ++ ++ @staticmethod ++ def removeHoldFromNodes(): ++ """ ++ Remove the ON_HOLD state from all nodes in the Pacemaker cluster ++ """ ++ ocf.logger.debug("removeHoldFromNodes: begin") ++ ++ for n in clusterHelper.getAllNodes(): ++ if clusterHelper.getAttr(attr_curNodeState, node=n) == "ON_HOLD": ++ clusterHelper.setAttr(attr_curNodeState, "AVAILABLE", node=n) ++ ocf.logger.info("removeHoldFromNodes: removed ON_HOLD from node %s" % n) ++ ++ ocf.logger.debug("removeHoldFromNodes: finished") ++ return False ++ ++ @staticmethod ++ def otherNodesAvailable(exceptNode): ++ """ ++ Check if there are any nodes (except a given node) in the Pacemaker cluster that have state AVAILABLE ++ """ ++ ocf.logger.debug("otherNodesAvailable: begin; exceptNode = %s" % exceptNode) ++ ++ for n in clusterHelper.getAllNodes(): ++ state = clusterHelper.getAttr(attr_curNodeState, node=n) ++ state = stringToNodeState(state) if state else AVAILABLE ++ if state == AVAILABLE and n != exceptNode.hostName: ++ ocf.logger.info("otherNodesAvailable: at least %s is available" % n) ++ ocf.logger.debug("otherNodesAvailable: finished") ++ return True ++ ocf.logger.info("otherNodesAvailable: no other nodes are available") ++ ocf.logger.debug("otherNodesAvailable: finished") ++ ++ return False ++ ++ @staticmethod ++ def transitionSummary(): ++ """ ++ Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby) ++ """ ++ # Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node? ++ # # crm_simulate -Ls ++ # Transition Summary: ++ # * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1) ++ # * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0) ++ # * Move rsc_ip_HN1_HDB03 (Started hsr3-db0 -> hsr3-db1) ++ # * Start rsc_nc_HN1_HDB03 (hsr3-db1) ++ # # Excepted result when there are no pending actions: ++ # Transition Summary: ++ ocf.logger.debug("transitionSummary: begin") ++ ++ summary = clusterHelper._exec("crm_simulate", "-Ls") ++ if not summary: ++ ocf.logger.warning("transitionSummary: could not load transition summary") ++ return False ++ if summary.find("Transition Summary:") < 0: ++ ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary) ++ return False ++ summary = summary.split("Transition Summary:")[1] ++ ret = summary.decode().split("\n").pop(0) ++ ++ ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret)) ++ return ret ++ ++ @staticmethod ++ def listOperationsOnNode(node): ++ """ ++ Get a list of all current operations for a given node (used to check if any resources are pending) ++ """ ++ # hsr3-db1:/home/tniek # crm_resource --list-operations -N hsr3-db0 ++ # rsc_azure-events (ocf::heartbeat:azure-events): Started: rsc_azure-events_start_0 (node=hsr3-db0, call=91, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=115ms): complete ++ # rsc_azure-events (ocf::heartbeat:azure-events): Started: rsc_azure-events_monitor_10000 (node=hsr3-db0, call=93, rc=0, last-rc-change=Fri Jun 8 22:37:47 2018, exec=197ms): complete ++ # rsc_SAPHana_HN1_HDB03 (ocf::suse:SAPHana): Master: rsc_SAPHana_HN1_HDB03_start_0 (node=hsr3-db0, call=-1, rc=193, last-rc-change=Fri Jun 8 22:37:46 2018, exec=0ms): pending ++ # rsc_SAPHanaTopology_HN1_HDB03 (ocf::suse:SAPHanaTopology): Started: rsc_SAPHanaTopology_HN1_HDB03_start_0 (node=hsr3-db0, call=90, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=3214ms): complete ++ ocf.logger.debug("listOperationsOnNode: begin; node = %s" % node) ++ ++ resources = clusterHelper._exec("crm_resource", "--list-operations", "-N", node) ++ if len(resources) == 0: ++ ret = [] ++ else: ++ ret = resources.decode().split("\n") ++ ++ ocf.logger.debug("listOperationsOnNode: finished; return = %s" % str(ret)) ++ return ret ++ ++ @staticmethod ++ def noPendingResourcesOnNode(node): ++ """ ++ Check that there are no pending resources on a given node ++ """ ++ ocf.logger.debug("noPendingResourcesOnNode: begin; node = %s" % node) ++ ++ for r in clusterHelper.listOperationsOnNode(node): ++ ocf.logger.debug("noPendingResourcesOnNode: * %s" % r) ++ resource = r.split()[-1] ++ if resource == "pending": ++ ocf.logger.info("noPendingResourcesOnNode: found resource %s that is still pending" % resource) ++ ocf.logger.debug("noPendingResourcesOnNode: finished; return = False") ++ return False ++ ocf.logger.info("noPendingResourcesOnNode: no pending resources on node %s" % node) ++ ocf.logger.debug("noPendingResourcesOnNode: finished; return = True") ++ ++ return True ++ ++ @staticmethod ++ def allResourcesStoppedOnNode(node): ++ """ ++ Check that all resources on a given node are stopped ++ """ ++ ocf.logger.debug("allResourcesStoppedOnNode: begin; node = %s" % node) ++ ++ if clusterHelper.noPendingResourcesOnNode(node): ++ if len(clusterHelper.transitionSummary()) == 0: ++ ocf.logger.info("allResourcesStoppedOnNode: no pending resources on node %s and empty transition summary" % node) ++ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = True") ++ return True ++ ocf.logger.info("allResourcesStoppedOnNode: transition summary is not empty") ++ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False") ++ return False ++ ++ ocf.logger.info("allResourcesStoppedOnNode: still pending resources on node %s" % node) ++ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False") ++ return False ++ ++############################################################################## ++ ++AVAILABLE = 0 # Node is online and ready to handle events ++STOPPING = 1 # Standby has been triggered, but some resources are still running ++IN_EVENT = 2 # All resources are stopped, and event has been initiated via Azure Metadata Service ++ON_HOLD = 3 # Node has a pending event that cannot be started there are no other nodes available ++ ++def stringToNodeState(name): ++ if type(name) == int: return name ++ if name == "STOPPING": return STOPPING ++ if name == "IN_EVENT": return IN_EVENT ++ if name == "ON_HOLD": return ON_HOLD ++ return AVAILABLE ++ ++def nodeStateToString(state): ++ if state == STOPPING: return "STOPPING" ++ if state == IN_EVENT: return "IN_EVENT" ++ if state == ON_HOLD: return "ON_HOLD" ++ return "AVAILABLE" ++ ++############################################################################## ++ ++class Node: ++ """ ++ Core class implementing logic for a cluster node ++ """ ++ def __init__(self, ra): ++ self.raOwner = ra ++ self.azInfo = azHelper.getInstanceInfo() ++ self.azName = self.azInfo.name ++ self.hostName = socket.gethostname() ++ self.setAttr("azName", self.azName) ++ clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName) ++ ++ def getAttr(self, key): ++ """ ++ Get a local attribute ++ """ ++ return clusterHelper.getAttr(key, node=self.hostName) ++ ++ def setAttr(self, key, value): ++ """ ++ Set a local attribute ++ """ ++ return clusterHelper.setAttr(key, value, node=self.hostName) ++ ++ def selfOrOtherNode(self, node): ++ """ ++ Helper function to distinguish self/other node ++ """ ++ return node if node else self.hostName ++ ++ def setState(self, state, node=None): ++ """ ++ Set the state for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("setState: begin; node = %s, state = %s" % (node, nodeStateToString(state))) ++ ++ clusterHelper.setAttr(attr_curNodeState, nodeStateToString(state), node=node) ++ ++ ocf.logger.debug("setState: finished") ++ ++ def getState(self, node=None): ++ """ ++ Get the state for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("getState: begin; node = %s" % node) ++ ++ state = clusterHelper.getAttr(attr_curNodeState, node=node) ++ ocf.logger.debug("getState: state = %s" % state) ++ ocf.logger.debug("getState: finished") ++ if not state: ++ return AVAILABLE ++ return stringToNodeState(state) ++ ++ def setEventIDs(self, eventIDs, node=None): ++ """ ++ Set pending EventIDs for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("setEventIDs: begin; node = %s, eventIDs = %s" % (node, str(eventIDs))) ++ ++ if eventIDs: ++ eventIDStr = ",".join(eventIDs) ++ else: ++ eventIDStr = None ++ clusterHelper.setAttr(attr_pendingEventIDs, eventIDStr, node=node) ++ ++ ocf.logger.debug("setEventIDs: finished") ++ return ++ ++ def getEventIDs(self, node=None): ++ """ ++ Get pending EventIDs for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("getEventIDs: begin; node = %s" % node) ++ ++ eventIDStr = clusterHelper.getAttr(attr_pendingEventIDs, node=node) ++ if eventIDStr: ++ eventIDs = eventIDStr.decode().split(",") ++ else: ++ eventIDs = None ++ ++ ocf.logger.debug("getEventIDs: finished; eventIDs = %s" % str(eventIDs)) ++ return eventIDs ++ ++ def updateNodeStateAndEvents(self, state, eventIDs, node=None): ++ """ ++ Set the state and pending EventIDs for a given node (or self) ++ """ ++ ocf.logger.debug("updateNodeStateAndEvents: begin; node = %s, state = %s, eventIDs = %s" % (node, nodeStateToString(state), str(eventIDs))) ++ ++ self.setState(state, node=node) ++ self.setEventIDs(eventIDs, node=node) ++ ++ ocf.logger.debug("updateNodeStateAndEvents: finished") ++ return state ++ ++ def putNodeStandby(self, node=None): ++ """ ++ Put self to standby ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("putNodeStandby: begin; node = %s" % node) ++ ++ clusterHelper._exec("crm_attribute", ++ "-t", "nodes", ++ "-N", node, ++ "-n", "standby", ++ "-v", "on", ++ "--lifetime=forever") ++ ++ ocf.logger.debug("putNodeStandby: finished") ++ ++ def putNodeOnline(self, node=None): ++ """ ++ Put self back online ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("putNodeOnline: begin; node = %s" % node) ++ ++ clusterHelper._exec("crm_attribute", ++ "-t", "nodes", ++ "-N", node, ++ "-n", "standby", ++ "-v", "off", ++ "--lifetime=forever") ++ ++ ocf.logger.debug("putNodeOnline: finished") ++ ++ def separateEvents(self, events): ++ """ ++ Split own/other nodes' events ++ """ ++ ocf.logger.debug("separateEvents: begin; events = %s" % str(events)) ++ ++ localEvents = [] ++ remoteEvents = [] ++ for e in events: ++ e = attrDict(e) ++ if e.EventType not in self.raOwner.relevantEventTypes: ++ continue ++ if self.azName in e.Resources: ++ localEvents.append(e) ++ else: ++ remoteEvents.append(e) ++ ocf.logger.debug("separateEvents: finished; localEvents = %s, remoteEvents = %s" % (str(localEvents), str(remoteEvents))) ++ return (localEvents, remoteEvents) ++ ++ def removeOrphanedEvents(self, azEvents): ++ """ ++ Remove remote events that are already finished ++ """ ++ ocf.logger.debug("removeOrphanedEvents: begin; azEvents = %s" % str(azEvents)) ++ ++ azEventIDs = set() ++ for e in azEvents: ++ azEventIDs.add(e.EventId) ++ # for all nodes except self ... ++ for n in clusterHelper.getAllNodes(): ++ if n == self.hostName: ++ continue ++ curState = self.getState(node=n) ++ # ... that still show in an event or shutting down resources ... ++ if curState in (STOPPING, IN_EVENT): ++ ocf.logger.info("removeOrphanedEvents: node %s has state %s" % (n, curState)) ++ clusterEventIDs = self.getEventIDs(node=n) ++ stillActive = False ++ # ... but don't have any more events running according to Azure, ... ++ for p in clusterEventIDs: ++ if p in azEventIDs: ++ ocf.logger.info("removeOrphanedEvents: (at least) event %s on node %s has not yet finished" % (str(p), n)) ++ stillActive = True ++ break ++ if not stillActive: ++ # ... put them back online. ++ ocf.logger.info("removeOrphanedEvents: clusterEvents %s on node %s are not in azEvents %s -> bring node back online" % (str(clusterEventIDs), n, str(azEventIDs))) ++ self.putNodeOnline(node=n) ++ ++ ocf.logger.debug("removeOrphanedEvents: finished") ++ ++ def handleRemoteEvents(self, azEvents): ++ """ ++ Handle a list of events (as provided by Azure Metadata Service) for other nodes ++ """ ++ ocf.logger.debug("handleRemoteEvents: begin; hostName = %s, events = %s" % (self.hostName, str(azEvents))) ++ ++ if len(azEvents) == 0: ++ ocf.logger.debug("handleRemoteEvents: no remote events to handle") ++ ocf.logger.debug("handleRemoteEvents: finished") ++ return ++ eventIDsForNode = {} ++ ++ # iterate through all current events as per Azure ++ for e in azEvents: ++ ocf.logger.info("handleRemoteEvents: handling remote event %s (%s; nodes = %s)" % (e.EventId, e.EventType, str(e.Resources))) ++ # before we can force an event to start, we need to ensure all nodes involved have stopped their resources ++ if e.EventStatus == "Scheduled": ++ allNodesStopped = True ++ for azName in e.Resources: ++ hostName = clusterHelper.getHostNameFromAzName(azName) ++ state = self.getState(node=hostName) ++ if state == STOPPING: ++ # the only way we can continue is when node state is STOPPING, but all resources have been stopped ++ if not clusterHelper.allResourcesStoppedOnNode(hostName): ++ ocf.logger.info("handleRemoteEvents: (at least) node %s has still resources running -> wait" % hostName) ++ allNodesStopped = False ++ break ++ elif state in (AVAILABLE, IN_EVENT, ON_HOLD): ++ ocf.logger.info("handleRemoteEvents: node %s is still %s -> remote event needs to be picked up locally" % (hostName, nodeStateToString(state))) ++ allNodesStopped = False ++ break ++ if allNodesStopped: ++ ocf.logger.info("handleRemoteEvents: nodes %s are stopped -> add remote event %s to force list" % (str(e.Resources), e.EventId)) ++ for n in e.Resources: ++ hostName = clusterHelper.getHostNameFromAzName(n) ++ if hostName in eventIDsForNode: ++ eventIDsForNode[hostName].append(e.EventId) ++ else: ++ eventIDsForNode[hostName] = [e.EventId] ++ elif e.EventStatus == "Started": ++ ocf.logger.info("handleRemoteEvents: remote event already started") ++ ++ # force the start of all events whose nodes are ready (i.e. have no more resources running) ++ if len(eventIDsForNode.keys()) > 0: ++ eventIDsToForce = set([item for sublist in eventIDsForNode.values() for item in sublist]) ++ ocf.logger.info("handleRemoteEvents: set nodes %s to IN_EVENT; force remote events %s" % (str(eventIDsForNode.keys()), str(eventIDsToForce))) ++ for node, eventId in eventIDsForNode.items(): ++ self.updateNodeStateAndEvents(IN_EVENT, eventId, node=node) ++ azHelper.forceEvents(eventIDsToForce) ++ ++ ocf.logger.debug("handleRemoteEvents: finished") ++ ++ def handleLocalEvents(self, azEvents): ++ """ ++ Handle a list of own events (as provided by Azure Metadata Service) ++ """ ++ ocf.logger.debug("handleLocalEvents: begin; hostName = %s, azEvents = %s" % (self.hostName, str(azEvents))) ++ ++ azEventIDs = set() ++ for e in azEvents: ++ azEventIDs.add(e.EventId) ++ ++ curState = self.getState() ++ clusterEventIDs = self.getEventIDs() ++ mayUpdateDocVersion = False ++ ocf.logger.info("handleLocalEvents: current state = %s; pending local clusterEvents = %s" % (nodeStateToString(curState), str(clusterEventIDs))) ++ ++ # check if there are currently/still events set for the node ++ if clusterEventIDs: ++ # there are pending events set, so our state must be STOPPING or IN_EVENT ++ i = 0; touchedEventIDs = False ++ while i < len(clusterEventIDs): ++ # clean up pending events that are already finished according to AZ ++ if clusterEventIDs[i] not in azEventIDs: ++ ocf.logger.info("handleLocalEvents: remove finished local clusterEvent %s" % (clusterEventIDs[i])) ++ clusterEventIDs.pop(i) ++ touchedEventIDs = True ++ else: ++ i += 1 ++ if len(clusterEventIDs) > 0: ++ # there are still pending events (either because we're still stopping, or because the event is still in place) ++ # either way, we need to wait ++ if touchedEventIDs: ++ ocf.logger.info("handleLocalEvents: added new local clusterEvent %s" % str(clusterEventIDs)) ++ self.setEventIDs(clusterEventIDs) ++ else: ++ ocf.logger.info("handleLocalEvents: no local clusterEvents were updated") ++ else: ++ # there are no more pending events left after cleanup ++ if clusterHelper.noPendingResourcesOnNode(self.hostName): ++ # and no pending resources on the node -> set it back online ++ ocf.logger.info("handleLocalEvents: all local events finished -> clean up, put node online and AVAILABLE") ++ curState = self.updateNodeStateAndEvents(AVAILABLE, None) ++ self.putNodeOnline() ++ clusterHelper.removeHoldFromNodes() ++ # repeat handleLocalEvents() since we changed status to AVAILABLE ++ else: ++ ocf.logger.info("handleLocalEvents: all local events finished, but some resources have not completed startup yet -> wait") ++ else: ++ # there are no pending events set for us (yet) ++ if curState == AVAILABLE: ++ if len(azEventIDs) > 0: ++ if clusterHelper.otherNodesAvailable(self): ++ ocf.logger.info("handleLocalEvents: can handle local events %s -> set state STOPPING" % (str(azEventIDs))) ++ # this will also set mayUpdateDocVersion = True ++ curState = self.updateNodeStateAndEvents(STOPPING, azEventIDs) ++ else: ++ ocf.logger.info("handleLocalEvents: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(azEventIDs)) ++ self.setState(ON_HOLD) ++ else: ++ ocf.logger.debug("handleLocalEvents: no local azEvents to handle") ++ if curState == STOPPING: ++ if clusterHelper.noPendingResourcesOnNode(self.hostName): ++ ocf.logger.info("handleLocalEvents: all local resources are started properly -> put node standby") ++ self.putNodeStandby() ++ mayUpdateDocVersion = True ++ else: ++ ocf.logger.info("handleLocalEvents: some local resources are not clean yet -> wait") ++ ++ ocf.logger.debug("handleLocalEvents: finished; mayUpdateDocVersion = %s" % str(mayUpdateDocVersion)) ++ return mayUpdateDocVersion ++ ++############################################################################## ++ ++class raAzEvents: ++ """ ++ Main class for resource agent ++ """ ++ def __init__(self, relevantEventTypes): ++ self.node = Node(self) ++ self.relevantEventTypes = relevantEventTypes ++ ++ def monitor(self): ++ ocf.logger.debug("monitor: begin") ++ ++ pullFailedAttemps = 0 ++ while True: ++ # check if another node is pulling at the same time; ++ # this should only be a concern for the first pull, as setting up Scheduled Events may take up to 2 minutes. ++ if clusterHelper.getAttr(attr_globalPullState) == "PULLING": ++ pullFailedAttemps += 1 ++ if pullFailedAttemps == global_pullMaxAttempts: ++ ocf.logger.warning("monitor: exceeded maximum number of attempts (%d) to pull events" % global_pullMaxAttempts) ++ ocf.logger.debug("monitor: finished") ++ return ocf.OCF_SUCCESS ++ else: ++ ocf.logger.info("monitor: another node is pulling; retry in %d seconds" % global_pullDelaySecs) ++ time.sleep(global_pullDelaySecs) ++ continue ++ ++ # we can pull safely from Azure Metadata Service ++ clusterHelper.setAttr(attr_globalPullState, "PULLING") ++ events = azHelper.pullScheduledEvents() ++ clusterHelper.setAttr(attr_globalPullState, "IDLE") ++ ++ # get current document version ++ curDocVersion = events.DocumentIncarnation ++ lastDocVersion = self.node.getAttr(attr_lastDocVersion) ++ ocf.logger.debug("monitor: lastDocVersion = %s; curDocVersion = %s" % (lastDocVersion, curDocVersion)) ++ ++ # split events local/remote ++ (localEvents, remoteEvents) = self.node.separateEvents(events.Events) ++ ++ # ensure local events are only executing once ++ if curDocVersion != lastDocVersion: ++ ocf.logger.debug("monitor: curDocVersion has not been handled yet") ++ # handleLocalEvents() returns True if mayUpdateDocVersion is True; ++ # this is only the case if we can ensure there are no pending events ++ if self.node.handleLocalEvents(localEvents): ++ ocf.logger.info("monitor: handleLocalEvents completed successfully -> update curDocVersion") ++ self.node.setAttr(attr_lastDocVersion, curDocVersion) ++ else: ++ ocf.logger.debug("monitor: handleLocalEvents still waiting -> keep curDocVersion") ++ else: ++ ocf.logger.info("monitor: already handled curDocVersion, skip") ++ ++ # remove orphaned remote events and then handle the remaining remote events ++ self.node.removeOrphanedEvents(remoteEvents) ++ self.node.handleRemoteEvents(remoteEvents) ++ break ++ ++ ocf.logger.debug("monitor: finished") ++ return ocf.OCF_SUCCESS ++ ++############################################################################## ++ ++def setLoglevel(verbose): ++ # set up writing into syslog ++ loglevel = default_loglevel ++ if verbose: ++ opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=1)) ++ urllib2.install_opener(opener) ++ loglevel = ocf.logging.DEBUG ++ ocf.log.setLevel(loglevel) ++ ++description = ( ++ "Microsoft Azure Scheduled Events monitoring agent", ++ """This resource agent implements a monitor for scheduled ++(maintenance) events for a Microsoft Azure VM. ++ ++If any relevant events are found, it moves all Pacemaker resources ++away from the affected node to allow for a graceful shutdown. ++ ++ Usage: ++ [OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events ACTION ++ ++ action (required): Supported values: monitor, help, meta-data ++ eventTypes (optional): List of event types to be considered ++ relevant by the resource agent (comma-separated). ++ Supported values: Freeze,Reboot,Redeploy ++ Default = Reboot,Redeploy ++/ verbose (optional): If set to true, displays debug info. ++ Default = false ++ ++ Deployment: ++ crm configure primitive rsc_azure-events ocf:heartbeat:azure-events \ ++ op monitor interval=10s ++ crm configure clone cln_azure-events rsc_azure-events ++ ++For further information on Microsoft Azure Scheduled Events, please ++refer to the following documentation: ++https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events ++""") ++ ++def monitor_action(eventTypes): ++ relevantEventTypes = set(eventTypes.split(",") if eventTypes else []) ++ ra = raAzEvents(relevantEventTypes) ++ return ra.monitor() ++ ++def validate_action(eventTypes): ++ if eventTypes: ++ for event in eventTypes.split(","): ++ if event not in ("Freeze", "Reboot", "Redeploy"): ++ ocf.ocf_exit_reason("Event type not one of Freeze, Reboot, Redeploy: " + eventTypes) ++ return ocf.OCF_ERR_CONFIGURED ++ return ocf.OCF_SUCCESS ++ ++def main(): ++ agent = ocf.Agent("azure-events", shortdesc=description[0], longdesc=description[1]) ++ agent.add_parameter( ++ "eventTypes", ++ shortdesc="List of resources to be considered", ++ longdesc="A comma-separated list of event types that will be handled by this resource agent. (Possible values: Freeze,Reboot,Redeploy)", ++ content_type="string", ++ default="Reboot,Redeploy") ++ agent.add_parameter( ++ "verbose", ++ shortdesc="Enable verbose agent logging", ++ longdesc="Set to true to enable verbose logging", ++ content_type="boolean", ++ default="false") ++ agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS) ++ agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS) ++ agent.add_action("validate-all", timeout=20, handler=validate_action) ++ agent.add_action("monitor", timeout=240, interval=10, handler=monitor_action) ++ setLoglevel(ocf.is_true(ocf.get_parameter("verbose", "false"))) ++ agent.run() ++ ++if __name__ == '__main__': ++ main() +diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am +--- a/heartbeat/Makefile.am 2020-04-16 11:54:08.467619588 +0200 ++++ b/heartbeat/Makefile.am 2020-04-16 12:08:07.788224036 +0200 +@@ -55,7 +55,7 @@ + osp_SCRIPTS = nova-compute-wait \ + NovaEvacuate + +-ocf_SCRIPTS = AoEtarget \ ++ocf_SCRIPTS = AoEtarget \ + AudibleAlarm \ + ClusterMon \ + CTDB \ +@@ -116,10 +116,7 @@ + fio \ + galera \ + garbd \ +- gcp-pd-move \ + gcp-vpc-move-ip \ +- gcp-vpc-move-vip \ +- gcp-vpc-move-route \ + iSCSILogicalUnit \ + iSCSITarget \ + ids \ +@@ -177,6 +174,22 @@ + vsftpd \ + zabbixserver + ++if BUILD_AZURE_EVENTS ++ocf_SCRIPTS += azure-events ++endif ++ ++if BUILD_GCP_PD_MOVE ++ocf_SCRIPTS += gcp-pd-move ++endif ++ ++if BUILD_GCP_VPC_MOVE_ROUTE ++ocf_SCRIPTS += gcp-vpc-move-route ++endif ++ ++if BUILD_GCP_VPC_MOVE_VIP ++ocf_SCRIPTS += gcp-vpc-move-vip ++endif ++ + ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat + ocfcommon_DATA = ocf-shellfuncs \ + ocf-binaries \ +@@ -205,3 +218,13 @@ + + %.check: % + OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) ./$< meta-data | xmllint --path $(abs_srcdir) --noout --relaxng $(abs_srcdir)/metadata.rng - ++ ++do_spellcheck = printf '[%s]\n' "$(agent)"; \ ++ OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) \ ++ ./$(agent) meta-data 2>/dev/null \ ++ | xsltproc $(top_srcdir)/make/extract_text.xsl - \ ++ | aspell pipe list -d en_US --ignore-case \ ++ --home-dir=$(top_srcdir)/make -p spellcheck-ignore \ ++ | sed -n 's|^&\([^:]*\):.*|\1|p'; ++spellcheck: ++ @$(foreach agent,$(ocf_SCRIPTS), $(do_spellcheck)) +diff -uNr a/m4/ac_python_module.m4 b/m4/ac_python_module.m4 +--- a/m4/ac_python_module.m4 1970-01-01 01:00:00.000000000 +0100 ++++ b/m4/ac_python_module.m4 2020-04-14 11:11:26.325806378 +0200 +@@ -0,0 +1,30 @@ ++dnl @synopsis AC_PYTHON_MODULE(modname[, fatal]) ++dnl ++dnl Checks for Python module. ++dnl ++dnl If fatal is non-empty then absence of a module will trigger an ++dnl error. ++dnl ++dnl @category InstalledPackages ++dnl @author Andrew Collier . ++dnl @version 2004-07-14 ++dnl @license AllPermissive ++ ++AC_DEFUN([AC_PYTHON_MODULE],[ ++ AC_MSG_CHECKING(python module: $1) ++ $PYTHON -c "import $1" 2>/dev/null ++ if test $? -eq 0; ++ then ++ AC_MSG_RESULT(yes) ++ eval AS_TR_CPP(HAVE_PYMOD_$1)=yes ++ else ++ AC_MSG_RESULT(no) ++ eval AS_TR_CPP(HAVE_PYMOD_$1)=no ++ # ++ if test -n "$2" ++ then ++ AC_MSG_ERROR(failed to find required module $1) ++ exit 1 ++ fi ++ fi ++]) diff --git a/SOURCES/bz1820523-exportfs-1-add-symlink-support.patch b/SOURCES/bz1820523-exportfs-1-add-symlink-support.patch new file mode 100644 index 0000000..b5c1d96 --- /dev/null +++ b/SOURCES/bz1820523-exportfs-1-add-symlink-support.patch @@ -0,0 +1,51 @@ +From 091a6d1f26140651b7314cfb618c80f9258fd1d4 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 5 May 2020 14:20:43 +0200 +Subject: [PATCH] exportfs: add symlink support + +based on implementation in Filesystem agent +--- + heartbeat/exportfs | 30 ++++++++++++++++++++++-------- + 1 file changed, 22 insertions(+), 8 deletions(-) + +diff --git a/heartbeat/exportfs b/heartbeat/exportfs +index 1cabdee70..294d7eec9 100755 +--- a/heartbeat/exportfs ++++ b/heartbeat/exportfs +@@ -450,14 +450,28 @@ exportfs_validate_all () + fi + } + +-# If someone puts a trailing slash at the end of the export directory, +-# this agent is going to fail in some unexpected ways due to how +-# export strings are matched. The simplest solution here is to strip off +-# a trailing '/' in the directory before processing anything. +-newdir=$(echo "$OCF_RESKEY_directory" | sed -n -e 's/^\(.*\)\/$/\1/p') +-if [ -n "$newdir" ]; then +- OCF_RESKEY_directory=$newdir +-fi ++for dir in $OCF_RESKEY_directory; do ++ # strip off trailing '/' from directory ++ dir=$(echo $dir | sed 's/\/*$//') ++ : ${dir:=/} ++ if [ -e "$dir" ] ; then ++ canonicalized_dir=$(readlink -f "$dir") ++ if [ $? -ne 0 ]; then ++ if [ "$__OCF_ACTION" != "stop" ]; then ++ ocf_exit_reason "Could not canonicalize $dir because readlink failed" ++ exit $OCF_ERR_GENERIC ++ fi ++ fi ++ else ++ if [ "$__OCF_ACTION" != "stop" ]; then ++ ocf_exit_reason "$dir does not exist" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ fi ++ directories+="$canonicalized_dir " ++done ++ ++OCF_RESKEY_directory="${directories%% }" + + NUMDIRS=`echo "$OCF_RESKEY_directory" | wc -w` + OCF_REQUIRED_PARAMS="directory fsid clientspec" diff --git a/SOURCES/bz1820523-exportfs-2-fix-monitor-action.patch b/SOURCES/bz1820523-exportfs-2-fix-monitor-action.patch new file mode 100644 index 0000000..ec1dff4 --- /dev/null +++ b/SOURCES/bz1820523-exportfs-2-fix-monitor-action.patch @@ -0,0 +1,35 @@ +From fda12d3d6495e33e049ed3ac03d6bfb4d65aac3d Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 9 Jun 2020 10:27:13 +0200 +Subject: [PATCH] exportfs: fix monitor-action in symlink-logic for when + directory doesnt exist + +--- + heartbeat/exportfs | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/exportfs b/heartbeat/exportfs +index 294d7eec9..d7d3463d9 100755 +--- a/heartbeat/exportfs ++++ b/heartbeat/exportfs +@@ -463,10 +463,16 @@ for dir in $OCF_RESKEY_directory; do + fi + fi + else +- if [ "$__OCF_ACTION" != "stop" ]; then +- ocf_exit_reason "$dir does not exist" +- exit $OCF_ERR_CONFIGURED +- fi ++ case "$__OCF_ACTION" in ++ stop|monitor) ++ canonicalized_dir="$dir" ++ ocf_log debug "$dir does not exist" ++ ;; ++ *) ++ ocf_exit_reason "$dir does not exist" ++ exit $OCF_ERR_CONFIGURED ++ ;; ++ esac + fi + directories+="$canonicalized_dir " + done diff --git a/SOURCES/bz1830716-NovaEvacuate-suppress-expected-error.patch b/SOURCES/bz1830716-NovaEvacuate-suppress-expected-error.patch new file mode 100644 index 0000000..0a7144f --- /dev/null +++ b/SOURCES/bz1830716-NovaEvacuate-suppress-expected-error.patch @@ -0,0 +1,37 @@ +From 143864c694fb3f44a28b805a17ba7a2f6bf9931f Mon Sep 17 00:00:00 2001 +From: Vincent Untz +Date: Sun, 07 Feb 2016 10:30:00 +0100 +Subject: [PATCH] NovaEvacuate: Avoid initial useless message on stderr + +When no evacuation has been done yet, we're spamming syslog with: + + Could not query value of evacuate: attribute does not exist + +So let's just filter this out, since it's known to be expected on +initial setup. + +As this requires a bashism, also move the script to use bash. + +Change-Id: I3351919febc0ef0101e4a08ce6eb412e3c7cfc76 +--- + +diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate +index 319a747..f764bde 100644 +--- a/heartbeat/NovaEvacuate ++++ b/heartbeat/NovaEvacuate +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # + # + # NovaCompute agent manages compute daemons. +@@ -220,7 +220,8 @@ + fi + + handle_evacuations $( +- attrd_updater -n evacuate -A | ++ attrd_updater -n evacuate -A \ ++ 2> >(grep -v "attribute does not exist" 1>&2) | + sed 's/ value=""/ value="no"/' | + tr '="' ' ' | + awk '{print $4" "$6}' diff --git a/SOURCES/bz1832321-rabbitmq-cluster-increase-wait-timeout.patch b/SOURCES/bz1832321-rabbitmq-cluster-increase-wait-timeout.patch new file mode 100644 index 0000000..558ecc6 --- /dev/null +++ b/SOURCES/bz1832321-rabbitmq-cluster-increase-wait-timeout.patch @@ -0,0 +1,60 @@ +From cf1e7bfab984b5e9451a63c25b39c0932e0d9116 Mon Sep 17 00:00:00 2001 +From: Michele Baldessari +Date: Wed, 6 May 2020 16:11:36 +0200 +Subject: [PATCH] Increase the rabbitmqctl wait timeout during start() + +After we start the rabbitmq process we wait for the pid to show up +and then declare the server to be started successfully. +This wait is done via 'rabbitmqctl wait'. Now from +From https://www.rabbitmq.com/rabbitmqctl.8.html we have: + + If the specified pidfile is not created or erlang node is not started + within --timeout the command will fail. Default timeout is 10 seconds. + +This default of 10 seconds might not be enough in overloaded +environments. So what we want to do here is wait for as much time as +the start() operation allows us. So we wait for OCF_RESKEY_CRM_meta_timeout +minus 5 seconds. In the rare and non-sensical case that it is less than +10s we do not pass a timeout string at all to rabbitmqctl. + +Co-Authored-By: John Eckersberg +--- + heartbeat/rabbitmq-cluster | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index a9ebd37ad..f7d48120c 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -294,6 +294,8 @@ rmq_monitor() { + rmq_init_and_wait() + { + local rc ++ local wait_timeout ++ local timeout_string + + prepare_dir $RMQ_PID_DIR + prepare_dir $RMQ_LOG_DIR +@@ -305,11 +307,20 @@ rmq_init_and_wait() + setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" & + + ocf_log info "Waiting for server to start" +- $RMQ_CTL wait $RMQ_PID_FILE ++ # We want to give the wait command almost the full startup timeout we are given ++ # So we use the start operation timeout (in ms), convert it and subtract 5 seconds ++ # In the silly case that it is less than 10 seconds we just skip setting the timeout ++ wait_timeout=`expr $OCF_RESKEY_CRM_meta_timeout / 1000 - 5` ++ if [ $wait_timeout -gt 10 ]; then ++ timeout_string="--timeout ${wait_timeout}" ++ else ++ timeout_string="" ++ fi ++ $RMQ_CTL $timeout_string wait $RMQ_PID_FILE + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + remove_pid +- ocf_log info "rabbitmq-server start failed: $rc" ++ ocf_log info "rabbitmq-server start failed with a timeout of ($timeout_string): $rc" + return $OCF_ERR_GENERIC + fi + diff --git a/SOURCES/bz1836186-pgsql-support-Pacemaker-v2.03-output.patch b/SOURCES/bz1836186-pgsql-support-Pacemaker-v2.03-output.patch new file mode 100644 index 0000000..b0d7ade --- /dev/null +++ b/SOURCES/bz1836186-pgsql-support-Pacemaker-v2.03-output.patch @@ -0,0 +1,52 @@ +--- a/heartbeat/ocf-shellfuncs.in 2020-06-16 10:47:54.462276461 +0200 ++++ b/heartbeat/ocf-shellfuncs.in 2020-06-16 10:43:36.880739016 +0200 +@@ -563,7 +563,7 @@ + # (master/slave) resource. This is defined as a resource where the + # master-max meta attribute is present, and set to greater than zero. + ocf_is_ms() { +- [ ! -z "${OCF_RESKEY_CRM_meta_master_max}" ] && [ "${OCF_RESKEY_CRM_meta_master_max}" -gt 0 ] ++ [ "${OCF_RESKEY_CRM_meta_promotable}" = "true" ] || { [ ! -z "${OCF_RESKEY_CRM_meta_master_max}" ] && [ "${OCF_RESKEY_CRM_meta_master_max}" -gt 0 ]; } + } + + # version check functions +--- a/heartbeat/pgsql 2020-06-16 10:47:54.474276441 +0200 ++++ b/heartbeat/pgsql 2020-06-16 10:49:02.835159083 +0200 +@@ -1021,7 +1021,7 @@ + + # I can't get master node name from $OCF_RESKEY_CRM_meta_notify_master_uname on monitor, + # so I will get master node name using crm_mon -n +- print_crm_mon | tr -d "\t" | tr -d " " | grep -q "^${RESOURCE_NAME}[(:].*[):].*Master" ++ print_crm_mon | grep -q -i -E " +Date: Mon, 18 May 2020 16:18:21 +0200 +Subject: [PATCH] db2: HADR add STANDBY/REMOTE_CATCHUP_PENDING/DISCONNECTED to + correctly promote standby node when master node disappears (e.g. via fencing) + +--- + heartbeat/db2 | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/db2 b/heartbeat/db2 +index 62b288d46..a57fd2bb6 100755 +--- a/heartbeat/db2 ++++ b/heartbeat/db2 +@@ -774,14 +774,19 @@ db2_promote() { + ;; + + STANDBY/PEER/CONNECTED|Standby/Peer) +- # must take over ++ # must take over + ;; + + STANDBY/*PEER/DISCONNECTED|Standby/DisconnectedPeer) +- # must take over forced ++ # must take over by force peer window only + force="by force peer window only" + ;; + ++ # must take over by force ++ STANDBY/REMOTE_CATCHUP_PENDING/DISCONNECTED) ++ force="by force" ++ ;; ++ + *) + return $OCF_ERR_GENERIC + esac diff --git a/SOURCES/bz1839721-podman-force-rm-container-if-rm-fails.patch b/SOURCES/bz1839721-podman-force-rm-container-if-rm-fails.patch new file mode 100644 index 0000000..89fbb06 --- /dev/null +++ b/SOURCES/bz1839721-podman-force-rm-container-if-rm-fails.patch @@ -0,0 +1,53 @@ +From 5a732511db2c49ff6afe0a20e738b565a35273ae Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Fri, 29 May 2020 11:57:29 +0200 +Subject: [PATCH] podman: make sure to remove containers with lingering exec + sessions + +It may happen that some "podman exec" commands don't finish +cleanly and leave lingering "Exec sessions" in the container's +state. In that case, a "podman rm" command will always fail. + +To overcome the podman bug, issue a "podman rm -f" command when +we detect a container is stopped but still has some lingering +"Exec sessions" associated with it. + +Related-Bug: rhbz#1839721 +--- + heartbeat/podman | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/heartbeat/podman b/heartbeat/podman +index f77d988fc..e2f6e981b 100755 +--- a/heartbeat/podman ++++ b/heartbeat/podman +@@ -232,6 +232,9 @@ container_exists() + + remove_container() + { ++ local rc ++ local execids ++ + if ocf_is_true "$OCF_RESKEY_reuse"; then + # never remove the container if we have reuse enabled. + return 0 +@@ -244,6 +247,19 @@ remove_container() + fi + ocf_log notice "Cleaning up inactive container, ${CONTAINER}." + ocf_run podman rm $CONTAINER ++ rc=$? ++ if [ $rc -ne 0 ]; then ++ # due to a podman bug (rhbz#1841485), sometimes a stopped ++ # container can still be associated with Exec sessions, in ++ # which case the "podman rm" has to be forced ++ execids=$(podman inspect $CONTAINER --format '{{len .ExecIDs}}') ++ if [ "$execids" -ne "0" ]; then ++ ocf_log warn "Inactive container ${CONTAINER} has lingering exec sessions. Force-remove it." ++ ocf_run podman rm -f $CONTAINER ++ rc=$? ++ fi ++ fi ++ return $rc + } + + podman_simple_status() diff --git a/SOURCES/bz1843999-aliyun-vpc-move-ip-log-output-when-failing.patch b/SOURCES/bz1843999-aliyun-vpc-move-ip-log-output-when-failing.patch new file mode 100644 index 0000000..7a7185f --- /dev/null +++ b/SOURCES/bz1843999-aliyun-vpc-move-ip-log-output-when-failing.patch @@ -0,0 +1,265 @@ +--- a/heartbeat/aliyun-vpc-move-ip 2020-06-09 13:41:35.308379032 +0200 ++++ b/heartbeat/aliyun-vpc-move-ip 2020-06-05 10:48:45.555132686 +0200 +@@ -9,12 +9,46 @@ + # Initialization: + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++# Parameter defaults ++ ++OCF_RESKEY_address_default="" ++OCF_RESKEY_routing_table_default="" ++OCF_RESKEY_interface_default="eth0" ++OCF_RESKEY_profile_default="default" ++OCF_RESKEY_endpoint_default="vpc.aliyuncs.com" ++OCF_RESKEY_aliyuncli_default="detect" ++ ++ ++: ${OCF_RESKEY_address=${OCF_RESKEY_address_default}} ++: ${OCF_RESKEY_routing_table=${OCF_RESKEY_routing_table_default}} ++: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}} ++: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}} ++: ${OCF_RESKEY_endpoint=${OCF_RESKEY_endpoint_default}} ++: ${OCF_RESKEY_aliyuncli=${OCF_RESKEY_aliyuncli_default}} ++ + ####################################################################### + +-# aliyuncli doesnt work without HOME parameter ++# aliyun cli doesnt work without HOME parameter + export HOME="/root" + + USAGE="usage: $0 {start|stop|status|meta-data}"; ++ ++if [ "${OCF_RESKEY_aliyuncli}" = "detect" ]; then ++ OCF_RESKEY_aliyuncli="$(which aliyuncli 2> /dev/null || which aliyun 2> /dev/null)" ++fi ++ ++if [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli' ]]; then ++ OUTPUT="text" ++ EXECUTING='{ print $3 }' ++ IFS_=" " ++ ENDPOINT="" ++elif [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyun' ]]; then ++ OUTPUT="table cols=InstanceId,DestinationCidrBlock rows=RouteTables.RouteTable[].RouteEntrys.RouteEntry[]" ++ EXECUTING='{ gsub (" ", "", $0); print $1 }' ++ IFS_="|" ++ ENDPOINT="--endpoint $OCF_RESKEY_endpoint" ++fi + ############################################################################### + + +@@ -24,27 +58,61 @@ + # + ############################################################################### + ++request_create_route_entry() { ++ cmd="${OCF_RESKEY_aliyuncli} vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance ${ENDPOINT}" ++ ocf_log debug "executing command: $cmd" ++ res=$($cmd 2>&1) ++ rc=$? ++ if [[ $rc -eq 0 ]] ++ then ++ ocf_log debug "result: $res; rc: $rc" ++ else ++ ocf_log err "result: $res; cmd: $cmd; rc: $rc" ++ fi ++ return $rc ++} ++ ++request_delete_route_entry() { ++ cmd="${OCF_RESKEY_aliyuncli} vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE ${ENDPOINT}" ++ ocf_log debug "executing command: $cmd" ++ res=$($cmd) ++ rc=$? ++ if [[ $rc -eq 0 ]] ++ then ++ ocf_log debug "result: $res; rc: $rc" ++ else ++ ocf_log err "result: $res; cmd: $cmd; rc: $rc" ++ fi ++ return $rc ++} + ++request_describe_route_tables() { ++ cmd="${OCF_RESKEY_aliyuncli} vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output ${OUTPUT} ${ENDPOINT}" ++ ocf_log debug "executing command: $cmd" ++ res=$($cmd) ++ rc=$? ++ if [[ $rc -eq 0 ]] ++ then ++ ROUTE_TO_INSTANCE=$(echo "$res" |grep "\s${OCF_RESKEY_address}/" | awk -F "${IFS_}" "${EXECUTING}") ++ ocf_log debug "ROUTE_TO_INSTANCE: $ROUTE_TO_INSTANCE" ++ else ++ ocf_log err "result: $res; cmd: $cmd; rc: $rc" ++ fi ++} + + ip_get_and_configure() { + ocf_log debug "function: ip_get_and_configure" + +- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" +- ++ request_describe_route_tables + if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then + if [ -n "$ROUTE_TO_INSTANCE" ]; then + ip_drop + fi +- +- cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" +- ocf_log debug "executing command: $cmd" +- $cmd ++ request_create_route_entry + rc=$? + while [ $rc -ne 0 ]; do + sleep 1 +- cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text" +- ocf_log debug "executing command: $cmd" +- $cmd ++ request_create_route_entry + rc=$? + done + wait_for_started +@@ -68,17 +136,15 @@ + ocf_log debug "function: ip_drop" + cmd="ip addr delete ${OCF_RESKEY_address}/32 dev $OCF_RESKEY_interface" + ocf_log debug "executing command: $cmd" +- $cmd ++ res=$($cmd) + rc=$? + if [ $rc -ne 0 ] && [ $rc -ne 2 ]; then +- ocf_log err "command failed, rc $rc" ++ ocf_log err "command failed, rc: $rc; cmd: $cmd; result: $res" + return $OCF_ERR_GENERIC + fi +- +- cmd="aliyuncli vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text" +- ocf_log debug "executing command: $cmd" +- $cmd +- if [ $? -ne 0 ]; then ++ request_delete_route_entry ++ rc=$? ++ if [ $rc -ne 0 ]; then + ocf_log err "command failed, rc: $rc" + return $OCF_ERR_GENERIC + fi +@@ -90,26 +156,18 @@ + } + + wait_for_started() { +- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" +- ocf_log debug "executing command: $cmd" +- ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')" +- ++ request_describe_route_tables + while [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; do + sleep 3 +- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" +- ocf_log debug "executing command: $cmd" +- ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')" ++ request_describe_route_tables + done + } + + wait_for_deleted() { +- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" +- ++ request_describe_route_tables + while [ ! -z "$ROUTE_TO_INSTANCE" ]; do + sleep 1 +- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" +- ocf_log debug "executing command: $cmd" +- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" ++ request_describe_route_tables + done + } + +@@ -124,38 +182,58 @@ + by changing an entry in an specific routing table + + Move IP within a VPC of the Aliyun ECS ++ + ++ ++ ++Path to command line tools for Aliyun ++ ++Path to Aliyun CLI tools ++ ++ ++ + + + VPC private IP address + + vpc ip +- ++ + ++ + + + Name of the routing table, where the route for the IP address should be changed, i.e. vtb-... + + routing table name +- ++ + ++ + + + Name of the network interface, i.e. eth0 + + network interface name +- ++ + +- ++ ++ + +-Valid Aliyun CLI profile name (see 'aliyuncli-ra configure'). ++An endpoint is the service entry of an Alibaba Cloud service, i.e. vpc.cn-beijing.aliyuncs.com ++ ++service endpoint ++ ++ + +-See https://www.alibabacloud.com/help/doc-detail/43039.htm?spm=a2c63.p38356.b99.16.38a914abRZtOU3 for more information about aliyuncli-ra. ++ ++ ++Valid Aliyun CLI profile name (see 'aliyun cli configure'). ++See https://www.alibabacloud.com/help/zh/product/29991.htm for more information about aliyun cli. + + profile name +- ++ + + ++ + + + +@@ -170,6 +248,11 @@ + ecs_ip_validate() { + ocf_log debug "function: validate" + ++ if [ -z "${OCF_RESKEY_aliyuncli}" ]; then ++ ocf_exit_reason "unable to detect aliyuncli binary" ++ exit $OCF_ERR_INSTALLED ++ fi ++ + # IP address + if [ -z "$OCF_RESKEY_address" ]; then + ocf_log err "IP address parameter not set $OCF_RESKEY_ADDRESS!" +@@ -250,10 +333,7 @@ + + ecs_ip_monitor() { + ocf_log debug "function: ecsip_monitor: check routing table" +- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text" +- ocf_log debug "executing command: $cmd" +- +- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')" ++ request_describe_route_tables + + if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then + ocf_log debug "not routed to this instance ($ECS_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE" diff --git a/SOURCES/bz1845574-azure-events-handle-exceptions-in-urlopen.patch b/SOURCES/bz1845574-azure-events-handle-exceptions-in-urlopen.patch new file mode 100644 index 0000000..fa194c9 --- /dev/null +++ b/SOURCES/bz1845574-azure-events-handle-exceptions-in-urlopen.patch @@ -0,0 +1,70 @@ +From 194909ff08cfe75cd5da9f704d8ed4cc9ab40341 Mon Sep 17 00:00:00 2001 +From: Gustavo Figueira +Date: Tue, 19 May 2020 10:58:34 +0200 +Subject: [PATCH 1/2] azure-events: handle exceptions in urlopen The locking in + azure-events does not correctly handle some failures. + +If the metadata server is not recheable or has an error +handling the request, attr_globalPullState will never go +back to IDLE unless the administrator manually changes it. + +> azure-events: ERROR: [Errno 104] Connection reset by peer +> lrmd[2734]: notice: rsc_azure-events_monitor_10000:113088:stderr [ ocf-exit-reason:[Errno 104] Connection reset by peer ] +--- + heartbeat/azure-events.in | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in +index 8709d97e3..bd812f4b2 100644 +--- a/heartbeat/azure-events.in ++++ b/heartbeat/azure-events.in +@@ -82,9 +82,19 @@ class azHelper: + req = urllib2.Request(url, postData) + req.add_header("Metadata", "true") + req.add_header("User-Agent", USER_AGENT) +- resp = urllib2.urlopen(req) +- data = resp.read() +- ocf.logger.debug("_sendMetadataRequest: response = %s" % data) ++ try: ++ resp = urllib2.urlopen(req) ++ except URLError as e: ++ if hasattr(e, 'reason'): ++ print('We failed to reach a server. Reason: '), e.reason ++ clusterHelper.setAttr(attr_globalPullState, "IDLE") ++ elif hasattr(e, 'code'): ++ print('The server couldn\'t fulfill the request. Error code: '), e.code ++ clusterHelper.setAttr(attr_globalPullState, "IDLE") ++ else: ++ data = resp.read() ++ ocf.logger.debug("_sendMetadataRequest: response = %s" % data) ++ + if data: + data = json.loads(data) + + +From c4071ec4a82fcb831f170f341e0790633e4b904f Mon Sep 17 00:00:00 2001 +From: Gustavo Figueira +Date: Tue, 19 May 2020 12:53:22 +0200 +Subject: [PATCH 2/2] azure-events: use ocf.logger.warning instead of print + +--- + heartbeat/azure-events.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in +index bd812f4b2..a48a86309 100644 +--- a/heartbeat/azure-events.in ++++ b/heartbeat/azure-events.in +@@ -86,10 +86,10 @@ class azHelper: + resp = urllib2.urlopen(req) + except URLError as e: + if hasattr(e, 'reason'): +- print('We failed to reach a server. Reason: '), e.reason ++ ocf.logger.warning("Failed to reach the server: %s" % e.reason) + clusterHelper.setAttr(attr_globalPullState, "IDLE") + elif hasattr(e, 'code'): +- print('The server couldn\'t fulfill the request. Error code: '), e.code ++ ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code) + clusterHelper.setAttr(attr_globalPullState, "IDLE") + else: + data = resp.read() diff --git a/SOURCES/bz1845581-nfsserver-dont-log-error-message-file-doesnt-exist.patch b/SOURCES/bz1845581-nfsserver-dont-log-error-message-file-doesnt-exist.patch new file mode 100644 index 0000000..06da3b4 --- /dev/null +++ b/SOURCES/bz1845581-nfsserver-dont-log-error-message-file-doesnt-exist.patch @@ -0,0 +1,31 @@ +From 314eadcd683551bd79b644de05cbf0e425c84f83 Mon Sep 17 00:00:00 2001 +From: Kazunori INOUE +Date: Tue, 9 Jun 2020 13:30:32 +0000 +Subject: [PATCH] nfsserver: prevent error messages when /etc/sysconfig/nfs + does not exist + +--- + heartbeat/nfsserver-redhat.sh | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/nfsserver-redhat.sh b/heartbeat/nfsserver-redhat.sh +index cef0862ee..73a70c186 100644 +--- a/heartbeat/nfsserver-redhat.sh ++++ b/heartbeat/nfsserver-redhat.sh +@@ -150,10 +150,12 @@ set_env_args() + + # override local nfs config. preserve previous local config though. + if [ -s $tmpconfig ]; then +- cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 +- if [ $? -ne 0 ]; then +- # backup local nfs config if it doesn't have our HA autogen tag in it. +- mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP ++ if [ -f "$NFS_SYSCONFIG" ]; then ++ cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 ++ if [ $? -ne 0 ]; then ++ # backup local nfs config if it doesn't have our HA autogen tag in it. ++ mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP ++ fi + fi + + cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 diff --git a/SOURCES/bz1845583-exportfs-1-describe-clientspec-format-in-metadata.patch b/SOURCES/bz1845583-exportfs-1-describe-clientspec-format-in-metadata.patch new file mode 100644 index 0000000..fd685b1 --- /dev/null +++ b/SOURCES/bz1845583-exportfs-1-describe-clientspec-format-in-metadata.patch @@ -0,0 +1,27 @@ +From f572186ec9bd26c791771a18d38804cfde602578 Mon Sep 17 00:00:00 2001 +From: zzhou1 <10611019+zzhou1@users.noreply.github.com> +Date: Tue, 3 Sep 2019 09:24:23 +0000 +Subject: [PATCH] exportfs: doc clarification for clientspec format + +Point out the standard of the format is aligned with `man exportfs`, and also point out the correct way to deal with the use case to export the same directory(-ies) to multiple subnets. +--- + heartbeat/exportfs | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/heartbeat/exportfs b/heartbeat/exportfs +index 80ed057f2..dc609a0b4 100755 +--- a/heartbeat/exportfs ++++ b/heartbeat/exportfs +@@ -47,6 +47,12 @@ Manages NFS exports + + The client specification allowing remote machines to mount the directory + (or directories) over NFS. ++ ++Note: it follows the format defined in "man exportfs". For example, in ++the use case to export the directory(-ies) for multiple subnets, please ++do config a dedicated primitive for each subnet CIDR ip address, ++and do not attempt to use multiple CIDR ip addresses in a space ++seperated list, like in /etc/exports. + + + Client ACL. diff --git a/SOURCES/bz1845583-exportfs-2-fix-typo.patch b/SOURCES/bz1845583-exportfs-2-fix-typo.patch new file mode 100644 index 0000000..0406f31 --- /dev/null +++ b/SOURCES/bz1845583-exportfs-2-fix-typo.patch @@ -0,0 +1,23 @@ +From 0f8189161ef5c09c6a6df96cb15937d430f75353 Mon Sep 17 00:00:00 2001 +From: Valentin Vidic +Date: Wed, 23 Oct 2019 22:11:14 +0200 +Subject: [PATCH] Low: exportfs: Fix spelling error + +Replace seperated -> separated. +--- + heartbeat/exportfs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/exportfs b/heartbeat/exportfs +index dc609a0b4..d79aced88 100755 +--- a/heartbeat/exportfs ++++ b/heartbeat/exportfs +@@ -52,7 +52,7 @@ Note: it follows the format defined in "man exportfs". For example, in + the use case to export the directory(-ies) for multiple subnets, please + do config a dedicated primitive for each subnet CIDR ip address, + and do not attempt to use multiple CIDR ip addresses in a space +-seperated list, like in /etc/exports. ++separated list, like in /etc/exports. + + + Client ACL. diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index e141af3..8738405 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -29,6 +29,10 @@ %global googlecloudsdk google-cloud-sdk %global googlecloudsdk_version 241.0.0 %global googlecloudsdk_dir %{bundled_lib_dir}/gcp/%{googlecloudsdk} +# python-httplib2 bundle +%global httplib2 httplib2 +%global httplib2_version 0.18.1 +%global httplib2_dir %{bundled_lib_dir}/gcp/%{httplib2} # python-pyroute2 bundle %global pyroute2 pyroute2 %global pyroute2_version 0.4.13 @@ -66,7 +70,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.1.1 -Release: 39%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 60%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -76,13 +80,14 @@ Group: Productivity/Clustering/HA %endif Source0: %{upstream_prefix}-%{upstream_version}.tar.gz Source1: %{googlecloudsdk}-%{googlecloudsdk_version}-linux-x86_64.tar.gz -Source2: %{pyroute2}-%{pyroute2_version}.tar.gz -Source3: %{colorama}-%{colorama_version}.tar.gz -Source4: %{pycryptodome}-%{pycryptodome_version}.tar.gz -Source5: %{aliyunsdkcore}-%{aliyunsdkcore_version}.tar.gz -Source6: %{aliyunsdkecs}-%{aliyunsdkecs_version}.tar.gz -Source7: %{aliyunsdkvpc}-%{aliyunsdkvpc_version}.tar.gz -Source8: %{aliyuncli}-%{aliyuncli_version}.tar.gz +Source2: %{httplib2}-%{httplib2_version}.tar.gz +Source3: %{pyroute2}-%{pyroute2_version}.tar.gz +Source4: %{colorama}-%{colorama_version}.tar.gz +Source5: %{pycryptodome}-%{pycryptodome_version}.tar.gz +Source6: %{aliyunsdkcore}-%{aliyunsdkcore_version}.tar.gz +Source7: %{aliyunsdkecs}-%{aliyunsdkecs_version}.tar.gz +Source8: %{aliyunsdkvpc}-%{aliyunsdkvpc_version}.tar.gz +Source9: %{aliyuncli}-%{aliyuncli_version}.tar.gz Patch0: nova-compute-wait-NovaEvacuate.patch Patch1: LVM-volume_group_check_only.patch Patch2: bz1552330-vdo-vol.patch @@ -170,18 +175,60 @@ Patch83: bz1750261-Route-1-dont-fence-when-parameters-not-set.patch Patch84: bz1750352-rabbitmq-cluster-restore-users-single-node-mode.patch Patch85: bz1751700-IPaddr2-1-sanitize-IPv6-IPs.patch Patch86: bz1751700-IPaddr2-2-return-empty-when-sanitation-fails.patch -Patch87: bz1751949-1-SAPInstance-add-reload-action.patch -Patch88: bz1751949-2-SAPInstance-improve-profile-detection.patch -Patch89: bz1751949-3-SAPInstance-metadata-improvements.patch -Patch90: bz1751962-nfsserver-1-systemd-perf-improvements.patch -Patch91: bz1751962-nfsserver-2-systemd-use-no-legend.patch -Patch92: bz1755760-NovaEvacuate-evacuate_delay.patch -Patch93: bz1750261-Route-2-validate-start-validate-all.patch -Patch94: bz1741843-LVM-activate-partial-activation.patch -Patch95: bz1764888-exportfs-allow-same-fsid.patch -Patch96: bz1765128-mysql-galera-fix-incorrect-rc.patch -Patch97: bz1741042-IPaddr2-add-noprefixroute-parameter.patch -Patch98: bz1744224-IPsrcaddr-2-local-rule-destination-fixes.patch +Patch87: bz1751962-nfsserver-1-systemd-perf-improvements.patch +Patch88: bz1751962-nfsserver-2-systemd-use-no-legend.patch +Patch89: bz1755760-NovaEvacuate-evacuate_delay.patch +Patch90: bz1750261-Route-2-validate-start-validate-all.patch +Patch91: bz1741843-LVM-activate-partial-activation.patch +Patch92: bz1764888-exportfs-allow-same-fsid.patch +Patch93: bz1765128-mysql-galera-fix-incorrect-rc.patch +Patch94: bz1741042-IPaddr2-add-noprefixroute-parameter.patch +Patch95: bz1744224-IPsrcaddr-2-local-rule-destination-fixes.patch +Patch96: bz1788889-podman-improve-image-exist-check.patch +Patch97: bz1744224-IPsrcaddr-3-fix-probe-issues.patch +Patch98: bz1767916-IPaddr2-clusterip-not-supported.patch +Patch99: bz1777381-Filesystem-1-refresh-UUID.patch +Patch100: bz1777381-Filesystem-2-udev-settle.patch +Patch101: bz1744224-IPsrcaddr-4-fix-hardcoded-device.patch +Patch102: bz1792196-rabbitmq-cluster-delete-nodename-when-stop-fails.patch +Patch103: bz1808468-1-lvmlockd-fix-conditionals.patch +Patch104: bz1808468-2-remove-locking_type.patch +Patch105: bz1759115-aws-vpc-route53-1-update.patch +Patch106: bz1804658-azure-lb-1-remove-status-metadata.patch +Patch107: bz1804658-azure-lb-2-add-socat-support.patch +Patch108: bz1810466-aws-vpc-move-ip-1-add-routing_table_role.patch +Patch109: bz1810466-aws-vpc-move-ip-2-update-metadata.patch +Patch110: bz1792237-redis-1-fix-validate-all.patch +Patch111: bz1792237-redis-2-run-validate-during-start.patch +Patch112: bz1817432-use-safe-temp-file-location.patch +Patch113: bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch +Patch114: bz1817598-ocf_is_clone-2-update-comment.patch +Patch115: bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch +Patch116: bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch +Patch117: bz1633251-gcp-pd-move-1.patch +Patch118: bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch +Patch119: bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch +Patch120: bz1819965-1-ocf.py-update.patch +Patch121: bz1819965-2-azure-events.patch +Patch122: bz1759115-aws-vpc-route53-3-awscli-property.patch +Patch123: bz1744190-pgsql-1-set-primary-standby-initial-score.patch +Patch124: bz1744190-pgsql-2-improve-start-checks.patch +Patch125: bz1820523-exportfs-1-add-symlink-support.patch +Patch126: bz1832321-rabbitmq-cluster-increase-wait-timeout.patch +Patch127: bz1818997-nfsserver-fix-nfsv4-only-support.patch +Patch128: bz1830716-NovaEvacuate-suppress-expected-error.patch +Patch129: bz1836945-db2-hadr-promote-standby-node.patch +Patch130: bz1633251-gcp-pd-move-4-fixes-and-improvements.patch +Patch131: bz1633251-gcp-pd-move-5-bundle.patch +Patch132: bz1839721-podman-force-rm-container-if-rm-fails.patch +Patch133: bz1820523-exportfs-2-fix-monitor-action.patch +Patch134: bz1843999-aliyun-vpc-move-ip-log-output-when-failing.patch +Patch135: bz1845574-azure-events-handle-exceptions-in-urlopen.patch +Patch136: bz1845581-nfsserver-dont-log-error-message-file-doesnt-exist.patch +Patch137: bz1845583-exportfs-1-describe-clientspec-format-in-metadata.patch +Patch138: bz1845583-exportfs-2-fix-typo.patch +Patch139: bz1814896-Filesystem-fast_stop-default-to-no-for-GFS2.patch +Patch140: bz1836186-pgsql-support-Pacemaker-v2.03-output.patch # bundle patches Patch1000: 7-gcp-bundled.patch @@ -424,11 +471,53 @@ exit 1 %patch91 -p1 %patch92 -p1 %patch93 -p1 -%patch94 -p1 +%patch94 -p1 -F2 %patch95 -p1 %patch96 -p1 -%patch97 -p1 -F2 +%patch97 -p1 %patch98 -p1 +%patch99 -p1 +%patch100 -p1 +%patch101 -p1 +%patch102 -p1 +%patch103 -p1 +%patch104 -p1 +%patch105 -p1 +%patch106 -p1 +%patch107 -p1 +%patch108 -p1 +%patch109 -p1 +%patch110 -p1 +%patch111 -p1 +%patch112 -p1 +%patch113 -p1 +%patch114 -p1 +%patch115 -p1 +%patch116 -p1 +%patch117 -p1 +%patch118 -p1 +%patch119 -p1 +%patch120 -p1 +%patch121 -p1 +%patch122 -p1 +%patch123 -p1 +%patch124 -p1 +%patch125 -p1 +%patch126 -p1 +%patch127 -p1 +%patch128 -p1 -F2 +%patch129 -p1 +%patch130 -p1 +%patch131 -p1 +%patch132 -p1 +%patch133 -p1 +%patch134 -p1 +%patch135 -p1 +%patch136 -p1 +%patch137 -p1 +%patch138 -p1 +%patch139 -p1 +%patch140 -p1 chmod 755 heartbeat/nova-compute-wait chmod 755 heartbeat/NovaEvacuate @@ -440,6 +529,15 @@ mkdir -p %{bundled_lib_dir}/aliyun # google-cloud-sdk bundle %ifarch x86_64 tar -xzf %SOURCE1 -C %{bundled_lib_dir}/gcp +## upgrade httplib2 to fix CVE-2020-11078 +pushd %{googlecloudsdk_dir} +rm -rf lib/third_party/httplib2 +popd + +# python-httplib2 bundle +tar -xzf %SOURCE2 -C %{bundled_lib_dir} +mv %{bundled_lib_dir}/%{httplib2}-%{httplib2_version} %{httplib2_dir} + # gcp*: append bundled-directory to search path, gcloud-ra %patch1000 -p1 # google-cloud-sdk fixes @@ -468,7 +566,7 @@ rm -rf %{googlecloudsdk_dir}/lib/third_party/dateutil cp %{googlecloudsdk_dir}/README %{googlecloudsdk}_README cp %{googlecloudsdk_dir}/lib/third_party/argparse/README.txt %{googlecloudsdk}_argparse_README.txt cp %{googlecloudsdk_dir}/LICENSE %{googlecloudsdk}_LICENSE -cp %{googlecloudsdk_dir}/lib/third_party/httplib2/LICENSE %{googlecloudsdk}_httplib2_LICENSE +cp %{httplib2_dir}/LICENSE %{googlecloudsdk}_httplib2_LICENSE cp %{googlecloudsdk_dir}/lib/third_party/contextlib2/LICENSE %{googlecloudsdk}_contextlib2_LICENSE cp %{googlecloudsdk_dir}/lib/third_party/concurrent/LICENSE %{googlecloudsdk}_concurrent_LICENSE cp %{googlecloudsdk_dir}/lib/third_party/yaml/LICENSE %{googlecloudsdk}_yaml_LICENSE @@ -511,7 +609,7 @@ cp %{googlecloudsdk_dir}/lib/third_party/apitools/LICENSE %{googlecloudsdk}_apit cp %{googlecloudsdk_dir}/lib/third_party/containerregistry/LICENSE %{googlecloudsdk}_containerregistry_LICENSE # python-pyroute2 bundle -tar -xzf %SOURCE2 -C %{bundled_lib_dir}/gcp +tar -xzf %SOURCE3 -C %{bundled_lib_dir}/gcp mv %{bundled_lib_dir}/gcp/%{pyroute2}-%{pyroute2_version} %{pyroute2_dir} cp %{pyroute2_dir}/README.md %{pyroute2}_README.md cp %{pyroute2_dir}/README.license.md %{pyroute2}_README.license.md @@ -519,7 +617,7 @@ cp %{pyroute2_dir}/LICENSE.Apache.v2 %{pyroute2}_LICENSE.Apache.v2 cp %{pyroute2_dir}/LICENSE.GPL.v2 %{pyroute2}_LICENSE.GPL.v2 # python-colorama bundle -tar -xzf %SOURCE3 -C %{bundled_lib_dir}/aliyun +tar -xzf %SOURCE4 -C %{bundled_lib_dir}/aliyun mv %{bundled_lib_dir}/aliyun/%{colorama}-%{colorama_version} %{colorama_dir} cp %{colorama_dir}/LICENSE.txt %{colorama}_LICENSE.txt cp %{colorama_dir}/README.rst %{colorama}_README.rst @@ -530,28 +628,28 @@ rm -rf *.egg-info popd # python-pycryptodome bundle -tar -xzf %SOURCE4 -C %{bundled_lib_dir}/aliyun +tar -xzf %SOURCE5 -C %{bundled_lib_dir}/aliyun mv %{bundled_lib_dir}/aliyun/%{pycryptodome}-%{pycryptodome_version} %{pycryptodome_dir} cp %{pycryptodome_dir}/README.rst %{pycryptodome}_README.rst cp %{pycryptodome_dir}/LICENSE.rst %{pycryptodome}_LICENSE.rst # python-aliyun-sdk-core bundle -tar -xzf %SOURCE5 -C %{bundled_lib_dir}/aliyun +tar -xzf %SOURCE6 -C %{bundled_lib_dir}/aliyun mv %{bundled_lib_dir}/aliyun/%{aliyunsdkcore}-%{aliyunsdkcore_version} %{aliyunsdkcore_dir} cp %{aliyunsdkcore_dir}/README.rst %{aliyunsdkcore}_README.rst # python-aliyun-sdk-ecs bundle -tar -xzf %SOURCE6 -C %{bundled_lib_dir}/aliyun +tar -xzf %SOURCE7 -C %{bundled_lib_dir}/aliyun mv %{bundled_lib_dir}/aliyun/%{aliyunsdkecs}-%{aliyunsdkecs_version} %{aliyunsdkecs_dir} cp %{aliyunsdkecs_dir}/README.rst %{aliyunsdkecs}_README.rst # python-aliyun-sdk-vpc bundle -tar -xzf %SOURCE7 -C %{bundled_lib_dir}/aliyun +tar -xzf %SOURCE8 -C %{bundled_lib_dir}/aliyun mv %{bundled_lib_dir}/aliyun/%{aliyunsdkvpc}-%{aliyunsdkvpc_version} %{aliyunsdkvpc_dir} cp %{aliyunsdkvpc_dir}/README.rst %{aliyunsdkvpc}_README.rst # aliyuncli bundle -tar -xzf %SOURCE8 -C %{bundled_lib_dir}/aliyun +tar -xzf %SOURCE9 -C %{bundled_lib_dir}/aliyun mv %{bundled_lib_dir}/aliyun/%{aliyuncli}-%{aliyuncli_version} %{aliyuncli_dir} cp %{aliyuncli_dir}/README.rst %{aliyuncli}_README.rst cp %{aliyuncli_dir}/LICENSE %{aliyuncli}_LICENSE @@ -609,8 +707,13 @@ JFLAGS="$(echo '%{_smp_mflags}')" make $JFLAGS -# python-pyroute2 bundle +# python-httplib2 bundle %ifarch x86_64 +pushd %{httplib2_dir} +%{__python3} setup.py build +popd + +# python-pyroute2 bundle pushd %{pyroute2_dir} %{__python3} setup.py build popd @@ -662,6 +765,11 @@ mkdir %{buildroot}/%{_bindir} ln -s /usr/lib/%{name}/%{googlecloudsdk_dir}/bin/gcloud-ra %{buildroot}/%{_bindir} popd +# python-httplib2 bundle +pushd %{httplib2_dir} +%{__python3} setup.py install -O1 --skip-build --root %{buildroot} --install-lib /usr/lib/%{name}/%{googlecloudsdk_dir}/lib/third_party +popd + # python-pyroute2 bundle pushd %{pyroute2_dir} %{__python3} setup.py install -O1 --skip-build --root %{buildroot} --install-lib /usr/lib/%{name}/%{bundled_lib_dir}/gcp @@ -815,7 +923,6 @@ rm -rf %{buildroot} %exclude /usr/lib/ocf/resource.d/heartbeat/Xen %exclude /usr/lib/ocf/resource.d/heartbeat/anything %exclude /usr/lib/ocf/resource.d/heartbeat/asterisk -%exclude /usr/lib/ocf/resource.d/heartbeat/aws-vpc-route53 %exclude /usr/lib/ocf/resource.d/heartbeat/dnsupdate %exclude /usr/lib/ocf/resource.d/heartbeat/eDir88 %exclude /usr/lib/ocf/resource.d/heartbeat/fio @@ -878,7 +985,6 @@ rm -rf %{buildroot} %exclude %{_mandir}/man7/ocf_heartbeat_Xen.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_anything.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_asterisk.7.gz -%exclude %{_mandir}/man7/ocf_heartbeat_aws-vpc-route53.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_dnsupdate.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_eDir88.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_fio.7.gz @@ -966,6 +1072,8 @@ ccs_update_schema > /dev/null 2>&1 ||: %{_mandir}/man7/*gcp-vpc-move-vip* /usr/lib/ocf/resource.d/heartbeat/gcp-vpc-move-route* %{_mandir}/man7/*gcp-vpc-move-route* +/usr/lib/ocf/resource.d/heartbeat/gcp-pd-move* +%{_mandir}/man7/*gcp-pd-move* # bundle %{_bindir}/gcloud-ra %dir /usr/lib/%{name} @@ -973,11 +1081,122 @@ ccs_update_schema > /dev/null 2>&1 ||: %endif %changelog -* Fri Nov 29 2019 Oyvind Albrigtsen - 4.1.1-39 +* Thu Jun 25 2020 Oyvind Albrigtsen - 4.1.1-60 +- Upgrade bundled python-httplib2 to fix CVE-2020-11078 + + Resolves: rhbz#1850990 + +* Wed Jun 17 2020 Oyvind Albrigtsen - 4.1.1-59 +- pgsql: support Pacemaker v2.03+ output + + Resolves: rhbz#1836186 + +* Thu Jun 11 2020 Oyvind Albrigtsen - 4.1.1-56 +- Filesystem: set "fast_stop" default to "no" for GFS2 filesystems + + Resolves: rhbz#1814896 + +* Wed Jun 10 2020 Oyvind Albrigtsen - 4.1.1-55 +- azure-events: handle exceptions in urlopen +- nfsserver: dont log error message when /etc/sysconfig/nfs does not exist +- exportfs: describe clientspec format in metadata + + Resolves: rhbz#1845574 + Resolves: rhbz#1845581 + Resolves: rhbz#1845583 + +* Tue Jun 9 2020 Oyvind Albrigtsen - 4.1.1-54 +- exportfs: add symlink support +- aliyun-vpc-move-ip: log output when failing + + Resolves: rhbz#1820523 + Resolves: rhbz#1843999 + +* Tue Jun 2 2020 Oyvind Albrigtsen - 4.1.1-53 +- podman: force remove container if remove fails + + Resolves: rhbz#1839721 + +* Thu May 28 2020 Oyvind Albrigtsen - 4.1.1-52 +- gcp-pd-move: new resource agent for Google Cloud + + Resolves: rhbz#1633251 + +* Wed May 27 2020 Oyvind Albrigtsen - 4.1.1-51 +- nfsserver: fix NFSv4-only support +- NovaEvacuate: suppress expected initial error message +- db2 (HADR): promote standby node when master node disappears + + Resolves: rhbz#1818997 + Resolves: rhbz#1830716 + Resolves: rhbz#1836945 + +* Thu May 7 2020 Oyvind Albrigtsen - 4.1.1-50 +- rabbitmq-cluster: increase rabbitmqctl wait timeout during start + + Resolves: rhbz#1832321 + +* Tue Apr 28 2020 Oyvind Albrigtsen - 4.1.1-49 +- aws-vpc-route53: new resource agent for AWS +- pgsql: improve checks to prevent incorrect status, and set initial + score for primary and hot standby + + Resolves: rhbz#1759115 + Resolves: rhbz#1744190 + +* Thu Apr 16 2020 Oyvind Albrigtsen - 4.1.1-48 +- azure-events: new resource agent for Azure + + Resolves: rhbz#1633251 + Resolves: rhbz#1819965 + +* Mon Apr 6 2020 Oyvind Albrigtsen - 4.1.1-47 +- aws-vpc-move-ip: delete remaining route entries + + Resolves: rhbz#1819021 + +* Fri Mar 27 2020 Oyvind Albrigtsen - 4.1.1-46 +- use safe temp file location +- ocf-shellfuncs: ocf_is_clone(): fix to return true when clone-max + is set to 0 + + Resolves: rhbz#1817432 + Resolves: rhbz#1817598 + +* Wed Mar 18 2020 Oyvind Albrigtsen - 4.1.1-45 +- azure-lb: support using socat instead of nc +- aws-vpc-move-ip: add "routing_table_role" parameter +- redis: fix validate-all action and run it during start + + Resolves: rhbz#1804658 + Resolves: rhbz#1810466 + Resolves: rhbz#1792237 + +* Fri Mar 6 2020 Oyvind Albrigtsen - 4.1.1-44 +- lvmlockd: automatically remove locking_type from lvm.conf for LVM + v2.03+ + + Resolves: rhbz#1808468 + +* Tue Jan 28 2020 Oyvind Albrigtsen - 4.1.1-43 +- rabbitmq-cluster: delete nodename when stop fails + + Resolves: rhbz#1792196 + +* Thu Jan 23 2020 Oyvind Albrigtsen - 4.1.1-42 - IPsrcaddr: add destination and table parameters Resolves: rhbz#1744224 +* Mon Jan 13 2020 Oyvind Albrigtsen - 4.1.1-40 +- podman: improve image exist check +- IPaddr2: add CLUSTERIP not supported info to metadata/manpage +- Filesystem: refresh UUID if block device doesnt exist + + Resolves: rhbz#1788889 + Resolves: rhbz#1767916 + Resolves: rhbz#1777381 + * Wed Nov 27 2019 Oyvind Albrigtsen - 4.1.1-38 - IPaddr2: add noprefixroute parameter @@ -1009,7 +1228,6 @@ ccs_update_schema > /dev/null 2>&1 ||: - docker: fix stop issues - rabbitmq-cluster: also restore users in single node mode - IPaddr2: sanitize compressed IPv6 IPs -- SAPInstance: add reload-action - nfsserver: systemd performance improvements - NovaEvacuate: add "evacuate_delay" parameter @@ -1022,7 +1240,6 @@ ccs_update_schema > /dev/null 2>&1 ||: Resolves: rhbz#1748768 Resolves: rhbz#1750352 Resolves: rhbz#1751700 - Resolves: rhbz#1751949 Resolves: rhbz#1751962 Resolves: rhbz#1755760