import resource-agents-4.9.0-16.el8

This commit is contained in:
CentOS Sources 2022-05-10 03:08:37 -04:00 committed by Stepan Oksanichenko
parent 559165f198
commit f0715bb168
217 changed files with 3153 additions and 19604 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
SOURCES/ClusterLabs-resource-agents-e711383f.tar.gz
SOURCES/ClusterLabs-resource-agents-55a4e2c9.tar.gz
SOURCES/aliyun-cli-2.1.10.tar.gz
SOURCES/aliyun-python-sdk-core-2.13.1.tar.gz
SOURCES/aliyun-python-sdk-ecs-4.9.3.tar.gz

View File

@ -1,4 +1,4 @@
0358e1cb7fe86b2105bd2646cbe86f3c0273844a SOURCES/ClusterLabs-resource-agents-e711383f.tar.gz
dfc65f4cac3f95026b2f5674019814a527333004 SOURCES/ClusterLabs-resource-agents-55a4e2c9.tar.gz
306e131d8908ca794276bfe3a0b55ccc3bbd482f SOURCES/aliyun-cli-2.1.10.tar.gz
0a56f6d9ed2014a363486d33b63eca094379be06 SOURCES/aliyun-python-sdk-core-2.13.1.tar.gz
c2a98b9a1562d223a76514f05028488ca000c395 SOURCES/aliyun-python-sdk-ecs-4.9.3.tar.gz

View File

@ -1,29 +0,0 @@
From 266e10a719a396a3a522e4b0ce4271a372e4f6f1 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 13 Jul 2018 08:59:45 +0200
Subject: [PATCH 1/3] configure: add Python path detection
---
configure.ac | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/configure.ac b/configure.ac
index 90ed2453..bdf057d3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -501,6 +501,12 @@ AC_SUBST(PING)
AC_SUBST(RM)
AC_SUBST(TEST)
+AM_PATH_PYTHON
+if test -z "$PYTHON"; then
+ echo "*** Essential program python not found" 1>&2
+ exit 1
+fi
+
AC_PATH_PROGS(ROUTE, route)
AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command)
--
2.17.1

View File

@ -1,24 +0,0 @@
From 059effc058758c1294d80f03741bf5c078f1498d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 13 Jul 2018 13:22:56 +0200
Subject: [PATCH 2/3] CI: skip Python agents in shellcheck
---
ci/build.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/ci/build.sh b/ci/build.sh
index 608387ad..c331e9ab 100755
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -58,6 +58,7 @@ check_all_executables() {
echo "Checking executables and .sh files..."
while read -r script; do
file --mime "$script" | grep 'charset=binary' >/dev/null 2>&1 && continue
+ file --mime "$script" | grep 'text/x-python' >/dev/null 2>&1 && continue
head=$(head -n1 "$script")
[[ "$head" =~ .*ruby.* ]] && continue
[[ "$head" =~ .*zsh.* ]] && continue
--
2.17.1

View File

@ -1,646 +0,0 @@
From 92da4155d881e9ac2dce3a51c6953817349d164a Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Mon, 25 Jun 2018 11:03:51 -0300
Subject: [PATCH 1/4] gcp-vpc-move-vip.in: manage ip alias
Add a resource agent to manage ip alias in the cluster.
start:
Check if any machine in hostlist has the alias_ip assigned and
disassociate it.
Assign alias_ip to the current machine.
stop:
Disassociate the alias_ip from the current machine.
status/monitor:
Check if alias_ip is assigned with the current machine.
---
This is a port to the following bash script to python:
https://storage.googleapis.com/sapdeploy/pacemaker-gcp/alias
The problem with the bash script is the use of gcloud whose command line
API is not stable.
ocf-tester.in results:
> sudo ./tools/ocf-tester.in -o alias_ip='10.128.1.0/32' -o stackdriver_logging=yes -n gcp-vpc-move-vip.in heartbeat/gcp-vpc-move-vip.in
Beginning tests for heartbeat/gcp-vpc-move-vip.in...
./tools/ocf-tester.in: line 226: cd: @datadir@/resource-agents: No such file or directory
close failed in file object destructor:
sys.excepthook is missing
lost sys.stderr
* rc=1: Your agent produces meta-data which does not conform to ra-api-1.dtd
Tests failed: heartbeat/gcp-vpc-move-vip.in failed 1 tests
The only test faillig is the meta-data, but all the agents that I tried
also fails on this. If this is a concern, could you please point me out
to a test which succeeds so I can check what I am doing differently?
This commit can also be viewed at:
https://github.com/collabora-gce/resource-agents/tree/alias
Thanks
---
configure.ac | 1 +
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/gcp-vpc-move-vip.in | 299 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 302 insertions(+)
create mode 100755 heartbeat/gcp-vpc-move-vip.in
diff --git a/configure.ac b/configure.ac
index bdf057d33..3d8f9ca74 100644
--- a/configure.ac
+++ b/configure.ac
@@ -959,6 +959,7 @@ AC_CONFIG_FILES([heartbeat/dnsupdate], [chmod +x heartbeat/dnsupdate])
AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88])
AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip])
+AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip])
AC_CONFIG_FILES([heartbeat/iSCSILogicalUnit], [chmod +x heartbeat/iSCSILogicalUnit])
AC_CONFIG_FILES([heartbeat/iSCSITarget], [chmod +x heartbeat/iSCSITarget])
AC_CONFIG_FILES([heartbeat/jira], [chmod +x heartbeat/jira])
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index c59126d13..e9eaf369f 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -114,6 +114,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_galera.7 \
ocf_heartbeat_garbd.7 \
ocf_heartbeat_gcp-vpc-move-ip.7 \
+ ocf_heartbeat_gcp-vpc-move-vip.7 \
ocf_heartbeat_iSCSILogicalUnit.7 \
ocf_heartbeat_iSCSITarget.7 \
ocf_heartbeat_iface-bridge.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 4f5059e27..36b271956 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -111,6 +111,7 @@ ocf_SCRIPTS = AoEtarget \
galera \
garbd \
gcp-vpc-move-ip \
+ gcp-vpc-move-vip \
iSCSILogicalUnit \
iSCSITarget \
ids \
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
new file mode 100755
index 000000000..4954e11df
--- /dev/null
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# ---------------------------------------------------------------------
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ---------------------------------------------------------------------
+# Description: Google Cloud Platform - Floating IP Address (Alias)
+# ---------------------------------------------------------------------
+
+import json
+import logging
+import os
+import sys
+import time
+
+import googleapiclient.discovery
+
+if sys.version_info >= (3, 0):
+ # Python 3 imports.
+ import urllib.parse as urlparse
+ import urllib.request as urlrequest
+else:
+ # Python 2 imports.
+ import urllib as urlparse
+ import urllib2 as urlrequest
+
+
+CONN = None
+THIS_VM = None
+OCF_SUCCESS = 0
+OCF_ERR_GENERIC = 1
+OCF_ERR_CONFIGURED = 6
+OCF_NOT_RUNNING = 7
+METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
+METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
+METADATA = \
+'''<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="gcp-vpc-move-vip">
+ <version>1.0</version>
+ <longdesc lang="en">Floating IP Address on Google Cloud Platform - Using Alias IP address functionality to attach a secondary IP address to a running instance</longdesc>
+ <shortdesc lang="en">Floating IP Address on Google Cloud Platform</shortdesc>
+ <parameters>
+ <parameter name="hostlist" unique="1" required="1">
+ <longdesc lang="en">List of hosts in the cluster</longdesc>
+ <shortdesc lang="en">Host list</shortdesc>
+ <content type="string" default="" />
+ </parameter>
+ <parameter name="stackdriver-logging" unique="0" required="0">
+ <longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
+ <shortdesc lang="en">Stackdriver-logging support</shortdesc>
+ <content type="boolean" default="" />
+ </parameter>
+ <parameter name="alias_ip" unique="1" required="1">
+ <longdesc lang="en">IP Address to be added including CIDR. E.g 192.168.0.1/32</longdesc>
+ <shortdesc lang="en">IP Address to be added including CIDR. E.g 192.168.0.1/32</shortdesc>
+ <content type="string" default="" />
+ </parameter>
+ <parameter name="alias_range_name" unique="1" required="0">
+ <longdesc lang="en">Subnet name for the Alias IP2</longdesc>
+ <shortdesc lang="en">Subnet name for the Alias IP</shortdesc>
+ <content type="string" default="" />
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="300" />
+ <action name="stop" timeout="15" />
+ <action name="monitor" timeout="15" interval="60" depth="0" />
+ <action name="meta-data" timeout="15" />
+ </actions>
+</resource-agent>'''
+
+
+def get_metadata(metadata_key, params=None, timeout=None):
+ """Performs a GET request with the metadata headers.
+
+ Args:
+ metadata_key: string, the metadata to perform a GET request on.
+ params: dictionary, the query parameters in the GET request.
+ timeout: int, timeout in seconds for metadata requests.
+
+ Returns:
+ HTTP response from the GET request.
+
+ Raises:
+ urlerror.HTTPError: raises when the GET request fails.
+ """
+ timeout = timeout or 60
+ metadata_url = os.path.join(METADATA_SERVER, metadata_key)
+ params = urlparse.urlencode(params or {})
+ url = '%s?%s' % (metadata_url, params)
+ request = urlrequest.Request(url, headers=METADATA_HEADERS)
+ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
+ return request_opener.open(request, timeout=timeout * 1.1).read()
+
+
+def get_instance(project, zone, instance):
+ request = CONN.instances().get(
+ project=project, zone=zone, instance=instance)
+ return request.execute()
+
+
+def get_network_ifaces(project, zone, instance):
+ return get_instance(project, zone, instance)['networkInterfaces']
+
+
+def wait_for_operation(project, zone, operation):
+ while True:
+ result = CONN.zoneOperations().get(
+ project=project,
+ zone=zone,
+ operation=operation['name']).execute()
+
+ if result['status'] == 'DONE':
+ if 'error' in result:
+ raise Exception(result['error'])
+ return
+ time.sleep(1)
+
+
+def set_alias(project, zone, instance, alias, alias_range_name=None):
+ fingerprint = get_network_ifaces(project, zone, instance)[0]['fingerprint']
+ body = {
+ 'aliasIpRanges': [],
+ 'fingerprint': fingerprint
+ }
+ if alias:
+ obj = {'ipCidrRange': alias}
+ if alias_range_name:
+ obj['subnetworkRangeName'] = alias_range_name
+ body['aliasIpRanges'].append(obj)
+
+ request = CONN.instances().updateNetworkInterface(
+ instance=instance, networkInterface='nic0', project=project, zone=zone,
+ body=body)
+ operation = request.execute()
+ wait_for_operation(project, zone, operation)
+
+
+def get_alias(project, zone, instance):
+ iface = get_network_ifaces(project, zone, instance)
+ try:
+ return iface[0]['aliasIpRanges'][0]['ipCidrRange']
+ except KeyError:
+ return ''
+
+
+def get_localhost_alias():
+ net_iface = get_metadata('instance/network-interfaces', {'recursive': True})
+ net_iface = json.loads(net_iface.decode('utf-8'))
+ try:
+ return net_iface[0]['ipAliases'][0]
+ except (KeyError, IndexError):
+ return ''
+
+
+def get_zone(project, instance):
+ request = CONN.instances().aggregatedList(project=project)
+ while request is not None:
+ response = request.execute()
+ zones = response.get('items', {})
+ for zone in zones.values():
+ for inst in zone.get('instances', []):
+ if inst['name'] == instance:
+ return inst['zone'].split("/")[-1]
+ request = CONN.instances().aggregatedList_next(
+ previous_request=request, previous_response=response)
+ raise Exception("Unable to find instance %s" % (instance))
+
+
+def gcp_alias_start(alias):
+ if not alias:
+ sys.exit(OCF_ERR_CONFIGURED)
+ my_alias = get_localhost_alias()
+ my_zone = get_metadata('instance/zone').split('/')[-1]
+ project = get_metadata('project/project-id')
+
+ # If I already have the IP, exit. If it has an alias IP that isn't the VIP,
+ # then remove it
+ if my_alias == alias:
+ logging.info(
+ '%s already has %s attached. No action required' % (THIS_VM, alias))
+ sys.exit(OCF_SUCCESS)
+ elif my_alias:
+ logging.info('Removing %s from %s' % (my_alias, THIS_VM))
+ set_alias(project, my_zone, THIS_VM, '')
+
+ # Loops through all hosts & remove the alias IP from the host that has it
+ hostlist = os.environ.get('OCF_RESKEY_hostlist', '')
+ hostlist.replace(THIS_VM, '')
+ for host in hostlist.split():
+ host_zone = get_zone(project, host)
+ host_alias = get_alias(project, host_zone, host)
+ if alias == host_alias:
+ logging.info(
+ '%s is attached to %s - Removing all alias IP addresses from %s' %
+ (alias, host, host))
+ set_alias(project, host_zone, host, '')
+ break
+
+ # add alias IP to localhost
+ set_alias(
+ project, my_zone, THIS_VM, alias,
+ os.environ.get('OCF_RESKEY_alias_range_name'))
+
+ # Check the IP has been added
+ my_alias = get_localhost_alias()
+ if alias == my_alias:
+ logging.info('Finished adding %s to %s' % (alias, THIS_VM))
+ elif my_alias:
+ logging.error(
+ 'Failed to add IP. %s has an IP attached but it isn\'t %s' %
+ (THIS_VM, alias))
+ sys.exit(OCF_ERR_GENERIC)
+ else:
+ logging.error('Failed to add IP address %s to %s' % (alias, THIS_VM))
+ sys.exit(OCF_ERR_GENERIC)
+
+
+def gcp_alias_stop(alias):
+ if not alias:
+ sys.exit(OCF_ERR_CONFIGURED)
+ my_alias = get_localhost_alias()
+ my_zone = get_metadata('instance/zone').split('/')[-1]
+ project = get_metadata('project/project-id')
+
+ if my_alias == alias:
+ logging.info('Removing %s from %s' % (my_alias, THIS_VM))
+ set_alias(project, my_zone, THIS_VM, '')
+
+
+def gcp_alias_status(alias):
+ if not alias:
+ sys.exit(OCF_ERR_CONFIGURED)
+ my_alias = get_localhost_alias()
+ if alias == my_alias:
+ logging.info('%s has the correct IP address attached' % THIS_VM)
+ else:
+ sys.exit(OCF_NOT_RUNNING)
+
+
+def configure():
+ global CONN
+ global THIS_VM
+
+ # Populate global vars
+ CONN = googleapiclient.discovery.build('compute', 'v1')
+ THIS_VM = get_metadata('instance/name')
+
+ # Prepare logging
+ logging.basicConfig(
+ format='gcp:alias - %(levelname)s - %(message)s', level=logging.INFO)
+ logging.getLogger('googleapiclient').setLevel(logging.WARN)
+ logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging')
+ if logging_env:
+ logging_env = logging_env.lower()
+ if any(x in logging_env for x in ['yes', 'true', 'enabled']):
+ try:
+ import google.cloud.logging.handlers
+ client = google.cloud.logging.Client()
+ handler = google.cloud.logging.handlers.CloudLoggingHandler(
+ client, name=THIS_VM)
+ handler.setLevel(logging.INFO)
+ formatter = logging.Formatter('gcp:alias "%(message)s"')
+ handler.setFormatter(formatter)
+ root_logger = logging.getLogger()
+ root_logger.addHandler(handler)
+ except ImportError:
+ logging.error('Couldn\'t import google.cloud.logging, '
+ 'disabling Stackdriver-logging support')
+
+
+def main():
+ configure()
+
+ alias = os.environ.get('OCF_RESKEY_alias_ip')
+ if 'start' in sys.argv[1]:
+ gcp_alias_start(alias)
+ elif 'stop' in sys.argv[1]:
+ gcp_alias_stop(alias)
+ elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]:
+ gcp_alias_status(alias)
+ elif 'meta-data' in sys.argv[1]:
+ print(METADATA)
+ else:
+ logging.error('gcp:alias - no such function %s' % str(sys.argv[1]))
+
+
+if __name__ == "__main__":
+ main()
From 0e6ba4894a748664ac1d8ff5b9e8c271f0b04d93 Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Thu, 12 Jul 2018 09:01:22 -0300
Subject: [PATCH 2/4] gcp-vpc-move-vip.in: minor fixes
- Get hostlist from the project if the parameter is not given
- Verify if alias is present out of each action function
- Don't call configure if 'meta-data' action is given
---
heartbeat/gcp-vpc-move-vip.in | 40 ++++++++++++++++++++++++++++------------
1 file changed, 28 insertions(+), 12 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index 4954e11df..f3d117bda 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -50,7 +50,7 @@ METADATA = \
<longdesc lang="en">Floating IP Address on Google Cloud Platform - Using Alias IP address functionality to attach a secondary IP address to a running instance</longdesc>
<shortdesc lang="en">Floating IP Address on Google Cloud Platform</shortdesc>
<parameters>
- <parameter name="hostlist" unique="1" required="1">
+ <parameter name="hostlist" unique="1" required="0">
<longdesc lang="en">List of hosts in the cluster</longdesc>
<shortdesc lang="en">Host list</shortdesc>
<content type="string" default="" />
@@ -177,9 +177,22 @@ def get_zone(project, instance):
raise Exception("Unable to find instance %s" % (instance))
+def get_instances_list(project, exclude):
+ hostlist = []
+ request = CONN.instances().aggregatedList(project=project)
+ while request is not None:
+ response = request.execute()
+ zones = response.get('items', {})
+ for zone in zones.values():
+ for inst in zone.get('instances', []):
+ if inst['name'] != exclude:
+ hostlist.append(inst['name'])
+ request = CONN.instances().aggregatedList_next(
+ previous_request=request, previous_response=response)
+ return hostlist
+
+
def gcp_alias_start(alias):
- if not alias:
- sys.exit(OCF_ERR_CONFIGURED)
my_alias = get_localhost_alias()
my_zone = get_metadata('instance/zone').split('/')[-1]
project = get_metadata('project/project-id')
@@ -196,8 +209,11 @@ def gcp_alias_start(alias):
# Loops through all hosts & remove the alias IP from the host that has it
hostlist = os.environ.get('OCF_RESKEY_hostlist', '')
- hostlist.replace(THIS_VM, '')
- for host in hostlist.split():
+ if hostlist:
+ hostlist.replace(THIS_VM, '').split()
+ else:
+ hostlist = get_instances_list(project, THIS_VM)
+ for host in hostlist:
host_zone = get_zone(project, host)
host_alias = get_alias(project, host_zone, host)
if alias == host_alias:
@@ -227,8 +243,6 @@ def gcp_alias_start(alias):
def gcp_alias_stop(alias):
- if not alias:
- sys.exit(OCF_ERR_CONFIGURED)
my_alias = get_localhost_alias()
my_zone = get_metadata('instance/zone').split('/')[-1]
project = get_metadata('project/project-id')
@@ -239,8 +253,6 @@ def gcp_alias_stop(alias):
def gcp_alias_status(alias):
- if not alias:
- sys.exit(OCF_ERR_CONFIGURED)
my_alias = get_localhost_alias()
if alias == my_alias:
logging.info('%s has the correct IP address attached' % THIS_VM)
@@ -280,17 +292,21 @@ def configure():
def main():
- configure()
+ if 'meta-data' in sys.argv[1]:
+ print(METADATA)
+ return
alias = os.environ.get('OCF_RESKEY_alias_ip')
+ if not alias:
+ sys.exit(OCF_ERR_CONFIGURED)
+
+ configure()
if 'start' in sys.argv[1]:
gcp_alias_start(alias)
elif 'stop' in sys.argv[1]:
gcp_alias_stop(alias)
elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]:
gcp_alias_status(alias)
- elif 'meta-data' in sys.argv[1]:
- print(METADATA)
else:
logging.error('gcp:alias - no such function %s' % str(sys.argv[1]))
From 1f50c4bc80f23f561a8630c12076707366525899 Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Thu, 12 Jul 2018 13:02:16 -0300
Subject: [PATCH 3/4] gcp-vcp-move-vip.in: implement validate-all
Also fix some return errors
---
heartbeat/gcp-vpc-move-vip.in | 47 +++++++++++++++++++++++++++++++------------
1 file changed, 34 insertions(+), 13 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index f3d117bda..a90c2de8d 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -22,7 +22,10 @@ import os
import sys
import time
-import googleapiclient.discovery
+try:
+ import googleapiclient.discovery
+except ImportError:
+ pass
if sys.version_info >= (3, 0):
# Python 3 imports.
@@ -36,6 +39,7 @@ else:
CONN = None
THIS_VM = None
+ALIAS = None
OCF_SUCCESS = 0
OCF_ERR_GENERIC = 1
OCF_ERR_CONFIGURED = 6
@@ -210,7 +214,7 @@ def gcp_alias_start(alias):
# Loops through all hosts & remove the alias IP from the host that has it
hostlist = os.environ.get('OCF_RESKEY_hostlist', '')
if hostlist:
- hostlist.replace(THIS_VM, '').split()
+ hostlist = hostlist.replace(THIS_VM, '').split()
else:
hostlist = get_instances_list(project, THIS_VM)
for host in hostlist:
@@ -260,14 +264,31 @@ def gcp_alias_status(alias):
sys.exit(OCF_NOT_RUNNING)
-def configure():
+def validate():
+ global ALIAS
global CONN
global THIS_VM
# Populate global vars
- CONN = googleapiclient.discovery.build('compute', 'v1')
- THIS_VM = get_metadata('instance/name')
+ try:
+ CONN = googleapiclient.discovery.build('compute', 'v1')
+ except Exception as e:
+ logging.error('Couldn\'t connect with google api: ' + str(e))
+ sys.exit(OCF_ERR_CONFIGURED)
+
+ try:
+ THIS_VM = get_metadata('instance/name')
+ except Exception as e:
+ logging.error('Couldn\'t get instance name, is this running inside GCE?: ' + str(e))
+ sys.exit(OCF_ERR_CONFIGURED)
+ ALIAS = os.environ.get('OCF_RESKEY_alias_ip')
+ if not ALIAS:
+ logging.error('Missing alias_ip parameter')
+ sys.exit(OCF_ERR_CONFIGURED)
+
+
+def configure_logs():
# Prepare logging
logging.basicConfig(
format='gcp:alias - %(levelname)s - %(message)s', level=logging.INFO)
@@ -296,19 +317,19 @@ def main():
print(METADATA)
return
- alias = os.environ.get('OCF_RESKEY_alias_ip')
- if not alias:
- sys.exit(OCF_ERR_CONFIGURED)
+ validate()
+ if 'validate-all' in sys.argv[1]:
+ return
- configure()
+ configure_logs()
if 'start' in sys.argv[1]:
- gcp_alias_start(alias)
+ gcp_alias_start(ALIAS)
elif 'stop' in sys.argv[1]:
- gcp_alias_stop(alias)
+ gcp_alias_stop(ALIAS)
elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]:
- gcp_alias_status(alias)
+ gcp_alias_status(ALIAS)
else:
- logging.error('gcp:alias - no such function %s' % str(sys.argv[1]))
+ logging.error('no such function %s' % str(sys.argv[1]))
if __name__ == "__main__":
From f11cb236bb348ebee74e962d0ded1cb2fc97bd5f Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Fri, 13 Jul 2018 08:01:02 -0300
Subject: [PATCH 4/4] gcp-vpc-move-vip.in: minor fixes
---
heartbeat/gcp-vpc-move-vip.in | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index a90c2de8d..9fc87242f 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!@PYTHON@ -tt
# ---------------------------------------------------------------------
# Copyright 2016 Google Inc.
#
@@ -59,7 +59,7 @@ METADATA = \
<shortdesc lang="en">Host list</shortdesc>
<content type="string" default="" />
</parameter>
- <parameter name="stackdriver-logging" unique="0" required="0">
+ <parameter name="stackdriver_logging" unique="0" required="0">
<longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
<shortdesc lang="en">Stackdriver-logging support</shortdesc>
<content type="boolean" default="" />
@@ -80,6 +80,7 @@ METADATA = \
<action name="stop" timeout="15" />
<action name="monitor" timeout="15" interval="60" depth="0" />
<action name="meta-data" timeout="15" />
+ <action name="validate-all" timeout="15" />
</actions>
</resource-agent>'''

View File

@ -1,632 +0,0 @@
From 0ee4c62105ee8f90a43fe0bf8a65bc9b9da2e7e0 Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Wed, 18 Jul 2018 11:54:40 -0300
Subject: [PATCH 1/4] gcp-vpc-move-route.in: python implementation of
gcp-vpc-move-ip.in
gcloud api is not reliable and it is slow, add a python version of
gcp-vpc-move-ip.in
---
configure.ac | 1 +
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/gcp-vpc-move-route.in | 441 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 444 insertions(+)
create mode 100644 heartbeat/gcp-vpc-move-route.in
diff --git a/configure.ac b/configure.ac
index 3d8f9ca74..039b4942c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -960,6 +960,7 @@ AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88])
AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip])
+AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route])
AC_CONFIG_FILES([heartbeat/iSCSILogicalUnit], [chmod +x heartbeat/iSCSILogicalUnit])
AC_CONFIG_FILES([heartbeat/iSCSITarget], [chmod +x heartbeat/iSCSITarget])
AC_CONFIG_FILES([heartbeat/jira], [chmod +x heartbeat/jira])
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index e9eaf369f..3ac0569de 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -115,6 +115,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_garbd.7 \
ocf_heartbeat_gcp-vpc-move-ip.7 \
ocf_heartbeat_gcp-vpc-move-vip.7 \
+ ocf_heartbeat_gcp-vpc-move-route.7 \
ocf_heartbeat_iSCSILogicalUnit.7 \
ocf_heartbeat_iSCSITarget.7 \
ocf_heartbeat_iface-bridge.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 36b271956..d4750bf09 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -112,6 +112,7 @@ ocf_SCRIPTS = AoEtarget \
garbd \
gcp-vpc-move-ip \
gcp-vpc-move-vip \
+ gcp-vpc-move-route \
iSCSILogicalUnit \
iSCSITarget \
ids \
diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
new file mode 100644
index 000000000..5f4569baa
--- /dev/null
+++ b/heartbeat/gcp-vpc-move-route.in
@@ -0,0 +1,441 @@
+#!@PYTHON@ -tt
+# - *- coding: utf- 8 - *-
+#
+#
+# OCF resource agent to move an IP address within a VPC in GCP
+#
+# License: GNU General Public License (GPL)
+# Copyright (c) 2018 Hervé Werner (MFG Labs)
+# Copyright 2018 Google Inc.
+# Based on code from Markus Guertler (aws-vpc-move-ip)
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+
+#######################################################################
+
+import atexit
+import logging
+import os
+import sys
+import time
+
+try:
+ import googleapiclient.discovery
+ import pyroute2
+except ImportError:
+ pass
+
+if sys.version_info >= (3, 0):
+ # Python 3 imports.
+ import urllib.parse as urlparse
+ import urllib.request as urlrequest
+else:
+ # Python 2 imports.
+ import urllib as urlparse
+ import urllib2 as urlrequest
+
+
+OCF_SUCCESS = 0
+OCF_ERR_GENERIC = 1
+OCF_ERR_UNIMPLEMENTED = 3
+OCF_ERR_PERM = 4
+OCF_ERR_CONFIGURED = 6
+OCF_NOT_RUNNING = 7
+GCP_API_URL_PREFIX = 'https://www.googleapis.com/compute/v1'
+METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
+METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
+METADATA = \
+'''<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="gcp-vpc-move-route">
+<version>1.0</version>
+<longdesc lang="en">
+Resource Agent that can move a floating IP addresse within a GCP VPC by changing an
+entry in the routing table. This agent also configures the floating IP locally
+on the instance OS.
+Requirements :
+- IP forwarding must be enabled on all instances in order to be able to
+terminate the route
+- The floating IP address must be choosen so that it is outside all existing
+subnets in the VPC network
+- IAM permissions
+(see https://cloud.google.com/compute/docs/access/iam-permissions) :
+1) compute.routes.delete, compute.routes.get and compute.routes.update on the
+route
+2) compute.networks.updatePolicy on the network (to add a new route)
+3) compute.networks.get on the network (to check the VPC network existence)
+4) compute.routes.list on the project (to check conflicting routes)
+</longdesc>
+<shortdesc lang="en">Move IP within a GCP VPC</shortdesc>
+
+<parameters>
+
+<parameter name="stackdriver_logging" unique="0" required="0">
+<longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
+<shortdesc lang="en">Stackdriver-logging support</shortdesc>
+<content type="boolean" default="" />
+</parameter>
+
+<parameter name="ip" unique="1" required="1">
+<longdesc lang="en">
+Floating IP address. Note that this IP must be chosen outside of all existing
+subnet ranges
+</longdesc>
+<shortdesc lang="en">Floating IP</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="vpc_network" required="1">
+<longdesc lang="en">
+Name of the VPC network
+</longdesc>
+<shortdesc lang="en">VPC network</shortdesc>
+<content type="string" default="${OCF_RESKEY_vpc_network_default}" />
+</parameter>
+
+<parameter name="interface">
+<longdesc lang="en">
+Name of the network interface
+</longdesc>
+<shortdesc lang="en">Network interface name</shortdesc>
+<content type="string" default="${OCF_RESKEY_interface_default}" />
+</parameter>
+
+<parameter name="route_name" unique="1">
+<longdesc lang="en">
+Route name
+</longdesc>
+<shortdesc lang="en">Route name</shortdesc>
+<content type="string" default="${OCF_RESKEY_route_name_default}" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="180s" />
+<action name="stop" timeout="180s" />
+<action name="monitor" depth="0" timeout="30s" interval="60s" />
+<action name="validate-all" timeout="5s" />
+<action name="meta-data" timeout="5s" />
+</actions>
+</resource-agent>
+'''
+
+
+class Context(object):
+ __slots__ = 'conn', 'iface_idx', 'instance', 'instance_url', 'interface', \
+ 'ip', 'iproute', 'project', 'route_name', 'vpc_network', \
+ 'vpc_network_url', 'zone'
+
+
+def wait_for_operation(ctx, response):
+ """Blocks until operation completes.
+ Code from GitHub's GoogleCloudPlatform/python-docs-samples
+
+ Args:
+ response: dict, a request's response
+ """
+ def _OperationGetter(response):
+ operation = response[u'name']
+ if response.get(u'zone'):
+ return ctx.conn.zoneOperations().get(
+ project=ctx.project, zone=ctx.zone, operation=operation)
+ else:
+ return ctx.conn.globalOperations().get(
+ project=ctx.project, operation=operation)
+
+ while True:
+ result = _OperationGetter(response).execute()
+
+ if result['status'] == 'DONE':
+ if 'error' in result:
+ raise Exception(result['error'])
+ return result
+
+ time.sleep(1)
+
+
+def get_metadata(metadata_key, params=None, timeout=None):
+ """Performs a GET request with the metadata headers.
+
+ Args:
+ metadata_key: string, the metadata to perform a GET request on.
+ params: dictionary, the query parameters in the GET request.
+ timeout: int, timeout in seconds for metadata requests.
+
+ Returns:
+ HTTP response from the GET request.
+
+ Raises:
+ urlerror.HTTPError: raises when the GET request fails.
+ """
+ timeout = timeout or 60
+ metadata_url = os.path.join(METADATA_SERVER, metadata_key)
+ params = urlparse.urlencode(params or {})
+ url = '%s?%s' % (metadata_url, params)
+ request = urlrequest.Request(url, headers=METADATA_HEADERS)
+ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
+ return request_opener.open(request, timeout=timeout * 1.1).read()
+
+
+def validate(ctx):
+ if os.geteuid() != 0:
+ logging.error('You must run this agent as root')
+ sys.exit(OCF_ERR_PERM)
+
+ try:
+ ctx.conn = googleapiclient.discovery.build('compute', 'v1')
+ except Exception as e:
+ logging.error('Couldn\'t connect with google api: ' + str(e))
+ sys.exit(OCF_ERR_CONFIGURED)
+
+ ctx.ip = os.environ.get('OCF_RESKEY_ip')
+ if not ctx.ip:
+ logging.error('Missing ip parameter')
+ sys.exit(OCF_ERR_CONFIGURED)
+
+ try:
+ ctx.instance = get_metadata('instance/name')
+ ctx.zone = get_metadata('instance/zone').split('/')[-1]
+ ctx.project = get_metadata('project/project-id')
+ except Exception as e:
+ logging.error(
+ 'Instance information not found. Is this a GCE instance ?: %s', str(e))
+ sys.exit(OCF_ERR_CONFIGURED)
+
+ ctx.instance_url = '%s/projects/%s/zones/%s/instances/%s' % (
+ GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance)
+ ctx.vpc_network = os.environ.get('OCF_RESKEY_vpc_network', 'default')
+ ctx.vpc_network_url = '%s/projects/%s/global/networks/%s' % (
+ GCP_API_URL_PREFIX, ctx.project, ctx.vpc_network)
+ ctx.interface = os.environ.get('OCF_RESKEY_interface', 'eth0')
+ ctx.route_name = os.environ.get(
+ 'OCF_RESKEY_route_name', 'ra-%s' % os.environ['__SCRIPT_NAME'])
+ ctx.iproute = pyroute2.IPRoute()
+ atexit.register(ctx.iproute.close)
+ idxs = ctx.iproute.link_lookup(ifname=ctx.interface)
+ if not idxs:
+ logging.error('Network interface not found')
+ sys.exit(OCF_ERR_CONFIGURED)
+ ctx.iface_idx = idxs[0]
+
+
+def check_conflicting_routes(ctx):
+ fl = '(destRange = "%s*") AND (network = "%s") AND (name != "%s")' % (
+ ctx.ip, ctx.vpc_network_url, ctx.route_name)
+ request = ctx.conn.routes().list(project=ctx.project, filter=fl)
+ response = request.execute()
+ route_list = response.get('items', None)
+ if route_list:
+ logging.error(
+ 'Conflicting unnmanaged routes for destination %s/32 in VPC %s found : %s',
+ ctx.ip, ctx.vpc_network, str(route_list))
+ sys.exit(OCF_ERR_CONFIGURED)
+
+
+def route_release(ctx):
+ request = ctx.conn.routes().delete(project=ctx.project, route=ctx.route_name)
+ wait_for_operation(ctx, request.execute())
+
+
+def ip_monitor(ctx):
+ logging.info('IP monitor: checking local network configuration')
+
+ def address_filter(addr):
+ for attr in addr['attrs']:
+ if attr[0] == 'IFA_LOCAL':
+ if attr[1] == ctx.ip:
+ return True
+ else:
+ return False
+
+ route = ctx.iproute.get_addr(
+ index=ctx.iface_idx, match=address_filter)
+ if not route:
+ logging.warn(
+ 'The floating IP %s is not locally configured on this instance (%s)',
+ ctx.ip, ctx.instance)
+ return OCF_NOT_RUNNING
+
+ logging.debug(
+ 'The floating IP %s is correctly configured on this instance (%s)',
+ ctx.ip, ctx.instance)
+ return OCF_SUCCESS
+
+
+def ip_release(ctx):
+ ctx.iproute.addr('del', index=ctx.iface_idx, address=ctx.ip, mask=32)
+
+
+def ip_and_route_start(ctx):
+ logging.info('Bringing up the floating IP %s', ctx.ip)
+
+ # Add a new entry in the routing table
+ # If the route entry exists and is pointing to another instance, take it over
+
+ # Ensure that there is no route that we are not aware of that is also handling our IP
+ check_conflicting_routes(ctx)
+
+ # There is no replace API, We need to first delete the existing route if any
+ try:
+ request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name)
+ request.execute()
+ # TODO: check specific exception for 404
+ except googleapiclient.errors.HttpError as e:
+ if e.resp.status != 404:
+ raise
+ else:
+ route_release(ctx)
+
+ route_body = {
+ 'name': ctx.route_name,
+ 'network': ctx.vpc_network_url,
+ 'destRange': '%s/32' % ctx.ip,
+ 'nextHopInstance': ctx.instance_url,
+ }
+ try:
+ request = ctx.conn.routes().insert(project=ctx.project, body=route_body)
+ wait_for_operation(ctx, request.execute())
+ except googleapiclient.errors.HttpError:
+ try:
+ request = ctx.conn.networks().get(
+ project=ctx.project, network=ctx.vpc_network)
+ request.execute()
+ except googleapiclient.errors.HttpError as e:
+ if e.resp.status == 404:
+ logging.error('VPC network not found')
+ sys.exit(OCF_ERR_CONFIGURED)
+ else:
+ raise
+ else:
+ raise
+
+ # Configure the IP address locally
+ # We need to release the IP first
+ if ip_monitor(ctx) == OCF_SUCCESS:
+ ip_release(ctx)
+
+ ctx.iproute.addr('add', index=ctx.iface_idx, address=ctx.ip, mask=32)
+ ctx.iproute.link('set', index=ctx.iface_idx, state='up')
+ logging.info('Successfully brought up the floating IP %s', ctx.ip)
+
+
+def route_monitor(ctx):
+ logging.info('GCP route monitor: checking route table')
+
+ # Ensure that there is no route that we are not aware of that is also handling our IP
+ check_conflicting_routes
+
+ try:
+ request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name)
+ response = request.execute()
+ except googleapiclient.errors.HttpError as e:
+ if 'Insufficient Permission' in e.content:
+ return OCF_ERR_PERM
+ elif e.resp.status == 404:
+ return OCF_NOT_RUNNING
+ else:
+ raise
+
+ routed_to_instance = response.get('nextHopInstance', '<unknown>')
+ instance_url = '%s/projects/%s/zones/%s/instances/%s' % (
+ GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance)
+ if routed_to_instance != instance_url:
+ logging.warn(
+ 'The floating IP %s is not routed to this instance (%s) but to instance %s',
+ ctx.ip, ctx.instance, routed_to_instance.split('/')[-1])
+ return OCF_NOT_RUNNING
+
+ logging.debug(
+ 'The floating IP %s is correctly routed to this instance (%s)',
+ ctx.ip, ctx.instance)
+ return OCF_SUCCESS
+
+
+def ip_and_route_stop(ctx):
+ logging.info('Bringing down the floating IP %s', ctx.ip)
+
+ # Delete the route entry
+ # If the route entry exists and is pointing to another instance, don't touch it
+ if route_monitor(ctx) == OCF_NOT_RUNNING:
+ logging.info(
+ 'The floating IP %s is already not routed to this instance (%s)',
+ ctx.ip, ctx.instance)
+ else:
+ route_release(ctx)
+
+ if ip_monitor(ctx) == OCF_NOT_RUNNING:
+ logging.info('The floating IP %s is already down', ctx.ip)
+ else:
+ ip_release(ctx)
+
+
+def configure_logs(ctx):
+ # Prepare logging
+ logging.basicConfig(
+ format='gcp:route - %(levelname)s - %(message)s', level=logging.INFO)
+ logging.getLogger('googleapiclient').setLevel(logging.WARN)
+ logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging')
+ if logging_env:
+ logging_env = logging_env.lower()
+ if any(x in logging_env for x in ['yes', 'true', 'enabled']):
+ try:
+ import google.cloud.logging.handlers
+ client = google.cloud.logging.Client()
+ handler = google.cloud.logging.handlers.CloudLoggingHandler(
+ client, name=ctx.instance)
+ handler.setLevel(logging.INFO)
+ formatter = logging.Formatter('gcp:route "%(message)s"')
+ handler.setFormatter(formatter)
+ root_logger = logging.getLogger()
+ root_logger.addHandler(handler)
+ except ImportError:
+ logging.error('Couldn\'t import google.cloud.logging, '
+ 'disabling Stackdriver-logging support')
+
+
+def main():
+ if 'meta-data' in sys.argv[1]:
+ print(METADATA)
+ return
+
+ ctx = Context()
+
+ validate(ctx)
+ if 'validate-all' in sys.argv[1]:
+ return
+
+ configure_logs(ctx)
+ if 'start' in sys.argv[1]:
+ ip_and_route_start(ctx)
+ elif 'stop' in sys.argv[1]:
+ ip_and_route_stop(ctx)
+ elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]:
+ sys.exit(ip_monitor(ctx))
+ else:
+ usage = 'usage: $0 {start|stop|monitor|status|meta-data|validate-all}'
+ logging.error(usage)
+ sys.exit(OCF_ERR_UNIMPLEMENTED)
+
+
+if __name__ == "__main__":
+ main()
From 6590c99f462403808854114ec1031755e5ce6b36 Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Thu, 19 Jul 2018 12:33:44 -0300
Subject: [PATCH 2/4] gcp-vpc-move-ip.in: add deprecation message
---
heartbeat/gcp-vpc-move-ip.in | 2 ++
1 file changed, 2 insertions(+)
diff --git a/heartbeat/gcp-vpc-move-ip.in b/heartbeat/gcp-vpc-move-ip.in
index 4a6c343a8..3b8d998b3 100755
--- a/heartbeat/gcp-vpc-move-ip.in
+++ b/heartbeat/gcp-vpc-move-ip.in
@@ -348,6 +348,8 @@ ip_and_route_stop() {
#
###############################################################################
+ocf_log warn "gcp-vpc-move-ip is deprecated, prefer to use gcp-vpc-move-route instead"
+
case $__OCF_ACTION in
meta-data) metadata
exit $OCF_SUCCESS
From 73608196d21068c6c2d5fb9f77e3d40179c85fee Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Fri, 20 Jul 2018 08:26:17 -0300
Subject: [PATCH 3/4] gcp-vpc-move-route.in: move stackdriver parameter
Move stackdriver parameter to the bottom of metadata list
---
heartbeat/gcp-vpc-move-route.in | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
index 5f4569baa..8d5bfff36 100644
--- a/heartbeat/gcp-vpc-move-route.in
+++ b/heartbeat/gcp-vpc-move-route.in
@@ -90,12 +90,6 @@ route
<parameters>
-<parameter name="stackdriver_logging" unique="0" required="0">
-<longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
-<shortdesc lang="en">Stackdriver-logging support</shortdesc>
-<content type="boolean" default="" />
-</parameter>
-
<parameter name="ip" unique="1" required="1">
<longdesc lang="en">
Floating IP address. Note that this IP must be chosen outside of all existing
@@ -128,6 +122,12 @@ Route name
<shortdesc lang="en">Route name</shortdesc>
<content type="string" default="${OCF_RESKEY_route_name_default}" />
</parameter>
+
+<parameter name="stackdriver_logging" unique="0" required="0">
+<longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
+<shortdesc lang="en">Stackdriver-logging support</shortdesc>
+<content type="boolean" default="" />
+</parameter>
</parameters>
<actions>
From e54565ec69f809b28337c0471ad0a9b26a64f8bf Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Fri, 20 Jul 2018 08:45:53 -0300
Subject: [PATCH 4/4] gcp-vpc-move-route.in: minor fixes
---
heartbeat/gcp-vpc-move-route.in | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
index 8d5bfff36..566a70f86 100644
--- a/heartbeat/gcp-vpc-move-route.in
+++ b/heartbeat/gcp-vpc-move-route.in
@@ -104,7 +104,7 @@ subnet ranges
Name of the VPC network
</longdesc>
<shortdesc lang="en">VPC network</shortdesc>
-<content type="string" default="${OCF_RESKEY_vpc_network_default}" />
+<content type="string" default="default" />
</parameter>
<parameter name="interface">
@@ -112,7 +112,7 @@ Name of the VPC network
Name of the network interface
</longdesc>
<shortdesc lang="en">Network interface name</shortdesc>
-<content type="string" default="${OCF_RESKEY_interface_default}" />
+<content type="string" default="eth0" />
</parameter>
<parameter name="route_name" unique="1">
@@ -120,7 +120,7 @@ Name of the network interface
Route name
</longdesc>
<shortdesc lang="en">Route name</shortdesc>
-<content type="string" default="${OCF_RESKEY_route_name_default}" />
+<content type="string" default="ra-%s" />
</parameter>
<parameter name="stackdriver_logging" unique="0" required="0">
@@ -138,7 +138,7 @@ Route name
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
-'''
+''' % os.path.basename(sys.argv[0])
class Context(object):
@@ -229,7 +229,7 @@ def validate(ctx):
GCP_API_URL_PREFIX, ctx.project, ctx.vpc_network)
ctx.interface = os.environ.get('OCF_RESKEY_interface', 'eth0')
ctx.route_name = os.environ.get(
- 'OCF_RESKEY_route_name', 'ra-%s' % os.environ['__SCRIPT_NAME'])
+ 'OCF_RESKEY_route_name', 'ra-%s' % os.path.basename(sys.argv[0]))
ctx.iproute = pyroute2.IPRoute()
atexit.register(ctx.iproute.close)
idxs = ctx.iproute.link_lookup(ifname=ctx.interface)
@@ -432,7 +432,8 @@ def main():
elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]:
sys.exit(ip_monitor(ctx))
else:
- usage = 'usage: $0 {start|stop|monitor|status|meta-data|validate-all}'
+ usage = 'usage: %s {start|stop|monitor|status|meta-data|validate-all}' % \
+ os.path.basename(sys.argv[0])
logging.error(usage)
sys.exit(OCF_ERR_UNIMPLEMENTED)

View File

@ -1,600 +0,0 @@
From 13ae97dec5754642af4d0d0edc03d9290e792e7f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 19 Jul 2018 16:12:35 +0200
Subject: [PATCH 1/5] Add Python library
---
heartbeat/Makefile.am | 3 +-
heartbeat/ocf.py | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 138 insertions(+), 1 deletion(-)
create mode 100644 heartbeat/ocf.py
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index d4750bf09..1333f8feb 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -185,7 +185,8 @@ ocfcommon_DATA = ocf-shellfuncs \
ora-common.sh \
mysql-common.sh \
nfsserver-redhat.sh \
- findif.sh
+ findif.sh \
+ ocf.py
# Legacy locations
hbdir = $(sysconfdir)/ha.d
diff --git a/heartbeat/ocf.py b/heartbeat/ocf.py
new file mode 100644
index 000000000..12be7a2a4
--- /dev/null
+++ b/heartbeat/ocf.py
@@ -0,0 +1,136 @@
+#
+# Copyright (c) 2016 Red Hat, Inc, Oyvind Albrigtsen
+# All Rights Reserved.
+#
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import sys, os, logging, syslog
+
+argv=sys.argv
+env=os.environ
+
+#
+# Common variables for the OCF Resource Agents supplied by
+# heartbeat.
+#
+
+OCF_SUCCESS=0
+OCF_ERR_GENERIC=1
+OCF_ERR_ARGS=2
+OCF_ERR_UNIMPLEMENTED=3
+OCF_ERR_PERM=4
+OCF_ERR_INSTALLED=5
+OCF_ERR_CONFIGURED=6
+OCF_NOT_RUNNING=7
+
+# Non-standard values.
+#
+# OCF does not include the concept of master/slave resources so we
+# need to extend it so we can discover a resource's complete state.
+#
+# OCF_RUNNING_MASTER:
+# The resource is in "master" mode and fully operational
+# OCF_FAILED_MASTER:
+# The resource is in "master" mode but in a failed state
+#
+# The extra two values should only be used during a probe.
+#
+# Probes are used to discover resources that were started outside of
+# the CRM and/or left behind if the LRM fails.
+#
+# They can be identified in RA scripts by checking for:
+# [ "${__OCF_ACTION}" = "monitor" -a "${OCF_RESKEY_CRM_meta_interval}" = "0" ]
+#
+# Failed "slaves" should continue to use: OCF_ERR_GENERIC
+# Fully operational "slaves" should continue to use: OCF_SUCCESS
+#
+OCF_RUNNING_MASTER=8
+OCF_FAILED_MASTER=9
+
+
+## Own logger handler that uses old-style syslog handler as otherwise
+## everything is sourced from /dev/syslog
+class SyslogLibHandler(logging.StreamHandler):
+ """
+ A handler class that correctly push messages into syslog
+ """
+ def emit(self, record):
+ syslog_level = {
+ logging.CRITICAL:syslog.LOG_CRIT,
+ logging.ERROR:syslog.LOG_ERR,
+ logging.WARNING:syslog.LOG_WARNING,
+ logging.INFO:syslog.LOG_INFO,
+ logging.DEBUG:syslog.LOG_DEBUG,
+ logging.NOTSET:syslog.LOG_DEBUG,
+ }[record.levelno]
+
+ msg = self.format(record)
+
+ # syslog.syslog can not have 0x00 character inside or exception
+ # is thrown
+ syslog.syslog(syslog_level, msg.replace("\x00","\n"))
+ return
+
+
+OCF_RESOURCE_INSTANCE = env.get("OCF_RESOURCE_INSTANCE")
+
+HA_DEBUG = env.get("HA_debug", 0)
+HA_DATEFMT = env.get("HA_DATEFMT", "%b %d %T ")
+HA_LOGFACILITY = env.get("HA_LOGFACILITY")
+HA_LOGFILE = env.get("HA_LOGFILE")
+HA_DEBUGLOG = env.get("HA_DEBUGLOG")
+
+log = logging.getLogger(os.path.basename(argv[0]))
+log.setLevel(logging.DEBUG)
+
+## add logging to stderr
+if sys.stdout.isatty():
+ seh = logging.StreamHandler(stream=sys.stderr)
+ if HA_DEBUG == 0:
+ seh.setLevel(logging.WARNING)
+ sehformatter = logging.Formatter('%(filename)s(%(OCF_RESOURCE_INSTANCE)s)[%(process)s]:\t%(asctime)s%(levelname)s: %(message)s', datefmt=HA_DATEFMT)
+ seh.setFormatter(sehformatter)
+ log.addHandler(seh)
+
+## add logging to syslog
+if HA_LOGFACILITY:
+ slh = SyslogLibHandler()
+ if HA_DEBUG == 0:
+ slh.setLevel(logging.WARNING)
+ slhformatter = logging.Formatter('%(levelname)s: %(message)s')
+ slh.setFormatter(slhformatter)
+ log.addHandler(slh)
+
+## add logging to file
+if HA_LOGFILE:
+ lfh = logging.FileHandler(HA_LOGFILE)
+ if HA_DEBUG == 0:
+ lfh.setLevel(logging.WARNING)
+ lfhformatter = logging.Formatter('%(filename)s(%(OCF_RESOURCE_INSTANCE)s)[%(process)s]:\t%(asctime)s%(levelname)s: %(message)s', datefmt=HA_DATEFMT)
+ lfh.setFormatter(lfhformatter)
+ log.addHandler(lfh)
+
+## add debug logging to file
+if HA_DEBUGLOG and HA_LOGFILE != HA_DEBUGLOG:
+ dfh = logging.FileHandler(HA_DEBUGLOG)
+ if HA_DEBUG == 0:
+ dfh.setLevel(logging.WARNING)
+ dfhformatter = logging.Formatter('%(filename)s(%(OCF_RESOURCE_INSTANCE)s)[%(process)s]:\t%(asctime)s%(levelname)s: %(message)s', datefmt=HA_DATEFMT)
+ dfh.setFormatter(dfhformatter)
+ log.addHandler(dfh)
+
+logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE})
From 2ade8dbf1f6f6d3889dd1ddbf40858edf10fbdc7 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 19 Jul 2018 16:20:39 +0200
Subject: [PATCH 2/5] gcp-vpc-move-vip: use Python library
---
heartbeat/gcp-vpc-move-vip.in | 42 +++++++++++++++++++++---------------------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index af2080502..eb5bce6a8 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -22,6 +22,11 @@ import os
import sys
import time
+OCF_FUNCTIONS_DIR="%s/lib/heartbeat" % os.environ.get("OCF_ROOT")
+sys.path.append(OCF_FUNCTIONS_DIR)
+
+from ocf import *
+
try:
import googleapiclient.discovery
except ImportError:
@@ -40,10 +45,6 @@ else:
CONN = None
THIS_VM = None
ALIAS = None
-OCF_SUCCESS = 0
-OCF_ERR_GENERIC = 1
-OCF_ERR_CONFIGURED = 6
-OCF_NOT_RUNNING = 7
METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
METADATA = \
@@ -206,11 +207,11 @@ def gcp_alias_start(alias):
# If I already have the IP, exit. If it has an alias IP that isn't the VIP,
# then remove it
if my_alias == alias:
- logging.info(
+ logger.info(
'%s already has %s attached. No action required' % (THIS_VM, alias))
sys.exit(OCF_SUCCESS)
elif my_alias:
- logging.info('Removing %s from %s' % (my_alias, THIS_VM))
+ logger.info('Removing %s from %s' % (my_alias, THIS_VM))
set_alias(project, my_zone, THIS_VM, '')
# Loops through all hosts & remove the alias IP from the host that has it
@@ -223,7 +224,7 @@ def gcp_alias_start(alias):
host_zone = get_zone(project, host)
host_alias = get_alias(project, host_zone, host)
if alias == host_alias:
- logging.info(
+ logger.info(
'%s is attached to %s - Removing all alias IP addresses from %s' %
(alias, host, host))
set_alias(project, host_zone, host, '')
@@ -237,14 +238,14 @@ def gcp_alias_start(alias):
# Check the IP has been added
my_alias = get_localhost_alias()
if alias == my_alias:
- logging.info('Finished adding %s to %s' % (alias, THIS_VM))
+ logger.info('Finished adding %s to %s' % (alias, THIS_VM))
elif my_alias:
- logging.error(
+ logger.error(
'Failed to add IP. %s has an IP attached but it isn\'t %s' %
(THIS_VM, alias))
sys.exit(OCF_ERR_GENERIC)
else:
- logging.error('Failed to add IP address %s to %s' % (alias, THIS_VM))
+ logger.error('Failed to add IP address %s to %s' % (alias, THIS_VM))
sys.exit(OCF_ERR_GENERIC)
@@ -254,14 +255,14 @@ def gcp_alias_stop(alias):
project = get_metadata('project/project-id')
if my_alias == alias:
- logging.info('Removing %s from %s' % (my_alias, THIS_VM))
+ logger.info('Removing %s from %s' % (my_alias, THIS_VM))
set_alias(project, my_zone, THIS_VM, '')
def gcp_alias_status(alias):
my_alias = get_localhost_alias()
if alias == my_alias:
- logging.info('%s has the correct IP address attached' % THIS_VM)
+ logger.info('%s has the correct IP address attached' % THIS_VM)
else:
sys.exit(OCF_NOT_RUNNING)
@@ -275,25 +276,24 @@ def validate():
try:
CONN = googleapiclient.discovery.build('compute', 'v1')
except Exception as e:
- logging.error('Couldn\'t connect with google api: ' + str(e))
+ logger.error('Couldn\'t connect with google api: ' + str(e))
sys.exit(OCF_ERR_CONFIGURED)
try:
THIS_VM = get_metadata('instance/name')
except Exception as e:
- logging.error('Couldn\'t get instance name, is this running inside GCE?: ' + str(e))
+ logger.error('Couldn\'t get instance name, is this running inside GCE?: ' + str(e))
sys.exit(OCF_ERR_CONFIGURED)
ALIAS = os.environ.get('OCF_RESKEY_alias_ip')
if not ALIAS:
- logging.error('Missing alias_ip parameter')
+ logger.error('Missing alias_ip parameter')
sys.exit(OCF_ERR_CONFIGURED)
def configure_logs():
# Prepare logging
- logging.basicConfig(
- format='gcp:alias - %(levelname)s - %(message)s', level=logging.INFO)
+ global logger
logging.getLogger('googleapiclient').setLevel(logging.WARN)
logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging')
if logging_env:
@@ -307,10 +307,10 @@ def configure_logs():
handler.setLevel(logging.INFO)
formatter = logging.Formatter('gcp:alias "%(message)s"')
handler.setFormatter(formatter)
- root_logger = logging.getLogger()
- root_logger.addHandler(handler)
+ log.addHandler(handler)
+ logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE})
except ImportError:
- logging.error('Couldn\'t import google.cloud.logging, '
+ logger.error('Couldn\'t import google.cloud.logging, '
'disabling Stackdriver-logging support')
@@ -331,7 +331,7 @@ def main():
elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]:
gcp_alias_status(ALIAS)
else:
- logging.error('no such function %s' % str(sys.argv[1]))
+ logger.error('no such function %s' % str(sys.argv[1]))
if __name__ == "__main__":
From 9e9ea17c42df27d4c13fed9badba295df48437f2 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 20 Jul 2018 13:27:42 +0200
Subject: [PATCH 3/5] gcp-vpc-move-vip: moved alias-parameters to top of
metadata
---
heartbeat/gcp-vpc-move-vip.in | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index eb5bce6a8..ba61193b6 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -55,6 +55,16 @@ METADATA = \
<longdesc lang="en">Floating IP Address on Google Cloud Platform - Using Alias IP address functionality to attach a secondary IP address to a running instance</longdesc>
<shortdesc lang="en">Floating IP Address on Google Cloud Platform</shortdesc>
<parameters>
+ <parameter name="alias_ip" unique="1" required="1">
+ <longdesc lang="en">IP Address to be added including CIDR. E.g 192.168.0.1/32</longdesc>
+ <shortdesc lang="en">IP Address to be added including CIDR. E.g 192.168.0.1/32</shortdesc>
+ <content type="string" default="" />
+ </parameter>
+ <parameter name="alias_range_name" unique="1" required="0">
+ <longdesc lang="en">Subnet name for the Alias IP</longdesc>
+ <shortdesc lang="en">Subnet name for the Alias IP</shortdesc>
+ <content type="string" default="" />
+ </parameter>
<parameter name="hostlist" unique="1" required="0">
<longdesc lang="en">List of hosts in the cluster</longdesc>
<shortdesc lang="en">Host list</shortdesc>
@@ -65,16 +75,6 @@ METADATA = \
<shortdesc lang="en">Stackdriver-logging support</shortdesc>
<content type="boolean" default="" />
</parameter>
- <parameter name="alias_ip" unique="1" required="1">
- <longdesc lang="en">IP Address to be added including CIDR. E.g 192.168.0.1/32</longdesc>
- <shortdesc lang="en">IP Address to be added including CIDR. E.g 192.168.0.1/32</shortdesc>
- <content type="string" default="" />
- </parameter>
- <parameter name="alias_range_name" unique="1" required="0">
- <longdesc lang="en">Subnet name for the Alias IP2</longdesc>
- <shortdesc lang="en">Subnet name for the Alias IP</shortdesc>
- <content type="string" default="" />
- </parameter>
</parameters>
<actions>
<action name="start" timeout="300" />
From 716d69040dba7a769efb5a60eca934fdd65585f2 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 23 Jul 2018 11:17:00 +0200
Subject: [PATCH 4/5] gcp-vpc-move-route: use Python library
---
heartbeat/gcp-vpc-move-route.in | 58 ++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 30 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
index 566a70f86..125289d86 100644
--- a/heartbeat/gcp-vpc-move-route.in
+++ b/heartbeat/gcp-vpc-move-route.in
@@ -39,6 +39,11 @@ import os
import sys
import time
+OCF_FUNCTIONS_DIR="%s/lib/heartbeat" % os.environ.get("OCF_ROOT")
+sys.path.append(OCF_FUNCTIONS_DIR)
+
+from ocf import *
+
try:
import googleapiclient.discovery
import pyroute2
@@ -55,12 +60,6 @@ else:
import urllib2 as urlrequest
-OCF_SUCCESS = 0
-OCF_ERR_GENERIC = 1
-OCF_ERR_UNIMPLEMENTED = 3
-OCF_ERR_PERM = 4
-OCF_ERR_CONFIGURED = 6
-OCF_NOT_RUNNING = 7
GCP_API_URL_PREFIX = 'https://www.googleapis.com/compute/v1'
METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
@@ -199,18 +198,18 @@ def get_metadata(metadata_key, params=None, timeout=None):
def validate(ctx):
if os.geteuid() != 0:
- logging.error('You must run this agent as root')
+ logger.error('You must run this agent as root')
sys.exit(OCF_ERR_PERM)
try:
ctx.conn = googleapiclient.discovery.build('compute', 'v1')
except Exception as e:
- logging.error('Couldn\'t connect with google api: ' + str(e))
+ logger.error('Couldn\'t connect with google api: ' + str(e))
sys.exit(OCF_ERR_CONFIGURED)
ctx.ip = os.environ.get('OCF_RESKEY_ip')
if not ctx.ip:
- logging.error('Missing ip parameter')
+ logger.error('Missing ip parameter')
sys.exit(OCF_ERR_CONFIGURED)
try:
@@ -218,7 +217,7 @@ def validate(ctx):
ctx.zone = get_metadata('instance/zone').split('/')[-1]
ctx.project = get_metadata('project/project-id')
except Exception as e:
- logging.error(
+ logger.error(
'Instance information not found. Is this a GCE instance ?: %s', str(e))
sys.exit(OCF_ERR_CONFIGURED)
@@ -234,7 +233,7 @@ def validate(ctx):
atexit.register(ctx.iproute.close)
idxs = ctx.iproute.link_lookup(ifname=ctx.interface)
if not idxs:
- logging.error('Network interface not found')
+ logger.error('Network interface not found')
sys.exit(OCF_ERR_CONFIGURED)
ctx.iface_idx = idxs[0]
@@ -246,7 +245,7 @@ def check_conflicting_routes(ctx):
response = request.execute()
route_list = response.get('items', None)
if route_list:
- logging.error(
+ logger.error(
'Conflicting unnmanaged routes for destination %s/32 in VPC %s found : %s',
ctx.ip, ctx.vpc_network, str(route_list))
sys.exit(OCF_ERR_CONFIGURED)
@@ -258,7 +257,7 @@ def route_release(ctx):
def ip_monitor(ctx):
- logging.info('IP monitor: checking local network configuration')
+ logger.info('IP monitor: checking local network configuration')
def address_filter(addr):
for attr in addr['attrs']:
@@ -271,12 +270,12 @@ def ip_monitor(ctx):
route = ctx.iproute.get_addr(
index=ctx.iface_idx, match=address_filter)
if not route:
- logging.warn(
+ logger.warning(
'The floating IP %s is not locally configured on this instance (%s)',
ctx.ip, ctx.instance)
return OCF_NOT_RUNNING
- logging.debug(
+ logger.debug(
'The floating IP %s is correctly configured on this instance (%s)',
ctx.ip, ctx.instance)
return OCF_SUCCESS
@@ -287,7 +286,7 @@ def ip_release(ctx):
def ip_and_route_start(ctx):
- logging.info('Bringing up the floating IP %s', ctx.ip)
+ logger.info('Bringing up the floating IP %s', ctx.ip)
# Add a new entry in the routing table
# If the route entry exists and is pointing to another instance, take it over
@@ -322,7 +321,7 @@ def ip_and_route_start(ctx):
request.execute()
except googleapiclient.errors.HttpError as e:
if e.resp.status == 404:
- logging.error('VPC network not found')
+ logger.error('VPC network not found')
sys.exit(OCF_ERR_CONFIGURED)
else:
raise
@@ -336,11 +335,11 @@ def ip_and_route_start(ctx):
ctx.iproute.addr('add', index=ctx.iface_idx, address=ctx.ip, mask=32)
ctx.iproute.link('set', index=ctx.iface_idx, state='up')
- logging.info('Successfully brought up the floating IP %s', ctx.ip)
+ logger.info('Successfully brought up the floating IP %s', ctx.ip)
def route_monitor(ctx):
- logging.info('GCP route monitor: checking route table')
+ logger.info('GCP route monitor: checking route table')
# Ensure that there is no route that we are not aware of that is also handling our IP
check_conflicting_routes
@@ -360,39 +359,38 @@ def route_monitor(ctx):
instance_url = '%s/projects/%s/zones/%s/instances/%s' % (
GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance)
if routed_to_instance != instance_url:
- logging.warn(
+ logger.warning(
'The floating IP %s is not routed to this instance (%s) but to instance %s',
ctx.ip, ctx.instance, routed_to_instance.split('/')[-1])
return OCF_NOT_RUNNING
- logging.debug(
+ logger.debug(
'The floating IP %s is correctly routed to this instance (%s)',
ctx.ip, ctx.instance)
return OCF_SUCCESS
def ip_and_route_stop(ctx):
- logging.info('Bringing down the floating IP %s', ctx.ip)
+ logger.info('Bringing down the floating IP %s', ctx.ip)
# Delete the route entry
# If the route entry exists and is pointing to another instance, don't touch it
if route_monitor(ctx) == OCF_NOT_RUNNING:
- logging.info(
+ logger.info(
'The floating IP %s is already not routed to this instance (%s)',
ctx.ip, ctx.instance)
else:
route_release(ctx)
if ip_monitor(ctx) == OCF_NOT_RUNNING:
- logging.info('The floating IP %s is already down', ctx.ip)
+ logger.info('The floating IP %s is already down', ctx.ip)
else:
ip_release(ctx)
def configure_logs(ctx):
# Prepare logging
- logging.basicConfig(
- format='gcp:route - %(levelname)s - %(message)s', level=logging.INFO)
+ global logger
logging.getLogger('googleapiclient').setLevel(logging.WARN)
logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging')
if logging_env:
@@ -406,10 +404,10 @@ def configure_logs(ctx):
handler.setLevel(logging.INFO)
formatter = logging.Formatter('gcp:route "%(message)s"')
handler.setFormatter(formatter)
- root_logger = logging.getLogger()
- root_logger.addHandler(handler)
+ log.addHandler(handler)
+ logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE})
except ImportError:
- logging.error('Couldn\'t import google.cloud.logging, '
+ logger.error('Couldn\'t import google.cloud.logging, '
'disabling Stackdriver-logging support')
@@ -434,7 +432,7 @@ def main():
else:
usage = 'usage: %s {start|stop|monitor|status|meta-data|validate-all}' % \
os.path.basename(sys.argv[0])
- logging.error(usage)
+ logger.error(usage)
sys.exit(OCF_ERR_UNIMPLEMENTED)
From 6ec7e87693a51cbb16a1822e6d15f1dbfc11f8e6 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 23 Jul 2018 15:55:48 +0200
Subject: [PATCH 5/5] Python: add logging.basicConfig() to support background
logging
---
heartbeat/ocf.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/heartbeat/ocf.py b/heartbeat/ocf.py
index 12be7a2a4..36e7ccccd 100644
--- a/heartbeat/ocf.py
+++ b/heartbeat/ocf.py
@@ -94,6 +94,7 @@ def emit(self, record):
HA_LOGFILE = env.get("HA_LOGFILE")
HA_DEBUGLOG = env.get("HA_DEBUGLOG")
+logging.basicConfig()
log = logging.getLogger(os.path.basename(argv[0]))
log.setLevel(logging.DEBUG)

View File

@ -1,25 +0,0 @@
From 6bd66e337922403cb2dd3a8715ae401def8c0437 Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Thu, 19 Jul 2018 13:00:58 -0300
Subject: [PATCH] gcp-vpc-move-vip.in: filter call to aggregatedList
Don't list all the instances in the project, filter only the one we are
interested in.
---
heartbeat/gcp-vpc-move-vip.in | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index 9fc87242f..af2080502 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -169,7 +169,8 @@ def get_localhost_alias():
def get_zone(project, instance):
- request = CONN.instances().aggregatedList(project=project)
+ fl = 'name="%s"' % instance
+ request = CONN.instances().aggregatedList(project=project, filter=fl)
while request is not None:
response = request.execute()
zones = response.get('items', {})

View File

@ -20,4 +20,4 @@ diff -uNr a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
+ sys.path.insert(0, '/usr/lib/resource-agents/bundled/gcp')
import googleapiclient.discovery
import pyroute2
try:
except ImportError:

View File

@ -1,28 +0,0 @@
diff -uNr a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
--- a/heartbeat/gcp-vpc-move-route.in 2018-07-30 16:56:23.486368292 +0200
+++ b/heartbeat/gcp-vpc-move-route.in 2018-07-30 17:11:54.189715666 +0200
@@ -125,8 +125,8 @@
</parameter>
<parameter name="stackdriver_logging" unique="0" required="0">
-<longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
-<shortdesc lang="en">Stackdriver-logging support</shortdesc>
+<longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging. Using stackdriver logging requires additional libraries (google-cloud-logging).</longdesc>
+<shortdesc lang="en">Stackdriver-logging support. Requires additional libraries (google-cloud-logging).</shortdesc>
<content type="boolean" default="" />
</parameter>
</parameters>
diff -uNr a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
--- a/heartbeat/gcp-vpc-move-vip.in 2018-07-30 16:56:23.486368292 +0200
+++ b/heartbeat/gcp-vpc-move-vip.in 2018-07-30 17:06:17.260686483 +0200
@@ -72,8 +72,8 @@
<content type="string" default="" />
</parameter>
<parameter name="stackdriver_logging" unique="0" required="0">
- <longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging</longdesc>
- <shortdesc lang="en">Stackdriver-logging support</shortdesc>
+ <longdesc lang="en">If enabled (set to true), IP failover logs will be posted to stackdriver logging. Using stackdriver logging requires additional libraries (google-cloud-logging).</longdesc>
+ <shortdesc lang="en">Stackdriver-logging support. Requires additional libraries (google-cloud-logging).</shortdesc>
<content type="boolean" default="" />
</parameter>
</parameters>

View File

@ -1,66 +0,0 @@
diff -uNr a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
--- a/heartbeat/IPaddr2 2018-06-27 10:29:08.000000000 +0200
+++ b/heartbeat/IPaddr2 2018-06-29 16:01:50.538797379 +0200
@@ -80,6 +80,7 @@
OCF_RESKEY_arp_bg_default=true
OCF_RESKEY_run_arping_default=false
OCF_RESKEY_preferred_lft_default="forever"
+OCF_RESKEY_monitor_retries="1"
: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}}
@@ -92,6 +93,7 @@
: ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}}
: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}}
: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}}
+: ${OCF_RESKEY_monitor_retries=${OCF_RESKEY_monitor_retries_default}}
#######################################################################
SENDARP=$HA_BIN/send_arp
@@ -368,6 +370,18 @@
<content type="string" default="${OCF_RESKEY_preferred_lft_default}"/>
</parameter>
+<parameter name="monitor_retries">
+<longdesc lang="en">
+Set number of retries to find interface in monitor-action.
+
+ONLY INCREASE IF THE AGENT HAS ISSUES FINDING YOUR NIC DURING THE
+MONITOR-ACTION. A HIGHER SETTING MAY LEAD TO DELAYS IN DETECTING
+A FAILURE.
+</longdesc>
+<shortdesc lang="en">Number of retries to find interface in monitor-action</shortdesc>
+<content type="string" default="${OCF_RESKEY_monitor_retries_default}"/>
+</parameter>
+
</parameters>
<actions>
<action name="start" timeout="20s" />
@@ -536,15 +550,26 @@
find_interface() {
local ipaddr="$1"
local netmask="$2"
+ local iface=""
#
# List interfaces but exclude FreeS/WAN ipsecN virtual interfaces
#
- local iface="`$IP2UTIL -o -f $FAMILY addr show \
+ for i in $(seq 1 $OCF_RESKEY_monitor_retries); do
+ iface="`$IP2UTIL -o -f $FAMILY addr show \
| grep "\ $ipaddr/$netmask" \
| cut -d ' ' -f2 \
| grep -v '^ipsec[0-9][0-9]*$'`"
+ if [ -n "$iface" ]; then
+ break
+ fi
+
+ if [ $OCF_RESKEY_monitor_retries -gt 1 ]; then
+ sleep 1
+ fi
+ done
+
echo "$iface"
return 0
}

View File

@ -1,42 +0,0 @@
From 12ef5a343158bbfaa5233468a0506074fceaac81 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 21 Aug 2018 12:14:49 +0200
Subject: [PATCH] LVM-activate: return OCF_ERR_CONFIGURED for incorrect
vg_access_mode
---
heartbeat/LVM-activate | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index fbd058288..55e36a2d2 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -448,7 +448,7 @@ lvm_validate() {
;;
*)
ocf_exit_reason "You specified an invalid value for vg_access_mode: $VG_access_mode"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
;;
esac
@@ -771,7 +771,6 @@ lvm_stop() {
return $OCF_SUCCESS
fi
- lvm_validate
ocf_log info "Deactivating ${vol}"
case ${VG_access_mode} in
@@ -788,8 +787,8 @@ lvm_stop() {
tagging_deactivate
;;
*)
- ocf_exit_reason "VG [${VG}] is not properly configured in cluster. It's unsafe!"
- exit $OCF_ERR_CONFIGURED
+ ocf_log err "VG [${VG}] is not properly configured in cluster. It's unsafe!"
+ exit $OCF_SUCCESS
;;
esac

View File

@ -1,137 +0,0 @@
From 792077bf2994e2e582ccfb0768f3186517de9025 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 21 Sep 2018 12:00:07 +0200
Subject: [PATCH] LVM-activate: fixes
- read parameters for start/stop/monitor-actions
- fail during monitor-action when run with incorrect access_mode
---
heartbeat/LVM-activate | 44 ++++++++++++++++++++++++++----------------
1 file changed, 27 insertions(+), 17 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 55e36a2d2..f46932c1c 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -56,6 +56,7 @@ LV=${OCF_RESKEY_lvname}
# 3: vg has system_id (new)
# 4: vg has tagging (old)
VG_access_mode=${OCF_RESKEY_vg_access_mode}
+VG_access_mode_num=0
# Activate LV(s) with "shared" lock for cluster fs
# or "exclusive" lock for local fs
@@ -176,7 +177,9 @@ END
# 2: vg is clustered - clvmd (old)
# 3: vg has system_id (new)
# 4: vg has tagging (old)
-get_VG_access_mode() {
+
+get_VG_access_mode_num()
+{
local access_mode
local vg_locktype
local vg_clustered
@@ -415,11 +418,8 @@ tagging_check()
return $OCF_SUCCESS
}
-lvm_validate() {
- local lv_count
- local mode
-
- # Parameters checking
+read_parameters()
+{
if [ -z "$VG" ]
then
ocf_exit_reason "You must identify the volume group name!"
@@ -435,22 +435,30 @@ lvm_validate() {
# Convert VG_access_mode from string to index
case ${VG_access_mode} in
lvmlockd)
- VG_access_mode=1
+ VG_access_mode_num=1
;;
clvmd)
- VG_access_mode=2
+ VG_access_mode_num=2
;;
system_id)
- VG_access_mode=3
+ VG_access_mode_num=3
;;
tagging)
- VG_access_mode=4
+ VG_access_mode_num=4
;;
*)
+ # dont exit with error-code here or nodes will get fenced on
+ # e.g. "pcs resource create"
ocf_exit_reason "You specified an invalid value for vg_access_mode: $VG_access_mode"
- exit $OCF_ERR_CONFIGURED
;;
esac
+}
+
+lvm_validate() {
+ local lv_count
+ local mode
+
+ read_parameters
check_binary pgrep
# Every LVM command is just symlink to lvm binary
@@ -471,9 +479,9 @@ lvm_validate() {
# Get the access mode from VG metadata and check if it matches the input
# value. Skip to check "tagging" mode because there's no reliable way to
# automatically check if "tagging" mode is being used.
- get_VG_access_mode
+ get_VG_access_mode_num
mode=$?
- if [ $VG_access_mode -ne 4 ] && [ $mode -ne $VG_access_mode ]; then
+ if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
exit $OCF_ERR_ARGS
fi
@@ -495,8 +503,8 @@ lvm_validate() {
fi
fi
- # VG_access_mode specific checking goes here
- case ${VG_access_mode} in
+ # VG_access_mode_num specific checking goes here
+ case ${VG_access_mode_num} in
1)
lvmlockd_check
;;
@@ -731,7 +739,7 @@ lvm_start() {
[ -z ${LV} ] && vol=${VG} || vol=${VG}/${LV}
ocf_log info "Activating ${vol}"
- case ${VG_access_mode} in
+ case ${VG_access_mode_num} in
1)
lvmlockd_activate
;;
@@ -773,7 +781,7 @@ lvm_stop() {
ocf_log info "Deactivating ${vol}"
- case ${VG_access_mode} in
+ case ${VG_access_mode_num} in
1)
lvmlockd_deactivate
;;
@@ -811,9 +819,11 @@ start)
lvm_start
;;
stop)
+ read_parameters
lvm_stop
;;
monitor)
+ lvm_validate
lvm_status
;;
validate-all)

View File

@ -1,54 +0,0 @@
From 2c219dd000d7f5edd3765a1c6bc5f3e6efb7208b Mon Sep 17 00:00:00 2001
From: Paul Mezzanini <pfmeec@rit.edu>
Date: Fri, 1 Jun 2018 11:58:06 -0400
Subject: [PATCH] Volume groups and logical volumes "-" in their name get
mangled with double dashes in dmsetup. Switching to wc and just counting
lines while depending on the vgname + lvname filter in the dmsetup call gets
around the issue with dmsetup outputting correctly but grep failing due to
the name mangle.
Logic for both test cases and dmsetup calls changed so they match too. No reason
to not have matching tests.
This is AllBad but there isn't a better way that I'm aware of yet.
---
heartbeat/LVM-activate | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 60e656178..fbd058288 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -692,20 +692,27 @@ tagging_deactivate() {
# lvs/vgs when the metadata is somehow inconsistent.
#
# So, we have to make compromise that the VG is assumably active if any LV of the VG is active.
+#
+# Paul:
+# VGS + LVS with "-" in their name get mangled with double dashes in dmsetup.
+# Switching to wc and just counting lines while depending on the vgname + lvname filter
+# in dmsetup gets around the issue with dmsetup reporting correctly but grep failing.
+#
+# Logic for both test cases and dmsetup calls changed so they match too.
+#
+# This is AllBad but there isn't a better way that I'm aware of yet.
lvm_status() {
local dm_count
if [ -n "${LV}" ]; then
# dmsetup ls? It cannot accept device name. It's
# too heavy to list all DM devices.
- dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" \
- | grep -Eq "${VG}-+${LV}"
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" | wc -l )
else
- dm_count=$(dmsetup --noheadings info -c -S "vgname=${VG}" 2>/dev/null | grep -c "${VG}-")
- test $dm_count -gt 0
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG}" 2>/dev/null | wc -l )
fi
- if [ $? -ne 0 ]; then
+ if [ $dm_count -eq 0 ]; then
return $OCF_NOT_RUNNING
fi

View File

@ -1,22 +0,0 @@
From 5a664525a20d3d5094912322be4faac668e4920e Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 13 Aug 2018 14:30:50 +0200
Subject: [PATCH] LVM: fix missing dash
---
heartbeat/lvm-tag.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/lvm-tag.sh b/heartbeat/lvm-tag.sh
index 71f53b20b..fe17e0f21 100644
--- a/heartbeat/lvm-tag.sh
+++ b/heartbeat/lvm-tag.sh
@@ -147,7 +147,7 @@ lvm_init() {
if [ -n "$OCF_RESKEY_tag" ]; then
OUR_TAG=$OCF_RESKEY_tag
fi
- vgchange_activate_options="aly --config activation{volume_list=[\"@${OUR_TAG}\"]}"
+ vgchange_activate_options="-aly --config activation{volume_list=[\"@${OUR_TAG}\"]}"
vgchange_deactivate_options="-aln"
}

View File

@ -1,72 +0,0 @@
From c414259728610f95243d9e34289fefd596b0ac8b Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 22 Jun 2018 15:37:36 +0200
Subject: [PATCH] LVM: add "volume_group_check_only" parameter to avoid
timeouts in some cases
---
heartbeat/LVM | 10 ++++++++++
heartbeat/lvm-tag.sh | 24 +++++++++++++-----------
2 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/heartbeat/LVM b/heartbeat/LVM
index d3cd1a14..10f7186d 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -103,6 +103,16 @@ logical volumes.
<content type="string" default="false" />
</parameter>
+<parameter name="volume_group_check_only" unique="0" required="0">
+<longdesc lang="en">
+If set, only the volume group will be checked during monitoring.
+
+WARNING: ONLY USE IF YOU HAVE ISSUES WITH TIMEOUTS!
+</longdesc>
+<shortdesc lang="en">Only check volume group during monitoring</shortdesc>
+<content type="string" default="false" />
+</parameter>
+
</parameters>
<actions>
diff --git a/heartbeat/lvm-tag.sh b/heartbeat/lvm-tag.sh
index 71f53b20..170426e8 100644
--- a/heartbeat/lvm-tag.sh
+++ b/heartbeat/lvm-tag.sh
@@ -160,19 +160,21 @@ lvm_validate_all() {
lvm_status() {
local rc=0
- # If vg is running, make sure the correct tag is present. Otherwise we
- # can not guarantee exclusive activation.
- if ! check_tags; then
- ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
- rc=$OCF_ERR_GENERIC
- fi
+ if ! ocf_is_true "$OCF_RESKEY_volume_group_check_only"; then
+ # If vg is running, make sure the correct tag is present. Otherwise we
+ # can not guarantee exclusive activation.
+ if ! check_tags; then
+ ocf_exit_reason "WARNING: $OCF_RESKEY_volgrpname is active without the cluster tag, \"$OUR_TAG\""
+ rc=$OCF_ERR_GENERIC
+ fi
- # make sure the environment for tags activation is still valid
- if ! verify_tags_environment; then
- rc=$OCF_ERR_GENERIC
+ # make sure the environment for tags activation is still valid
+ if ! verify_tags_environment; then
+ rc=$OCF_ERR_GENERIC
+ fi
+ # let the user know if their initrd is older than lvm.conf.
+ check_initrd_warning
fi
- # let the user know if their initrd is older than lvm.conf.
- check_initrd_warning
return $rc
}
--
2.17.1

View File

@ -1,126 +0,0 @@
diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
--- a/heartbeat/VirtualDomain 2018-06-29 14:05:02.000000000 +0200
+++ b/heartbeat/VirtualDomain 2018-07-03 14:01:25.892705351 +0200
@@ -26,6 +26,9 @@
OCF_RESKEY_CRM_meta_timeout_default=90000
OCF_RESKEY_save_config_on_stop_default=false
OCF_RESKEY_sync_config_on_stop_default=false
+OCF_RESKEY_backingfile_default=""
+OCF_RESKEY_stateless_default="false"
+OCF_RESKEY_copyindirs_default=""
: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}}
: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}}
@@ -36,6 +39,9 @@
: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}}
: ${OCF_RESKEY_save_config_on_stop=${OCF_RESKEY_save_config_on_stop_default}}
: ${OCF_RESKEY_sync_config_on_stop=${OCF_RESKEY_sync_config_on_stop_default}}
+: ${OCF_RESKEY_backingfile=${OCF_RESKEY_backingfile_default}}
+: ${OCF_RESKEY_stateless=${OCF_RESKEY_stateless_default}}
+: ${OCF_RESKEY_copyindirs=${OCF_RESKEY_copyindirs_default}}
if ocf_is_true ${OCF_RESKEY_sync_config_on_stop}; then
OCF_RESKEY_save_config_on_stop="true"
@@ -271,6 +277,35 @@
<content type="string" default=""/>
</parameter>
+<parameter name="backingfile" unique="0" required="0">
+<longdesc lang="en">
+When the VM is used in Copy-On-Write mode, this is the backing file to use (with its full path).
+The VMs image will be created based on this backing file.
+This backing file will never be changed during the life of the VM.
+</longdesc>
+<shortdesc lang="en">If the VM is wanted to work with Copy-On-Write mode, this is the backing file to use (with its full path)</shortdesc>
+<content type="string" default="${OCF_RESKEY_backingfile_default}" />
+</parameter>
+
+<parameter name="stateless" unique="0" required="0">
+<longdesc lang="en">
+If set to true and backingfile is defined, the start of the VM will systematically create a new qcow2 based on
+the backing file, therefore the VM will always be stateless. If set to false, the start of the VM will use the
+COW (&lt;vmname&gt;.qcow2) file if it exists, otherwise the first start will create a new qcow2 based on the backing
+file given as backingfile.
+</longdesc>
+<shortdesc lang="en">If set to true, the (&lt;vmname&gt;.qcow2) file will be re-created at each start, based on the backing file (if defined)</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_stateless_default}" />
+</parameter>
+
+<parameter name="copyindirs" unique="0" required="0">
+<longdesc lang="en">
+List of directories for the virt-copy-in before booting the VM. Used only in stateless mode.
+</longdesc>
+<shortdesc lang="en">List of directories for the virt-copy-in before booting the VM stateless mode.</shortdesc>
+<content type="string" default="${OCF_RESKEY_copyindirs_default}" />
+</parameter>
+
<parameter name="shutdown_mode">
<longdesc lang="en">
virsh shutdown method to use. Please verify that it is supported by your virsh toolsed with 'virsh help shutdown'
@@ -545,11 +580,49 @@
# is restored to an 'undefined' state before creating.
verify_undefined
- virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
- return $OCF_ERR_GENERIC
+ if [ -z "${OCF_RESKEY_backingfile}" ]; then
+ virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ if ocf_is_true "${OCF_RESKEY_stateless}" || [ ! -s "${OCF_RESKEY_config%%.*}.qcow2" ]; then
+ # Create the Stateless image
+ dirconfig=`dirname ${OCF_RESKEY_config}`
+ qemu-img create -f qcow2 -b ${OCF_RESKEY_backingfile} ${OCF_RESKEY_config%%.*}.qcow2
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed qemu-img create ${DOMAIN_NAME} with backing file ${OCF_RESKEY_backingfile}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ virsh define ${OCF_RESKEY_config}
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [ -n "${OCF_RESKEY_copyindirs}" ]; then
+ # Inject copyindirs directories and files
+ virt-copy-in -d ${DOMAIN_NAME} ${OCF_RESKEY_copyindirs} /
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed on virt-copy-in command ${DOMAIN_NAME}."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ else
+ virsh define ${OCF_RESKEY_config}
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to define virtual domain ${DOMAIN_NAME}."
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ virsh $VIRSH_OPTIONS start ${DOMAIN_NAME}
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
+ return $OCF_ERR_GENERIC
+ fi
fi
while ! VirtualDomain_monitor; do
@@ -926,6 +999,11 @@
ocf_exit_reason "migration_downtime has to be a decimal value"
return $OCF_ERR_CONFIGURED
fi
+
+ if ocf_is_true "${OCF_RESKEY_stateless}" && [ -z "${OCF_RESKEY_backingfile}" ]; then
+ ocf_exit_reason "Stateless functionality can't be achieved without a backing file."
+ return $OCF_ERR_CONFIGURED
+ fi
}
VirtualDomain_getconfig() {

View File

@ -1,275 +0,0 @@
From e45d0ca9ccc3d5fbe94372f40bedb7559dc9530a Mon Sep 17 00:00:00 2001
From: "feng.changf1" <feng.changf1@alibaba-inc.com>
Date: Tue, 24 Jul 2018 15:08:45 +0800
Subject: [PATCH] Add Aliyun vpc-move-ip agent.
---
heartbeat/aliyun-vpc-move-ip | 258 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 258 insertions(+)
create mode 100644 heartbeat/aliyun-vpc-move-ip
diff --git a/heartbeat/aliyun-vpc-move-ip b/heartbeat/aliyun-vpc-move-ip
new file mode 100644
index 000000000..bc97822a8
--- /dev/null
+++ b/heartbeat/aliyun-vpc-move-ip
@@ -0,0 +1,258 @@
+#!/bin/bash
+#
+# OCF resource agent to move an IP address within a VPC in the Aliyun
+# Based on code of Markus Guertler (GitHub AWS-VPC-move-IP)
+# Based on code of Adam Gandelman (GitHub ec2-resource-agents/elasticip)
+#
+
+###############################################################################
+# For testing purposes delete OCF_ROOT after testing
+OCF_ROOT=/usr/lib/ocf/
+#
+# INIT
+#: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
+#if [ -f ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs ]; then
+# . ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
+#fi
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
+. ${OCF_FUNCTIONS}
+: ${__OCF_ACTION=$1}
+export HOME=/root
+#######################################################################
+
+
+USAGE="usage: $0 {start|stop|status|meta-data}";
+###############################################################################
+
+
+###############################################################################
+#
+# Functions
+#
+###############################################################################
+
+
+metadata() {
+cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="vpc-move-ip">
+<version>2.0</version>
+<longdesc lang="en">
+Resource Agent to move IP addresses within a VPC of the Aliyun Webservices ECS
+by changing an entry in an specific routing table
+</longdesc>
+<shortdesc lang="en">Move IP within a APC of the Aliyun ECS</shortdesc>
+<parameters>
+<parameter name="address" required="1">
+<longdesc lang="en">
+VPC private IP address
+</longdesc>
+<shortdesc lang="en">vpc ip</shortdesc>
+<content type="string" default="" />
+</parameter>
+<parameter name="routing_table" required="1">
+<longdesc lang="en">
+Name of the routing table, where the route for the IP address should be changed, i.e. rtb-...
+</longdesc>
+<shortdesc lang="en">routing table name</shortdesc>
+<content type="string" default="" />
+</parameter>
+<parameter name="interface" required="1">
+<longdesc lang="en">
+Name of the network interfacen, i.e. eth0
+</longdesc>
+<shortdesc lang="en">network interface name</shortdesc>
+<content type="string" default="eth0" />
+</parameter>
+<parameter name="profile" required="0">
+<longdesc lang="en">
+Valid Aliyun CLI profile name
+</longdesc>
+<shortdesc lang="en">profile name</shortdesc>
+<content type="string" default="default" />
+</parameter>
+</parameters>
+<actions>
+<action name="start" timeout="180" />
+<action name="stop" timeout="180" />
+<action name="monitor" depth="0" timeout="30" interval="30" />
+<action name="validate-all" timeout="5" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+debugger() {
+ ocf_log info "DEBUG: $1"
+}
+
+ecs_ip_validate() {
+ debugger "function: validate"
+
+ # IP address
+ [[ -z "$OCF_RESKEY_address" ]] && ocf_log error "IP address parameter not set $OCF_RESKEY_ADDRESS!" && exit $OCF_ERR_CONFIGURED
+
+ # Network Interface
+ [[ -z "$OCF_RESKEY_interface" ]] && ocf_log error "Network interface parameter not set $OCF_RESKEY_INTERFACE!" && exit $OCF_ERR_CONFIGURED
+
+ # Routing Table
+ [[ -z "$OCF_RESKEY_routing_table" ]] && ocf_log error "Routing table parameter not set $OCF_RESKEY_ROUTING_TABLE!" && exit $OCF_ERR_CONFIGURED
+
+ ECS_INSTANCE_ID="$(curl -s http://100.100.100.200/latest/meta-data/instance-id)"
+
+ if [ -z "${ECS_INSTANCE_ID}" ]; then
+ ocf_exit_reason "Instance ID not found. Is this a ECS instance?"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+ecs_ip_monitor() {
+ ecs_ip_validate
+ debugger "function: ecsip_monitor: check routing table"
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ debugger "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+ if [ -z "$ROUTE_TO_INSTANCE" ]; then
+ ROUTE_TO_INSTANCE="<unknown>"
+ fi
+
+ [[ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]] && debugger "not routed to this instance ($ECS_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE" && return $OCF_NOT_RUNNING
+ cmd="ping -W 1 -c 1 $OCF_RESKEY_address"
+ debugger "executing command: $cmd"
+ $cmd > /dev/null
+ [[ $? -gt 0 ]] && debugger "IP $OCF_RESKEY_address not locally reachable via ping on this system" && return $OCF_NOT_RUNNING
+ debugger "routed in VPC and locally reachable"
+ return $OCF_SUCCESS
+}
+
+
+ecs_ip_drop() {
+ debugger "function: ecsip_drop"
+ cmd="ip addr delete ${OCF_RESKEY_address}/32 dev $OCF_RESKEY_interface"
+ debugger "executing command: $cmd"
+ $cmd
+ rc=$?
+ [[ $rc -gt 2 ]] && debugger "command failed, rc $rc" && return $OCF_ERR_GENERIC
+ debugger "command succeeded"
+ return $OCF_SUCCESS
+}
+
+wait_for_deleted() {
+ while [ ! -z "$ROUTE_TO_INSTANCE" ]; do
+ sleep 1
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ debugger "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+ done
+ sleep 5
+}
+
+wait_for_started() {
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ debugger "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+
+ while [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; do
+ sleep 1
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ debugger "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+ done
+ sleep 5
+}
+
+ecs_ip_get_and_configure() {
+ debugger "function: ecsip_get_and_configure"
+
+ if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then
+
+ if [ $ROUTE_TO_INSTANCE != "<unknown>" ]; then
+ # Adjusting the routing table
+ cmd="aliyuncli vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text"
+ debugger "executing command: $cmd"
+ $cmd
+ rc=$?
+ [[ $rc != 0 ]] && debugger "command failed, rc: $rc" && return $OCF_ERR_GENERIC
+ #wait_for_deleted
+ sleep 3
+ fi
+
+ cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text"
+ debugger "executing command: $cmd"
+ $cmd
+ rc=$?
+ #[[ $rc != 0 ]] && debugger "command failed, rc: $rc" && return $OCF_ERR_GENERIC
+ while [ $rc != 0 ]; do
+ sleep 2
+ cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text"
+ debugger "executing command: $cmd"
+ $cmd
+ rc=$?
+ done
+ wait_for_started
+ fi
+
+
+ # Reconfigure the local ip address
+ ecs_ip_drop
+ ip addr add "${OCF_RESKEY_address}/32" dev $OCF_RESKEY_interface
+ rc=$?
+ [[ $rc != 0 ]] && debugger "command failed, rc: $rc" && return $OCF_ERR_GENERIC
+ debugger "-success"
+ return $OCF_SUCCESS
+}
+
+ecs_ip_stop() {
+ ocf_log info "ECS: Bringing down IP address $OCF_RESKEY_address"
+ ecs_ip_validate
+ ecs_ip_monitor
+ [[ $? == $OCF_NOT_RUNNING ]] && ocf_log info "ECS: Address $OCF_RESKEY_address already down" && return $OCF_SUCCESS
+ ecs_ip_drop
+ [[ $? != $OCF_SUCCESS ]] && return $OCF_ERR_GENERIC
+ ecs_ip_monitor
+ [[ $? == $OCF_NOT_RUNNING ]] && ocf_log info "ECS: Successfully brought down $OCF_RESKEY_address" && return $OCF_SUCCESS
+ ocf_log error "ECS: Couldn't bring down IP address $OCF_RESKEY_address on interface $OCF_RESKEY_interface."
+ return $OCF_ERR_GENERIC
+}
+
+ecs_ip_start() {
+ ocf_log info "ECS: Moving IP address $OCF_RESKEY_address to this host by adjusting routing table $OCF_RESKEY_routing_table"
+ ecs_ip_validate
+ ecs_ip_monitor
+ [[ $? == $OCF_SUCCESS ]] && ocf_log info "ECS: $OCF_RESKEY_address already started" && return $OCF_SUCCESS
+ ocf_log info "ECS: Adjusting routing table and locally configuring IP address"
+ ecs_ip_get_and_configure
+ [[ $? != 0 ]] && ocf_log error "Received $? from 'aliyun cli'" && return $OCF_ERR_GENERIC
+ return $OCF_SUCCESS
+ ecs_ip_monitor
+ [[ $? == $OCF_SUCCESS ]] && return $?
+ ocf_log error "ECS: IP address couldn't be configured on this host (IP: $OCF_RESKEY_address, Interface: $OCF_RESKEY_interface)"
+ return $OCF_ERR_GENERIC
+}
+
+###############################################################################
+#
+# MAIN
+#
+###############################################################################
+
+case $__OCF_ACTION in
+ meta-data) metadata
+ exit $OCF_SUCCESS;;
+ monitor)
+ ecs_ip_monitor;;
+ stop)
+ ecs_ip_stop;;
+ validate-all) ecs_ip_validate;;
+ start)
+ ecs_ip_start;;
+ *) exit $OCF_ERR_UNIMPLEMENTED;;
+esac
\ No newline at end of file

View File

@ -1,451 +0,0 @@
From db3df55a6f7097e1da7d77eb361e9e7560f13353 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 24 Jul 2018 13:57:08 +0200
Subject: [PATCH] aliyun-vpc-move-ip: fixes
---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/aliyun-vpc-move-ip | 336 ++++++++++++++++++++++++-------------------
3 files changed, 189 insertions(+), 149 deletions(-)
mode change 100644 => 100755 heartbeat/aliyun-vpc-move-ip
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 3ac0569de..fc9a67161 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -93,6 +93,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_WinPopup.7 \
ocf_heartbeat_Xen.7 \
ocf_heartbeat_Xinetd.7 \
+ ocf_heartbeat_aliyun-vpc-move-ip.7 \
ocf_heartbeat_anything.7 \
ocf_heartbeat_apache.7 \
ocf_heartbeat_asterisk.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index d4750bf09..6adc6bc3c 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -90,6 +90,7 @@ ocf_SCRIPTS = AoEtarget \
Xen \
Xinetd \
ZFS \
+ aliyun-vpc-move-ip \
anything \
apache \
asterisk \
diff --git a/heartbeat/aliyun-vpc-move-ip b/heartbeat/aliyun-vpc-move-ip
old mode 100644
new mode 100755
index bc97822a8..108feb247
--- a/heartbeat/aliyun-vpc-move-ip
+++ b/heartbeat/aliyun-vpc-move-ip
@@ -1,30 +1,19 @@
-#!/bin/bash
+#!/bin/sh
#
# OCF resource agent to move an IP address within a VPC in the Aliyun
# Based on code of Markus Guertler (GitHub AWS-VPC-move-IP)
# Based on code of Adam Gandelman (GitHub ec2-resource-agents/elasticip)
#
-###############################################################################
-# For testing purposes delete OCF_ROOT after testing
-OCF_ROOT=/usr/lib/ocf/
-#
-# INIT
-#: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat}
-#if [ -f ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs ]; then
-# . ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs
-#fi
-
#######################################################################
# Initialization:
-
-: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
-. ${OCF_FUNCTIONS}
-: ${__OCF_ACTION=$1}
-export HOME=/root
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
-
+# aliyuncli doesnt work without HOME parameter
+export HOME="/root"
+
USAGE="usage: $0 {start|stop|status|meta-data}";
###############################################################################
@@ -36,8 +25,96 @@ USAGE="usage: $0 {start|stop|status|meta-data}";
###############################################################################
-metadata() {
-cat <<END
+
+ip_get_and_configure() {
+ ocf_log debug "function: ip_get_and_configure"
+
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+
+ if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then
+ if [ -n "$ROUTE_TO_INSTANCE" ]; then
+ ip_drop
+ fi
+
+ cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text"
+ ocf_log debug "executing command: $cmd"
+ $cmd
+ rc=$?
+ while [ $rc -ne 0 ]; do
+ sleep 1
+ cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text"
+ ocf_log debug "executing command: $cmd"
+ $cmd
+ rc=$?
+ done
+ wait_for_started
+ fi
+
+
+ # Reconfigure the local ip address
+ ip addr add "${OCF_RESKEY_address}/32" dev $OCF_RESKEY_interface
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log err "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log debug "IP added"
+
+ return $OCF_SUCCESS
+}
+
+ip_drop() {
+ ocf_log debug "function: ip_drop"
+ cmd="ip addr delete ${OCF_RESKEY_address}/32 dev $OCF_RESKEY_interface"
+ ocf_log debug "executing command: $cmd"
+ $cmd
+ rc=$?
+ if [ $rc -ne 0 ] && [ $rc -ne 2 ]; then
+ ocf_log err "command failed, rc $rc"
+ return $OCF_ERR_GENERIC
+ fi
+
+ cmd="aliyuncli vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text"
+ ocf_log debug "executing command: $cmd"
+ $cmd
+ if [ $? -ne 0 ]; then
+ ocf_log err "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ wait_for_deleted
+
+ ocf_log debug "IP dropped"
+
+ return $OCF_SUCCESS
+}
+
+wait_for_started() {
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ ocf_log debug "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')"
+
+ while [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; do
+ sleep 3
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ ocf_log debug "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_address | awk '{ print $3 }')"
+ done
+}
+
+wait_for_deleted() {
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+
+ while [ ! -z "$ROUTE_TO_INSTANCE" ]; do
+ sleep 1
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ ocf_log debug "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+ done
+}
+
+ecs_ip_metadata() {
+ cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="vpc-move-ip">
@@ -74,8 +151,8 @@ Name of the network interfacen, i.e. eth0
Valid Aliyun CLI profile name
</longdesc>
<shortdesc lang="en">profile name</shortdesc>
-<content type="string" default="default" />
-</parameter>
+<content type="string" default="default" />
+</parameter>
</parameters>
<actions>
<action name="start" timeout="180" />
@@ -88,171 +165,132 @@ Valid Aliyun CLI profile name
END
}
-debugger() {
- ocf_log info "DEBUG: $1"
-}
-
ecs_ip_validate() {
- debugger "function: validate"
-
+ ocf_log debug "function: validate"
+
# IP address
- [[ -z "$OCF_RESKEY_address" ]] && ocf_log error "IP address parameter not set $OCF_RESKEY_ADDRESS!" && exit $OCF_ERR_CONFIGURED
-
+ if [ -z "$OCF_RESKEY_address" ]; then
+ ocf_log err "IP address parameter not set $OCF_RESKEY_ADDRESS!"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
# Network Interface
- [[ -z "$OCF_RESKEY_interface" ]] && ocf_log error "Network interface parameter not set $OCF_RESKEY_INTERFACE!" && exit $OCF_ERR_CONFIGURED
-
+ if [ -z "$OCF_RESKEY_interface" ]; then
+ ocf_log err "Network interface parameter not set $OCF_RESKEY_INTERFACE!"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
# Routing Table
- [[ -z "$OCF_RESKEY_routing_table" ]] && ocf_log error "Routing table parameter not set $OCF_RESKEY_ROUTING_TABLE!" && exit $OCF_ERR_CONFIGURED
-
- ECS_INSTANCE_ID="$(curl -s http://100.100.100.200/latest/meta-data/instance-id)"
+ if [ -z "$OCF_RESKEY_routing_table" ]; then
+ ocf_log err "Routing table parameter not set $OCF_RESKEY_ROUTING_TABLE!"
+ exit $OCF_ERR_CONFIGURED
+ fi
if [ -z "${ECS_INSTANCE_ID}" ]; then
ocf_exit_reason "Instance ID not found. Is this a ECS instance?"
return $OCF_ERR_GENERIC
fi
-
- return $OCF_SUCCESS
-}
-ecs_ip_monitor() {
- ecs_ip_validate
- debugger "function: ecsip_monitor: check routing table"
- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
- debugger "executing command: $cmd"
- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
- if [ -z "$ROUTE_TO_INSTANCE" ]; then
- ROUTE_TO_INSTANCE="<unknown>"
- fi
-
- [[ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]] && debugger "not routed to this instance ($ECS_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE" && return $OCF_NOT_RUNNING
- cmd="ping -W 1 -c 1 $OCF_RESKEY_address"
- debugger "executing command: $cmd"
- $cmd > /dev/null
- [[ $? -gt 0 ]] && debugger "IP $OCF_RESKEY_address not locally reachable via ping on this system" && return $OCF_NOT_RUNNING
- debugger "routed in VPC and locally reachable"
- return $OCF_SUCCESS
-}
-
-
-ecs_ip_drop() {
- debugger "function: ecsip_drop"
- cmd="ip addr delete ${OCF_RESKEY_address}/32 dev $OCF_RESKEY_interface"
- debugger "executing command: $cmd"
- $cmd
- rc=$?
- [[ $rc -gt 2 ]] && debugger "command failed, rc $rc" && return $OCF_ERR_GENERIC
- debugger "command succeeded"
return $OCF_SUCCESS
}
-wait_for_deleted() {
- while [ ! -z "$ROUTE_TO_INSTANCE" ]; do
- sleep 1
- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
- debugger "executing command: $cmd"
- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
- done
- sleep 5
-}
+ecs_ip_start() {
+ ocf_log info "ECS: Moving IP address $OCF_RESKEY_address to this host by adjusting routing table $OCF_RESKEY_routing_table"
-wait_for_started() {
- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
- debugger "executing command: $cmd"
- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
-
- while [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; do
- sleep 1
- cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
- debugger "executing command: $cmd"
- ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
- done
- sleep 5
-}
+ ecs_ip_monitor
+ if [ $? = $OCF_SUCCESS ]; then
+ ocf_log info "ECS: $OCF_RESKEY_address already started"
+ return $OCF_SUCCESS
+ fi
-ecs_ip_get_and_configure() {
- debugger "function: ecsip_get_and_configure"
-
- if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then
-
- if [ $ROUTE_TO_INSTANCE != "<unknown>" ]; then
- # Adjusting the routing table
- cmd="aliyuncli vpc DeleteRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ROUTE_TO_INSTANCE --output text"
- debugger "executing command: $cmd"
- $cmd
- rc=$?
- [[ $rc != 0 ]] && debugger "command failed, rc: $rc" && return $OCF_ERR_GENERIC
- #wait_for_deleted
- sleep 3
- fi
-
- cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text"
- debugger "executing command: $cmd"
- $cmd
- rc=$?
- #[[ $rc != 0 ]] && debugger "command failed, rc: $rc" && return $OCF_ERR_GENERIC
- while [ $rc != 0 ]; do
- sleep 2
- cmd="aliyuncli vpc CreateRouteEntry --RouteTableId $OCF_RESKEY_routing_table --DestinationCidrBlock ${OCF_RESKEY_address}/32 --NextHopId $ECS_INSTANCE_ID --NextHopType Instance --output text"
- debugger "executing command: $cmd"
- $cmd
- rc=$?
- done
- wait_for_started
+ ocf_log info "ECS: Adjusting routing table and locally configuring IP address"
+ ip_get_and_configure
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log err "Received $rc from 'aliyun cli'"
+ return $OCF_ERR_GENERIC
fi
-
-
- # Reconfigure the local ip address
- ecs_ip_drop
- ip addr add "${OCF_RESKEY_address}/32" dev $OCF_RESKEY_interface
+
+ ecs_ip_monitor
rc=$?
- [[ $rc != 0 ]] && debugger "command failed, rc: $rc" && return $OCF_ERR_GENERIC
- debugger "-success"
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_log err "IP address couldn't be configured on this host (IP: $OCF_RESKEY_address, Interface: $OCF_RESKEY_interface)"
+ return $rc
+ fi
+
return $OCF_SUCCESS
}
ecs_ip_stop() {
ocf_log info "ECS: Bringing down IP address $OCF_RESKEY_address"
- ecs_ip_validate
+
ecs_ip_monitor
- [[ $? == $OCF_NOT_RUNNING ]] && ocf_log info "ECS: Address $OCF_RESKEY_address already down" && return $OCF_SUCCESS
- ecs_ip_drop
- [[ $? != $OCF_SUCCESS ]] && return $OCF_ERR_GENERIC
+ if [ $? = $OCF_NOT_RUNNING ]; then
+ ocf_log info "ECS: Address $OCF_RESKEY_address already down"
+ return $OCF_SUCCESS
+ fi
+
+ ip_drop
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_log err "ECS: Couldn't drop IP address $OCF_RESKEY_address on interface $OCF_RESKEY_interface."
+ return $OCF_ERR_GENERIC
+ fi
+
ecs_ip_monitor
- [[ $? == $OCF_NOT_RUNNING ]] && ocf_log info "ECS: Successfully brought down $OCF_RESKEY_address" && return $OCF_SUCCESS
- ocf_log error "ECS: Couldn't bring down IP address $OCF_RESKEY_address on interface $OCF_RESKEY_interface."
+ if [ $? = $OCF_NOT_RUNNING ]; then
+ ocf_log info "ECS: Successfully brought down $OCF_RESKEY_address"
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log err "ECS: Couldn't bring down IP address $OCF_RESKEY_address on interface $OCF_RESKEY_interface."
return $OCF_ERR_GENERIC
}
-ecs_ip_start() {
- ocf_log info "ECS: Moving IP address $OCF_RESKEY_address to this host by adjusting routing table $OCF_RESKEY_routing_table"
- ecs_ip_validate
- ecs_ip_monitor
- [[ $? == $OCF_SUCCESS ]] && ocf_log info "ECS: $OCF_RESKEY_address already started" && return $OCF_SUCCESS
- ocf_log info "ECS: Adjusting routing table and locally configuring IP address"
- ecs_ip_get_and_configure
- [[ $? != 0 ]] && ocf_log error "Received $? from 'aliyun cli'" && return $OCF_ERR_GENERIC
- return $OCF_SUCCESS
- ecs_ip_monitor
- [[ $? == $OCF_SUCCESS ]] && return $?
- ocf_log error "ECS: IP address couldn't be configured on this host (IP: $OCF_RESKEY_address, Interface: $OCF_RESKEY_interface)"
- return $OCF_ERR_GENERIC
+ecs_ip_monitor() {
+ ocf_log debug "function: ecsip_monitor: check routing table"
+ cmd="aliyuncli vpc DescribeRouteTables --RouteTableId $OCF_RESKEY_routing_table --output text"
+ ocf_log debug "executing command: $cmd"
+
+ ROUTE_TO_INSTANCE="$($cmd |grep $OCF_RESKEY_address | awk '{ print $3 }')"
+
+ if [ "$ECS_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then
+ ocf_log debug "not routed to this instance ($ECS_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE"
+ return $OCF_NOT_RUNNING
+ fi
+
+ cmd="ping -W 1 -c 1 $OCF_RESKEY_address"
+ ocf_log debug "executing command: $cmd"
+ $cmd > /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log debug "IP $OCF_RESKEY_address not locally reachable via ping on this system"
+ return $OCF_NOT_RUNNING
+ fi
+ ocf_log debug "routed in VPC and locally reachable"
+ return $OCF_SUCCESS
}
+
###############################################################################
#
# MAIN
#
###############################################################################
-case $__OCF_ACTION in
- meta-data) metadata
+case $__OCF_ACTION in
+ meta-data) ecs_ip_metadata
exit $OCF_SUCCESS;;
- monitor)
- ecs_ip_monitor;;
- stop)
- ecs_ip_stop;;
validate-all) ecs_ip_validate;;
+esac
+
+ECS_INSTANCE_ID="$(curl -s http://100.100.100.200/latest/meta-data/instance-id)"
+
+case $__OCF_ACTION in
start)
+ ecs_ip_validate
ecs_ip_start;;
+ stop)
+ ecs_ip_stop;;
+ monitor)
+ ecs_ip_monitor;;
*) exit $OCF_ERR_UNIMPLEMENTED;;
-esac
\ No newline at end of file
+esac

View File

@ -1,22 +0,0 @@
From ee081df601f914079f111eec10cb81ab212130a9 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 25 Jul 2018 11:22:39 +0200
Subject: [PATCH] aliyun-vpc-move-ip: fix manpage
---
heartbeat/aliyun-vpc-move-ip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/aliyun-vpc-move-ip b/heartbeat/aliyun-vpc-move-ip
index 108feb247..e27952adb 100755
--- a/heartbeat/aliyun-vpc-move-ip
+++ b/heartbeat/aliyun-vpc-move-ip
@@ -117,7 +117,7 @@ ecs_ip_metadata() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="vpc-move-ip">
+<resource-agent name="aliyun-vpc-move-ip">
<version>2.0</version>
<longdesc lang="en">
Resource Agent to move IP addresses within a VPC of the Aliyun Webservices ECS

View File

@ -8,8 +8,8 @@
+ OCF_RESKEY_aliyuncli="$(which aliyuncli-ra 2> /dev/null || which aliyuncli 2> /dev/null || which aliyun 2> /dev/null)"
fi
-if [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli' ]]; then
+if [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli-ra' ]] || [[ "${OCF_RESKEY_aliyuncli##*/}" == 'aliyuncli' ]]; then
-if [ "${OCF_RESKEY_aliyuncli##*/}" = 'aliyuncli' ]; then
+if [ "${OCF_RESKEY_aliyuncli##*/}" = 'aliyuncli-ra' ] || [ "${OCF_RESKEY_aliyuncli##*/}" = 'aliyuncli' ]; then
OUTPUT="text"
EXECUTING='{ print $3 }'
IFS_=" "

View File

@ -1,49 +0,0 @@
From fc497e888afcb88babbc21a59883556335c070fa Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 31 Aug 2018 11:41:32 +0200
Subject: [PATCH] aliyun-vpc-move-ip: improve metadata and manpage
---
heartbeat/aliyun-vpc-move-ip | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/heartbeat/aliyun-vpc-move-ip b/heartbeat/aliyun-vpc-move-ip
index e27952adb..c004d26fc 100755
--- a/heartbeat/aliyun-vpc-move-ip
+++ b/heartbeat/aliyun-vpc-move-ip
@@ -123,7 +123,7 @@ ecs_ip_metadata() {
Resource Agent to move IP addresses within a VPC of the Aliyun Webservices ECS
by changing an entry in an specific routing table
</longdesc>
-<shortdesc lang="en">Move IP within a APC of the Aliyun ECS</shortdesc>
+<shortdesc lang="en">Move IP within a VPC of the Aliyun ECS</shortdesc>
<parameters>
<parameter name="address" required="1">
<longdesc lang="en">
@@ -134,21 +134,23 @@ VPC private IP address
</parameter>
<parameter name="routing_table" required="1">
<longdesc lang="en">
-Name of the routing table, where the route for the IP address should be changed, i.e. rtb-...
+Name of the routing table, where the route for the IP address should be changed, i.e. vtb-...
</longdesc>
<shortdesc lang="en">routing table name</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="interface" required="1">
<longdesc lang="en">
-Name of the network interfacen, i.e. eth0
+Name of the network interface, i.e. eth0
</longdesc>
<shortdesc lang="en">network interface name</shortdesc>
<content type="string" default="eth0" />
</parameter>
<parameter name="profile" required="0">
<longdesc lang="en">
-Valid Aliyun CLI profile name
+Valid Aliyun CLI profile name (see 'aliyuncli-ra configure').
+
+See https://www.alibabacloud.com/help/doc-detail/43039.htm?spm=a2c63.p38356.b99.16.38a914abRZtOU3 for more information about aliyuncli-ra.
</longdesc>
<shortdesc lang="en">profile name</shortdesc>
<content type="string" default="default" />

View File

@ -1,39 +0,0 @@
From 7632a85bcf642b484df52a25dbffbfa0031421bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Koutn=C3=BD?= <mkoutny@suse.com>
Date: Mon, 6 Aug 2018 18:04:52 +0200
Subject: [PATCH] aws-vpc-move-ip: Use ip utility to check address
When pinging the assigned address during initial monitor (probe) on one
node we may actually ping the reachable address when the resource is
running on another node. This yields false positive monitor result on
the pinging node. Avoid this by merely checking the assignment of the
address to an interface.
---
heartbeat/aws-vpc-move-ip | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index cefa38e03..3bbbed474 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -167,15 +167,15 @@ ec2ip_monitor() {
ocf_log debug "monitor: Enhanced Monitoring disabled - omitting API call"
fi
- cmd="ping -W 1 -c 1 $OCF_RESKEY_ip"
+ cmd="ip addr show to '$OCF_RESKEY_ip' up"
ocf_log debug "executing command: $cmd"
- $cmd > /dev/null
- if [ "$?" -gt 0 ]; then
- ocf_log warn "IP $OCF_RESKEY_ip not locally reachable via ping on this system"
+ RESULT=$($cmd | grep '$OCF_RESKEY_ip')
+ if [ -z "$RESULT" ]; then
+ ocf_log warn "IP $OCF_RESKEY_ip not assigned to running interface"
return $OCF_NOT_RUNNING
fi
- ocf_log debug "route in VPC and locally reachable"
+ ocf_log debug "route in VPC and address assigned"
return $OCF_SUCCESS
}

View File

@ -1,31 +0,0 @@
From 42dccdd20aff3ebf134c8041f79ab0a658975e69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Koutn=C3=BD?= <mkoutny@suse.com>
Date: Thu, 30 Aug 2018 18:02:51 +0200
Subject: [PATCH] aws-vpc-move-ip: Fix broken shell quoting
The argument 4th to `ip` is passed with single quotes around which
cannot be parsed as valid IP address. Furthermore, we need to expand the
$OCF_RESKEY_ip for grep. This breaks correct detection of the assigned
address.
Fixes 7632a85bcf642b484df52a25dbffbfa0031421bc.
---
heartbeat/aws-vpc-move-ip | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 3bbbed474..ce3fc6b9a 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -167,9 +167,9 @@ ec2ip_monitor() {
ocf_log debug "monitor: Enhanced Monitoring disabled - omitting API call"
fi
- cmd="ip addr show to '$OCF_RESKEY_ip' up"
+ cmd="ip addr show to $OCF_RESKEY_ip up"
ocf_log debug "executing command: $cmd"
- RESULT=$($cmd | grep '$OCF_RESKEY_ip')
+ RESULT=$($cmd | grep "$OCF_RESKEY_ip")
if [ -z "$RESULT" ]; then
ocf_log warn "IP $OCF_RESKEY_ip not assigned to running interface"
return $OCF_NOT_RUNNING

View File

@ -1,43 +0,0 @@
diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am
--- a/doc/man/Makefile.am 2018-07-27 13:05:09.611188363 +0200
+++ b/doc/man/Makefile.am 2018-07-27 13:05:43.981806873 +0200
@@ -97,6 +97,7 @@
ocf_heartbeat_WinPopup.7 \
ocf_heartbeat_Xen.7 \
ocf_heartbeat_Xinetd.7 \
+ ocf_heartbeat_ZFS.7 \
ocf_heartbeat_aliyun-vpc-move-ip.7 \
ocf_heartbeat_anything.7 \
ocf_heartbeat_apache.7 \
@@ -136,6 +137,7 @@
ocf_heartbeat_lxd-info.7 \
ocf_heartbeat_machine-info.7 \
ocf_heartbeat_mariadb.7 \
+ ocf_heartbeat_minio.7 \
ocf_heartbeat_mysql.7 \
ocf_heartbeat_mysql-proxy.7 \
ocf_heartbeat_nagios.7 \
@@ -150,6 +152,7 @@
ocf_heartbeat_oracle.7 \
ocf_heartbeat_oralsnr.7 \
ocf_heartbeat_ovsmonitor.7 \
+ ocf_heartbeat_pgagent.7 \
ocf_heartbeat_pgsql.7 \
ocf_heartbeat_pingd.7 \
ocf_heartbeat_portblock.7 \
@@ -158,6 +161,7 @@
ocf_heartbeat_proftpd.7 \
ocf_heartbeat_rabbitmq-cluster.7 \
ocf_heartbeat_redis.7 \
+ ocf_heartbeat_rkt.7 \
ocf_heartbeat_rsyncd.7 \
ocf_heartbeat_rsyslog.7 \
ocf_heartbeat_scsi2reservation.7 \
@@ -172,6 +176,7 @@
ocf_heartbeat_varnish.7 \
ocf_heartbeat_vdo-vol.7 \
ocf_heartbeat_vmware.7 \
+ ocf_heartbeat_vsftpd.7 \
ocf_heartbeat_zabbixserver.7
if USE_IPV6ADDR_AGENT

View File

@ -1,415 +0,0 @@
From 019c3108feff48d8ad496cd0759349c46170dc2d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 6 Apr 2020 10:23:51 +0200
Subject: [PATCH 1/2] crypt: new resource agent
---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/crypt | 337 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 339 insertions(+)
create mode 100755 heartbeat/crypt
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 478fbe4f8..53c9975ec 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -105,6 +105,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_azure-lb.7 \
ocf_heartbeat_clvm.7 \
ocf_heartbeat_conntrackd.7 \
+ ocf_heartbeat_crypt.7 \
ocf_heartbeat_db2.7 \
ocf_heartbeat_dhcpd.7 \
ocf_heartbeat_docker.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 893115810..bbc9590ac 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -101,6 +101,7 @@ ocf_SCRIPTS = AoEtarget \
azure-lb \
clvm \
conntrackd \
+ crypt \
db2 \
dhcpd \
dnsupdate \
diff --git a/heartbeat/crypt b/heartbeat/crypt
new file mode 100755
index 000000000..6bffdff89
--- /dev/null
+++ b/heartbeat/crypt
@@ -0,0 +1,337 @@
+#!/bin/sh
+#
+# crypt/LUKS OCF RA. Manages cryptsetup devices.
+#
+# Copyright (c) 2020 Red Hat GmbH, Heinz Mauelshagen
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Parameter defaults
+OCF_RESKEY_encrypted_dev_default=""
+OCF_RESKEY_crypt_dev_default=""
+OCF_RESKEY_key_file_default=""
+OCF_RESKEY_crypt_type_default=""
+OCF_RESKEY_force_stop_default="false"
+
+: ${OCF_RESKEY_encrypted_dev=${OCF_RESKEY_encrypted_dev_default}}
+: ${OCF_RESKEY_crypt_dev=${OCF_RESKEY_crypt_dev_default}}
+: ${OCF_RESKEY_key_file=${OCF_RESKEY_key_file_default}}
+: ${OCF_RESKEY_crypt_type=${OCF_RESKEY_crypt_type_default}}
+: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="crypt">
+<version>1.0</version>
+
+<longdesc lang="en">
+This is a LUKS/crypt Resource Agent managing encrypted devices via cryptsetup(8).
+The agent imposes limitations on device types supported: luks, luks[1..N].
+</longdesc>
+<shortdesc lang="en">LUKS/crypt resource agent</shortdesc>
+
+<parameters>
+
+<parameter name="encrypted_dev" unique="1" required="1">
+<longdesc lang="en">
+Encrypted backing device, which should be defined by UUID,
+36 characters including '-'s as reported by blkid(8).
+
+Although it can be defined as a block device path (e.g. /dev/sdh),
+the UUID should be preferred over the block device path to allow for the
+unique discovery of the crypt backing device given the volatile nature of
+/dev entries (e.g. /dev/sdh on one node may be /dev/sdg on another).
+
+Only define as block device path if you know what you are doing.
+</longdesc>
+<shortdesc lang="en">Encrypted device</shortdesc>
+<content type="string" default="${OCF_RESKEY_encrypted_dev_default}" />
+</parameter>
+
+<parameter name="crypt_dev" unique="1" required="1">
+<longdesc lang="en">
+Encrypted device name, no path. I.e. the one given in "cryptsetup open name ...".
+The resulting block device path is /dev/mapper/name.
+</longdesc>
+<shortdesc lang="en">Encrypted device</shortdesc>
+<content type="string" default="${OCF_RESKEY_crypt_dev_default}" />
+</parameter>
+
+<parameter name="key_file" unique="1" required="1">
+<longdesc lang="en">
+Key file path containing the encryption passphrase
+(aka key; see cryptsetup(8)). For LUKS, the passphrase as of the key_file
+parameter is used to decrypt a randomly selected key when the device was created.
+</longdesc>
+<shortdesc lang="en">Key file</shortdesc>
+<content type="string" default="${OCF_RESKEY_key_file_default}" />
+</parameter>
+
+<parameter name="crypt_type" unique="1" required="1">
+<longdesc lang="en">
+Encryption (device) type (e.g. "luks" or "luks2").
+
+This parameter affirms the encryption format as of the crypt metadata
+thus allowing for safety measures when starting the encrypted resource.
+</longdesc>
+<shortdesc lang="en">Encryption type</shortdesc>
+<content type="string" default="${OCF_RESKEY_crypt_type_default}" />
+</parameter>
+
+<parameter name="force_stop" unique="0" required="0">
+<longdesc lang="en">
+If processes or kernel threads are using the crypt device, it cannot
+be stopped. We will try to stop processes, first by sending TERM and
+then, if that doesn't help in $PROC_CLEANUP_TIME seconds, using KILL.
+The lsof(8) program is required to get the list of array users.
+Of course, the kernel threads cannot be stopped this way.
+If the processes are critical for data integrity, then set this
+parameter to false. Note that in that case the stop operation
+will fail and the node will be fenced.
+</longdesc>
+<shortdesc lang="en">force stop processes using the crpyt device</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20s" />
+<action name="stop" timeout="20s" />
+<action name="monitor" timeout="20s" interval="10s" depth="0" />
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="10s" />
+</actions>
+</resource-agent>
+END
+}
+
+# Disable cryptsetup auto-recovery if cloned.
+disable_locks=""
+ocf_is_clone && disable_locks="--disable-locks"
+
+crypt_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|usage|meta-data|validate-all}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+encrypted_dev="${OCF_RESKEY_encrypted_dev}"
+crypt_dev="${OCF_RESKEY_crypt_dev}"
+crypt_dev_path="/dev/mapper/$crypt_dev"
+key_file="${OCF_RESKEY_key_file}"
+crypt_type="${OCF_RESKEY_crypt_type}"
+force_stop="${OCF_RESKEY_force_stop}"
+
+crypt_validate_all() {
+ if ! have_binary cryptsetup; then
+ ocf_exit_reason "Please install cryptsetup(8)"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [ -z "$encrypted_dev" ]; then
+ ocf_exit_reason "Undefined OCF_RESKEY_encrypted_dev"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if [ -n "$encrypted_dev" ]; then
+ case "$encrypted_dev" in
+ *-*-*-*) if [ `echo "$encrypted_dev" | wc -c` -ne 37 ]; then
+ ocf_exit_reason "Bogus encrypted device UUID \"$encrypted_dev\""
+ return $OCF_ERR_ARGS
+ fi
+ encrypted_dev=/dev/disk/by-uuid/"$encrypted_dev";;
+ *) case "$encrypted_dev" in
+ /dev/*) ;;
+ *) ocf_exit_reason "Bogus encrypted device path"
+ return $OCF_ERR_ARGS;;
+ esac
+ esac
+ fi
+ if [ ! -b "$encrypted_dev" ]; then
+ ocf_exit_reason "Encrypted device $encrypted_dev not accessible"
+ return $OCF_ERR_ARGS
+ fi
+ echo "$crypt_dev" | grep "/" >/dev/null
+ if [ $? -eq 0 ] && [ -z "$crypt_dev" ]; then
+ ocf_exit_reason "Crypt device \"$crypt_dev\" name has to at least 1 character long and without path"
+ return $OCF_ERR_ARGS
+ fi
+ if [ ! -r "$key_file" ]; then
+ ocf_exit_reason "Hash key file $key_file not accessible"
+ return $OCF_ERR_ARGS
+ fi
+ if ! ocf_is_true "$force_stop" && "$force_stop" != "false" ]]; then
+ ocf_exit_reason "Bogus force_stop=\"$force_stop\" attribute"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if "$force_stop" = "true" && ! have_binary lsof; then
+ ocf_exit_reason "Force stop requested, please install lsof(8)"
+ return $OCF_ERR_INSTALLED
+ fi
+ cryptsetup isLuks $encrypted_dev 2>/dev/null
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "$encrypted_dev is not a Luks formatted device"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+get_users_pids() {
+ ocf_log debug "running lsof to list \"$crypt_dev\" users..."
+ ocf_run -warn 'lsof $crypt_dev_path | tail -n +2 | awk "{print $2}" | sort -u'
+}
+
+stop_crypt_users() {
+ local pids=`get_users_pids`
+
+ if [ -z "$pids" ]; then
+ ocf_log warn "lsof reported no users holding arrays"
+ return 2
+ fi
+
+ ocf_stop_processes TERM $PROC_CLEANUP_TIME $pids
+}
+
+show_users() {
+ local dm_dev
+
+ ocf_log info "running lsof to list \"$crypt_dev\" users..."
+ ocf_run -warn lsof $crypt_dev_path
+
+ dm_dev=$(basename $(realpath $crypt_dev_path))
+ if [ -d /sys/block/$dm_dev/holders ]; then
+ ocf_log debug "ls -l /sys/block/$dm_dev/holders"
+ ocf_run -warn ls -l /sys/block/$dm_dev/holders
+ fi
+}
+
+crypt_stop_one() {
+ cryptsetup close $crypt_dev $disable_locks
+}
+
+#######################################################################
+#
+# Action: START an encrypted resource
+#
+crypt_start() {
+ local rc
+
+ cryptsetup open $encrypted_dev $crypt_dev --type $crypt_type $disable_locks --key-file=$key_file
+ rc=$?
+ if [ $rc -eq 0 ];then
+ crypt_monitor
+ rc=$?
+ else
+ rc=$OCF_ERR_GERNERIC
+ fi
+ [ $rc -ne $OCF_SUCCESS ] ocf_exit_reason "Failed to start encrypted device \"$crypt_dev\""
+
+ return $rc
+}
+
+#
+# Action: STOP an encrypted resource
+#
+crypt_stop() {
+ local rc
+
+ crypt_monitor
+ rc=$?
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
+ crypt_stop_one
+ crypt_monitor
+ rc=$?
+ fi
+ if [ $rc -ne $OCF_NOT_RUNNING ] && ocf_is_true $FORCESTOP; then
+ stop_crypt_users
+ case $? in
+ 2) rc=$OCF_SUCCESS;;
+ *) crypt_stop_one
+ crypt_monitor
+ rc=$?;;
+ esac
+ fi
+ if [ $rc -ne $OCF_NOT_RUNNING ]; then
+ ocf_log warn "Couldn't stop crypt device \"$crypt_dev\" (rc=$rc)"
+ show_users
+ ocf_exit_reason "Failed to stop crypt device \"$crypt_dev\"!"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+#
+# Action: MONITOR an encrypted resource
+#
+crypt_monitor() {
+ cryptsetup status $crypt_dev $disable_locks &>/dev/null
+ if [ $? -eq 0 ]; then
+ [ -L $crypt_dev_path ] && return $OCF_SUCCESS
+ return $OCF_ERR_GENERIC
+ fi
+
+ [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe && ocf_exit_reason "Crypt resource not running"
+ return $OCF_NOT_RUNNING
+}
+
+# Check for stange argument count.
+if [ $# -ne 1 ]; then
+ usage
+ exit $OCF_ERR_ARGS
+fi
+
+case "$__OCF_ACTION" in
+meta-data) meta_data
+ exit $OCF_SUCCESS;;
+usage|help) crypt_usage
+ exit $OCF_SUCCESS;;
+esac
+
+# XME: remove once pacemaker is fixed and calls this action
+crypt_validate_all
+rc=$?
+[ $rc -ne $OCF_SUCCESS ] && exit $rc
+
+case "$__OCF_ACTION" in
+start) crypt_start; rc=$?;;
+stop) crypt_stop; rc=$?;;
+monitor) crypt_monitor; rc=$?;;
+validate-all) rc=$OCF_SUCCESS;; # crypt_validate_all would have errored out above already.
+*) crypt_usage
+ exit $OCF_ERR_UNIMPLEMENTED;;
+esac
+
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
From 5e0d35f8db967419ea9f1234ab621b88babcf3ea Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 7 Apr 2020 12:39:24 +0200
Subject: [PATCH 2/2] crypt: force_stop check fixes
---
heartbeat/crypt | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/heartbeat/crypt b/heartbeat/crypt
index 6bffdff89..8bfa1094d 100755
--- a/heartbeat/crypt
+++ b/heartbeat/crypt
@@ -190,11 +190,7 @@ crypt_validate_all() {
ocf_exit_reason "Hash key file $key_file not accessible"
return $OCF_ERR_ARGS
fi
- if ! ocf_is_true "$force_stop" && "$force_stop" != "false" ]]; then
- ocf_exit_reason "Bogus force_stop=\"$force_stop\" attribute"
- return $OCF_ERR_CONFIGURED
- fi
- if "$force_stop" = "true" && ! have_binary lsof; then
+ if ocf_is_true "$force_stop" && ! have_binary lsof; then
ocf_exit_reason "Force stop requested, please install lsof(8)"
return $OCF_ERR_INSTALLED
fi
@@ -273,7 +269,7 @@ crypt_stop() {
crypt_monitor
rc=$?
fi
- if [ $rc -ne $OCF_NOT_RUNNING ] && ocf_is_true $FORCESTOP; then
+ if [ $rc -ne $OCF_NOT_RUNNING ] && ocf_is_true $force_stop; then
stop_crypt_users
case $? in
2) rc=$OCF_SUCCESS;;

View File

@ -1,22 +0,0 @@
From 2915fa336e95b609d3d738d335799f015022c493 Mon Sep 17 00:00:00 2001
From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
Date: Sat, 13 Jun 2020 08:47:36 +0200
Subject: [PATCH] crypt: fix bashism
---
heartbeat/crypt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/crypt b/heartbeat/crypt
index 8bfa1094d..2727b5b23 100755
--- a/heartbeat/crypt
+++ b/heartbeat/crypt
@@ -292,7 +292,7 @@ crypt_stop() {
# Action: MONITOR an encrypted resource
#
crypt_monitor() {
- cryptsetup status $crypt_dev $disable_locks &>/dev/null
+ cryptsetup status $crypt_dev $disable_locks >/dev/null 2>&1
if [ $? -eq 0 ]; then
[ -L $crypt_dev_path ] && return $OCF_SUCCESS
return $OCF_ERR_GENERIC

View File

@ -1,22 +0,0 @@
From 635c344fb85ef225b8a0c094687d2838b0b0cd2c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 26 Oct 2020 16:36:06 +0100
Subject: [PATCH] crypt: fix missing && to set exit_reason
---
heartbeat/crypt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/crypt b/heartbeat/crypt
index 2727b5b23..0e49b6c2d 100755
--- a/heartbeat/crypt
+++ b/heartbeat/crypt
@@ -251,7 +251,7 @@ crypt_start() {
else
rc=$OCF_ERR_GERNERIC
fi
- [ $rc -ne $OCF_SUCCESS ] ocf_exit_reason "Failed to start encrypted device \"$crypt_dev\""
+ [ $rc -ne $OCF_SUCCESS ] && ocf_exit_reason "Failed to start encrypted device \"$crypt_dev\""
return $rc
}

View File

@ -1,714 +0,0 @@
From 90b595650d7d8a6f6a69a9f7060c6406aa731c18 Mon Sep 17 00:00:00 2001
From: "Fabio M. Di Nitto" <fdinitto@redhat.com>
Date: Wed, 28 Jul 2021 10:08:10 +0200
Subject: [PATCH] Add storage-mon pacemaker health check
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
.gitignore | 41 ++++++
configure.ac | 1 +
doc/man/Makefile.am | 3 +-
heartbeat/Makefile.am | 17 +--
heartbeat/storage-mon.in | 263 +++++++++++++++++++++++++++++++++++++++
tools/Makefile.am | 5 +-
tools/storage_mon.c | 263 +++++++++++++++++++++++++++++++++++++++
7 files changed, 583 insertions(+), 10 deletions(-)
create mode 100644 heartbeat/storage-mon.in
create mode 100644 tools/storage_mon.c
diff --git a/.gitignore b/.gitignore
index 38d3566205..f7277bf04e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,46 @@ heartbeat/ocf-shellfuncs
heartbeat/send_ua
heartbeat/shellfuncs
heartbeat/*.pyc
+heartbeat/AoEtarget
+heartbeat/CTDB
+heartbeat/ManageRAID
+heartbeat/ManageVE
+heartbeat/Squid
+heartbeat/SysInfo
+heartbeat/aws-vpc-route53
+heartbeat/azure-events
+heartbeat/clvm
+heartbeat/conntrackd
+heartbeat/dnsupdate
+heartbeat/dummypy
+heartbeat/eDir88
+heartbeat/fio
+heartbeat/galera
+heartbeat/gcp-pd-move
+heartbeat/gcp-vpc-move-ip
+heartbeat/gcp-vpc-move-route
+heartbeat/gcp-vpc-move-vip
+heartbeat/iSCSILogicalUnit
+heartbeat/iSCSITarget
+heartbeat/jira
+heartbeat/kamailio
+heartbeat/lxc
+heartbeat/lxd-info
+heartbeat/machine-info
+heartbeat/mariadb
+heartbeat/mpathpersist
+heartbeat/nfsnotify
+heartbeat/openstack-info
+heartbeat/rabbitmq-cluster
+heartbeat/redis
+heartbeat/rsyslog
+heartbeat/sg_persist
+heartbeat/slapd
+heartbeat/smb-share
+heartbeat/storage-mon
+heartbeat/sybaseASE
+heartbeat/syslog-ng
+heartbeat/vsftpd
include/agent_config.h
include/config.h
include/config.h.in
@@ -61,6 +101,7 @@ systemd/resource-agents.conf
tools/findif
tools/ocf-tester
tools/send_arp
+tools/storage_mon
tools/tickle_tcp
tools/ocft/README
tools/ocft/README.zh_CN
diff --git a/configure.ac b/configure.ac
index 717fb95432..c125df98f6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1002,6 +1002,7 @@ AC_CONFIG_FILES([heartbeat/rsyslog], [chmod +x heartbeat/rsyslog])
AC_CONFIG_FILES([heartbeat/smb-share], [chmod +x heartbeat/smb-share])
AC_CONFIG_FILES([heartbeat/sg_persist], [chmod +x heartbeat/sg_persist])
AC_CONFIG_FILES([heartbeat/slapd], [chmod +x heartbeat/slapd])
+AC_CONFIG_FILES([heartbeat/storage-mon], [chmod +x heartbeat/storage-mon])
AC_CONFIG_FILES([heartbeat/sybaseASE], [chmod +x heartbeat/sybaseASE])
AC_CONFIG_FILES([heartbeat/syslog-ng], [chmod +x heartbeat/syslog-ng])
AC_CONFIG_FILES([heartbeat/vsftpd], [chmod +x heartbeat/vsftpd])
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 947d83cb2b..97904ccb16 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -138,6 +138,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_mariadb.7 \
ocf_heartbeat_mdraid.7 \
ocf_heartbeat_minio.7 \
+ ocf_heartbeat_mpathpersist.7 \
ocf_heartbeat_mysql.7 \
ocf_heartbeat_mysql-proxy.7 \
ocf_heartbeat_nagios.7 \
@@ -175,7 +176,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_smb-share.7 \
ocf_heartbeat_sybaseASE.7 \
ocf_heartbeat_sg_persist.7 \
- ocf_heartbeat_mpathpersist.7 \
+ ocf_heartbeat_storage-mon.7 \
ocf_heartbeat_symlink.7 \
ocf_heartbeat_syslog-ng.7 \
ocf_heartbeat_tomcat.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 9af44cc127..5d52d211f2 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -32,22 +32,22 @@ ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat
dtddir = $(datadir)/$(PACKAGE_NAME)
dtd_DATA = ra-api-1.dtd metadata.rng
+ocf_PROGRAMS =
+
if USE_IPV6ADDR_AGENT
-ocf_PROGRAMS = IPv6addr
-else
-ocf_PROGRAMS =
+ocf_PROGRAMS += IPv6addr
endif
+halib_PROGRAMS =
+
if IPV6ADDR_COMPATIBLE
-halib_PROGRAMS = send_ua
-else
-halib_PROGRAMS =
+halib_PROGRAMS += send_ua
endif
IPv6addr_SOURCES = IPv6addr.c IPv6addr_utils.c
-send_ua_SOURCES = send_ua.c IPv6addr_utils.c
-
IPv6addr_LDADD = -lplumb $(LIBNETLIBS)
+
+send_ua_SOURCES = send_ua.c IPv6addr_utils.c
send_ua_LDADD = $(LIBNETLIBS)
osp_SCRIPTS = nova-compute-wait \
@@ -170,6 +170,7 @@ ocf_SCRIPTS = AoEtarget \
mpathpersist \
slapd \
+ storage-mon \
sybaseASE \
symlink \
syslog-ng \
tomcat \
diff --git a/heartbeat/storage-mon.in b/heartbeat/storage-mon.in
new file mode 100644
index 0000000000..5b289fe554
--- /dev/null
+++ b/heartbeat/storage-mon.in
@@ -0,0 +1,263 @@
+#!@BASH_SHELL@
+#
+# Copyright (C) 2021 Red Hat, Inc. All rights reserved.
+#
+# Authors: Christine Caulfield <ccaulfie@redhat.com>
+# Fabio M. Di Nitto <fdinitto@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#
+# Checks storage I/O status of all given drives and writes the #health-storage
+# status into the CIB
+# Implementation is heavily based on ocf:pacemaker:HealtSMART
+#
+# It sends a single block on IO to a radom location on the device and reports any errors returned.
+# If the IO hangs, that will also be returned. (bear in mind tha tmay also hang the C app in some
+# instances).
+#
+# It's worth making a note in the RA description that the smartmon RA is also recommended (this
+# does not replace it), and that Pacemaker health checking should be configued.
+#
+# https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Explained/singlehtml/index.html#tracking-node-health
+
+#######################################################################
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#
+STORAGEMON=$HA_BIN/storage_mon
+ATTRDUP=/usr/sbin/attrd_updater
+
+OCF_RESKEY_CRM_meta_interval_default="0"
+OCF_RESKEY_io_timeout_default="10"
+OCF_RESKEY_inject_errors_default=""
+OCF_RESKEY_state_file_default="${HA_RSCTMP%%/}/storage-mon-${OCF_RESOURCE_INSTANCE}.state"
+
+# Explicitly list all environment variables used, to make static analysis happy
+: ${OCF_RESKEY_CRM_meta_interval:=${OCF_RESKEY_CRM_meta_interval_default}}
+: ${OCF_RESKEY_drives:=""}
+: ${OCF_RESKEY_io_timeout:=${OCF_RESKEY_io_timeout_default}}
+: ${OCF_RESKEY_inject_errors:=${OCF_RESKEY_inject_errors_default}}
+: ${OCF_RESKEY_state_file:=${OCF_RESKEY_state_file_default}}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="storage-mon">
+<version>1.0</version>
+
+<longdesc lang="en">
+System health agent that checks the storage I/O status of the given drives and
+updates the #health-storage attribute. Usage is highly recommended in combination
+with storage-mon monitoring agent. The agent currently support a maximum of 25
+devices per instance.
+</longdesc>
+<shortdesc lang="en">storage I/O health status</shortdesc>
+
+<parameters>
+
+<parameter name="state_file" unique="1">
+<longdesc lang="en">
+Location to store the resource state in.
+</longdesc>
+<shortdesc lang="en">State file</shortdesc>
+<content type="string" default="${OCF_RESKEY_state_file_default}" />
+</parameter>
+
+<parameter name="drives" unique="1" required="1">
+<longdesc lang="en">
+The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda".
+</longdesc>
+<shortdesc lang="en">Drives to check</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="io_timeout" unique="0">
+<longdesc lang="en">
+Specify disk I/O timeout in seconds. Minimum 1, recommeded 10 (default).
+</longdesc>
+<shortdesc lang="en">Disk I/O timeout</shortdesc>
+<content type="integer" default="${OCF_RESKEY_io_timeout_default}" />
+</parameter>
+
+<parameter name="inject_errors" unique="0">
+<longdesc lang="en">
+Used only for testing! Specify % of I/O errors to simulate drives failures.
+</longdesc>
+<shortdesc lang="en">Specify % of I/O errors to simulate drives failures</shortdesc>
+<content type="integer" default="${OCF_RESKEY_inject_errors_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="10s" />
+<action name="stop" timeout="120s" />
+<action name="monitor" timeout="120s" interval="30s" start-delay="0s" />
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="10s" />
+</actions>
+</resource-agent>
+END
+ return $OCF_SUCCESS
+}
+
+#######################################################################
+
+storage-mon_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+ return $1
+}
+
+storage-mon_init() {
+ #Test for presence of storage_mon helper
+ if [ ! -x "$STORAGEMON" ] ; then
+ ocf_log err "${STORAGEMON} not installed."
+ exit $OCF_ERR_INSTALLED
+ fi
+
+ i=0
+ for DRIVE in ${OCF_RESKEY_drives}; do
+ if [ ! -e "$DRIVE" ] ; then
+ ocf_log err "${DRIVE} not found on the system"
+ exit $OCF_ERR_INSTALLED
+ fi
+ i=$((i + 1))
+ done
+
+ if [ "$i" -gt "25" ]; then
+ ocf_log err "Too many drives ($i) configured for this agent. Max 25."
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ "${OCF_RESKEY_io_timeout}" -lt "1" ]; then
+ ocf_log err "Minimum timeout is 1. Recommended 10 (default)."
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -n "${OCF_RESKEY_inject_errors}" ]; then
+ if [ "${OCF_RESKEY_inject_errors}" -lt "1" ] || [ "${OCF_RESKEY_inject_errors}" -gt "100" ]; then
+ ocf_log err "Inject errors % has to be a value between 1 and 100."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+}
+
+storage-mon_validate() {
+ storage-mon_init
+
+ # Is the state directory writable?
+ state_dir=$(dirname "$OCF_RESKEY_state_file")
+ touch "$state_dir/$$"
+ if [ $? -ne 0 ]; then
+ return $OCF_ERR_CONFIGURED
+ fi
+ rm "$state_dir/$$"
+
+ return $OCF_SUCCESS
+}
+
+storage-mon_monitor() {
+ storage-mon_init
+
+ # Monitor _MUST!_ differentiate correctly between running
+ # (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
+ # That is THREE states, not just yes/no.
+
+ if [ ! -f "${OCF_RESKEY_state_file}" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ # generate command line
+ cmdline=""
+ for DRIVE in ${OCF_RESKEY_drives}; do
+ cmdline="$cmdline --device $DRIVE --score 1"
+ done
+ cmdline="$cmdline --timeout ${OCF_RESKEY_io_timeout}"
+ if [ -n "${OCF_RESKEY_inject_errors}" ]; then
+ cmdline="$cmdline --inject-errors-percent ${OCF_RESKEY_inject_errors}"
+ fi
+ $STORAGEMON $cmdline
+ if [ $? -ne 0 ]; then
+ status="red"
+ else
+ status="green"
+ fi
+
+ "$ATTRDUP" -n "#health-${OCF_RESOURCE_INSTANCE}" -U "$status" -d "5s"
+ return $OCF_SUCCESS
+}
+
+storage-mon_start() {
+ storage-mon_monitor
+ if [ $? -eq $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+ touch "${OCF_RESKEY_state_file}"
+}
+
+storage-mon_stop() {
+ storage-mon_monitor
+ if [ $? -eq $OCF_SUCCESS ]; then
+ rm "${OCF_RESKEY_state_file}"
+ fi
+ return $OCF_SUCCESS
+}
+
+storage-mon_validate() {
+ storage-mon_init
+
+ # Is the state directory writable?
+ state_dir=$(dirname "${OCF_RESKEY_state_file}")
+ touch "$state_dir/$$"
+ if [ $? -ne 0 ]; then
+ return $OCF_ERR_CONFIGURED
+ fi
+ rm "$state_dir/$$"
+
+ return $OCF_SUCCESS
+}
+
+case "$__OCF_ACTION" in
+ start) storage-mon_start;;
+ stop) storage-mon_stop;;
+ monitor) storage-mon_monitor;;
+ validate-all) storage-mon_validate;;
+ meta-data) meta_data;;
+ usage|help) storage-mon_usage $OCF_SUCCESS;;
+ *) storage-mon_usage $OCF_ERR_UNIMPLEMENTED;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
+# vim: set filetype=sh:
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 1186967cfb..83ff43651d 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -29,7 +29,8 @@ EXTRA_DIST = ocf-tester.8 sfex_init.8
sbin_PROGRAMS =
sbin_SCRIPTS = ocf-tester
-halib_PROGRAMS = findif
+halib_PROGRAMS = findif \
+ storage_mon
man8_MANS = ocf-tester.8
@@ -67,6 +68,8 @@ sfex_stat_LDADD = $(GLIBLIB) -lplumb -lplumbgpl
findif_SOURCES = findif.c
+storage_mon_SOURCES = storage_mon.c
+
if BUILD_TICKLE
halib_PROGRAMS += tickle_tcp
tickle_tcp_SOURCES = tickle_tcp.c
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
new file mode 100644
index 0000000000..7b65bb4191
--- /dev/null
+++ b/tools/storage_mon.c
@@ -0,0 +1,263 @@
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#ifdef __FreeBSD__
+#include <sys/disk.h>
+#endif
+
+#define MAX_DEVICES 25
+#define DEFAULT_TIMEOUT 10
+
+static void usage(char *name, FILE *f)
+{
+ fprintf(f, "usage: %s [-hv] [-d <device>]... [-s <score>]... [-t <secs>]\n", name);
+ fprintf(f, " --device <dev> device to test, up to %d instances\n", MAX_DEVICES);
+ fprintf(f, " --score <n> score if device fails the test. Must match --device count\n");
+ fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
+ fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
+ fprintf(f, " --verbose emit extra output to stdout\n");
+ fprintf(f, " --help print this messages\n");
+}
+
+/* Check one device */
+static void *test_device(const char *device, int verbose, int inject_error_percent)
+{
+ uint64_t devsize;
+ int device_fd;
+ int res;
+ off_t seek_spot;
+ char buffer[512];
+
+ if (verbose) {
+ printf("Testing device %s\n", device);
+ }
+
+ device_fd = open(device, O_RDONLY);
+ if (device_fd < 0) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ exit(-1);
+ }
+#ifdef __FreeBSD__
+ res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
+#else
+ res = ioctl(device_fd, BLKGETSIZE64, &devsize);
+#endif
+ if (res != 0) {
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ close(device_fd);
+ exit(-1);
+ }
+ if (verbose) {
+ fprintf(stderr, "%s: size=%zu\n", device, devsize);
+ }
+ /* Don't fret about real randomness */
+ srand(time(NULL) + getpid());
+ /* Pick a random place on the device - sector aligned */
+ seek_spot = (rand() % (devsize-1024)) & 0xFFFFFFFFFFFFFE00;
+ res = lseek(device_fd, seek_spot, SEEK_SET);
+ if (res < 0) {
+ fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
+ close(device_fd);
+ exit(-1);
+ }
+
+ if (verbose) {
+ printf("%s: reading from pos %ld\n", device, seek_spot);
+ }
+
+ res = read(device_fd, buffer, sizeof(buffer));
+ if (res < 0) {
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
+ close(device_fd);
+ exit(-1);
+ }
+ if (res < (int)sizeof(buffer)) {
+ fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
+ close(device_fd);
+ exit(-1);
+ }
+
+ /* Fake an error */
+ if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
+ close(device_fd);
+ exit(-1);
+ }
+ res = close(device_fd);
+ if (res != 0) {
+ fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno));
+ close(device_fd);
+ exit(-1);
+ }
+
+ if (verbose) {
+ printf("%s: done\n", device);
+ }
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ char *devices[MAX_DEVICES];
+ int scores[MAX_DEVICES];
+ pid_t test_forks[MAX_DEVICES];
+ size_t device_count = 0;
+ size_t score_count = 0;
+ size_t finished_count = 0;
+ int timeout = DEFAULT_TIMEOUT;
+ struct timespec ts;
+ time_t start_time;
+ size_t i;
+ int final_score = 0;
+ int opt, option_index;
+ int verbose = 0;
+ int inject_error_percent = 0;
+ struct option long_options[] = {
+ {"timeout", required_argument, 0, 't' },
+ {"device", required_argument, 0, 'd' },
+ {"score", required_argument, 0, 's' },
+ {"inject-errors-percent", required_argument, 0, 0 },
+ {"verbose", no_argument, 0, 'v' },
+ {"help", no_argument, 0, 'h' },
+ {0, 0, 0, 0 }
+ };
+ while ( (opt = getopt_long(argc, argv, "hvt:d:s:",
+ long_options, &option_index)) != -1 ) {
+ switch (opt) {
+ case 0: /* Long-only options */
+ if (strcmp(long_options[option_index].name, "inject-errors-percent") == 0) {
+ inject_error_percent = atoi(optarg);
+ if (inject_error_percent < 1 || inject_error_percent > 100) {
+ fprintf(stderr, "inject_error_percent should be between 1 and 100\n");
+ return -1;
+ }
+ }
+ break;
+ case 'd':
+ if (device_count < MAX_DEVICES) {
+ devices[device_count++] = strdup(optarg);
+ } else {
+ fprintf(stderr, "too many devices, max is %d\n", MAX_DEVICES);
+ return -1;
+ }
+ break;
+ case 's':
+ if (device_count < MAX_DEVICES) {
+ int score = atoi(optarg);
+ if (score < 1 || score > 10) {
+ fprintf(stderr, "Score must be between 1 and 10 inclusive\n");
+ return -1;
+ }
+ scores[score_count++] = score;
+ } else {
+ fprintf(stderr, "too many scores, max is %d\n", MAX_DEVICES);
+ return -1;
+ }
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 't':
+ timeout = atoi(optarg);
+ if (timeout < 1) {
+ fprintf(stderr, "invalid timeout %d. Min 1, recommended %d (default)\n", timeout, DEFAULT_TIMEOUT);
+ return -1;
+ }
+ break;
+ case 'h':
+ usage(argv[0], stdout);
+ break;
+ default:
+ usage(argv[0], stderr);
+ break;
+ }
+
+ }
+ if (device_count == 0) {
+ fprintf(stderr, "No devices to test, use the -d or --device argument\n");
+ return -1;
+ }
+
+ if (device_count != score_count) {
+ fprintf(stderr, "There must be the same number of devices and scores\n");
+ return -1;
+ }
+
+ openlog("storage_mon", 0, LOG_DAEMON);
+
+ memset(test_forks, 0, sizeof(test_forks));
+ for (i=0; i<device_count; i++) {
+ test_forks[i] = fork();
+ if (test_forks[i] < 0) {
+ fprintf(stderr, "Error spawning fork for %s: %s\n", devices[i], strerror(errno));
+ syslog(LOG_ERR, "Error spawning fork for %s: %s\n", devices[i], strerror(errno));
+ /* Just test the devices we have */
+ break;
+ }
+ /* child */
+ if (test_forks[i] == 0) {
+ test_device(devices[i], verbose, inject_error_percent);
+ }
+ }
+
+ /* See if they have finished */
+ clock_gettime(CLOCK_REALTIME, &ts);
+ start_time = ts.tv_sec;
+
+ while ((finished_count < device_count) && ((start_time + timeout) > ts.tv_sec)) {
+ for (i=0; i<device_count; i++) {
+ int wstatus;
+ pid_t w;
+
+ if (test_forks[i] > 0) {
+ w = waitpid(test_forks[i], &wstatus, WUNTRACED | WNOHANG | WCONTINUED);
+ if (w < 0) {
+ fprintf(stderr, "waitpid on %s failed: %s\n", devices[i], strerror(errno));
+ return -1;
+ }
+
+ if (w == test_forks[i]) {
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == 0) {
+ finished_count++;
+ test_forks[i] = 0;
+ } else {
+ syslog(LOG_ERR, "Error reading from device %s", devices[i]);
+ final_score += scores[i];
+ }
+ }
+ }
+ }
+ }
+
+ usleep(100000);
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+ }
+
+ /* See which threads have not finished */
+ for (i=0; i<device_count; i++) {
+ if (test_forks[i] != 0) {
+ syslog(LOG_ERR, "Reading from device %s did not complete in %d seconds timeout", devices[i], timeout);
+ fprintf(stderr, "Thread for device %s did not complete in time\n", devices[i]);
+ final_score += scores[i];
+ }
+ }
+
+ if (verbose) {
+ printf("Final score is %d\n", final_score);
+ }
+ return final_score;
+}

View File

@ -1,285 +0,0 @@
From 8b07d095acbbb1069c1fb44142ccfdd0aeed075f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 16 May 2018 14:10:49 +0200
Subject: [PATCH] vdo-vol: new resource agent
---
doc/man/Makefile.am | 3 +-
heartbeat/Makefile.am | 1 +
heartbeat/vdo-vol | 234 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 237 insertions(+), 1 deletion(-)
create mode 100755 heartbeat/vdo-vol
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index c59126d1..8d94c10c 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -158,11 +158,12 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_slapd.7 \
ocf_heartbeat_sybaseASE.7 \
ocf_heartbeat_sg_persist.7 \
- ocf_heartbeat_mpathpersist.7 \
+ ocf_heartbeat_mpathpersist.7 \
ocf_heartbeat_symlink.7 \
ocf_heartbeat_syslog-ng.7 \
ocf_heartbeat_tomcat.7 \
ocf_heartbeat_varnish.7 \
+ ocf_heartbeat_vdo-vol.7 \
ocf_heartbeat_vmware.7 \
ocf_heartbeat_zabbixserver.7
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 4f5059e2..a68fa978 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -162,6 +162,7 @@ ocf_SCRIPTS = AoEtarget \
syslog-ng \
tomcat \
varnish \
+ vdo-vol \
vmware \
vsftpd \
zabbixserver
diff --git a/heartbeat/vdo-vol b/heartbeat/vdo-vol
new file mode 100755
index 00000000..074339db
--- /dev/null
+++ b/heartbeat/vdo-vol
@@ -0,0 +1,234 @@
+#!/bin/sh
+#
+# License: GNU General Public License (GPL)
+# (c) 2018 O. Albrigtsen
+# and Linux-HA contributors
+#
+# -----------------------------------------------------------------------------
+# O C F R E S O U R C E S C R I P T S P E C I F I C A T I O N
+# -----------------------------------------------------------------------------
+#
+# NAME
+# vdo-vol : OCF resource agent script for VDO (Virtual Data Optimizer)
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Defaults
+OCF_RESKEY_volume_default=""
+
+: ${OCF_RESKEY_volume=${OCF_RESKEY_volume_default}}
+
+
+vdo_usage() {
+ cat <<END
+ usage: $0 (start|stop|validate-all|meta-data|help|usage|monitor)
+ $0 manages VDO (Virtual Data Optimizer) volume(s) as an OCF HA resource.
+ The 'start' operation starts the instance.
+ The 'stop' operation stops the instance.
+ The 'status' operation reports whether the instance is running
+ The 'monitor' operation reports whether the instance seems to be working
+ The 'validate-all' operation reports whether the parameters are valid
+END
+}
+
+vdo_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="vdo-vol">
+<version>0.75</version>
+
+<longdesc lang="en">
+OCF Resource script for VDO (Virtual Data Optimizer) volume(s). It manages VDO volume(s) as a HA resource.
+
+The configuration file needs to be synced to all nodes, and the systemd vdo service must be disabled when
+using this agent.
+</longdesc>
+<shortdesc lang="en">VDO resource agent</shortdesc>
+
+<parameters>
+
+<parameter name="config">
+ <longdesc lang="en">Configuration file</longdesc>
+ <shortdesc lang="en">Config file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_config_default}" />
+</parameter>
+
+<parameter name="volume">
+ <longdesc lang="en">VDO Volume (leave empty for all)</longdesc>
+ <shortdesc lang="en">Volume (empty for all)</shortdesc>
+ <content type="string" default="${OCF_RESKEY_volume_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="60s" />
+<action name="stop" timeout="20s" />
+<action name="status" timeout="20s" />
+<action name="monitor" depth="0" timeout="20s" interval="10s" start-delay="10s" />
+<action name="validate-all" timeout="20s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+}
+
+
+rebuild() {
+ ocf_log warn "${OCF_RESKEY_volume} is in $MODE mode, starting in rebuild mode"
+
+ vdo stop $OPTIONS
+
+ while vdo_monitor skiprocheck; do
+ sleep 1
+ done
+
+ vdo start $OPTIONS --forceRebuild
+
+ while ! vdo_monitor; do
+ sleep 1
+ done
+
+ return $?
+}
+
+vdo_start() {
+ # if resource is already running,no need to continue code after this.
+ if vdo_monitor; then
+ ocf_log info "VDO volume(s): ${OCF_RESKEY_volume} is already active"
+ return $OCF_SUCCESS
+ fi
+
+ vdo activate $OPTIONS
+ vdo start $OPTIONS
+
+ while ! vdo_monitor skiprocheck; do
+ sleep 1
+ done
+
+ MODE=$(vdostats --verbose ${OCF_RESKEY_volume} | grep "operating mode" | awk '{print $NF}')
+ if [ $(echo "$MODE" | grep -v "normal" | wc -l) -gt 0 ]; then
+ rebuild
+ fi
+
+ if [ $? -eq $OCF_SUCCESS ]; then
+ ocf_log info "VDO volume(s): ${OCF_RESKEY_volume} activated"
+ return ${OCF_SUCCESS}
+ fi
+
+ return $?
+}
+
+vdo_stop() {
+ vdo_monitor skiprocheck
+ if [ $? -ne $OCF_SUCCESS ]; then
+ # Currently not running. Nothing to do.
+ ocf_log info "VDO volume(s): ${OCF_RESKEY_volume} already deactivated"
+
+ return $OCF_SUCCESS
+ fi
+
+ vdo stop $OPTIONS
+ vdo deactivate $OPTIONS
+
+ # Wait for process to stop
+ while vdo_monitor skiprocheck; do
+ sleep 1
+ done
+
+ return $OCF_SUCCESS
+}
+
+vdo_monitor(){
+ status=$(vdo status $OPTIONS 2>&1)
+ MODE=$(vdostats vdo_vol --verbose | grep "operating mode" | awk '{print $NF}')
+
+ case "$status" in
+ *"Device mapper status: not available"*)
+ return $OCF_NOT_RUNNING
+ ;;
+ *"Device mapper status: "*online*)
+ if [ "$MODE" = "read-only" ] && [ "$1" != "skiprocheck" ]; then
+ ocf_log err "VDO volume(s): ${OCF_RESKEY_volume} is in $MODE mode."
+ return $OCF_ERR_GENERIC
+ else
+ return $OCF_SUCCESS
+ fi
+ ;;
+ *)
+ ocf_log err "VDO volume(s): ${OCF_RESKEY_volume} failed\n$status"
+ return $OCF_ERR_GENERIC;;
+ esac
+}
+
+vdo_validate_all(){
+ check_binary "vdo"
+
+ if systemctl is-enabled vdo > /dev/null 2>&1; then
+ ocf_exit_reason "systemd service vdo needs to be disabled"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -n "${OCF_RESKEY_config}" ] && [ ! -f "${OCF_RESKEY_config}" ]; then
+ ocf_exit_reason "Configuration file: ${OCF_RESKEY_config} not found"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+
+# **************************** MAIN SCRIPT ************************************
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data)
+ vdo_meta_data
+ exit $OCF_SUCCESS
+ ;;
+ usage|help)
+ vdo_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# This OCF agent script need to be run as root user.
+if ! ocf_is_root; then
+ echo "$0 agent script need to be run as root user."
+ ocf_log debug "$0 agent script need to be run as root user."
+ exit $OCF_ERR_GENERIC
+fi
+
+if [ -z "${OCF_RESKEY_volume}" ]; then
+ OPTIONS="-a"
+else
+ OPTIONS="-n ${OCF_RESKEY_volume}"
+fi
+
+if [ -n "${OCF_RESKEY_config}" ]; then
+ OPTIONS="$OPTIONS -f ${OCF_RESKEY_config}"
+fi
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+ start)
+ vdo_validate_all
+ vdo_start;;
+ stop)
+ vdo_stop;;
+ status|monitor)
+ vdo_monitor;;
+ validate-all)
+ ;;
+ *)
+ vdo_usage
+ exit $OCF_ERR_UNIMPLEMENTED;;
+esac
+
+exit $?
+
+# End of this script
--
2.17.1

View File

@ -1,538 +0,0 @@
From 07d283a6e20b8e559257c9694f7e36e155075014 Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Sun, 22 Jul 2018 17:54:29 +0200
Subject: [PATCH] Initial podman support
Tested with the following container:
podman container set: test_bundle [docker.io/sdelrio/docker-minimal-nginx]
test_bundle-podman-0 (ocf::heartbeat:podman): Started nodea
test_bundle-podman-1 (ocf::heartbeat:podman): Started nodeb
test_bundle-podman-2 (ocf::heartbeat:podman): Started nodec
Tested a couple of stop/start cycles successfully. Needs the
corresponding pacemaker support https://github.com/ClusterLabs/pacemaker/pull/1564
---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/podman | 488 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 490 insertions(+)
create mode 100755 heartbeat/podman
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 145e5fd50..0bef88740 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -151,6 +151,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_pgagent.7 \
ocf_heartbeat_pgsql.7 \
ocf_heartbeat_pingd.7 \
+ ocf_heartbeat_podman.7 \
ocf_heartbeat_portblock.7 \
ocf_heartbeat_postfix.7 \
ocf_heartbeat_pound.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index e7a3a4fac..993bff042 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -146,6 +146,7 @@ ocf_SCRIPTS = AoEtarget \
pgagent \
pgsql \
pingd \
+ podman \
portblock \
postfix \
pound \
diff --git a/heartbeat/podman b/heartbeat/podman
new file mode 100755
index 000000000..88475f1df
--- /dev/null
+++ b/heartbeat/podman
@@ -0,0 +1,488 @@
+#!/bin/sh
+#
+# The podman HA resource agent creates and launches a podman container
+# based off a supplied podman image. Containers managed by this agent
+# are both created and removed upon the agent's start and stop actions.
+#
+# Copyright (c) 2014 David Vossel <davidvossel@gmail.com>
+# Michele Baldessari <michele@acksyn.org>
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+
+meta_data()
+{
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="podman">
+<version>1.0</version>
+
+<longdesc lang="en">
+The podman HA resource agent creates and launches a podman container
+based off a supplied podman image. Containers managed by this agent
+are both created and removed upon the agent's start and stop actions.
+</longdesc>
+<shortdesc lang="en">Podman container resource agent.</shortdesc>
+
+<parameters>
+<parameter name="image" required="1" unique="0">
+<longdesc lang="en">
+The podman image to base this container off of.
+</longdesc>
+<shortdesc lang="en">podman image</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="name" required="0" unique="0">
+<longdesc lang="en">
+The name to give the created container. By default this will
+be that resource's instance name.
+</longdesc>
+<shortdesc lang="en">podman container name</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="allow_pull" unique="0">
+<longdesc lang="en">
+Allow the image to be pulled from the configured podman registry when
+the image does not exist locally. NOTE, this can drastically increase
+the time required to start the container if the image repository is
+pulled over the network.
+</longdesc>
+<shortdesc lang="en">Allow pulling non-local images</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+<parameter name="run_opts" required="0" unique="0">
+<longdesc lang="en">
+Add options to be appended to the 'podman run' command which is used
+when creating the container during the start action. This option allows
+users to do things such as setting a custom entry point and injecting
+environment variables into the newly created container. Note the '-d'
+option is supplied regardless of this value to force containers to run
+in the background.
+
+NOTE: Do not explicitly specify the --name argument in the run_opts. This
+agent will set --name using either the resource's instance or the name
+provided in the 'name' argument of this agent.
+
+</longdesc>
+<shortdesc lang="en">run options</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="run_cmd" required="0" unique="0">
+<longdesc lang="en">
+Specify a command to launch within the container once
+it has initialized.
+</longdesc>
+<shortdesc lang="en">run command</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="mount_points" required="0" unique="0">
+<longdesc lang="en">
+A comma separated list of directories that the container is expecting to use.
+The agent will ensure they exist by running 'mkdir -p'
+</longdesc>
+<shortdesc lang="en">Required mount points</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="monitor_cmd" required="0" unique="0">
+<longdesc lang="en">
+Specify the full path of a command to launch within the container to check
+the health of the container. This command must return 0 to indicate that
+the container is healthy. A non-zero return code will indicate that the
+container has failed and should be recovered.
+
+If 'podman exec' is supported, it is used to execute the command. If not,
+nsenter is used.
+
+Note: Using this method for monitoring processes inside a container
+is not recommended, as containerd tries to track processes running
+inside the container and does not deal well with many short-lived
+processes being spawned. Ensure that your container monitors its
+own processes and terminates on fatal error rather than invoking
+a command from the outside.
+</longdesc>
+<shortdesc lang="en">monitor command</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="force_kill" required="0" unique="0">
+<longdesc lang="en">
+Kill a container immediately rather than waiting for it to gracefully
+shutdown
+</longdesc>
+<shortdesc lang="en">force kill</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+<parameter name="reuse" required="0" unique="0">
+<longdesc lang="en">
+Allow the container to be reused after stopping the container. By default
+containers are removed after stop. With the reuse option containers
+will persist after the container stops.
+</longdesc>
+<shortdesc lang="en">reuse container</shortdesc>
+<content type="boolean"/>
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="90s" />
+<action name="stop" timeout="90s" />
+<action name="monitor" timeout="30s" interval="30s" depth="0" />
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="30s" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+REQUIRE_IMAGE_PULL=0
+
+podman_usage()
+{
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+
+monitor_cmd_exec()
+{
+ local rc=$OCF_SUCCESS
+ local out
+
+ if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
+ return $rc
+ fi
+
+ if podman exec --help >/dev/null 2>&1; then
+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
+ rc=$?
+ else
+ out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(podman inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
+ rc=$?
+ fi
+
+ if [ $rc -eq 127 ]; then
+ ocf_log err "monitor cmd failed (rc=$rc), output: $out"
+ ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
+ # there is no recovering from this, exit immediately
+ exit $OCF_ERR_ARGS
+ elif [ $rc -ne 0 ]; then
+ ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out"
+ rc=$OCF_ERR_GENERIC
+ else
+ ocf_log debug "monitor cmd passed: exit code = $rc"
+ fi
+
+ return $rc
+}
+
+container_exists()
+{
+ podman inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
+}
+
+remove_container()
+{
+ if ocf_is_true "$OCF_RESKEY_reuse"; then
+ # never remove the container if we have reuse enabled.
+ return 0
+ fi
+
+ container_exists
+ if [ $? -ne 0 ]; then
+ # don't attempt to remove a container that doesn't exist
+ return 0
+ fi
+ ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
+ ocf_run podman rm $CONTAINER
+}
+
+podman_simple_status()
+{
+ local val
+
+ container_exists
+ if [ $? -ne 0 ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ # retrieve the 'Running' attribute for the container
+ val=$(podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
+ if [ $? -ne 0 ]; then
+ #not running as a result of container not being found
+ return $OCF_NOT_RUNNING
+ fi
+
+ if ocf_is_true "$val"; then
+ # container exists and is running
+ return $OCF_SUCCESS
+ fi
+
+ return $OCF_NOT_RUNNING
+}
+
+podman_monitor()
+{
+ local rc=0
+
+ podman_simple_status
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+ monitor_cmd_exec
+}
+
+podman_create_mounts() {
+ oldIFS="$IFS"
+ IFS=","
+ for directory in $OCF_RESKEY_mount_points; do
+ mkdir -p "$directory"
+ done
+ IFS="$oldIFS"
+}
+
+podman_start()
+{
+ podman_create_mounts
+ local run_opts="-d --name=${CONTAINER}"
+ # check to see if the container has already started
+ podman_simple_status
+ if [ $? -eq $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+
+ if [ -n "$OCF_RESKEY_run_opts" ]; then
+ run_opts="$run_opts $OCF_RESKEY_run_opts"
+ fi
+
+ if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
+ ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
+ podman pull "${OCF_RESKEY_image}"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
+ ocf_log info "starting existing container $CONTAINER."
+ ocf_run podman start $CONTAINER
+ else
+ # make sure any previous container matching our container name is cleaned up first.
+ # we already know at this point it wouldn't be running
+ remove_container
+ ocf_log info "running container $CONTAINER for the first time"
+ ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
+ fi
+
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "podman failed to launch container"
+ return $OCF_ERR_GENERIC
+ fi
+
+
+ # wait for monitor to pass before declaring that the container is started
+ while true; do
+ podman_simple_status
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_exit_reason "Newly created podman container exited after start"
+ return $OCF_ERR_GENERIC
+ fi
+
+ monitor_cmd_exec
+ if [ $? -eq $OCF_SUCCESS ]; then
+ ocf_log notice "Container $CONTAINER started successfully"
+ return $OCF_SUCCESS
+ fi
+
+ ocf_exit_reason "waiting on monitor_cmd to pass after start"
+ sleep 1
+ done
+}
+
+podman_stop()
+{
+ local timeout=60
+ podman_simple_status
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ remove_container
+ return $OCF_SUCCESS
+ fi
+
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+ timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 ))
+ if [ $timeout -lt 10 ]; then
+ timeout=10
+ fi
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_force_kill"; then
+ ocf_run podman kill $CONTAINER
+ else
+ ocf_log debug "waiting $timeout second[s] before killing container"
+ ocf_run podman stop -t=$timeout $CONTAINER
+ fi
+
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ remove_container
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+image_exists()
+{
+ # if no tag was specified, use default "latest"
+ local COLON_FOUND=0
+ local SLASH_FOUND=0
+ local SERVER_NAME=""
+ local IMAGE_NAME="${OCF_RESKEY_image}"
+ local IMAGE_TAG="latest"
+
+ SLASH_FOUND="$(echo "${OCF_RESKEY_image}" | grep -o '/' | grep -c .)"
+
+ if [ ${SLASH_FOUND} -ge 1 ]; then
+ SERVER_NAME="$(echo ${IMAGE_NAME} | cut -d / -f 1-${SLASH_FOUND})"
+ IMAGE_NAME="$(echo ${IMAGE_NAME} | awk -F'/' '{print $NF}')"
+ fi
+
+ COLON_FOUND="$(echo "${IMAGE_NAME}" | grep -o ':' | grep -c .)"
+ if [ ${COLON_FOUND} -ge 1 ]; then
+ IMAGE_TAG="$(echo ${IMAGE_NAME} | awk -F':' '{print $NF}')"
+ IMAGE_NAME="$(echo ${IMAGE_NAME} | cut -d : -f 1-${COLON_FOUND})"
+ fi
+
+ # IMAGE_NAME might be following formats:
+ # - image
+ # - repository:port/image
+ # - docker.io/image (some distro will display "docker.io/" as prefix)
+ podman images | awk '{print $1 ":" $2}' | egrep -q -s "^(docker.io\/|${SERVER_NAME}\/)?${IMAGE_NAME}:${IMAGE_TAG}\$"
+ if [ $? -eq 0 ]; then
+ # image found
+ return 0
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_allow_pull"; then
+ REQUIRE_IMAGE_PULL=1
+ ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
+ return 0
+ fi
+ # image not found.
+ return 1
+}
+
+podman_validate()
+{
+ check_binary podman
+ if [ -z "$OCF_RESKEY_image" ]; then
+ ocf_exit_reason "'image' option is required"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
+ podman exec --help >/dev/null 2>&1
+ if [ ! $? ]; then
+ ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
+ check_binary nsenter
+ fi
+ fi
+
+ image_exists
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# TODO :
+# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters.
+# When a user appoints reuse, the resource agent cannot connect plural clones with a container.
+
+if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
+ if [ -n "$OCF_RESKEY_name" ]; then
+ if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ]
+ then
+ ocf_exit_reason "Cannot make plural clones from the same name parameter."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ]
+ then
+ ocf_exit_reason "Cannot make plural master from the same name parameter."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+ : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`}
+else
+ : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
+fi
+
+CONTAINER=$OCF_RESKEY_name
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS;;
+start)
+ podman_validate
+ podman_start;;
+stop) podman_stop;;
+monitor) podman_monitor;;
+validate-all) podman_validate;;
+usage|help) podman_usage
+ exit $OCF_SUCCESS
+ ;;
+*) podman_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc

View File

@ -1,48 +0,0 @@
From c70924b69af760ec3762b01594afb6ff82c3820c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 19 Sep 2018 16:13:43 +0200
Subject: [PATCH] systemd-tmpfiles: configure path with --with-rsctmpdir
---
configure.ac | 3 ++-
systemd/resource-agents.conf | 1 -
systemd/resource-agents.conf.in | 1 +
3 files changed, 3 insertions(+), 2 deletions(-)
delete mode 100644 systemd/resource-agents.conf
create mode 100644 systemd/resource-agents.conf.in
diff --git a/configure.ac b/configure.ac
index b7ffb99f3..e34d125e9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -431,7 +431,7 @@ AC_SUBST(HA_VARRUNDIR)
# Expand $prefix
eval HA_RSCTMPDIR="`eval echo ${HA_RSCTMPDIR}`"
-AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resouce agents keep state files)
+AC_DEFINE_UNQUOTED(HA_RSCTMPDIR,"$HA_RSCTMPDIR", Where Resource agents keep state files)
AC_SUBST(HA_RSCTMPDIR)
dnl Eventually move out of the heartbeat dir tree and create symlinks when needed
@@ -911,6 +911,7 @@ heartbeat/Makefile \
heartbeat/ocf-shellfuncs \
heartbeat/shellfuncs \
systemd/Makefile \
+ systemd/resource-agents.conf \
tools/Makefile \
tools/ocf-tester \
tools/ocft/Makefile \
diff --git a/systemd/resource-agents.conf b/systemd/resource-agents.conf
deleted file mode 100644
index 1cb129c18..000000000
--- a/systemd/resource-agents.conf
+++ /dev/null
@@ -1 +0,0 @@
-d /var/run/resource-agents/ 1755 root root
diff --git a/systemd/resource-agents.conf.in b/systemd/resource-agents.conf.in
new file mode 100644
index 000000000..7bd157ec0
--- /dev/null
+++ b/systemd/resource-agents.conf.in
@@ -0,0 +1 @@
+d @HA_RSCTMPDIR@ 1755 root root

View File

@ -1,425 +0,0 @@
From dedf420b8aa7e7e64fa56eeda2d7aeb5b2a5fcd9 Mon Sep 17 00:00:00 2001
From: Gustavo Serra Scalet <gustavo.scalet@collabora.com>
Date: Mon, 17 Sep 2018 12:29:51 -0300
Subject: [PATCH] Add gcp-pd-move python script
---
configure.ac | 1 +
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/gcp-pd-move.in | 370 +++++++++++++++++++++++++++++++++++++++
4 files changed, 373 insertions(+)
create mode 100755 heartbeat/gcp-pd-move.in
diff --git a/configure.ac b/configure.ac
index 10f5314da..b7ffb99f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -958,6 +958,7 @@ AC_CONFIG_FILES([heartbeat/conntrackd], [chmod +x heartbeat/conntrackd])
AC_CONFIG_FILES([heartbeat/dnsupdate], [chmod +x heartbeat/dnsupdate])
AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88])
AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio])
+AC_CONFIG_FILES([heartbeat/gcp-pd-move], [chmod +x heartbeat/gcp-pd-move])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip])
AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route])
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 0bef88740..0235c9af6 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -115,6 +115,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_fio.7 \
ocf_heartbeat_galera.7 \
ocf_heartbeat_garbd.7 \
+ ocf_heartbeat_gcp-pd-move.7 \
ocf_heartbeat_gcp-vpc-move-ip.7 \
ocf_heartbeat_gcp-vpc-move-vip.7 \
ocf_heartbeat_gcp-vpc-move-route.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 993bff042..843186c98 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -111,6 +111,7 @@ ocf_SCRIPTS = AoEtarget \
fio \
galera \
garbd \
+ gcp-pd-move \
gcp-vpc-move-ip \
gcp-vpc-move-vip \
gcp-vpc-move-route \
diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in
new file mode 100755
index 000000000..f9f6c3163
--- /dev/null
+++ b/heartbeat/gcp-pd-move.in
@@ -0,0 +1,370 @@
+#!@PYTHON@ -tt
+# - *- coding: utf- 8 - *-
+#
+# ---------------------------------------------------------------------
+# Copyright 2018 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ---------------------------------------------------------------------
+# Description: Google Cloud Platform - Disk attach
+# ---------------------------------------------------------------------
+
+import json
+import logging
+import os
+import re
+import sys
+import time
+
+OCF_FUNCTIONS_DIR = "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")
+sys.path.append(OCF_FUNCTIONS_DIR)
+
+import ocf
+
+try:
+ import googleapiclient.discovery
+except ImportError:
+ pass
+
+if sys.version_info >= (3, 0):
+ # Python 3 imports.
+ import urllib.parse as urlparse
+ import urllib.request as urlrequest
+else:
+ # Python 2 imports.
+ import urllib as urlparse
+ import urllib2 as urlrequest
+
+
+CONN = None
+PROJECT = None
+ZONE = None
+REGION = None
+LIST_DISK_ATTACHED_INSTANCES = None
+INSTANCE_NAME = None
+
+PARAMETERS = {
+ 'disk_name': None,
+ 'disk_scope': None,
+ 'disk_csek_file': None,
+ 'mode': None,
+ 'device_name': None,
+}
+
+MANDATORY_PARAMETERS = ['disk_name', 'disk_scope']
+
+METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
+METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
+METADATA = '''<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="gcp-pd-move">
+<version>1.0</version>
+<longdesc lang="en">
+Resource Agent that can attach or detach a regional/zonal disk on current GCP
+instance.
+Requirements :
+- Disk has to be properly created as regional/zonal in order to be used
+correctly.
+</longdesc>
+<shortdesc lang="en">Attach/Detach a persistent disk on current GCP instance</shortdesc>
+<parameters>
+<parameter name="disk_name" unique="1" required="1">
+<longdesc lang="en">The name of the GCP disk.</longdesc>
+<shortdesc lang="en">Disk name</shortdesc>
+<content type="string" default="" />
+</parameter>
+<parameter name="disk_scope" unique="1" required="1">
+<longdesc lang="en">Disk scope </longdesc>
+<shortdesc lang="en">Network name</shortdesc>
+<content type="string" default="regional" />
+</parameter>
+<parameter name="disk_csek_file" unique="1" required="0">
+<longdesc lang="en">Path to a Customer-Supplied Encryption Key (CSEK) key file</longdesc>
+<shortdesc lang="en">Customer-Supplied Encryption Key file</shortdesc>
+<content type="string" default="" />
+</parameter>
+<parameter name="mode" unique="1" required="0">
+<longdesc lang="en">Attachment mode (rw, ro)</longdesc>
+<shortdesc lang="en">Attachment mode</shortdesc>
+<content type="string" default="rw" />
+</parameter>
+<parameter name="device_name" unique="0" required="0">
+<longdesc lang="en">An optional name that indicates the disk name the guest operating system will see.</longdesc>
+<shortdesc lang="en">Optional device name</shortdesc>
+<content type="boolean" default="" />
+</parameter>
+</parameters>
+<actions>
+<action name="start" timeout="300s" />
+<action name="stop" timeout="15s" />
+<action name="monitor" timeout="15s" interval="10s" depth="0" />
+<action name="meta-data" timeout="5s" />
+</actions>
+</resource-agent>'''
+
+
+def get_metadata(metadata_key, params=None, timeout=None):
+ """Performs a GET request with the metadata headers.
+
+ Args:
+ metadata_key: string, the metadata to perform a GET request on.
+ params: dictionary, the query parameters in the GET request.
+ timeout: int, timeout in seconds for metadata requests.
+
+ Returns:
+ HTTP response from the GET request.
+
+ Raises:
+ urlerror.HTTPError: raises when the GET request fails.
+ """
+ timeout = timeout or 60
+ metadata_url = os.path.join(METADATA_SERVER, metadata_key)
+ params = urlparse.urlencode(params or {})
+ url = '%s?%s' % (metadata_url, params)
+ request = urlrequest.Request(url, headers=METADATA_HEADERS)
+ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
+ return request_opener.open(request, timeout=timeout * 1.1).read()
+
+
+def populate_vars():
+ global CONN
+ global INSTANCE_NAME
+ global PROJECT
+ global ZONE
+ global REGION
+ global LIST_DISK_ATTACHED_INSTANCES
+
+ global PARAMETERS
+
+ # Populate global vars
+ try:
+ CONN = googleapiclient.discovery.build('compute', 'v1')
+ except Exception as e:
+ logger.error('Couldn\'t connect with google api: ' + str(e))
+ sys.exit(ocf.OCF_ERR_CONFIGURED)
+
+ for param in PARAMETERS:
+ value = os.environ.get('OCF_RESKEY_%s' % param, None)
+ if not value and param in MANDATORY_PARAMETERS:
+ logger.error('Missing %s mandatory parameter' % param)
+ sys.exit(ocf.OCF_ERR_CONFIGURED)
+ PARAMETERS[param] = value
+
+ try:
+ INSTANCE_NAME = get_metadata('instance/name')
+ except Exception as e:
+ logger.error(
+ 'Couldn\'t get instance name, is this running inside GCE?: ' + str(e))
+ sys.exit(ocf.OCF_ERR_CONFIGURED)
+
+ PROJECT = get_metadata('project/project-id')
+ ZONE = get_metadata('instance/zone').split('/')[-1]
+ REGION = ZONE[:-2]
+ LIST_DISK_ATTACHED_INSTANCES = get_disk_attached_instances(
+ PARAMETERS['disk_name'])
+
+
+def configure_logs():
+ # Prepare logging
+ global logger
+ logging.getLogger('googleapiclient').setLevel(logging.WARN)
+ logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging')
+ if logging_env:
+ logging_env = logging_env.lower()
+ if any(x in logging_env for x in ['yes', 'true', 'enabled']):
+ try:
+ import google.cloud.logging.handlers
+ client = google.cloud.logging.Client()
+ handler = google.cloud.logging.handlers.CloudLoggingHandler(
+ client, name=INSTANCE_NAME)
+ handler.setLevel(logging.INFO)
+ formatter = logging.Formatter('gcp:alias "%(message)s"')
+ handler.setFormatter(formatter)
+ ocf.log.addHandler(handler)
+ logger = logging.LoggerAdapter(
+ ocf.log, {'OCF_RESOURCE_INSTANCE': ocf.OCF_RESOURCE_INSTANCE})
+ except ImportError:
+ logger.error('Couldn\'t import google.cloud.logging, '
+ 'disabling Stackdriver-logging support')
+
+
+def wait_for_operation(operation):
+ while True:
+ result = CONN.zoneOperations().get(
+ project=PROJECT,
+ zone=ZONE,
+ operation=operation['name']).execute()
+
+ if result['status'] == 'DONE':
+ if 'error' in result:
+ raise Exception(result['error'])
+ return
+ time.sleep(1)
+
+
+def get_disk_attached_instances(disk):
+ def get_users_list():
+ fl = 'name="%s"' % disk
+ request = CONN.disks().aggregatedList(project=PROJECT, filter=fl)
+ while request is not None:
+ response = request.execute()
+ locations = response.get('items', {})
+ for location in locations.values():
+ for d in location.get('disks', []):
+ if d['name'] == disk:
+ return d.get('users', [])
+ request = CONN.instances().aggregatedList_next(
+ previous_request=request, previous_response=response)
+ raise Exception("Unable to find disk %s" % disk)
+
+ def get_only_instance_name(user):
+ return re.sub('.*/instances/', '', user)
+
+ return map(get_only_instance_name, get_users_list())
+
+
+def is_disk_attached(instance):
+ return instance in LIST_DISK_ATTACHED_INSTANCES
+
+
+def detach_disk(instance, disk_name):
+ # Python API misses disk-scope argument.
+
+ # Detaching a disk is only possible by using deviceName, which is retrieved
+ # as a disk parameter when listing the instance information
+ request = CONN.instances().get(
+ project=PROJECT, zone=ZONE, instance=instance)
+ response = request.execute()
+
+ device_name = None
+ for disk in response['disks']:
+ if disk_name in disk['source']:
+ device_name = disk['deviceName']
+ break
+
+ if not device_name:
+ logger.error("Didn't find %(d)s deviceName attached to %(i)s" % {
+ 'd': disk_name,
+ 'i': instance,
+ })
+ return
+
+ request = CONN.instances().detachDisk(
+ project=PROJECT, zone=ZONE, instance=instance, deviceName=device_name)
+ wait_for_operation(request.execute())
+
+
+def attach_disk(instance, disk_name):
+ location = 'zones/%s' % ZONE
+ if PARAMETERS['disk_scope'] == 'regional':
+ location = 'regions/%s' % REGION
+ prefix = 'https://www.googleapis.com/compute/v1'
+ body = {
+ 'source': '%(prefix)s/projects/%(project)s/%(location)s/disks/%(disk)s' % {
+ 'prefix': prefix,
+ 'project': PROJECT,
+ 'location': location,
+ 'disk': disk_name,
+ },
+ }
+
+ # Customer-Supplied Encryption Key (CSEK)
+ if PARAMETERS['disk_csek_file']:
+ with open(PARAMETERS['disk_csek_file']) as csek_file:
+ body['diskEncryptionKey'] = {
+ 'rawKey': csek_file.read(),
+ }
+
+ if PARAMETERS['device_name']:
+ body['deviceName'] = PARAMETERS['device_name']
+
+ if PARAMETERS['mode']:
+ body['mode'] = PARAMETERS['mode']
+
+ force_attach = None
+ if PARAMETERS['disk_scope'] == 'regional':
+ # Python API misses disk-scope argument.
+ force_attach = True
+ else:
+ # If this disk is attached to some instance, detach it first.
+ for other_instance in LIST_DISK_ATTACHED_INSTANCES:
+ logger.info("Detaching disk %(disk_name)s from other instance %(i)s" % {
+ 'disk_name': PARAMETERS['disk_name'],
+ 'i': other_instance,
+ })
+ detach_disk(other_instance, PARAMETERS['disk_name'])
+
+ request = CONN.instances().attachDisk(
+ project=PROJECT, zone=ZONE, instance=instance, body=body,
+ forceAttach=force_attach)
+ wait_for_operation(request.execute())
+
+
+def fetch_data():
+ configure_logs()
+ populate_vars()
+
+
+def gcp_pd_move_start():
+ fetch_data()
+ if not is_disk_attached(INSTANCE_NAME):
+ logger.info("Attaching disk %(disk_name)s to %(instance)s" % {
+ 'disk_name': PARAMETERS['disk_name'],
+ 'instance': INSTANCE_NAME,
+ })
+ attach_disk(INSTANCE_NAME, PARAMETERS['disk_name'])
+
+
+def gcp_pd_move_stop():
+ fetch_data()
+ if is_disk_attached(INSTANCE_NAME):
+ logger.info("Detaching disk %(disk_name)s to %(instance)s" % {
+ 'disk_name': PARAMETERS['disk_name'],
+ 'instance': INSTANCE_NAME,
+ })
+ detach_disk(INSTANCE_NAME, PARAMETERS['disk_name'])
+
+
+def gcp_pd_move_status():
+ fetch_data()
+ if is_disk_attached(INSTANCE_NAME):
+ logger.info("Disk %(disk_name)s is correctly attached to %(instance)s" % {
+ 'disk_name': PARAMETERS['disk_name'],
+ 'instance': INSTANCE_NAME,
+ })
+ else:
+ sys.exit(ocf.OCF_NOT_RUNNING)
+
+
+def main():
+ if len(sys.argv) < 2:
+ logger.error('Missing argument')
+ return
+
+ command = sys.argv[1]
+ if 'meta-data' in command:
+ print(METADATA)
+ return
+
+ if command in 'start':
+ gcp_pd_move_start()
+ elif command in 'stop':
+ gcp_pd_move_stop()
+ elif command in ('monitor', 'status'):
+ gcp_pd_move_status()
+ else:
+ configure_logs()
+ logger.error('no such function %s' % str(command))
+
+
+if __name__ == "__main__":
+ main()

View File

@ -1,18 +0,0 @@
commit cbe0e6507992b50afbaebc46dfaf8955cc02e5ec
Author: Oyvind Albrigtsen <oalbrigt@redhat.com>
Python agents: use OCF_FUNCTIONS_DIR env variable when available
diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in
index f9f6c316..c5007a43 100755
--- a/heartbeat/gcp-pd-move.in
+++ b/heartbeat/gcp-pd-move.in
@@ -25,7 +25,7 @@ import re
import sys
import time
-OCF_FUNCTIONS_DIR = "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")
+OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT"))
sys.path.append(OCF_FUNCTIONS_DIR)
import ocf

View File

@ -1,48 +0,0 @@
From 4fa41a1d7b4bee31526649c40cc4c58bc6333917 Mon Sep 17 00:00:00 2001
From: masaki-tamura <masaki-tamura@kccs.co.jp>
Date: Wed, 2 Oct 2019 17:12:42 +0900
Subject: [PATCH 1/2] add parameter stackdriver_logging
---
heartbeat/gcp-pd-move.in | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in
index c5007a43c..fac5c9744 100755
--- a/heartbeat/gcp-pd-move.in
+++ b/heartbeat/gcp-pd-move.in
@@ -102,6 +102,11 @@ correctly.
<shortdesc lang="en">Optional device name</shortdesc>
<content type="boolean" default="" />
</parameter>
+<parameter name="stackdriver_logging" unique="1" required="0">
+<longdesc lang="en">Use stackdriver_logging output to global resource (yes, true, enabled)</longdesc>
+<shortdesc lang="en">Use stackdriver_logging</shortdesc>
+<content type="string" default="yes" />
+</parameter>
</parameters>
<actions>
<action name="start" timeout="300s" />
From f762ce3da00e1775587a04751a8828ba004fb534 Mon Sep 17 00:00:00 2001
From: masaki-tamura <masaki-tamura@kccs.co.jp>
Date: Wed, 2 Oct 2019 17:44:30 +0900
Subject: [PATCH 2/2] defautl no
---
heartbeat/gcp-pd-move.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in
index fac5c9744..7fabc80dc 100755
--- a/heartbeat/gcp-pd-move.in
+++ b/heartbeat/gcp-pd-move.in
@@ -105,7 +105,7 @@ correctly.
<parameter name="stackdriver_logging" unique="1" required="0">
<longdesc lang="en">Use stackdriver_logging output to global resource (yes, true, enabled)</longdesc>
<shortdesc lang="en">Use stackdriver_logging</shortdesc>
-<content type="string" default="yes" />
+<content type="string" default="no" />
</parameter>
</parameters>
<actions>

View File

@ -1,176 +0,0 @@
From 9dedf4d4ad3a94e4ce75e0f29ffdd018e3709ae3 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 28 May 2020 11:39:20 +0200
Subject: [PATCH] gcp-pd-move: fixes and improvements
- Fixed Python 3 encoding issue
- Improved metadata
- Change monitor loglevel to debug
- Removed "regional" functionality that doesnt work with attachDisk()
- Updated rw/ro to READ_WRITE/READ_ONLY in metadata/default value
---
heartbeat/gcp-pd-move.in | 63 ++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 31 deletions(-)
mode change 100755 => 100644 heartbeat/gcp-pd-move.in
diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in
old mode 100755
new mode 100644
index 7fabc80dc..f82bd25e5
--- a/heartbeat/gcp-pd-move.in
+++ b/heartbeat/gcp-pd-move.in
@@ -29,6 +29,7 @@ OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.
sys.path.append(OCF_FUNCTIONS_DIR)
import ocf
+from ocf import logger
try:
import googleapiclient.discovery
@@ -48,16 +49,16 @@ else:
CONN = None
PROJECT = None
ZONE = None
-REGION = None
LIST_DISK_ATTACHED_INSTANCES = None
INSTANCE_NAME = None
PARAMETERS = {
- 'disk_name': None,
- 'disk_scope': None,
- 'disk_csek_file': None,
- 'mode': None,
- 'device_name': None,
+ 'disk_name': '',
+ 'disk_scope': 'detect',
+ 'disk_csek_file': '',
+ 'mode': "READ_WRITE",
+ 'device_name': '',
+ 'stackdriver_logging': 'no',
}
MANDATORY_PARAMETERS = ['disk_name', 'disk_scope']
@@ -80,32 +81,32 @@ correctly.
<parameter name="disk_name" unique="1" required="1">
<longdesc lang="en">The name of the GCP disk.</longdesc>
<shortdesc lang="en">Disk name</shortdesc>
-<content type="string" default="" />
+<content type="string" default="{}" />
</parameter>
-<parameter name="disk_scope" unique="1" required="1">
-<longdesc lang="en">Disk scope </longdesc>
+<parameter name="disk_scope">
+<longdesc lang="en">Disk scope</longdesc>
<shortdesc lang="en">Network name</shortdesc>
-<content type="string" default="regional" />
+<content type="string" default="{}" />
</parameter>
-<parameter name="disk_csek_file" unique="1" required="0">
+<parameter name="disk_csek_file">
<longdesc lang="en">Path to a Customer-Supplied Encryption Key (CSEK) key file</longdesc>
<shortdesc lang="en">Customer-Supplied Encryption Key file</shortdesc>
-<content type="string" default="" />
+<content type="string" default="{}" />
</parameter>
-<parameter name="mode" unique="1" required="0">
-<longdesc lang="en">Attachment mode (rw, ro)</longdesc>
+<parameter name="mode">
+<longdesc lang="en">Attachment mode (READ_WRITE, READ_ONLY)</longdesc>
<shortdesc lang="en">Attachment mode</shortdesc>
-<content type="string" default="rw" />
+<content type="string" default="{}" />
</parameter>
-<parameter name="device_name" unique="0" required="0">
+<parameter name="device_name">
<longdesc lang="en">An optional name that indicates the disk name the guest operating system will see.</longdesc>
<shortdesc lang="en">Optional device name</shortdesc>
-<content type="boolean" default="" />
+<content type="boolean" default="{}" />
</parameter>
-<parameter name="stackdriver_logging" unique="1" required="0">
+<parameter name="stackdriver_logging">
<longdesc lang="en">Use stackdriver_logging output to global resource (yes, true, enabled)</longdesc>
<shortdesc lang="en">Use stackdriver_logging</shortdesc>
-<content type="string" default="no" />
+<content type="string" default="{}" />
</parameter>
</parameters>
<actions>
@@ -114,7 +115,9 @@ correctly.
<action name="monitor" timeout="15s" interval="10s" depth="0" />
<action name="meta-data" timeout="5s" />
</actions>
-</resource-agent>'''
+</resource-agent>'''.format(PARAMETERS['disk_name'], PARAMETERS['disk_scope'],
+ PARAMETERS['disk_csek_file'], PARAMETERS['mode'], PARAMETERS['device_name'],
+ PARAMETERS['stackdriver_logging'])
def get_metadata(metadata_key, params=None, timeout=None):
@@ -137,7 +140,7 @@ def get_metadata(metadata_key, params=None, timeout=None):
url = '%s?%s' % (metadata_url, params)
request = urlrequest.Request(url, headers=METADATA_HEADERS)
request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
- return request_opener.open(request, timeout=timeout * 1.1).read()
+ return request_opener.open(request, timeout=timeout * 1.1).read().decode("utf-8")
def populate_vars():
@@ -145,11 +148,8 @@ def populate_vars():
global INSTANCE_NAME
global PROJECT
global ZONE
- global REGION
global LIST_DISK_ATTACHED_INSTANCES
- global PARAMETERS
-
# Populate global vars
try:
CONN = googleapiclient.discovery.build('compute', 'v1')
@@ -158,11 +158,12 @@ def populate_vars():
sys.exit(ocf.OCF_ERR_CONFIGURED)
for param in PARAMETERS:
- value = os.environ.get('OCF_RESKEY_%s' % param, None)
+ value = os.environ.get('OCF_RESKEY_%s' % param, PARAMETERS[param])
if not value and param in MANDATORY_PARAMETERS:
logger.error('Missing %s mandatory parameter' % param)
sys.exit(ocf.OCF_ERR_CONFIGURED)
- PARAMETERS[param] = value
+ elif value:
+ PARAMETERS[param] = value
try:
INSTANCE_NAME = get_metadata('instance/name')
@@ -172,8 +173,10 @@ def populate_vars():
sys.exit(ocf.OCF_ERR_CONFIGURED)
PROJECT = get_metadata('project/project-id')
- ZONE = get_metadata('instance/zone').split('/')[-1]
- REGION = ZONE[:-2]
+ if PARAMETERS['disk_scope'] in ['detect', 'regional']:
+ ZONE = get_metadata('instance/zone').split('/')[-1]
+ else:
+ ZONE = PARAMETERS['disk_scope']
LIST_DISK_ATTACHED_INSTANCES = get_disk_attached_instances(
PARAMETERS['disk_name'])
@@ -270,8 +273,6 @@ def detach_disk(instance, disk_name):
def attach_disk(instance, disk_name):
location = 'zones/%s' % ZONE
- if PARAMETERS['disk_scope'] == 'regional':
- location = 'regions/%s' % REGION
prefix = 'https://www.googleapis.com/compute/v1'
body = {
'source': '%(prefix)s/projects/%(project)s/%(location)s/disks/%(disk)s' % {
@@ -342,7 +343,7 @@ def gcp_pd_move_stop():
def gcp_pd_move_status():
fetch_data()
if is_disk_attached(INSTANCE_NAME):
- logger.info("Disk %(disk_name)s is correctly attached to %(instance)s" % {
+ logger.debug("Disk %(disk_name)s is correctly attached to %(instance)s" % {
'disk_name': PARAMETERS['disk_name'],
'instance': INSTANCE_NAME,
})

View File

@ -1,10 +0,0 @@
--- ClusterLabs-resource-agents-e711383f/heartbeat/gcp-pd-move.in 2020-05-28 14:46:28.396220588 +0200
+++ /home/oalbrigt/src/resource-agents/gcp-pd-move.rhel8 2020-05-28 14:16:25.845308597 +0200
@@ -32,6 +32,7 @@
from ocf import logger
try:
+ sys.path.insert(0, '/usr/lib/resource-agents/bundled/gcp/google-cloud-sdk/lib/third_party')
import googleapiclient.discovery
except ImportError:
pass

View File

@ -1,61 +0,0 @@
From 2462caf264c487810805c40a546a4dc3f953c340 Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 3 Oct 2018 18:07:31 +0200
Subject: [PATCH] Do not use the absolute path in redis' pidof calls
The reason for this is that newer kernels (we saw this on a 4.18 kernel)
can limit access to /proc/<pid>/{cwd,exe,root} and so pidof will fail to
identify the process when using the full path names.
This access limitation happens even with the root user:
()[root@ra1 /]$ ls -l /proc/32/ |grep redis-server
ls: cannot read symbolic link '/proc/32/cwd': Permission denied
ls: cannot read symbolic link '/proc/32/root': Permission denied
ls: cannot read symbolic link '/proc/32/exe': Permission denied
For this reason the 'pidof /usr/bin/redis-server' calls will fail
when running inside containers that have this kernel protection
mechanism.
We tested this change and successfuly obtained a running redis cluster:
podman container set: redis-bundle [192.168.222.1:5000/redis:latest]
Replica[0]
redis-bundle-podman-0 (ocf::heartbeat:podman): Started ra1
redis-bundle-0 (ocf::pacemaker:remote): Started ra1
redis (ocf::heartbeat:redis): Master redis-bundle-0
Replica[1]
redis-bundle-podman-1 (ocf::heartbeat:podman): Started ra2
redis-bundle-1 (ocf::pacemaker:remote): Started ra2
redis (ocf::heartbeat:redis): Slave redis-bundle-1
Replica[2]
redis-bundle-podman-2 (ocf::heartbeat:podman): Started ra3
redis-bundle-2 (ocf::pacemaker:remote): Started ra3
redis (ocf::heartbeat:redis): Slave redis-bundle-2
Signed-off-By: Damien Ciabrini <dciabrin@redhat.com>
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/redis.in | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/redis.in b/heartbeat/redis.in
index ddc62d8a7..1dff067e9 100644
--- a/heartbeat/redis.in
+++ b/heartbeat/redis.in
@@ -316,7 +316,7 @@ simple_status() {
fi
pid="$(<"$REDIS_PIDFILE")"
- pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
+ pidof $(basename "$REDIS_SERVER") | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
ocf_log debug "monitor: redis-server running under pid $pid"
@@ -465,7 +465,7 @@ redis_start() {
break
elif (( info[loading] == 1 )); then
sleep "${info[loading_eta_seconds]}"
- elif pidof "$REDIS_SERVER" >/dev/null; then
+ elif pidof $(basename "$REDIS_SERVER") >/dev/null; then
# unknown error, but the process still exists.
# This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
# See https://github.com/antirez/redis/issues/2368

View File

@ -1,40 +0,0 @@
From 355cd29f2dee828bfe0a4ab64f425827aba7dd3b Mon Sep 17 00:00:00 2001
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
Date: Wed, 17 Oct 2018 09:54:37 +0900
Subject: [PATCH] Mid: pgsql: Fix to ignore Master's re-promote.
---
heartbeat/pgsql | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 380866da1..38f6ceeb7 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -680,6 +680,7 @@ pgsql_start() {
#pgsql_promote: Promote PostgreSQL
pgsql_promote() {
+ local output
local target
local rc
@@ -687,6 +688,18 @@ pgsql_promote() {
ocf_exit_reason "Not in a replication mode."
return $OCF_ERR_CONFIGURED
fi
+
+ output=`exec_sql "${CHECK_MS_SQL}"`
+ if [ $? -ne 0 ]; then
+ report_psql_error $rc $loglevel "Can't get PostgreSQL recovery status on promote."
+ return $OCF_ERR_GENERIC
+ fi
+
+ if [ "$output" = "f" ]; then
+ ocf_log info "PostgreSQL is alredy Master. Don't execute promote."
+ return $OCF_SUCCESS
+ fi
+
rm -f ${XLOG_NOTE_FILE}.*
for target in $NODE_LIST; do

View File

@ -1,43 +0,0 @@
diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver
--- a/heartbeat/nfsserver 2018-10-10 17:02:47.873199077 +0200
+++ b/heartbeat/nfsserver 2018-10-11 15:24:41.782048475 +0200
@@ -402,7 +402,6 @@
return
fi
- [ -d "$fp" ] || mkdir -p $fp
[ -d "$OCF_RESKEY_rpcpipefs_dir" ] || mkdir -p $OCF_RESKEY_rpcpipefs_dir
[ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery
@@ -437,10 +436,21 @@
return
fi
+ [ -d "$fp" ] || mkdir -p $fp
+
if is_bound /var/lib/nfs; then
ocf_log debug "$fp is already bound to /var/lib/nfs"
return 0
fi
+
+ case $EXEC_MODE in
+ [23]) if nfs_exec status var-lib-nfs-rpc_pipefs.mount > /dev/null 2>&1; then
+ ocf_log debug "/var/lib/nfs/rpc_pipefs already mounted. Unmounting in preparation to bind mount nfs dir"
+ systemctl stop var-lib-nfs-rpc_pipefs.mount
+ fi
+ ;;
+ esac
+
mount --bind $fp /var/lib/nfs
[ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs
}
@@ -612,8 +622,8 @@
fi
is_redhat_based && set_env_args
- prepare_directory
bind_tree
+ prepare_directory
if ! `mount | grep -q " on $OCF_RESKEY_rpcpipefs_dir "`; then
mount -t rpc_pipefs sunrpc $OCF_RESKEY_rpcpipefs_dir

View File

@ -1,24 +0,0 @@
From 848d62c32b355a03c2ad8d246eb3e34b04af07ca Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 9 Jan 2019 16:49:41 +0100
Subject: [PATCH] LVM-activate: dont fail initial probe
---
heartbeat/LVM-activate | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index f46932c1c..49ab717a3 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -323,6 +323,10 @@ lvmlockd_check()
# Good: lvmlockd is running, and clvmd is not running
if ! pgrep lvmlockd >/dev/null 2>&1 ; then
+ if ocf_is_probe; then
+ exit $OCF_NOT_RUNNING
+ fi
+
ocf_exit_reason "lvmlockd daemon is not running!"
exit $OCF_ERR_CONFIGURED
fi

View File

@ -0,0 +1,41 @@
From 6d2ed7615614ede093f097189876d0f08553a43e Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Mon, 14 Feb 2022 22:23:39 -0800
Subject: [PATCH] IPsrcaddr: Add warning about DHCP
If DHCP is enabled for the interface that serves OCF_RESKEY_ipaddress,
then NetworkManager (and possibly dhclient in systems without NM;
unsure) may later re-add a route that the IPsrcaddr resource replaced.
This may cause the resource to fail or cause other unexpected behavior.
So far this has been observed with a default route, albeit with an edge
case of a configuration (OCF_RESKEY_ipaddress on a different subnet)
that may not be totally valid. There are likely to be other situations
as well where DHCP can cause conflicts with IPsrcaddr's manual updates
via iproute. The safest option is to use only static configuration for
the involved interface.
Resolves: RHBZ#1654862
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/IPsrcaddr | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index ec868409f..fd7b6f68d 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -99,6 +99,12 @@ meta_data() {
<longdesc lang="en">
Resource script for IPsrcaddr. It manages the preferred source address
modification.
+
+Note: DHCP should not be enabled for the interface serving the preferred
+source address. Enabling DHCP may result in unexpected behavior, such as
+the automatic addition of duplicate or conflicting routes. This may
+cause the IPsrcaddr resource to fail, or it may produce undesired
+behavior while the resource continues to run.
</longdesc>
<shortdesc lang="en">Manages the preferred source address for outgoing IP packets</shortdesc>

View File

@ -0,0 +1,49 @@
From 5a65f66ff803ad7ed15af958cc1efdde4d53dcb7 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Thu, 17 Feb 2022 03:53:21 -0800
Subject: [PATCH] IPsrcaddr: Better error message when no matching route found
If OCF_RESKEY_destination is not explicitly set and `ip route list`
can't find a route matching the specifications, the NETWORK variable
doesn't get set. This causes a certain failure of the start operation,
because there is no PREFIX argument to `ip route replace` (syntax
error). It may also cause unexpected behavior for stop operations (but
not in all cases). During a monitor, this event can only happen if
something has changed outside the cluster's control, and so is cause
for warning there.
Exit OCF_ERR_ARGS for start, log debug for probe, log warning for all
other ops.
Resolves: RHBZ#1654862
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/IPsrcaddr | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index fd7b6f68d..f0216722d 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -549,6 +549,20 @@ rc=$?
INTERFACE=`echo $findif_out | awk '{print $1}'`
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+
+ if [ -z "$NETWORK" ]; then
+ err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
+ err_str="$err_str match $ipaddress' failed to find a matching route"
+
+ if [ "$__OCF_ACTION" = "start" ]; then
+ ocf_exit_reason "$err_str"
+ exit $OCF_ERR_ARGS
+ elif ! ocf_is_probe; then
+ ocf_log warn "$err_str"
+ else
+ ocf_log debug "$err_str"
+ fi
+ fi
else
NETWORK="$OCF_RESKEY_destination"
fi

View File

@ -0,0 +1,56 @@
From 0a197f1cd227e768837dff778a0c56fc1085d434 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 21 Feb 2022 13:54:04 +0100
Subject: [PATCH] IPsrcaddr: fix indentation in better error message code
---
heartbeat/IPsrcaddr | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index f0216722d..c82adc0e9 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -542,27 +542,27 @@ fi
findif_out=`$FINDIF -C`
rc=$?
[ $rc -ne 0 ] && {
- ocf_exit_reason "[$FINDIF -C] failed"
- exit $rc
+ ocf_exit_reason "[$FINDIF -C] failed"
+ exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
- if [ -z "$NETWORK" ]; then
- err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
- err_str="$err_str match $ipaddress' failed to find a matching route"
-
- if [ "$__OCF_ACTION" = "start" ]; then
- ocf_exit_reason "$err_str"
- exit $OCF_ERR_ARGS
- elif ! ocf_is_probe; then
- ocf_log warn "$err_str"
- else
- ocf_log debug "$err_str"
- fi
- fi
+ if [ -z "$NETWORK" ]; then
+ err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
+ err_str="$err_str match $ipaddress' failed to find a matching route"
+
+ if [ "$__OCF_ACTION" = "start" ]; then
+ ocf_exit_reason "$err_str"
+ exit $OCF_ERR_ARGS
+ elif ! ocf_is_probe; then
+ ocf_log warn "$err_str"
+ else
+ ocf_log debug "$err_str"
+ fi
+ fi
else
NETWORK="$OCF_RESKEY_destination"
fi

View File

@ -0,0 +1,117 @@
From 50a596bfb977b18902dc62b99145bbd1a087690a Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 1 Mar 2022 11:06:07 +0100
Subject: [PATCH] IPsrcaddr: fixes
- use findif.sh to detect secondary interfaces
- get metric and proto to update the correct route/update it correctly
- match route using interface to fail when trying to update secondary
interfaces without specifying destination (would update default route
before)
- also use PRIMARY_IP/OPTS during stop-action for default routes (to get
back to the exact routes we started with)
- dont fail during stop-action if route doesnt exist
- use [[:blank:]] for WS to follow POSIX standard (suggested by nrwahl)
---
heartbeat/IPsrcaddr | 35 +++++++++++++++++++----------------
1 file changed, 19 insertions(+), 16 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index c82adc0e9..7dbf65ff5 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -52,6 +52,7 @@
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/findif.sh
# Defaults
OCF_RESKEY_ipaddress_default=""
@@ -181,19 +182,21 @@ errorexit() {
#
# where the src clause "src Y.Y.Y.Y" may or may not be present
-WS="[`echo -en ' \t'`]"
+WS="[[:blank:]]"
OCTET="[0-9]\{1,3\}"
IPADDR="\($OCTET\.\)\{3\}$OCTET"
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
-FINDIF=$HA_BIN/findif
+METRICCLAUSE=".*\(metric$WS[^ ]\+\)"
+PROTOCLAUSE=".*\(proto$WS[^ ]\+\)"
+FINDIF=findif
# findif needs that to be set
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
# Capture matching route - doublequotes prevent word splitting...
- ROUTE="`$CMDSHOW 2> /dev/null`" || errorexit "command '$CMDSHOW' failed"
+ ROUTE="`$CMDSHOW dev $INTERFACE 2> /dev/null`" || errorexit "command '$CMDSHOW' failed"
# ... so we can make sure there is only 1 matching route
[ 1 -eq `echo "$ROUTE" | wc -l` ] || \
@@ -201,7 +204,7 @@ srca_read() {
# But there might still be no matching route
[ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && [ -z "$ROUTE" ] && \
- ! ocf_is_probe && errorexit "no matching route exists"
+ ! ocf_is_probe && [ "$__OCF_ACTION" != stop ] && errorexit "no matching route exists"
# Sed out the source ip address if it exists
SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\3/p"`
@@ -232,8 +235,8 @@ srca_start() {
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE src $1 || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE src $1' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
$CMDCHANGE $ROUTE_WO_SRC src $1 || \
@@ -266,14 +269,11 @@ srca_stop() {
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- OPTS=""
- if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] ;then
- PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
- OPTS="proto kernel scope host src $PRIMARY_IP"
- fi
+ PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
+ OPTS="proto kernel scope link src $PRIMARY_IP"
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
$CMDCHANGE $ROUTE_WO_SRC || \
@@ -539,16 +539,19 @@ if [ $rc -ne $OCF_SUCCESS ]; then
esac
fi
-findif_out=`$FINDIF -C`
+findif_out=`$FINDIF`
rc=$?
[ $rc -ne 0 ] && {
- ocf_exit_reason "[$FINDIF -C] failed"
+ ocf_exit_reason "[$FINDIF] failed"
exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
+LISTROUTE=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress`
+METRIC=`echo $LISTROUTE | sed -n "s/$METRICCLAUSE/\1/p"`
+[ -z "$PROTO" ] && PROTO=`echo $LISTROUTE | sed -n "s/$PROTOCLAUSE/\1/p"`
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
- NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+ NETWORK=`echo $LISTROUTE | grep -m 1 -o '^[^ ]*'`
if [ -z "$NETWORK" ]; then
err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"

View File

@ -1,27 +0,0 @@
From 4f122cd0cf46c1fdc1badb22049607a6abf0c885 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 4 Feb 2019 17:04:59 +0100
Subject: [PATCH] LVM-activate: only check locking_type when LVM < v2.03
---
heartbeat/LVM-activate | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index c2239d881..3c462c75c 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -311,7 +311,12 @@ config_verify()
lvmlockd_check()
{
config_verify "global/use_lvmlockd" "1"
- config_verify "global/locking_type" "1"
+
+ # locking_type was removed from config in v2.03
+ ocf_version_cmp "$(lvmconfig --version | awk '/LVM ver/ {sub(/\(.*/, "", $3); print $3}')" "2.03"
+ if [ "$?" -eq 0 ]; then
+ config_verify "global/locking_type" "1"
+ fi
# We recommend to activate one LV at a time so that this specific volume
# binds to a proper filesystem to protect the data

View File

@ -1,12 +0,0 @@
diff -uNr a/heartbeat/vdo-vol b/heartbeat/vdo-vol
--- a/heartbeat/vdo-vol 2018-11-07 09:11:23.037835110 +0100
+++ b/heartbeat/vdo-vol 2018-11-07 09:12:41.322373901 +0100
@@ -145,7 +145,7 @@
vdo_monitor(){
status=$(vdo status $OPTIONS 2>&1)
- MODE=$(vdostats vdo_vol --verbose | grep "operating mode" | awk '{print $NF}')
+ MODE=$(vdostats --verbose ${OCF_RESKEY_volume} | grep "operating mode" | awk '{print $NF}')
case "$status" in
*"Device mapper status: not available"*)

View File

@ -1,59 +0,0 @@
From b42ef7555de86cc29d165ae17682c223bfb23b6e Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 5 Nov 2018 16:38:01 +0100
Subject: [PATCH 1/2] tomcat: use systemd on RHEL when catalina.sh is
unavailable
---
heartbeat/tomcat | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/heartbeat/tomcat b/heartbeat/tomcat
index 4812a0133..833870038 100755
--- a/heartbeat/tomcat
+++ b/heartbeat/tomcat
@@ -613,7 +613,6 @@ TOMCAT_NAME="${OCF_RESKEY_tomcat_name-tomcat}"
TOMCAT_CONSOLE="${OCF_RESKEY_script_log-/var/log/$TOMCAT_NAME.log}"
RESOURCE_TOMCAT_USER="${OCF_RESKEY_tomcat_user-root}"
RESOURCE_STATUSURL="${OCF_RESKEY_statusurl-http://127.0.0.1:8080}"
-OCF_RESKEY_force_systemd_default=0
JAVA_HOME="${OCF_RESKEY_java_home}"
JAVA_OPTS="${OCF_RESKEY_java_opts}"
@@ -630,6 +629,13 @@ if [ -z "$CATALINA_PID" ]; then
CATALINA_PID="${HA_RSCTMP}/${TOMCAT_NAME}_tomcatstate/catalina.pid"
fi
+# Only default to true for RedHat systems without catalina.sh
+if [ -e "$CATALINA_HOME/bin/catalina.sh" ] || ! is_redhat_based; then
+ OCF_RESKEY_force_systemd_default=0
+else
+ OCF_RESKEY_force_systemd_default=1
+fi
+
MAX_STOP_TIME="${OCF_RESKEY_max_stop_time}"
: ${OCF_RESKEY_force_systemd=${OCF_RESKEY_force_systemd_default}}
From 9cb2b142a9ecb3a2d5a51cdd51b4005f08b9a97b Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 5 Nov 2018 17:09:43 +0100
Subject: [PATCH 2/2] ocf-distro: add regex for RedHat version
---
heartbeat/ocf-distro | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/ocf-distro b/heartbeat/ocf-distro
index 530ee57ed..f69910c98 100644
--- a/heartbeat/ocf-distro
+++ b/heartbeat/ocf-distro
@@ -39,7 +39,7 @@ get_os_ver() {
VER=$(cat $_DEBIAN_VERSION_FILE)
elif [ -f $_REDHAT_RELEASE_FILE ]; then
OS=RedHat # redhat or similar
- VER= # here some complex sed script
+ VER=$(sed "s/.* release \([^ ]\+\).*/\1/" $_REDHAT_RELEASE_FILE)
else
OS=$(uname -s)
VER=$(uname -r)

View File

@ -1,23 +0,0 @@
From 13511f843b2b0fa1b8b306beac041e0855be05a6 Mon Sep 17 00:00:00 2001
From: Valentin Vidic <Valentin.Vidic@CARNet.hr>
Date: Tue, 15 Jan 2019 15:45:03 +0100
Subject: [PATCH] LVM-activate: make vgname not uniqe
If activating one lvname at a time, vgname will not be unique.
---
heartbeat/LVM-activate | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index f46932c1c..bc448c9c1 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -102,7 +102,7 @@ because some DLM lockspaces might be in use and cannot be closed automatically.
<shortdesc lang="en">This agent activates/deactivates logical volumes.</shortdesc>
<parameters>
-<parameter name="vgname" unique="1" required="1">
+<parameter name="vgname" unique="0" required="1">
<longdesc lang="en">
The volume group name.
</longdesc>

View File

@ -1,29 +0,0 @@
From ee9a47f97dd8b0cb51033db7879a79588aab409c Mon Sep 17 00:00:00 2001
From: Valentin Vidic <Valentin.Vidic@CARNet.hr>
Date: Tue, 15 Jan 2019 15:40:01 +0100
Subject: [PATCH] LVM-activate: fix dmsetup check
When there are no devices in the system dmsetup outputs one line:
# dmsetup info -c
No devices found
---
heartbeat/LVM-activate | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index f46932c1c..c3225e1cb 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -715,9 +715,9 @@ lvm_status() {
if [ -n "${LV}" ]; then
# dmsetup ls? It cannot accept device name. It's
# too heavy to list all DM devices.
- dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" | wc -l )
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" | grep -c -v '^No devices found')
else
- dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG}" 2>/dev/null | wc -l )
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG}" | grep -c -v '^No devices found')
fi
if [ $dm_count -eq 0 ]; then

View File

@ -1,31 +0,0 @@
From d95765aba205ea59dcb99378bed4c6d0593ebdb4 Mon Sep 17 00:00:00 2001
From: fpicot <francois.picot@homesend.com>
Date: Fri, 11 Jan 2019 11:38:18 -0500
Subject: [PATCH] Route: make family parameter optional
---
heartbeat/Route | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/Route b/heartbeat/Route
index 67bdf6bfc..2da58bce1 100755
--- a/heartbeat/Route
+++ b/heartbeat/Route
@@ -124,7 +124,7 @@ The routing table to be configured for the route.
<content type="string" default="" />
</parameter>
-<parameter name="family" unique="0" required="1">
+<parameter name="family" unique="0">
<longdesc lang="en">
The address family to be used for the route
ip4 IP version 4
@@ -132,7 +132,7 @@ ip6 IP version 6
detect Detect from 'destination' address.
</longdesc>
<shortdesc lang="en">Address Family</shortdesc>
-<content type="string" default="${OCF_RESKEY_family}" />
+<content type="string" default="${OCF_RESKEY_family_default}" />
</parameter>
</parameters>

View File

@ -1,62 +0,0 @@
From 6303448af77d2ed64c7436a84b30cf7fa4941e19 Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 30 Jan 2019 21:36:17 +0100
Subject: [PATCH] redis: Filter warning from stderr when calling 'redis-cli -a'
In some versions of redis (starting with 4.0.10) we have commits [1] and
[2] which add a warning on stderr which will be printed out every single
time a monitor operation takes place:
foo pacemaker-remoted[57563]: notice: redis_monitor_20000:1930:stderr
[ Warning: Using a password with '-a' option on the command line interface may not be safe. ]
Later on commit [3] (merged with 5.0rc4) was merged which added the option
'--no-auth-warning' to disable said warning since it broke a bunch of
scripts [4]. I tried to forcibly either try the command twice (first
with --no-auth-warning and then without in case of errors) but it is
impossible to distinguish between error due to missing param and other
errors.
So instead of inspecting the version of the redis-cli tool and do the following:
- >= 5.0.0 use --no-auth-warning all the time
- >= 4.0.10 & < 5.0.0 filter the problematic line from stderr only
- else do it like before
We simply filter out from stderr the 'Using a password' message
unconditionally while making sure we keep stdout just the same.
Tested on a redis 4.0.10 cluster and confirmed that it is working as
intended.
All this horror and pain is due to the fact that redis does not support
any other means to pass a password (we could in theory first connect to
the server and then issue an AUTH command, but that seems even more
complex and error prone). See [5] for more info (or [6] for extra fun)
[1] https://github.com/antirez/redis/commit/c082221aefbb2a472c7193dbdbb90900256ce1a2
[2] https://github.com/antirez/redis/commit/ef931ef93e909b4f504e8c6fbed350ed70c1c67c
[3] https://github.com/antirez/redis/commit/a4ef94d2f71a32f73ce4ebf154580307a144b48f
[4] https://github.com/antirez/redis/issues/5073
[5] https://github.com/antirez/redis/issues/3483
[6] https://github.com/antirez/redis/pull/2413
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/redis.in | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/heartbeat/redis.in b/heartbeat/redis.in
index 1dff067e9..e257bcc5e 100644
--- a/heartbeat/redis.in
+++ b/heartbeat/redis.in
@@ -302,7 +302,9 @@ set_score()
redis_client() {
ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $*"
if [ -n "$clientpasswd" ]; then
- "$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" | sed 's/\r//'
+ # Starting with 4.0.10 there is a warning on stderr when using a pass
+ # Once we stop supporting versions < 5.0.0 we can add --no-auth-warning here
+ ("$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" 2>&1 >&3 3>&- | grep -v "Using a password" >&2 3>&-) 3>&1 | sed 's/\r//'
else
"$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
fi

View File

@ -1,70 +0,0 @@
From d228d41c61f57f2576dd87aa7be86f9ca26e3059 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 18 Mar 2019 16:03:14 +0100
Subject: [PATCH] Squid: fix pid file issue due to new Squid version saving the
PID of the parent process instead of the listener child process
---
heartbeat/Squid.in | 21 +++++----------------
1 file changed, 5 insertions(+), 16 deletions(-)
diff --git a/heartbeat/Squid.in b/heartbeat/Squid.in
index a99892d75..0b3c8ea86 100644
--- a/heartbeat/Squid.in
+++ b/heartbeat/Squid.in
@@ -96,12 +96,9 @@ for a squid instance managed by this RA.
<content type="string" default=""/>
</parameter>
-<parameter name="squid_pidfile" required="1" unique="1">
-<longdesc lang="en">
-This is a required parameter. This parameter specifies a process id file
-for a squid instance managed by this RA.
-</longdesc>
-<shortdesc lang="en">Pidfile</shortdesc>
+<parameter name="squid_pidfile" required="0" unique="1">
+<longdesc lang="en">Deprecated - do not use anymore</longdesc>
+<shortdesc lang="en">deprecated - do not use anymore</shortdesc>
<content type="string" default=""/>
</parameter>
@@ -175,8 +172,8 @@ get_pids()
# Seek by pattern
SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN")
- # Seek by pidfile
- SQUID_PIDS[1]=$(awk '1{print $1}' $SQUID_PIDFILE 2>/dev/null)
+ # Seek by child process
+ SQUID_PIDS[1]=$(pgrep -P ${SQUID_PIDS[0]})
if [[ -n "${SQUID_PIDS[1]}" ]]; then
typeset exe
@@ -306,7 +303,6 @@ stop_squid()
while true; do
get_pids
if is_squid_dead; then
- rm -f $SQUID_PIDFILE
return $OCF_SUCCESS
fi
(( lapse_sec = lapse_sec + 1 ))
@@ -326,7 +322,6 @@ stop_squid()
kill -KILL ${SQUID_PIDS[0]} ${SQUID_PIDS[2]}
sleep 1
if is_squid_dead; then
- rm -f $SQUID_PIDFILE
return $OCF_SUCCESS
fi
done
@@ -389,12 +384,6 @@ if [[ ! -x "$SQUID_EXE" ]]; then
exit $OCF_ERR_CONFIGURED
fi
-SQUID_PIDFILE="${OCF_RESKEY_squid_pidfile}"
-if [[ -z "$SQUID_PIDFILE" ]]; then
- ocf_exit_reason "SQUID_PIDFILE is not defined"
- exit $OCF_ERR_CONFIGURED
-fi
-
SQUID_PORT="${OCF_RESKEY_squid_port}"
if [[ -z "$SQUID_PORT" ]]; then
ocf_exit_reason "SQUID_PORT is not defined"

View File

@ -1,24 +0,0 @@
From e370845f41d39d93f76fa34502d62e2513d5eb73 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 29 May 2019 14:07:46 +0200
Subject: [PATCH] Squid: dont run pgrep -P without PID
---
heartbeat/Squid.in | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/heartbeat/Squid.in b/heartbeat/Squid.in
index 0b3c8ea86..e62e7ee66 100644
--- a/heartbeat/Squid.in
+++ b/heartbeat/Squid.in
@@ -173,7 +173,9 @@ get_pids()
SQUID_PIDS[0]=$(pgrep -f "$PROCESS_PATTERN")
# Seek by child process
- SQUID_PIDS[1]=$(pgrep -P ${SQUID_PIDS[0]})
+ if [[ -n "${SQUID_PIDS[0]}" ]]; then
+ SQUID_PIDS[1]=$(pgrep -P ${SQUID_PIDS[0]})
+ fi
if [[ -n "${SQUID_PIDS[1]}" ]]; then
typeset exe

View File

@ -1,31 +0,0 @@
From 9273b83edf6ee72a59511f307e168813ca3d31fd Mon Sep 17 00:00:00 2001
From: colttt <shadow_7@gmx.net>
Date: Fri, 12 Oct 2018 15:29:48 +0200
Subject: [PATCH] possible fix for #1026
add an if-condition and remove an useless 'targetcli create'
---
heartbeat/iSCSITarget.in | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/iSCSITarget.in b/heartbeat/iSCSITarget.in
index e49a79016..9128fdc55 100644
--- a/heartbeat/iSCSITarget.in
+++ b/heartbeat/iSCSITarget.in
@@ -340,13 +340,13 @@ iSCSITarget_start() {
ocf_take_lock $TARGETLOCKFILE
ocf_release_lock_on_exit $TARGETLOCKFILE
ocf_run targetcli /iscsi set global auto_add_default_portal=false || exit $OCF_ERR_GENERIC
- ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
+ if ! [ -d /sys/kernel/config/target/iscsi/${OCF_RESKEY_iqn} ] ; then
+ ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
+ fi
for portal in ${OCF_RESKEY_portals}; do
if [ $portal != ${OCF_RESKEY_portals_default} ] ; then
IFS=':' read -a sep_portal <<< "$portal"
ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/portals create "${sep_portal[0]}" "${sep_portal[1]}" || exit $OCF_ERR_GENERIC
- else
- ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC
fi
done
# in lio, we can set target parameters by manipulating

View File

@ -1,24 +0,0 @@
From 0d53e80957a00016418080967892337b1b13f99d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 30 Jul 2019 11:23:07 +0200
Subject: [PATCH] iSCSILogicalUnit: only create acls if it doesnt exist
---
heartbeat/iSCSILogicalUnit.in | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/iSCSILogicalUnit.in b/heartbeat/iSCSILogicalUnit.in
index 0fe85b593..02045d754 100644
--- a/heartbeat/iSCSILogicalUnit.in
+++ b/heartbeat/iSCSILogicalUnit.in
@@ -420,8 +420,8 @@ iSCSILogicalUnit_start() {
if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
for initiator in ${OCF_RESKEY_allowed_initiators}; do
- ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC
- ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
+ [ -d "/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls" ] || ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC
+ [ -d "/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls/${initiator}" ] || ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
done
fi

View File

@ -1,93 +0,0 @@
From db6d12f4b7b10e214526512abe35307270f81c03 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 8 Aug 2019 14:48:13 +0200
Subject: [PATCH] mysql/mariadb/galera: use runuser/su to avoid using SELinux
DAC_OVERRIDE
---
heartbeat/galera | 11 ++++++-----
heartbeat/mysql-common.sh | 16 ++++++++++++----
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/heartbeat/galera b/heartbeat/galera
index 9b9fe5569..056281fb8 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -624,8 +624,7 @@ detect_last_commit()
local recover_args="--defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
- --datadir=$OCF_RESKEY_datadir \
- --user=$OCF_RESKEY_user"
+ --datadir=$OCF_RESKEY_datadir"
local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p'
local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
@@ -654,7 +653,8 @@ detect_last_commit()
ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
- ${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null
+ $SU - $OCF_RESKEY_user -s /bin/sh -c \
+ "${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null"
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ -z "$last_commit" ]; then
@@ -670,8 +670,9 @@ detect_last_commit()
# we can only rollback the transaction, but that's OK
# since the DB will get resynchronized anyway
ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
- ${OCF_RESKEY_binary} $recover_args --wsrep-recover \
- --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null
+ $SU - $OCF_RESKEY_user -s /bin/sh -c \
+ "${OCF_RESKEY_binary} $recover_args --wsrep-recover \
+ --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null"
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ ! -z "$last_commit" ]; then
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index d5ac972cd..65db9bf85 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -2,6 +2,13 @@
#######################################################################
+# Use runuser if available for SELinux.
+if [ -x /sbin/runuser ]; then
+ SU=runuser
+else
+ SU=su
+fi
+
# Attempt to detect a default binary
OCF_RESKEY_binary_default=$(which mysqld_safe 2> /dev/null)
if [ "$OCF_RESKEY_binary_default" = "" ]; then
@@ -207,7 +214,7 @@ mysql_common_prepare_dirs()
# already existed, check whether it is writable by the configured
# user
for dir in $pid_dir $socket_dir; do
- if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
+ if ! $SU -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
ocf_exit_reason "Directory $dir is not writable by $OCF_RESKEY_user"
exit $OCF_ERR_PERM;
fi
@@ -219,14 +226,15 @@ mysql_common_start()
local mysql_extra_params="$1"
local pid
- ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
+ $SU - $OCF_RESKEY_user -s /bin/sh -c \
+ "${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--log-error=$OCF_RESKEY_log \
- --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
+ $OCF_RESKEY_additional_parameters \
$mysql_extra_params >/dev/null 2>&1 &
- pid=$!
+ pid=$!"
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required.

View File

@ -1,104 +0,0 @@
From 57f695d336cab33c61e754e463654ad6400f7b58 Mon Sep 17 00:00:00 2001
From: gguifelixamz <fguilher@amazon.com>
Date: Tue, 27 Nov 2018 17:06:05 +0000
Subject: [PATCH 1/4] Enable --query flag in DescribeRouteTable API call to
avoid race condition with grep
---
heartbeat/aws-vpc-move-ip | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 9b2043aca..d2aed7490 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -167,9 +167,10 @@ ec2ip_validate() {
ec2ip_monitor() {
if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then
ocf_log info "monitor: check routing table (API call)"
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table"
+ cmd=''$OCF_RESKEY_awscli' --profile '$OCF_RESKEY_profile' --output text ec2 describe-route-tables --route-table-ids '$OCF_RESKEY_routing_table' --query 'RouteTables[*].Routes[?DestinationCidrBlock==\`$OCF_RESKEY_address/32\`].InstanceId''
ocf_log debug "executing command: $cmd"
- ROUTE_TO_INSTANCE="$($cmd | grep $OCF_RESKEY_ip | awk '{ print $3 }')"
+ ROUTE_TO_INSTANCE=$($cmd)
+ ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"
if [ -z "$ROUTE_TO_INSTANCE" ]; then
ROUTE_TO_INSTANCE="<unknown>"
fi
From 4d6371aca5dca35b902a480e07a08c1dc3373ca5 Mon Sep 17 00:00:00 2001
From: gguifelixamz <fguilher@amazon.com>
Date: Thu, 29 Nov 2018 11:39:26 +0000
Subject: [PATCH 2/4] aws-vpc-move-ip: Fixed outer quotes and removed inner
quotes
---
heartbeat/aws-vpc-move-ip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index d2aed7490..ced69bd13 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -167,7 +167,7 @@ ec2ip_validate() {
ec2ip_monitor() {
if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then
ocf_log info "monitor: check routing table (API call)"
- cmd=''$OCF_RESKEY_awscli' --profile '$OCF_RESKEY_profile' --output text ec2 describe-route-tables --route-table-ids '$OCF_RESKEY_routing_table' --query 'RouteTables[*].Routes[?DestinationCidrBlock==\`$OCF_RESKEY_address/32\`].InstanceId''
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table --query RouteTables[*].Routes[?DestinationCidrBlock==\`$OCF_RESKEY_address/32\`].InstanceId"
ocf_log debug "executing command: $cmd"
ROUTE_TO_INSTANCE=$($cmd)
ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"
From 09f4b061690a0e681aaf7314f1fc3e6f4e597cc8 Mon Sep 17 00:00:00 2001
From: gguifelixamz <fguilher@amazon.com>
Date: Thu, 29 Nov 2018 11:55:05 +0000
Subject: [PATCH 3/4] aws-vpc-move-ip: Replaced indentation spaces with tabs
for consistency with the rest of the code
---
heartbeat/aws-vpc-move-ip | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index ced69bd13..3e827283e 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -167,10 +167,10 @@ ec2ip_validate() {
ec2ip_monitor() {
if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then
ocf_log info "monitor: check routing table (API call)"
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table --query RouteTables[*].Routes[?DestinationCidrBlock==\`$OCF_RESKEY_address/32\`].InstanceId"
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table --query RouteTables[*].Routes[?DestinationCidrBlock==\`$OCF_RESKEY_address/32\`].InstanceId"
ocf_log debug "executing command: $cmd"
- ROUTE_TO_INSTANCE=$($cmd)
- ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"
+ ROUTE_TO_INSTANCE=$($cmd)
+ ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"
if [ -z "$ROUTE_TO_INSTANCE" ]; then
ROUTE_TO_INSTANCE="<unknown>"
fi
From fcf85551ce70cb4fb7ce24e21c361fdbe6fcce6b Mon Sep 17 00:00:00 2001
From: gguifelixamz <fguilher@amazon.com>
Date: Thu, 29 Nov 2018 13:07:32 +0000
Subject: [PATCH 4/4] aws-vpc-move-ip: In cmd variable on ec2ip_monitor():
replaced _address with _ip and modified to use single quotes
---
heartbeat/aws-vpc-move-ip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 3e827283e..331ee184f 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -167,7 +167,7 @@ ec2ip_validate() {
ec2ip_monitor() {
if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then
ocf_log info "monitor: check routing table (API call)"
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table --query RouteTables[*].Routes[?DestinationCidrBlock==\`$OCF_RESKEY_address/32\`].InstanceId"
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId"
ocf_log debug "executing command: $cmd"
ROUTE_TO_INSTANCE=$($cmd)
ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"

View File

@ -1,82 +0,0 @@
From 4ee9a7026d7ed15b0b5cd26f06a21d04fc05d14e Mon Sep 17 00:00:00 2001
From: Roger Zhou <zzhou@suse.com>
Date: Mon, 1 Apr 2019 22:57:26 +0800
Subject: [PATCH 1/2] LVM-activate: return OCF_NOT_RUNNING on initial probe
In the use case of lvm on top of cluster md/raid. When the fenced node
rejoins to the cluster, Pacemaker will run the monitor action for the
probe operation. At that time, LVM PV and VG won't exist before cluster
md/raid get assembled, and the probe should return $OCF_NOT_RUNNING
instead of $OCF_ERR_CONFIGURED.
Signed-off-by: Roger Zhou <zzhou@suse.com>
---
heartbeat/LVM-activate | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 3c462c75c..91ac05c34 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -329,6 +329,7 @@ lvmlockd_check()
# Good: lvmlockd is running, and clvmd is not running
if ! pgrep lvmlockd >/dev/null 2>&1 ; then
if ocf_is_probe; then
+ ocf_log info "initial probe: lvmlockd is not running yet."
exit $OCF_NOT_RUNNING
fi
@@ -481,6 +482,11 @@ lvm_validate() {
exit $OCF_SUCCESS
fi
+ if ocf_is_probe; then
+ ocf_log info "initial probe: VG [${VG}] is not found on any block device yet."
+ exit $OCF_NOT_RUNNING
+ fi
+
ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
exit $OCF_ERR_CONFIGURED
fi
From df2f58c400b1f6f239f9e1c1fdf6ce0875639b43 Mon Sep 17 00:00:00 2001
From: Roger Zhou <zzhou@suse.com>
Date: Mon, 1 Apr 2019 23:02:54 +0800
Subject: [PATCH 2/2] LVM-activate: align dmsetup report command to standard
Namely to change 'vgname/lvname' to 'vg_name/lv_name'. The dmsetup
report command follows lvm2 selection criteria field name standard.
- dmsetup v1.02.86 (lvm2 v2_02_107) - 23rd June 2014
"Add dmsetup -S/--select to define selection criteria"
- dmsetup info -c -S help
Signed-off-by: Roger Zhou <zzhou@suse.com>
---
heartbeat/LVM-activate | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 91ac05c34..730d9a09d 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -707,7 +707,7 @@ tagging_deactivate() {
# method:
#
# lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null | tr -d '[:blank:]')
-# dm_count=$(dmsetup --noheadings info -c -S "vgname=${VG}" 2>/dev/null | grep -c "${VG}-")
+# dm_count=$(dmsetup --noheadings info -c -S "vg_name=${VG}" 2>/dev/null | grep -c "${VG}-")
# test $lv_count -eq $dm_count
#
# It works, but we cannot afford to use LVM command in lvm_status. LVM command is expensive
@@ -730,9 +730,9 @@ lvm_status() {
if [ -n "${LV}" ]; then
# dmsetup ls? It cannot accept device name. It's
# too heavy to list all DM devices.
- dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" | grep -c -v '^No devices found')
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG} && lv_name=${LV}" | grep -c -v '^No devices found')
else
- dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG}" | grep -c -v '^No devices found')
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG}" | grep -c -v '^No devices found')
fi
if [ $dm_count -eq 0 ]; then

View File

@ -1,46 +0,0 @@
From 17fe1dfeef1534b270e4765277cb8d7b42c4a9c4 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 5 Apr 2019 09:15:40 +0200
Subject: [PATCH] gcp-vpc-move-route/gcp-vpc-move-vip: fix Python 3 encoding
issue
---
heartbeat/gcp-vpc-move-route.in | 2 +-
heartbeat/gcp-vpc-move-vip.in | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in
index 591b97b1c..7dd47150d 100644
--- a/heartbeat/gcp-vpc-move-route.in
+++ b/heartbeat/gcp-vpc-move-route.in
@@ -193,7 +193,7 @@ def get_metadata(metadata_key, params=None, timeout=None):
url = '%s?%s' % (metadata_url, params)
request = urlrequest.Request(url, headers=METADATA_HEADERS)
request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
- return request_opener.open(request, timeout=timeout * 1.1).read()
+ return request_opener.open(request, timeout=timeout * 1.1).read().decode("utf-8")
def validate(ctx):
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
index bd6cf86cd..953d61ed7 100755
--- a/heartbeat/gcp-vpc-move-vip.in
+++ b/heartbeat/gcp-vpc-move-vip.in
@@ -106,7 +106,7 @@ def get_metadata(metadata_key, params=None, timeout=None):
url = '%s?%s' % (metadata_url, params)
request = urlrequest.Request(url, headers=METADATA_HEADERS)
request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
- return request_opener.open(request, timeout=timeout * 1.1).read()
+ return request_opener.open(request, timeout=timeout * 1.1).read().decode("utf-8")
def get_instance(project, zone, instance):
@@ -162,7 +162,7 @@ def get_alias(project, zone, instance):
def get_localhost_alias():
net_iface = get_metadata('instance/network-interfaces', {'recursive': True})
- net_iface = json.loads(net_iface.decode('utf-8'))
+ net_iface = json.loads(net_iface)
try:
return net_iface[0]['ipAliases'][0]
except (KeyError, IndexError):

View File

@ -1,122 +0,0 @@
--- a/heartbeat/aws-vpc-move-ip 2019-05-20 10:54:01.527329668 +0200
+++ b/heartbeat/aws-vpc-move-ip 2019-05-20 11:33:35.386089091 +0200
@@ -93,11 +93,19 @@
<content type="string" default="" />
</parameter>
+<parameter name="address">
+<longdesc lang="en">
+Deprecated IP address param. Use the ip param instead.
+</longdesc>
+<shortdesc lang="en">Deprecated VPC private IP Address</shortdesc>
+<content type="string" default="" />
+</parameter>
+
<parameter name="routing_table" required="1">
<longdesc lang="en">
-Name of the routing table, where the route for the IP address should be changed, i.e. rtb-...
+Name of the routing table(s), where the route for the IP address should be changed. If declaring multiple routing tables they should be separated by comma. Example: rtb-XXXXXXXX,rtb-YYYYYYYYY
</longdesc>
-<shortdesc lang="en">routing table name</shortdesc>
+<shortdesc lang="en">routing table name(s)</shortdesc>
<content type="string" default="" />
</parameter>
@@ -129,6 +137,13 @@
END
}
+ec2ip_set_address_param_compat(){
+ # Include backward compatibility for the deprecated address parameter
+ if [ -z "$OCF_RESKEY_ip" ] && [ -n "$OCF_RESKEY_address" ]; then
+ OCF_RESKEY_ip="$OCF_RESKEY_address"
+ fi
+}
+
ec2ip_validate() {
for cmd in aws ip curl; do
check_binary "$cmd"
@@ -150,20 +165,29 @@
}
ec2ip_monitor() {
- if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ]; then
- ocf_log info "monitor: check routing table (API call)"
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $OCF_RESKEY_routing_table --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId"
- ocf_log debug "executing command: $cmd"
- ROUTE_TO_INSTANCE=$($cmd)
- ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"
- if [ -z "$ROUTE_TO_INSTANCE" ]; then
- ROUTE_TO_INSTANCE="<unknown>"
- fi
+ MON_RES=""
+ if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then
+ for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do
+ ocf_log info "monitor: check routing table (API call) - $rtb"
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId"
+ ocf_log debug "executing command: $cmd"
+ ROUTE_TO_INSTANCE="$($cmd)"
+ ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}"
+ if [ -z "$ROUTE_TO_INSTANCE" ]; then
+ ROUTE_TO_INSTANCE="<unknown>"
+ fi
+
+ if [ "$EC2_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ]; then
+ ocf_log warn "not routed to this instance ($EC2_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE on $rtb"
+ MON_RES="$MON_RES $rtb"
+ fi
+ sleep 1
+ done
- if [ "$EC2_INSTANCE_ID" != "$ROUTE_TO_INSTANCE" ];then
- ocf_log warn "not routed to this instance ($EC2_INSTANCE_ID) but to instance $ROUTE_TO_INSTANCE"
+ if [ ! -z "$MON_RES" ]; then
return $OCF_NOT_RUNNING
fi
+
else
ocf_log debug "monitor: Enhanced Monitoring disabled - omitting API call"
fi
@@ -195,19 +219,23 @@
}
ec2ip_get_and_configure() {
- # Adjusting the routing table
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile ec2 replace-route --route-table-id $OCF_RESKEY_routing_table --destination-cidr-block ${OCF_RESKEY_ip}/32 --instance-id $EC2_INSTANCE_ID"
- ocf_log debug "executing command: $cmd"
- $cmd
- rc=$?
- if [ "$rc" != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
- fi
+ for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --instance-id $EC2_INSTANCE_ID"
+ ocf_log debug "executing command: $cmd"
+ $cmd
+ rc=$?
+ if [ "$rc" != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ sleep 1
+ done
# Reconfigure the local ip address
ec2ip_drop
- ip addr add "${OCF_RESKEY_ip}/32" dev $OCF_RESKEY_interface
+ cmd="ip addr add ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface"
+ ocf_log debug "executing command: $cmd"
+ $cmd
rc=$?
if [ $rc != 0 ]; then
ocf_log warn "command failed, rc: $rc"
@@ -289,6 +317,8 @@
exit $OCF_ERR_PERM
fi
+ec2ip_set_address_param_compat
+
ec2ip_validate
case $__OCF_ACTION in

View File

@ -1,221 +0,0 @@
From 9f2b9cc09f7e2df163ff95585374f860f3dc58eb Mon Sep 17 00:00:00 2001
From: Tomas Krojzl <tomas_krojzl@cz.ibm.com>
Date: Tue, 16 Apr 2019 18:40:29 +0200
Subject: [PATCH 1/6] Fix for VM having multiple network interfaces
---
heartbeat/aws-vpc-move-ip | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 090956434..a91c2dd11 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -219,8 +219,28 @@ ec2ip_drop() {
}
ec2ip_get_and_configure() {
+ cmd="ip -br link show dev $OCF_RESKEY_interface | tr -s ' ' | cut -d' ' -f3"
+ ocf_log debug "executing command: $cmd"
+ MAC_ADDR="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
+
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-instances --instance-ids $EC2_INSTANCE_ID --query 'Reservations[*].Instances[*].NetworkInterfaces[*].[NetworkInterfaceId,MacAddress]' | grep ${MAC_ADDR} | cut -f1"
+ ocf_log debug "executing command: $cmd"
+ EC2_NETWORK_INTERFACE_ID="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "network interface id associated MAC address ${MAC_ADDR}: ${EC2_NETWORK_INTERFACE_ID}"
+
for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --instance-id $EC2_INSTANCE_ID"
+ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID"
ocf_log debug "executing command: $cmd"
$cmd
rc=$?
From a871a463134ebb2456b5f37a343bf9034f5f4074 Mon Sep 17 00:00:00 2001
From: krojzl <tomas_krojzl@cz.ibm.com>
Date: Tue, 16 Apr 2019 18:49:32 +0200
Subject: [PATCH 2/6] Fixing indentation
---
heartbeat/aws-vpc-move-ip | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index a91c2dd11..a46d10d30 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -227,7 +227,7 @@ ec2ip_get_and_configure() {
ocf_log warn "command failed, rc: $rc"
return $OCF_ERR_GENERIC
fi
- ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-instances --instance-ids $EC2_INSTANCE_ID --query 'Reservations[*].Instances[*].NetworkInterfaces[*].[NetworkInterfaceId,MacAddress]' | grep ${MAC_ADDR} | cut -f1"
ocf_log debug "executing command: $cmd"
@@ -237,7 +237,7 @@ ec2ip_get_and_configure() {
ocf_log warn "command failed, rc: $rc"
return $OCF_ERR_GENERIC
fi
- ocf_log debug "network interface id associated MAC address ${MAC_ADDR}: ${EC2_NETWORK_INTERFACE_ID}"
+ ocf_log debug "network interface id associated MAC address ${MAC_ADDR}: ${EC2_NETWORK_INTERFACE_ID}"
for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do
cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID"
From 068680427dff620a948ae25f090bc154b02f17b9 Mon Sep 17 00:00:00 2001
From: krojzl <tomas_krojzl@cz.ibm.com>
Date: Wed, 17 Apr 2019 14:22:31 +0200
Subject: [PATCH 3/6] Requested fix to avoid using AWS API
---
heartbeat/aws-vpc-move-ip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index a46d10d30..2910552f2 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -229,7 +229,7 @@ ec2ip_get_and_configure() {
fi
ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-instances --instance-ids $EC2_INSTANCE_ID --query 'Reservations[*].Instances[*].NetworkInterfaces[*].[NetworkInterfaceId,MacAddress]' | grep ${MAC_ADDR} | cut -f1"
+ cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id"
ocf_log debug "executing command: $cmd"
EC2_NETWORK_INTERFACE_ID="$(eval $cmd)"
rc=$?
From 207a2ba66ba7196180d27674aa204980fcd25de2 Mon Sep 17 00:00:00 2001
From: krojzl <tomas_krojzl@cz.ibm.com>
Date: Fri, 19 Apr 2019 11:14:21 +0200
Subject: [PATCH 4/6] More robust approach of getting MAC address
---
heartbeat/aws-vpc-move-ip | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 2910552f2..3a848b7e3 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -219,15 +219,28 @@ ec2ip_drop() {
}
ec2ip_get_and_configure() {
- cmd="ip -br link show dev $OCF_RESKEY_interface | tr -s ' ' | cut -d' ' -f3"
- ocf_log debug "executing command: $cmd"
- MAC_ADDR="$(eval $cmd)"
- rc=$?
- if [ $rc != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
+ MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
+ if [ -f $MAC_FILE ]; then
+ cmd="cat ${MAC_FILE}"
+ ocf_log debug "executing command: $cmd"
+ MAC_ADDR="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
+ else
+ cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
+ ocf_log debug "executing command: $cmd"
+ MAC_ADDR="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
fi
- ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id"
ocf_log debug "executing command: $cmd"
From cdcc12a9c1431125b0d5298176e5242bfc9fbe29 Mon Sep 17 00:00:00 2001
From: krojzl <tomas_krojzl@cz.ibm.com>
Date: Fri, 19 Apr 2019 11:20:09 +0200
Subject: [PATCH 5/6] Moving shared part outside if
---
heartbeat/aws-vpc-move-ip | 25 +++++++++----------------
1 file changed, 9 insertions(+), 16 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 3a848b7e3..bfe23e5bf 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -222,26 +222,19 @@ ec2ip_get_and_configure() {
MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
if [ -f $MAC_FILE ]; then
cmd="cat ${MAC_FILE}"
- ocf_log debug "executing command: $cmd"
- MAC_ADDR="$(eval $cmd)"
- rc=$?
- if [ $rc != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
- fi
- ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
else
cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
- ocf_log debug "executing command: $cmd"
- MAC_ADDR="$(eval $cmd)"
- rc=$?
- if [ $rc != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
- fi
- ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
fi
+ ocf_log debug "executing command: $cmd"
+ MAC_ADDR="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
+
cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id"
ocf_log debug "executing command: $cmd"
EC2_NETWORK_INTERFACE_ID="$(eval $cmd)"
From c3fc114fc64f6feb015c5342923fd2afc367ae28 Mon Sep 17 00:00:00 2001
From: krojzl <tomas_krojzl@cz.ibm.com>
Date: Fri, 19 Apr 2019 11:22:55 +0200
Subject: [PATCH 6/6] Linting adjustment
---
heartbeat/aws-vpc-move-ip | 1 -
1 file changed, 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index bfe23e5bf..2757c27d0 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -225,7 +225,6 @@ ec2ip_get_and_configure() {
else
cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
fi
-
ocf_log debug "executing command: $cmd"
MAC_ADDR="$(eval $cmd)"
rc=$?

View File

@ -1,32 +0,0 @@
From aae26ca70ef910e83485778c1fb450941fe79e8a Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Mon, 3 Dec 2018 16:48:14 +0100
Subject: [PATCH] Do not log at debug log level when HA_debug is unset
There might be situations (e.g. bundles) where the HA_debug variable
is unset. It makes little sense to enable debug logging when the HA_debug env
variable is unset.
So let's skip debug logs when HA_debug is set to 0 or is unset.
Tested inside a bundle and observed that previously seen 'ocf_log debug'
calls are now correctly suppressed (w/ HA_debug being unset inside the
container)
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/ocf-shellfuncs.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 043ab9bf2..b17297e1a 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -257,7 +257,7 @@ ha_log()
ha_debug() {
- if [ "x${HA_debug}" = "x0" ] ; then
+ if [ "x${HA_debug}" = "x0" ] || [ -z "${HA_debug}" ] ; then
return 0
fi
if tty >/dev/null; then

View File

@ -1,22 +0,0 @@
From 73b35b74b743403aeebab43205475be6f2938cd5 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 12 Jun 2019 10:11:07 +0200
Subject: [PATCH] ocf_is_true: add True to regexp
---
heartbeat/ocf-shellfuncs.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index b17297e1a..7a97558a5 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -101,7 +101,7 @@ ocf_is_decimal() {
ocf_is_true() {
case "$1" in
- yes|true|1|YES|TRUE|ja|on|ON) true ;;
+ yes|true|1|YES|TRUE|True|ja|on|ON) true ;;
*) false ;;
esac
}

View File

@ -1,21 +0,0 @@
From d1fc6920718284431a2c2cc28562498d6c8ea792 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 19 Jun 2019 11:12:33 +0200
Subject: [PATCH] Filesystem: remove removed notify-action from metadata
---
heartbeat/Filesystem | 1 -
1 file changed, 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 780ba63a4..c46ec3cca 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -221,7 +221,6 @@ block if unresponsive nfs mounts are in use on the system.
<actions>
<action name="start" timeout="60s" />
<action name="stop" timeout="60s" />
-<action name="notify" timeout="60s" />
<action name="monitor" depth="0" timeout="40s" interval="20s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />

View File

@ -1,46 +0,0 @@
From d8400a30604229d349f36855c30a6a438204023b Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 12 Jun 2019 11:29:17 +0200
Subject: [PATCH] Avoid double call to podman inspect in podman_simple_status()
Right now podman_simple_status() does the following:
- It calls container_exists() which then calls "podman inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1"
- Then it calls "podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null"
This duplication is unnecessary and we can rely on the second podman inspect
call. We need to do this because podman inspect calls are very expensive as
soon as moderate I/O kicks in.
Tested as follows:
1) Injected the change on an existing bundle-based cluster
2) Observed that monitoring operations kept working okay
3) Verified by adding set -x that only a single podman inspect per monitor
operation was called (as opposed to two before)
4) Restarted a bundle with an OCF resource inside correctly
5) Did a podman stop of a bundle and correctly observed that:
5.a) It was detected as non running:
* haproxy-bundle-podman-1_monitor_60000 on controller-0 'not running' (7): call=192, status=complete, exitreason='',
last-rc-change='Wed Jun 12 09:22:18 2019', queued=0ms, exec=0ms
5.b) It was correctly started afterwards
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/podman | 5 -----
1 file changed, 5 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 34e11da6b..b2b3081f9 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -238,11 +238,6 @@ podman_simple_status()
{
local val
- container_exists
- if [ $? -ne 0 ]; then
- return $OCF_NOT_RUNNING
- fi
-
# retrieve the 'Running' attribute for the container
val=$(podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then

View File

@ -1,63 +0,0 @@
From 9685e8e6bf2896377a9cf0e07a85de5dd5fcf2df Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 12 Jun 2019 12:00:31 +0200
Subject: [PATCH] Simplify podman_monitor()
Before this change podman_monitor() does two things:
\-> podman_simple_status()
\-> podman inspect {{.State.Running}}
\-> if podman_simple_status == 0 then monitor_cmd_exec()
\-> if [ -z "$OCF_RESKEY_monitor_cmd" ]; then # so if OCF_RESKEY_monitor_cmd is empty we just return SUCCESS
return $rc
fi
# if OCF_RESKEY_monitor_cmd is set to something we execute it
podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd
Let's actually only rely on podman exec as invoked inside monitor_cmd_exec
when $OCF_RESKEY_monitor_cmd is non empty (which is the default as it is set to "/bin/true").
When there is no monitor_cmd command defined then it makes sense to rely on podman inspect
calls container in podman_simple_status().
Tested as follows:
1) Injected the change on an existing bundle-based cluster
2) Observed that monitoring operations kept working okay
3) Restarted rabbitmq-bundle and galera-bundle successfully
4) Killed a container and we correctly detected the monitor failure
Jun 12 09:52:12 controller-0 pacemaker-controld[25747]: notice: controller-0-haproxy-bundle-podman-1_monitor_60000:230 [ ocf-exit-reason:monitor cmd failed (rc=125), output: cannot exec into container that is not running\n ]
5) Container correctly got restarted after the monitor failure:
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-0
6) Stopped and removed a container and pcmk detected it correctly:
Jun 12 09:55:15 controller-0 podman(haproxy-bundle-podman-1)[841411]: ERROR: monitor cmd failed (rc=125), output: unable to exec into haproxy-bundle-podman-1: no container with name or ID haproxy-bundle-podman-1 found: no such container
Jun 12 09:55:15 controller-0 pacemaker-execd[25744]: notice: haproxy-bundle-podman-1_monitor_60000:841411:stderr [ ocf-exit-reason:monitor cmd failed (rc=125), output: unable to exec into haproxy-bundle-podman-1: no container with name or ID haproxy-bundle-podman-1 found: no such container ]
7) pcmk was able to start the container that was stopped and removed:
Jun 12 09:55:16 controller-0 pacemaker-controld[25747]: notice: Result of start operation for haproxy-bundle-podman-1 on controller-0: 0 (ok)
8) Added 'set -x' to the RA and correctly observed that no 'podman inspect' has been invoked during monitoring operations
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/podman | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index b2b3081f9..a9bd57dea 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -255,15 +255,10 @@ podman_simple_status()
podman_monitor()
{
- local rc=0
-
- podman_simple_status
- rc=$?
-
- if [ $rc -ne 0 ]; then
- return $rc
+ if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
+ podman_simple_status
+ return $?
fi
-
monitor_cmd_exec
}

View File

@ -1,34 +0,0 @@
From 69c5d35a7a5421d4728db824558007bbb91a9d4a Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 12 Jun 2019 12:02:06 +0200
Subject: [PATCH] Remove unneeded podman exec --help call
There are no podman releases that do not have the exec argument, so
let's just drop this remnant that came from the docker RA.
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/podman | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index a9bd57dea..858023555 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -190,14 +190,8 @@ monitor_cmd_exec()
return $rc
fi
- if podman exec --help >/dev/null 2>&1; then
- out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
- rc=$?
- else
- out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(podman inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
- rc=$?
- fi
-
+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
+ rc=$?
if [ $rc -eq 127 ]; then
ocf_log err "monitor cmd failed (rc=$rc), output: $out"
ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."

View File

@ -1,161 +0,0 @@
From 6016283dfdcb45bf750f96715fc653a4c0904bca Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Fri, 28 Jun 2019 13:34:40 +0200
Subject: [PATCH] podman: only use exec to manage container's lifecycle
Under heavy IO load, podman may be impacted and take a long time
to execute some actions. If that takes more than the default
20s container monitoring timeout, containers will restart unexpectedly.
Replace all IO-sensitive podman calls (inspect, exists...) by
equivalent "podman exec" calls, because the latter command seems
less prone to performance degradation under IO load.
With this commit, the resource agent now requires podman 1.0.2+,
because it relies on of two different patches [1,2] that improve
IO performance and enable to distinguish "container stopped"
"container doesn't exist" error codes.
Tested on an OpenStack environment with podman 1.0.2, with the
following scenario:
. regular start/stop/monitor operations
. probe operations (pcs resource cleanup/refresh)
. unmanage/manage operations
. reboot
[1] https://github.com/containers/libpod/commit/90b835db69d589de559462d988cb3fae5cf1ef49
[2] https://github.com/containers/libpod/commit/a19975f96d2ee7efe186d9aa0be42285cfafa3f4
---
heartbeat/podman | 75 ++++++++++++++++++++++++------------------------
1 file changed, 37 insertions(+), 38 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 51f6ba883..8fc2c4695 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -129,9 +129,6 @@ the health of the container. This command must return 0 to indicate that
the container is healthy. A non-zero return code will indicate that the
container has failed and should be recovered.
-If 'podman exec' is supported, it is used to execute the command. If not,
-nsenter is used.
-
Note: Using this method for monitoring processes inside a container
is not recommended, as containerd tries to track processes running
inside the container and does not deal well with many short-lived
@@ -192,17 +189,13 @@ monitor_cmd_exec()
local rc=$OCF_SUCCESS
local out
- if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
- return $rc
- fi
-
out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
rc=$?
- if [ $rc -eq 127 ]; then
- ocf_log err "monitor cmd failed (rc=$rc), output: $out"
- ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
- # there is no recovering from this, exit immediately
- exit $OCF_ERR_ARGS
+ # 125: no container with name or ID ${CONTAINER} found
+ # 126: container state improper (not running)
+ # 127: any other error
+ if [ $rc -eq 125 ] || [ $rc -eq 126 ]; then
+ rc=$OCF_NOT_RUNNING
elif [ $rc -ne 0 ]; then
ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out"
rc=$OCF_ERR_GENERIC
@@ -215,7 +208,16 @@ monitor_cmd_exec()
container_exists()
{
- podman inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
+ local rc
+ local out
+
+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
+ rc=$?
+ # 125: no container with name or ID ${CONTAINER} found
+ if [ $rc -ne 125 ]; then
+ return 0
+ fi
+ return 1
}
remove_container()
@@ -236,30 +238,30 @@ remove_container()
podman_simple_status()
{
- local val
-
- # retrieve the 'Running' attribute for the container
- val=$(podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
- if [ $? -ne 0 ]; then
- #not running as a result of container not being found
- return $OCF_NOT_RUNNING
- fi
+ local rc
- if ocf_is_true "$val"; then
- # container exists and is running
- return $OCF_SUCCESS
+ # simple status is implemented via podman exec
+ # everything besides success is considered "not running"
+ monitor_cmd_exec
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ rc=$OCF_NOT_RUNNING;
fi
-
- return $OCF_NOT_RUNNING
+ return $rc
}
podman_monitor()
{
- if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
- podman_simple_status
- return $?
- fi
+ # We rely on running podman exec to monitor the container
+ # state because that command seems to be less prone to
+ # performance issue under IO load.
+ #
+ # For probes to work, we expect cmd_exec to be able to report
+ # when a container is not running. Here, we're not interested
+ # in distinguishing whether it's stopped or non existing
+ # (there's function container_exists for that)
monitor_cmd_exec
+ return $?
}
podman_create_mounts() {
@@ -416,14 +418,6 @@ podman_validate()
exit $OCF_ERR_CONFIGURED
fi
- if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
- podman exec --help >/dev/null 2>&1
- if [ ! $? ]; then
- ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
- check_binary nsenter
- fi
- fi
-
image_exists
if [ $? -ne 0 ]; then
ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
@@ -457,6 +451,11 @@ fi
CONTAINER=$OCF_RESKEY_name
+# Note: we currently monitor podman containers by with the "podman exec"
+# command, so make sure that invocation is always valid by enforcing the
+# exec command to be non-empty
+: ${OCF_RESKEY_monitor_cmd:=/bin/true}
+
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;

View File

@ -1,28 +0,0 @@
From c8c073ed81884128b0b3955fb0b0bd23661044a2 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 12 Jun 2019 12:45:08 +0200
Subject: [PATCH] dhcpd: keep SELinux context
---
heartbeat/dhcpd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd
index 8b2d8b695..46027b39b 100755
--- a/heartbeat/dhcpd
+++ b/heartbeat/dhcpd
@@ -337,12 +337,12 @@ dhcpd_initialize_chroot() {
done | sort -u`
for i in $cplibs ; do
if [ -s "$i" ]; then
- cp -pL "$i" "${OCF_RESKEY_chrooted_path}/$libdir/" ||
+ cp -aL "$i" "${OCF_RESKEY_chrooted_path}/$libdir/" ||
{ ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
fi
done
- return $OCF_SUCCESS
+ return $OCF_SUCCESS
}
# Initialize a non-chroot environment

View File

@ -1,22 +0,0 @@
From ef37f8a2461b5763f4510d51e08d27d8b1f76937 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 23 Jul 2019 15:47:17 +0200
Subject: [PATCH] LVM-activate: fix monitor might hang due to lvm_validate
which was added by accident
---
heartbeat/LVM-activate | 1 -
1 file changed, 1 deletion(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 9c7c721bf..3df40c894 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -852,7 +852,6 @@ stop)
lvm_stop
;;
monitor)
- lvm_validate
lvm_status
;;
validate-all)

View File

@ -1,39 +0,0 @@
From 1ff4ce7cbe58b5309f00ac1bbe124c562b6dcaf6 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 27 Jul 2018 16:02:26 +0200
Subject: [PATCH] CTDB: explicitly use bash shell
Upcoming recovery lock substring processing is bash specific.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
configure.ac | 1 +
heartbeat/{CTDB => CTDB.in} | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
rename heartbeat/{CTDB => CTDB.in} (99%)
diff --git a/configure.ac b/configure.ac
index 039b4942c..10f5314da 100644
--- a/configure.ac
+++ b/configure.ac
@@ -978,6 +978,7 @@ AC_CONFIG_FILES([heartbeat/slapd], [chmod +x heartbeat/slapd])
AC_CONFIG_FILES([heartbeat/sybaseASE], [chmod +x heartbeat/sybaseASE])
AC_CONFIG_FILES([heartbeat/syslog-ng], [chmod +x heartbeat/syslog-ng])
AC_CONFIG_FILES([heartbeat/vsftpd], [chmod +x heartbeat/vsftpd])
+AC_CONFIG_FILES([heartbeat/CTDB], [chmod +x heartbeat/CTDB])
AC_CONFIG_FILES([rgmanager/src/resources/ASEHAagent.sh], [chmod +x rgmanager/src/resources/ASEHAagent.sh])
AC_CONFIG_FILES([rgmanager/src/resources/apache.sh], [chmod +x rgmanager/src/resources/apache.sh])
AC_CONFIG_FILES([rgmanager/src/resources/bind-mount.sh], [chmod +x rgmanager/src/resources/bind-mount.sh])
diff --git a/heartbeat/CTDB b/heartbeat/CTDB.in
similarity index 99%
rename from heartbeat/CTDB
rename to heartbeat/CTDB.in
index 28e58cea0..7d87a4ef7 100755
--- a/heartbeat/CTDB
+++ b/heartbeat/CTDB.in
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!@BASH_SHELL@
#
# OCF Resource Agent for managing CTDB
#

View File

@ -1,40 +0,0 @@
From 61f7cb5954d1727f58fab6d642a124ef342c8641 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 20 Feb 2019 11:24:28 +0100
Subject: [PATCH] CTDB: add ctdb_max_open_files parameter
---
heartbeat/CTDB.in | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 0d58c850a..bbf8ef627 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -288,6 +288,14 @@ What debug level to run at (0-10). Higher means more verbose.
<content type="integer" default="2" />
</parameter>
+<parameter name="ctdb_max_open_files" required="0">
+<longdesc lang="en">
+Maximum number of open files (for ulimit -n)
+</longdesc>
+<shortdesc lang="en">Max open files</shortdesc>
+<content type="integer" default="" />
+</parameter>
+
<parameter name="smb_conf" unique="0" required="0">
<longdesc lang="en">
Path to default samba config file. Only necessary if CTDB
@@ -611,6 +619,11 @@ ctdb_start() {
start_as_disabled="--start-as-disabled"
ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
+ # set nofile ulimit for ctdbd process
+ if [ -n "$OCF_RESKEY_ctdb_max_open_files" ]; then
+ ulimit -n "$OCF_RESKEY_ctdb_max_open_files"
+ fi
+
# Start her up
"$OCF_RESKEY_ctdbd_binary" \
--reclock="$OCF_RESKEY_ctdb_recovery_lock" \

View File

@ -1,131 +0,0 @@
From 8c61f2019d11781b737251b5cf839437b25fc53f Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 25 Jul 2018 23:15:10 +0200
Subject: [PATCH 1/3] CTDB: fix incorrect db corruption reports (bsc#1101668)
If a database was disconnected during an active transaction, then
tdbdump may fail with e.g.:
> /usr/bin/tdbdump /var/lib/ctdb/persistent/secrets.tdb.1
Failed to open /var/lib/ctdb/persistent/secrets.tdb.1
tdb(/var/lib/ctdb/persistent/secrets.tdb.1): FATAL:
tdb_transaction_recover: attempt to recover read only database
This does *not* indicate corruption, only that tdbdump, which opens the
database readonly, isn't able to perform recovery.
Using tdbtool check, instead of tdbdump, passes:
> tdbtool /var/lib/ctdb/persistent/secrets.tdb.1 check
tdb_transaction_recover: recovered 2146304 byte database
Database integrity is OK and has 2 records.
Drop the tdbdump checks, and instead rely on the core ctdb event script,
which performs the same checks with tdbtool.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
heartbeat/CTDB.in | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 1456ea32b..28e58cea0 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -392,6 +392,8 @@ enable_event_scripts() {
local event_dir
event_dir=$OCF_RESKEY_ctdb_config_dir/events.d
+ chmod u+x "$event_dir/00.ctdb" # core database health check
+
if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
chmod u+x "$event_dir/10.interface"
else
@@ -563,17 +565,6 @@ ctdb_start() {
rv=$?
[ $rv -ne 0 ] && return $rv
- # Die if databases are corrupted
- persistent_db_dir="${OCF_RESKEY_ctdb_dbdir}/persistent"
- mkdir -p $persistent_db_dir 2>/dev/null
- for pdbase in $persistent_db_dir/*.tdb.[0-9]; do
- [ -f "$pdbase" ] || break
- /usr/bin/tdbdump "$pdbase" >/dev/null 2>/dev/null || {
- ocf_exit_reason "Persistent database $pdbase is corrupted! CTDB will not start."
- return $OCF_ERR_GENERIC
- }
- done
-
# Add necessary configuration to smb.conf
init_smb_conf
if [ $? -ne 0 ]; then
@@ -737,9 +728,8 @@ ctdb_monitor() {
ctdb_validate() {
- # Required binaries (full path to tdbdump is intentional, as that's
- # what's used in ctdb_start, which was lifted from the init script)
- for binary in pkill /usr/bin/tdbdump; do
+ # Required binaries
+ for binary in pkill; do
check_binary $binary
done
From 1ff4ce7cbe58b5309f00ac1bbe124c562b6dcaf6 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 27 Jul 2018 16:02:26 +0200
Subject: [PATCH 2/3] CTDB: explicitly use bash shell
Upcoming recovery lock substring processing is bash specific.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
configure.ac | 1 +
heartbeat/CTDB.in | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 7d87a4ef7..f9b5c564f 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -134,8 +134,8 @@ For more information see http://linux-ha.org/wiki/CTDB_(resource_agent)
<parameter name="ctdb_recovery_lock" unique="1" required="1">
<longdesc lang="en">
-The location of a shared lock file, common across all nodes.
-This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock
+The location of a shared lock file or helper binary, common across all nodes.
+See CTDB documentation for details.
</longdesc>
<shortdesc lang="en">CTDB shared lock file</shortdesc>
<content type="string" default="" />
@@ -757,13 +757,24 @@ ctdb_validate() {
return $OCF_ERR_CONFIGURED
fi
- lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
- touch "$lock_dir/$$" 2>/dev/null
- if [ $? != 0 ]; then
- ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
- return $OCF_ERR_ARGS
+ if [ "${OCF_RESKEY_ctdb_recovery_lock:0:1}" == '!' ]; then
+ # '!' prefix means recovery lock is handled via a helper binary
+ binary="${OCF_RESKEY_ctdb_recovery_lock:1}"
+ binary="${binary%% *}" # trim any parameters
+ if [ -z "$binary" ]; then
+ ocf_exit_reason "ctdb_recovery_lock invalid helper"
+ return $OCF_ERR_CONFIGURED
+ fi
+ check_binary "${binary}"
+ else
+ lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
+ touch "$lock_dir/$$" 2>/dev/null
+ if [ $? != 0 ]; then
+ ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
+ return $OCF_ERR_ARGS
+ fi
+ rm "$lock_dir/$$"
fi
- rm "$lock_dir/$$"
return $OCF_SUCCESS
}

View File

@ -1,452 +0,0 @@
From 30b9f55325d2acfba27aa6859c7360e10b7201d7 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 5 Jun 2019 00:41:13 +0200
Subject: [PATCH 1/3] CTDB: support Samba 4.9+
With Samba 4.9+, all ctdbd parameters have moved to config files.
Generate a new /etc/ctdb/ctdb.conf file during ctdb startup, based on RA
configuration.
Event scripts in Samba 4.9+ are also no longer enabled/disabled based on
file mode. Use the "ctdb event script enable/disable" helpers, which now
work without a running ctdbd.
Fixes: https://github.com/ClusterLabs/resource-agents/issues/1196
Signed-off-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Noel Power <noel.power@suse.com>
Signed-off-by: Amitay Isaacs <amitay@samba.org>
---
heartbeat/CTDB.in | 214 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 167 insertions(+), 47 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 4dd646896..79a2f97e7 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -143,6 +143,10 @@ OCF_RESKEY_smb_fileid_algorithm_default=""
#######################################################################
+ctdb_version() {
+ $OCF_RESKEY_ctdb_binary version | awk '{print $NF}' | sed "s/[-\.]\?[[:alpha:]].*//"
+}
+
meta_data() {
cat <<END
<?xml version="1.0"?>
@@ -256,7 +260,7 @@ host any public ip addresses.
<longdesc lang="en">
The directory containing various CTDB configuration files.
The "nodes" and "notify.sh" scripts are expected to be
-in this directory, as is the "events.d" subdirectory.
+in this directory.
</longdesc>
<shortdesc lang="en">CTDB config file directory</shortdesc>
<content type="string" default="/etc/ctdb" />
@@ -282,8 +286,10 @@ Full path to the CTDB cluster daemon binary.
<longdesc lang="en">
Full path to the domain socket that ctdbd will create, used for
local clients to attach and communicate with the ctdb daemon.
+With CTDB 4.9.0 and later the socket path is hardcoded at build
+time, so this parameter is ignored.
</longdesc>
-<shortdesc lang="en">CTDB socket location</shortdesc>
+<shortdesc lang="en">CTDB socket location (ignored with CTDB 4.9+)</shortdesc>
<content type="string" default="${OCF_RESKEY_ctdb_socket}" />
</parameter>
@@ -421,16 +427,28 @@ invoke_ctdb() {
timeout=$((OCF_RESKEY_CRM_meta_timeout/1000))
timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000))
fi
- $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \
- -t $timeout -T $timelimit \
- "$@"
+
+ local vers=$(ctdb_version)
+ ocf_version_cmp "$vers" "4.9.0"
+
+ # if version < 4.9.0 specify '--socket' otherwise it's
+ # a compiled option
+ if [ "$?" -eq "0" ]; then
+ $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \
+ -t $timeout -T $timelimit \
+ "$@"
+ else
+ $OCF_RESKEY_ctdb_binary \
+ -t $timeout -T $timelimit \
+ "$@"
+ fi
}
# Enable any event scripts that are explicitly required.
# Any others will ultimately be invoked or not based on how they ship
# with CTDB, but will generally have no effect, beacuase the relevant
# CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb.
-enable_event_scripts() {
+enable_event_scripts_chmod() {
local event_dir
event_dir=$OCF_RESKEY_ctdb_config_dir/events.d
@@ -454,6 +472,36 @@ enable_event_scripts() {
fi
}
+enable_event_scripts_symlink() {
+ # event scripts are symlinked once enabled, with the link source in...
+ mkdir -p "$OCF_RESKEY_ctdb_config_dir/events/legacy" 2>/dev/null
+
+ invoke_ctdb event script enable legacy 00.ctdb
+
+ if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
+ invoke_ctdb event script enable legacy 10.interface
+ else
+ invoke_ctdb event script disable legacy 10.interface
+ fi
+ if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then
+ invoke_ctdb event script enable legacy 11.routing
+ else
+ invoke_ctdb event script disable legacy 11.routing
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then
+ invoke_ctdb event script enable legacy 49.winbind
+ else
+ invoke_ctdb event script disable legacy 49.winbind
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba"; then
+ invoke_ctdb event script enable legacy 50.samba
+ else
+ invoke_ctdb event script disable legacy 50.samba
+ fi
+}
+
# This function has no effect (currently no way to set CTDB_SET_*)
# but remains here in case we need it in future.
set_ctdb_variables() {
@@ -556,6 +604,46 @@ append_ctdb_sysconfig() {
[ -n "$2" ] && echo "$1=$2" >> "$CTDB_SYSCONFIG"
}
+generate_ctdb_config() {
+ local ctdb_config="$OCF_RESKEY_ctdb_config_dir/ctdb.conf"
+
+ # Backup existing config if we're not already using an auto-generated one
+ grep -qa '# CTDB-RA: Auto-generated' $ctdb_config || cp -p $ctdb_config ${ctdb_config}.ctdb-ra-orig
+ if [ $? -ne 0 ]; then
+ ocf_log warn "Unable to backup $ctdb_config to ${ctdb_config}.ctdb-ra-orig"
+ fi
+
+ local log_option="file:$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="syslog"
+ fi
+
+ local start_as_disabled="false"
+ ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" && start_as_disabled="true"
+
+ local dbdir_volatile="$OCF_RESKEY_ctdb_dbdir/volatile"
+ [ -d "$dbdir_volatile" ] || mkdir -p "$dbdir_volatile" 2>/dev/null
+ local dbdir_persistent="$OCF_RESKEY_ctdb_dbdir/persistent"
+ [ -d "$dbdir_persistent" ] || mkdir -p "$dbdir_persistent" 2>/dev/null
+ local dbdir_state="$OCF_RESKEY_ctdb_dbdir/state"
+ [ -d "$dbdir_state" ] || mkdir -p "$dbdir_state" 2>/dev/null
+
+cat >$ctdb_config <<EOF
+# CTDB-RA: Auto-generated
+[logging]
+ location = $log_option
+ log level = $OCF_RESKEY_ctdb_debuglevel
+[cluster]
+ recovery lock = $OCF_RESKEY_ctdb_recovery_lock
+[database]
+ volatile database directory = $dbdir_volatile
+ persistent database directory = $dbdir_persistent
+ state database directory = $dbdir_state
+[legacy]
+ start as disabled = $start_as_disabled
+EOF
+}
+
# Generate a new, minimal CTDB config file that's just enough
# to get CTDB running as configured by the RA parameters.
generate_ctdb_sysconfig() {
@@ -589,6 +677,58 @@ EOF
}
+invoke_ctdbd() {
+ local vers="$1"
+
+ ocf_version_cmp "$vers" "4.9.0"
+ if [ "$?" -ne "0" ]; then
+ # With 4.9+, all ctdbd binary parameters are provided as
+ # config settings
+ $OCF_RESKEY_ctdbd_binary
+ return
+ fi
+
+ # Use logfile by default, or syslog if asked for
+ local log_option
+ # --logging supported from v4.3.0 and --logfile / --syslog support
+ # has been removed from newer versions
+ ocf_version_cmp "$vers" "4.2.14"
+ if [ "$?" -eq "2" ]; then
+ log_option="--logging=file:$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--logging=syslog"
+ fi
+ else
+ log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--syslog"
+ fi
+ fi
+
+ # public addresses file (should not be present, but need to set for correctness if it is)
+ local pub_addr_option
+ pub_addr_option=""
+ [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \
+ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses"
+ # start as disabled
+ local start_as_disabled
+ start_as_disabled="--start-as-disabled"
+ ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
+
+ $OCF_RESKEY_ctdbd_binary \
+ --reclock="$OCF_RESKEY_ctdb_recovery_lock" \
+ --nlist="$OCF_RESKEY_ctdb_config_dir/nodes" \
+ --socket="$OCF_RESKEY_ctdb_socket" \
+ --dbdir="$OCF_RESKEY_ctdb_dbdir" \
+ --dbdir-persistent="$OCF_RESKEY_ctdb_dbdir/persistent" \
+ --event-script-dir="$OCF_RESKEY_ctdb_config_dir/events.d" \
+ --notification-script="$OCF_RESKEY_ctdb_config_dir/notify.sh" \
+ --transport=tcp \
+ $start_as_disabled $log_option $pub_addr_option \
+ -d "$OCF_RESKEY_ctdb_debuglevel"
+}
+
+
ctdb_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
@@ -614,27 +754,26 @@ ctdb_start() {
return $OCF_ERR_GENERIC
fi
- # Generate new CTDB sysconfig
- generate_ctdb_sysconfig
- enable_event_scripts
+ local version=$(ctdb_version)
- # Use logfile by default, or syslog if asked for
- local log_option
- # --logging supported from v4.3.0 and --logfile / --syslog support
- # has been removed from newer versions
- version=$(ctdb version | awk '{print $NF}')
- ocf_version_cmp "$version" "4.2.14"
- if [ "$?" -eq "2" ]; then
- log_option="--logging=file:$OCF_RESKEY_ctdb_logfile"
- if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
- log_option="--logging=syslog"
- fi
+ ocf_version_cmp "$version" "4.9.0"
+ if [ "$?" -eq "0" ]; then
+ # prior to 4.9, ctdbd parameters are in sysconfig or passed as
+ # binary arguments
+ generate_ctdb_sysconfig
+
+ # prior to 4.9, event script enablement without a running
+ # ctdbd is done by chmoding the scripts directly
+ enable_event_scripts_chmod
else
- log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
- if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
- log_option="--syslog"
- fi
+ # 4.9+ moves all ctdbd parameters to ctdb.conf
+ generate_ctdb_config
+
+ # 4.9+ event scripts can be enabled with ctdb directly, which
+ # performs a symlink
+ enable_event_scripts_symlink
fi
+
if [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then
# ensure the logfile's directory exists, otherwise ctdb will fail to start
mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile)
@@ -643,33 +782,14 @@ ctdb_start() {
# ensure ctdb's rundir exists, otherwise it will fail to start
mkdir -p $OCF_RESKEY_ctdb_rundir 2>/dev/null
- # public addresses file (should not be present, but need to set for correctness if it is)
- local pub_addr_option
- pub_addr_option=""
- [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \
- pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses"
- # start as disabled
- local start_as_disabled
- start_as_disabled="--start-as-disabled"
- ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
-
# set nofile ulimit for ctdbd process
if [ -n "$OCF_RESKEY_ctdb_max_open_files" ]; then
ulimit -n "$OCF_RESKEY_ctdb_max_open_files"
fi
# Start her up
- "$OCF_RESKEY_ctdbd_binary" \
- --reclock="$OCF_RESKEY_ctdb_recovery_lock" \
- --nlist="$OCF_RESKEY_ctdb_config_dir/nodes" \
- --socket="$OCF_RESKEY_ctdb_socket" \
- --dbdir="$OCF_RESKEY_ctdb_dbdir" \
- --dbdir-persistent="$OCF_RESKEY_ctdb_dbdir/persistent" \
- --event-script-dir="$OCF_RESKEY_ctdb_config_dir/events.d" \
- --notification-script="$OCF_RESKEY_ctdb_config_dir/notify.sh" \
- --transport=tcp \
- $start_as_disabled $log_option $pub_addr_option \
- -d "$OCF_RESKEY_ctdb_debuglevel"
+ invoke_ctdbd "$version"
+
if [ $? -ne 0 ]; then
# cleanup smb.conf
cleanup_smb_conf
@@ -688,7 +808,7 @@ ctdb_start() {
if [ $? -ne 0 ]; then
# CTDB will be running, kill it before returning
ctdb_stop
- ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status"
+ ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary status"
return $OCF_ERR_GENERIC
fi
if ! echo "$status" | grep -qs 'UNHEALTHY (THIS'; then
@@ -725,7 +845,7 @@ ctdb_stop() {
[ $count -gt 10 ] && {
ocf_log info "killing ctdbd "
pkill -9 -f "$OCF_RESKEY_ctdbd_binary"
- pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events.d/"
+ pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events"
}
done
From b4753b7cb46045bb9e7ed5e3a0a20f6104264b12 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 10 Jul 2019 17:11:50 +0200
Subject: [PATCH 2/3] CTDB: generate script.options file for 4.9+
Event scripts in CTDB 4.9+ ignore sysconfig configuration and instead
parse parameters in ctdb_config_dir/script.options .
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
heartbeat/CTDB.in | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 79a2f97e7..0906f3da9 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -242,6 +242,7 @@ If the amount of free memory drops below this value the node will
become unhealthy and ctdb and all managed services will be shutdown.
Once this occurs, the administrator needs to find the reason for the
OOM situation, rectify it and restart ctdb with "service ctdb start".
+With CTDB 4.4.0 and later this parameter is ignored.
</longdesc>
<shortdesc lang="en">Minimum amount of free memory (MB)</shortdesc>
<content type="integer" default="${OCF_RESKEY_ctdb_monitor_free_memory_default}" />
@@ -600,8 +601,10 @@ cleanup_smb_conf() {
mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf"
}
-append_ctdb_sysconfig() {
- [ -n "$2" ] && echo "$1=$2" >> "$CTDB_SYSCONFIG"
+append_conf() {
+ local file_path="$1"
+ shift
+ [ -n "$2" ] && echo "$1=$2" >> "$file_path"
}
generate_ctdb_config() {
@@ -644,6 +647,25 @@ cat >$ctdb_config <<EOF
EOF
}
+generate_event_script_options() {
+ local script_options="$OCF_RESKEY_ctdb_config_dir/script.options"
+
+ # Backup existing config if we're not already using an auto-generated one
+ grep -qa '# CTDB-RA: Auto-generated' $script_options || cp -p $script_options ${script_options}.ctdb-ra-orig
+ if [ $? -ne 0 ]; then
+ ocf_log warn "Unable to backup $script_options to ${script_options}.ctdb-ra-orig"
+ fi
+
+cat >$script_options <<EOF
+# CTDB-RA: Auto-generated
+CTDB_SAMBA_SKIP_SHARE_CHECK=$(ocf_is_true "$OCF_RESKEY_ctdb_samba_skip_share_check" && echo 'yes' || echo 'no')
+EOF
+
+ append_conf "$script_options" CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
+ append_conf "$script_options" CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
+ append_conf "$script_options" CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
+}
+
# Generate a new, minimal CTDB config file that's just enough
# to get CTDB running as configured by the RA parameters.
generate_ctdb_sysconfig() {
@@ -671,9 +693,9 @@ CTDB_SAMBA_SKIP_SHARE_CHECK=$(ocf_is_true "$OCF_RESKEY_ctdb_samba_skip_share_che
CTDB_MANAGES_SAMBA=$(ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && echo 'yes' || echo 'no')
CTDB_MANAGES_WINBIND=$(ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind" && echo 'yes' || echo 'no')
EOF
- append_ctdb_sysconfig CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
- append_ctdb_sysconfig CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
- append_ctdb_sysconfig CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
+ append_conf "$CTDB_SYSCONFIG" CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
+ append_conf "$CTDB_SYSCONFIG" CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
+ append_conf "$CTDB_SYSCONFIG" CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
}
@@ -769,6 +791,9 @@ ctdb_start() {
# 4.9+ moves all ctdbd parameters to ctdb.conf
generate_ctdb_config
+ # 4.9+ event script options are in script.options
+ generate_event_script_options
+
# 4.9+ event scripts can be enabled with ctdb directly, which
# performs a symlink
enable_event_scripts_symlink
From 0a8610711f90c4cc7a2b380a4795f463532d9520 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 10 Jul 2019 17:54:01 +0200
Subject: [PATCH 3/3] CTDB: drop sysconfig presence check during validate
There are two reasons to avoid this check:
- for ctdb versions prior to 4.9.0, the sysconfig file is generated by
the resource agent start hook *after* ctdb_validate() is called.
- post 4.9.0 versions don't use the sysconfig file.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
heartbeat/CTDB.in | 5 -----
1 file changed, 5 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 0906f3da9..15d78902e 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -925,11 +925,6 @@ ctdb_validate() {
check_binary $binary
done
- if [ -z "$CTDB_SYSCONFIG" ]; then
- ocf_exit_reason "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)"
- return $OCF_ERR_INSTALLED
- fi
-
if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then
ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist."
return $OCF_ERR_INSTALLED

View File

@ -1,193 +0,0 @@
From 462ada6164cb77c81f5291d88287d68506d38056 Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Tue, 9 Jul 2019 23:14:21 +0200
Subject: [PATCH] Generate addition drop-in dependencies for podman containers
When podman creates a container, it creates two additional systemd
scope files dynamically:
- libpod-conmon-<CONTAINERID>.scope - runs a conmon process that
tracks a container's pid1 into a dedicated pidfile.
- libpod-<CONTAINERID>.scope - created dynamically by runc,
for cgroups accounting
On shutdown, it can happen that systemd stops those scope early,
which in turn sends a SIGTERM to pacemaker-managed containers
before pacemaker has scheduled any stop operation. That
confuses the cluster and may break shutdown.
Add a new option in the resource-agent to inject additional
dependencies into the dynamically created scope files, so that
systemd is not allowed to stop scopes before the pacemaker
service itself is stopped.
When that option is enabled, the scopes look like:
# podman ps | grep galera
c329819a1227 192.168.122.8:8787/rhosp15/openstack-mariadb:latest dumb-init -- /bin... About an hour ago Up About an hour ago galera-bundle-podman-0
# systemctl cat libpod*c329819a1227*
# /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
# This is a transient unit file, created programmatically via the systemd API. Do not edit.
[Scope]
Slice=machine.slice
Delegate=yes
[Unit]
DefaultDependencies=no
# /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
[Unit]
Before=pacemaker.service
# /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
# This is a transient unit file, created programmatically via the systemd API. Do not edit.
[Unit]
Description=libcontainer container c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b
[Scope]
Slice=machine.slice
Delegate=yes
MemoryAccounting=yes
CPUAccounting=yes
BlockIOAccounting=yes
[Unit]
DefaultDependencies=no
# /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
[Unit]
Before=pacemaker.service
Effectively, this prevents systemd from managing the shutdown of any
pacemaker-managed podman container.
Related: rhbz#1726442
---
heartbeat/podman | 82 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 1 deletion(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 8fc2c4695..8a916eb8c 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -158,6 +158,16 @@ to have the particular one persist when this happens.
<shortdesc lang="en">reuse container</shortdesc>
<content type="boolean" default="${OCF_RESKEY_reuse_default}"/>
</parameter>
+
+<parameter name="drop_in_dependency" required="0" unique="0">
+<longdesc lang="en">
+Use transient drop-in files to add extra dependencies to the systemd
+scopes associated to the container. During reboot, this prevents systemd
+to stop the container before pacemaker.
+</longdesc>
+<shortdesc lang="en">drop-in dependency</shortdesc>
+<content type="boolean"/>
+</parameter>
</parameters>
<actions>
@@ -273,8 +283,57 @@ podman_create_mounts() {
IFS="$oldIFS"
}
+podman_container_id()
+{
+ # Retrieve the container ID by doing a "podman ps" rather than
+ # a "podman inspect", because the latter has performance issues
+ # under IO load.
+ # We could have run "podman start $CONTAINER" to get the ID back
+ # but if the container is stopped, the command will return a
+ # name instead of a container ID. This would break us.
+ podman ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$CONTAINER" | cut -d' ' -f1
+}
+
+
+create_transient_drop_in_dependency()
+{
+ local cid=$1
+ local rc=$OCF_SUCCESS
+
+ if [ -z "$cid" ]; then
+ ocf_log error "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency"
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)"
+ for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do
+ if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then
+ mkdir -p /run/systemd/transient/"$scope" && \
+ echo -e "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \
+ chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf
+ rc=$?
+ fi
+ done
+
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)"
+ else
+ systemctl daemon-reload
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\""
+ fi
+ fi
+
+ return $rc
+}
+
+
podman_start()
{
+ local cid
+ local rc
+
podman_create_mounts
local run_opts="-d --name=${CONTAINER}"
# check to see if the container has already started
@@ -306,8 +365,17 @@ podman_start()
ocf_log info "running container $CONTAINER for the first time"
ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
fi
+ rc=$?
- if [ $? -ne 0 ]; then
+ # if the container was stopped or didn't exist before, systemd
+ # removed the libpod* scopes. So always try to recreate the drop-ins
+ if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then
+ cid=$(podman_container_id)
+ create_transient_drop_in_dependency "$cid"
+ rc=$?
+ fi
+
+ if [ $rc -ne 0 ]; then
ocf_exit_reason "podman failed to launch container"
return $OCF_ERR_GENERIC
fi
@@ -353,6 +421,8 @@ podman_stop()
else
ocf_log debug "waiting $timeout second[s] before killing container"
ocf_run podman stop -t=$timeout $CONTAINER
+ # on stop, systemd will automatically delete any transient
+ # drop-in conf that has been created earlier
fi
if [ $? -ne 0 ]; then
@@ -456,6 +526,16 @@ CONTAINER=$OCF_RESKEY_name
# exec command to be non-empty
: ${OCF_RESKEY_monitor_cmd:=/bin/true}
+# When OCF_RESKEY_drop_in_dependency is not populated, we
+# look at another file-based way of enabling the option.
+# Otherwise, consider it disabled.
+if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then
+ if [ -f "/etc/sysconfig/podman_drop_in" ] || \
+ [ -f "/etc/default/podman_drop_in" ]; then
+ OCF_RESKEY_drop_in_dependency=yes
+ fi
+fi
+
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;

View File

@ -1,48 +0,0 @@
From 6c24147ebe0e979c48db93a5f8ec6094b8707591 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 26 Sep 2019 12:52:39 +0200
Subject: [PATCH] LVM-activate: move pvscan --cache to validate
It needs to be called before validate attempts to look at the VG.
---
configure.ac | 2 +-
heartbeat/LVM-activate | 6 +++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/configure.ac b/configure.ac
index 97dac7cf8..1eb65cf34 100644
--- a/configure.ac
+++ b/configure.ac
@@ -21,7 +21,7 @@ dnl checks for system services
AC_INIT([resource-agents],
m4_esyscmd([make/git-version-gen .tarball-version]),
- [to_be_defined@foobar.org])
+ [developers@clusterlabs.org])
AC_USE_SYSTEM_EXTENSIONS
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 3df40c894..9b7c0aa7f 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -489,6 +489,11 @@ lvm_validate() {
check_binary lvm
check_binary dmsetup
+ # This is necessary when using system ID to update lvm hints,
+ # or in older versions of lvm, this is necessary to update the
+ # lvmetad cache.
+ pvscan --cache
+
if ! vgs --foreign ${VG} >/dev/null 2>&1 ; then
# stop action exits successfully if the VG cannot be accessed...
if [ $__OCF_ACTION = "stop" ]; then
@@ -627,7 +632,6 @@ clvmd_activate() {
systemid_activate() {
local cur_systemid
- pvscan --cache
cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]')
# Put our system ID on the VG

View File

@ -1,66 +0,0 @@
From 34b46b172857babbb2bca5e012c7827ed6a26b01 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 6 Nov 2019 10:00:31 +0100
Subject: [PATCH] IPaddr2: add noprefixroute parameter
---
heartbeat/IPaddr2 | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 1d39ae514..6f8e8c734 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -88,6 +88,7 @@ OCF_RESKEY_arp_sender_default=""
OCF_RESKEY_send_arp_opts_default=""
OCF_RESKEY_flush_routes_default="false"
OCF_RESKEY_run_arping_default=false
+OCF_RESKEY_noprefixroute_default="false"
OCF_RESKEY_preferred_lft_default="forever"
OCF_RESKEY_network_namespace_default=""
@@ -109,6 +110,7 @@ OCF_RESKEY_network_namespace_default=""
: ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}}
: ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}}
: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}}
+: ${OCF_RESKEY_noprefixroute=${OCF_RESKEY_noprefixroute_default}}
: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}}
: ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}}
@@ -377,6 +379,14 @@ Whether or not to run arping for IPv4 collision detection check.
<content type="string" default="${OCF_RESKEY_run_arping_default}"/>
</parameter>
+<parameter name="noprefixroute">
+<longdesc lang="en">
+Use noprefixroute flag (see 'man ip-address').
+</longdesc>
+<shortdesc lang="en">Use noprefixroute flag</shortdesc>
+<content type="string" default="${OCF_RESKEY_noprefixroute_default}"/>
+</parameter>
+
<parameter name="preferred_lft">
<longdesc lang="en">
For IPv6, set the preferred lifetime of the IP address.
@@ -397,8 +407,8 @@ the namespace.
<shortdesc lang="en">Network namespace to use</shortdesc>
<content type="string" default="${OCF_RESKEY_network_namespace_default}"/>
</parameter>
-
</parameters>
+
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
@@ -640,6 +650,11 @@ add_interface () {
msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface"
fi
+ if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then
+ cmd="$cmd noprefixroute"
+ msg="${msg} (with noprefixroute)"
+ fi
+
if [ ! -z "$label" ]; then
cmd="$cmd label $label"
msg="${msg} (with label $label)"

View File

@ -1,69 +0,0 @@
diff -uNr a/heartbeat/LVM-activate b/heartbeat/LVM-activate
--- a/heartbeat/LVM-activate 2019-10-08 12:10:11.755991580 +0200
+++ b/heartbeat/LVM-activate 2019-10-08 12:14:38.388288176 +0200
@@ -42,6 +42,11 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+# Parameter defaults
+OCF_RESKEY_partial_activation_default="false"
+
+: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}}
+
# If LV is given, only activate this named LV; otherwise, activate all
# LVs in the named VG.
VG=${OCF_RESKEY_vgname}
@@ -150,6 +155,16 @@
<content type="string" default="pacemaker" />
</parameter>
+<parameter name="partial_activation" unique="0" required="0">
+<longdesc lang="en">
+If set, the volume group will be activated partially even with some
+physical volumes missing. It helps to set to true when using mirrored
+logical volumes.
+</longdesc>
+<shortdesc lang="en">Activate VG partially when missing PVs</shortdesc>
+<content type="string" default="${OCF_RESKEY_partial_activation_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -486,6 +501,25 @@
exit $OCF_ERR_CONFIGURED
fi
+ # Inconsistency might be due to missing physical volumes, which doesn't
+ # automatically mean we should fail. If partial_activation=true then
+ # we should let start try to handle it, or if no PVs are listed as
+ # "unknown device" then another node may have marked a device missing
+ # where we have access to all of them and can start without issue.
+ case $(vgs -o attr --noheadings $VG | tr -d ' ') in
+ ???p??*)
+ if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ # We are missing devices and cannot activate partially
+ ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially"
+ exit $OCF_ERR_GENERIC
+ else
+ # We are missing devices but are allowed to activate partially.
+ # Assume that caused the vgck failure and carry on
+ ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
+ fi
+ ;;
+ esac
+
# Get the access mode from VG metadata and check if it matches the input
# value. Skip to check "tagging" mode because there's no reliable way to
# automatically check if "tagging" mode is being used.
@@ -545,6 +579,10 @@
do_activate() {
local activate_opt=$1
+ if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ activate_opt="${activate_opt} --partial"
+ fi
+
# Only activate the specific LV if it's given
if [ -n "$LV" ]; then
ocf_run lvchange $activate_opt ${VG}/${LV}

View File

@ -1,39 +0,0 @@
From 2aa8015bc4ff0bd61eca13eceb59aaa672335b76 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nwahl@redhat.com>
Date: Thu, 30 Aug 2018 18:36:11 -0700
Subject: [PATCH] Filesystem: Support symlink as mountpoint directory
Filesystem monitor operation fails when the `directory` attribute is a
symlink.
The monitor operation calls the `list_mounts` function, which cats
`/proc/mounts` if it exists, else cats `/etc/mtab` if it exists, else
runs the `mount` command. It then greps for `" $MOUNTPOINT "` in the
output, where `$MOUNTPOINT` is the value of the `directory` attribute.
`/proc/mounts`, `/etc/mtab`, and the `mount` command resolve symlinks
to their canonical targets. So while the monitor operation greps for
the symlink path (surrounded by spaces) as defined in the directory
attribute, the symlink will not be present in the `list_mounts` output.
Only the symlink's target will be present.
This patch uses `readlink -f $MOUNTPOINT` to resolve the symlink to its
canonical name before using it as a grep pattern in the
`Filesystem_status` function.
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 7c73b0b97..fc4b8fcd5 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -580,7 +580,7 @@ Filesystem_stop()
#
Filesystem_status()
{
- if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
+ if list_mounts | grep -q " $(readlink -f $MOUNTPOINT) " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else

View File

@ -1,43 +0,0 @@
From e2c3ec91cdd123b8afc6010f45ecd22ee6d8ecf7 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nwahl@redhat.com>
Date: Mon, 3 Sep 2018 00:30:01 -0700
Subject: [PATCH] Filesystem: Canonicalize mountpoint symlinks
Commit 2aa8015 added support to `Filesystem_status()` for mountpoints
that are symlinks. However, it missed two other places where `readlink`
calls should have been added to canonicalize symlinks.
---
heartbeat/Filesystem | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index fc4b8fcd5..2a43d1daa 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -278,7 +278,7 @@ determine_blockdevice() {
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none)
: ;;
*)
- DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
+ DEVICE=`list_mounts | grep " $(readlink -f "$MOUNTPOINT" ) " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
@@ -396,7 +396,7 @@ fstype_supported()
Filesystem_start()
{
# Check if there are any mounts mounted under the mountpoint
- if list_mounts | grep -q -E " $MOUNTPOINT/\w+" >/dev/null 2>&1; then
+ if list_mounts | grep -q -E " $(readlink -f "$MOUNTPOINT" )/\w+" >/dev/null 2>&1; then
ocf_log err "There is one or more mounts mounted under $MOUNTPOINT."
return $OCF_ERR_CONFIGURED
fi
@@ -580,7 +580,7 @@ Filesystem_stop()
#
Filesystem_status()
{
- if list_mounts | grep -q " $(readlink -f $MOUNTPOINT) " >/dev/null 2>&1; then
+ if list_mounts | grep -q " $(readlink -f "$MOUNTPOINT" ) " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else

View File

@ -1,53 +0,0 @@
From 69d607dc7568168e874f99d5a8b6bdb66b579d8b Mon Sep 17 00:00:00 2001
From: "yusk.iida" <yusk.iida@gmail.com>
Date: Tue, 7 May 2019 19:37:26 +0900
Subject: [PATCH] Low: Filesystem: Fix a problem umount is not executed in the
event of a disk failure
---
heartbeat/Filesystem | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 2a43d1daa..bd974f8f3 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -278,7 +278,7 @@ determine_blockdevice() {
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none)
: ;;
*)
- DEVICE=`list_mounts | grep " $(readlink -f "$MOUNTPOINT" ) " | cut -d' ' -f1`
+ DEVICE=`list_mounts | grep " $CANONICALIZED_MOUNTPOINT " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
@@ -396,7 +396,7 @@ fstype_supported()
Filesystem_start()
{
# Check if there are any mounts mounted under the mountpoint
- if list_mounts | grep -q -E " $(readlink -f "$MOUNTPOINT" )/\w+" >/dev/null 2>&1; then
+ if list_mounts | grep -q -E " $CANONICALIZED_MOUNTPOINT/\w+" >/dev/null 2>&1; then
ocf_log err "There is one or more mounts mounted under $MOUNTPOINT."
return $OCF_ERR_CONFIGURED
fi
@@ -580,7 +580,7 @@ Filesystem_stop()
#
Filesystem_status()
{
- if list_mounts | grep -q " $(readlink -f "$MOUNTPOINT" ) " >/dev/null 2>&1; then
+ if list_mounts | grep -q " $CANONICALIZED_MOUNTPOINT " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else
@@ -804,6 +804,11 @@ if [ -z "$OCF_RESKEY_directory" ]; then
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
+ CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT")
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
+ exit $OCF_ERR_GENERIC
+ fi
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
# kill the whole system. Is that a good idea?

View File

@ -1,32 +0,0 @@
From 48a7ebcea5ce0522021cf3079b62107a06b530b9 Mon Sep 17 00:00:00 2001
From: James Oakley <jfunk@funktronics.ca>
Date: Thu, 8 Aug 2019 05:56:14 -0700
Subject: [PATCH] Don't call readlink on path if it does not exist
---
heartbeat/Filesystem | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 4bbbc06d3..738e3c08e 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -858,10 +858,14 @@ if [ -z "$OCF_RESKEY_directory" ]; then
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
- CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT")
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
- exit $OCF_ERR_GENERIC
+ if [ -e "$MOUNTPOINT" ] ; then
+ CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT")
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
+ exit $OCF_ERR_GENERIC
+ fi
+ else
+ CANONICALIZED_MOUNTPOINT="$MOUNTPOINT"
fi
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll

View File

@ -1,46 +0,0 @@
From b67278bc92cfb0b9947ff5fff65f46f420a42c2c Mon Sep 17 00:00:00 2001
From: Kazutomo Nakahira <kazutomo_nakahira@newson.co.jp>
Date: Fri, 10 May 2019 14:30:51 +0900
Subject: [PATCH] Low: Filesystem: Fix missing mount point due to corrupted
mount list
---
heartbeat/Filesystem | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 2a43d1daa..c38ae12d4 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -255,16 +255,26 @@ is_bind_mount() {
}
list_mounts() {
local inpf=""
+ local mount_list=""
+ local check_list="x"
+
if [ -e "/proc/mounts" ] && ! is_bind_mount; then
inpf=/proc/mounts
elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
inpf=/etc/mtab
fi
- if [ "$inpf" ]; then
- cut -d' ' -f1,2,3 < $inpf
- else
- $MOUNT | cut -d' ' -f1,3,5
- fi
+
+ # Make sure that the mount list has not been changed while reading.
+ while [ "$mount_list" != "$check_list" ]; do
+ check_list=$mount_list
+ if [ "$inpf" ]; then
+ mount_list=$(cut -d' ' -f1,2,3 < $inpf)
+ else
+ mount_list=$($MOUNT | cut -d' ' -f1,3,5)
+ fi
+ done
+
+ echo "$mount_list"
}
determine_blockdevice() {

View File

@ -1,52 +0,0 @@
From bfbc99003ebd96d79bbf8ad50be0b5e714a92fd7 Mon Sep 17 00:00:00 2001
From: ytakeshita <y.takeshita0311@gmail.com>
Date: Fri, 7 Jun 2019 15:20:52 +0900
Subject: [PATCH] Medium: Filesystem: Prevents to all root user processes are
killed when bind mounting a directory on rootfs.
if a directory is bound mounting on rootfs and "force_umount" is not set "safe", change "force_umount" to "safe".
---
heartbeat/Filesystem | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index c46ec3cca..1b29a08b3 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -314,6 +314,24 @@ bind_kernel_check() {
[ $? -ne 0 ] &&
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
+
+bind_rootfs_check() {
+ local SOURCE
+ local TARGET
+ local ROOTFS
+
+ SOURCE=$1
+ TARGET=$(df --output=target $SOURCE | tail -n 1)
+
+ ROOTFS=$(list_mounts | grep -w rootfs | cut -d' ' -f 2)
+
+ if [ "${TARGET}" = "${ROOTFS}" ]; then
+ return 1
+ else
+ return 0
+ fi
+}
+
bind_mount() {
if is_bind_mount && [ "$options" != "-o bind" ]
then
@@ -476,6 +494,11 @@ get_pids()
local procs
local mmap_procs
+ if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_rootfs_check "$DEVICE"; then
+ ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'"
+ FORCE_UNMOUNT=safe
+ fi
+
if ocf_is_true "$FORCE_UNMOUNT"; then
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'

View File

@ -1,42 +0,0 @@
From f8e5d2afc5b9bbf676ac20894f0f26e6ec998557 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 10 Sep 2019 15:40:12 +0200
Subject: [PATCH] Filesystem: improve "/" check for bind mounts
---
heartbeat/Filesystem | 15 +++------------
1 file changed, 3 insertions(+), 12 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 738e3c08e..e66ddc77f 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -337,17 +337,8 @@ bind_kernel_check() {
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
-bind_rootfs_check() {
- local SOURCE
- local TARGET
- local ROOTFS
-
- SOURCE=$1
- TARGET=$(df --output=target $SOURCE | tail -n 1)
-
- ROOTFS=$(list_mounts | grep -w rootfs | cut -d' ' -f 2)
-
- if [ "${TARGET}" = "${ROOTFS}" ]; then
+bind_root_mount_check() {
+ if [ "$(df -P "$1" | awk 'END{print $6}')" = "/" ]; then
return 1
else
return 0
@@ -516,7 +507,7 @@ get_pids()
local procs
local mmap_procs
- if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_rootfs_check "$DEVICE"; then
+ if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_root_mount_check "$DEVICE"; then
ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'"
FORCE_UNMOUNT=safe
fi

View File

@ -1,34 +0,0 @@
From f8e1b1407b613657ebd90381d53e6a567b92b241 Mon Sep 17 00:00:00 2001
From: Kazutomo Nakahira <kazutomo_nakahira@newson.co.jp>
Date: Mon, 17 Dec 2018 14:15:24 +0900
Subject: [PATCH] Medium: pgsql: Set initial score for primary and hot standby
in the probe.
---
heartbeat/pgsql | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 842dc0ac4..8ef84dd3e 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -974,11 +974,19 @@ pgsql_real_monitor() {
case "$output" in
f) ocf_log debug "PostgreSQL is running as a primary."
if [ "$OCF_RESKEY_monitor_sql" = "$OCF_RESKEY_monitor_sql_default" ]; then
+ if ocf_is_probe; then
+ # Set initial score for primary.
+ exec_with_retry 0 $CRM_MASTER -v $PROMOTE_ME
+ fi
return $OCF_RUNNING_MASTER
fi
;;
t) ocf_log debug "PostgreSQL is running as a hot standby."
+ if ocf_is_probe; then
+ # Set initial score for hot standby.
+ exec_with_retry 0 $CRM_MASTER -v $CAN_NOT_PROMOTE
+ fi
return $OCF_SUCCESS;;
*) ocf_exit_reason "$CHECK_MS_SQL output is $output"

View File

@ -1,34 +0,0 @@
From ac430f79c333d73e6cd59ae59178c7040e7dbfda Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Wed, 8 May 2019 18:23:59 +0900
Subject: [PATCH] pgsql: enhance checks in pgsql_real_start to prevent
incorrect status gets
---
heartbeat/pgsql | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 842dc0ac4..5d04618e6 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -483,7 +483,7 @@ runasowner() {
"-q")
quietrun="-q"
shift 1;;
- "warn"|"err")
+ "info"|"warn"|"err")
loglevel="-$1"
shift 1;;
*)
@@ -544,7 +544,9 @@ pgsql_real_start() {
local postgres_options
local rc
- if pgsql_status; then
+ pgsql_real_monitor info
+ rc=$?
+ if [ $rc -eq $OCF_SUCCESS -o $rc -eq $OCF_RUNNING_MASTER ]; then
ocf_log info "PostgreSQL is already running. PID=`cat $PIDFILE`"
if is_replication; then
return $OCF_ERR_GENERIC

View File

@ -1,202 +0,0 @@
--- ClusterLabs-resource-agents-e711383f/heartbeat/IPsrcaddr 2019-08-15 16:02:10.055827624 +0200
+++ /home/oalbrigt/src/resource-agents/heartbeat/IPsrcaddr 2019-08-15 15:45:50.690757838 +0200
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Description: IPsrcaddr - Preferred source address modification
+# Description: IPsrcaddr - Preferred source(/dest) address modification
#
# Author: John Sutton <john@scl.co.uk>
# Support: users@clusterlabs.org
@@ -11,7 +11,7 @@
#
# This script manages the preferred source address associated with
# packets which originate on the localhost and are routed through the
-# default route. By default, i.e. without the use of this script or
+# matching route. By default, i.e. without the use of this script or
# similar, these packets will carry the IP of the primary i.e. the
# non-aliased interface. This can be a nuisance if you need to ensure
# that such packets carry the same IP irrespective of which host in
@@ -27,7 +27,7 @@
#
# NOTES:
#
-# 1) There must be one and not more than 1 default route! Mainly because
+# 1) There must be one and not more than 1 matching route! Mainly because
# I can't see why you should have more than one. And if there is more
# than one, we would have to box clever to find out which one is to be
# modified, or we would have to pass its identity as an argument.
@@ -54,16 +54,25 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
+OCF_RESKEY_ipaddress_default=""
+OCF_RESKEY_cidr_netmask_default=""
+OCF_RESKEY_destination_default="0.0.0.0/0"
OCF_RESKEY_proto_default=""
+OCF_RESKEY_table_default=""
+: ${OCF_RESKEY_ipaddress=${OCF_RESKEY_ipaddress_default}}
+: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}}
+: ${OCF_RESKEY_destination=${OCF_RESKEY_destination_default}}
: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}}
+: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}}
#######################################################################
[ -z "$OCF_RESKEY_proto" ] && PROTO="" || PROTO="proto $OCF_RESKEY_proto"
+[ -z "$OCF_RESKEY_table" ] && TABLE="" || TABLE="table $OCF_RESKEY_table"
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
- CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0"
+ CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination"
CMDCHANGE="$IP2UTIL route change to "
SYSTYPE="`uname -s`"
@@ -91,7 +100,7 @@
The IP address.
</longdesc>
<shortdesc lang="en">IP address</shortdesc>
-<content type="string" default="" />
+<content type="string" default="${OCF_RESKEY_ipaddress_default}" />
</parameter>
<parameter name="cidr_netmask">
@@ -100,7 +109,15 @@
dotted quad notation 255.255.255.0).
</longdesc>
<shortdesc lang="en">Netmask</shortdesc>
-<content type="string" default=""/>
+<content type="string" default="${OCF_RESKEY_cidr_netmask_default}"/>
+</parameter>
+
+<parameter name="destination">
+<longdesc lang="en">
+The destination IP/subnet for the route (default: $OCF_RESKEY_destination_default)
+</longdesc>
+<shortdesc lang="en">Destination IP/subnet</shortdesc>
+<content type="string" default="${OCF_RESKEY_destination_default}" />
</parameter>
<parameter name="proto">
@@ -108,7 +125,17 @@
Proto to match when finding network. E.g. "kernel".
</longdesc>
<shortdesc lang="en">Proto</shortdesc>
-<content type="string" default="" />
+<content type="string" default="${OCF_RESKEY_proto_default}" />
+</parameter>
+
+<parameter name="table">
+<longdesc lang="en">
+Table to modify. E.g. "local".
+
+The table has to have a route matching the "destination" parameter.
+</longdesc>
+<shortdesc lang="en">Table</shortdesc>
+<content type="string" default="${OCF_RESKEY_table_default}" />
</parameter>
</parameters>
@@ -151,21 +178,22 @@
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
- # Capture the default route - doublequotes prevent word splitting...
- DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
-
- # ... so we can make sure there is only 1 default route
- [ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \
- errorexit "more than 1 default route exists"
+ # Capture matching route - doublequotes prevent word splitting...
+ ROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
- # But there might still be no default route
- [ -z "$DEFROUTE" ] && errorexit "no default route exists"
+ # ... so we can make sure there is only 1 matching route
+ [ 1 -eq `echo "$ROUTE" | wc -l` ] || \
+ errorexit "more than 1 matching route exists"
+
+ # But there might still be no matching route
+ [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && [ -z "$ROUTE" ] && \
+ ! ocf_is_probe && errorexit "no matching route exists"
# Sed out the source ip address if it exists
- SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"`
+ SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\3/p"`
# and what remains after stripping out the source ip address clause
- ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"`
+ ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\5/"`
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
@@ -185,11 +213,13 @@
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
- $IP2UTIL route replace $NETWORK dev $INTERFACE src $1 || \
- errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE src $1 || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE src $1' failed"
- $CMDCHANGE $ROUTE_WO_SRC src $1 || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
+ if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+ $CMDCHANGE $ROUTE_WO_SRC src $1 || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
+ fi
rc=$?
fi
@@ -201,7 +231,7 @@
# If one exists but it's not the same as the one specified, that's
# an error. Maybe that's the wrong behaviour because if this fails
# then when IPaddr releases the associated interface (if there is one)
-# your default route will also get dropped ;-(
+# your matching route will also get dropped ;-(
# The exit code should conform to LSB exit codes.
#
@@ -217,11 +247,13 @@
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- $IP2UTIL route replace $NETWORK dev $INTERFACE || \
- errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE' failed"
- $CMDCHANGE $ROUTE_WO_SRC || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
+ if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+ $CMDCHANGE $ROUTE_WO_SRC || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
+ fi
return $?
}
@@ -406,6 +438,10 @@
return $OCF_ERR_CONFIGURED
fi
+ if ! echo "$OCF_RESKEY_destination" | grep -q "/"; then
+ return $OCF_ERR_CONFIGURED
+ fi
+
if ! [ "x$SYSTYPE" = "xLinux" ]; then
# checks after this point are only relevant for linux.
@@ -486,7 +522,11 @@
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
-NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+ NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+else
+ NETWORK="$OCF_RESKEY_destination"
+fi
case $1 in
start) srca_start $ipaddress

View File

@ -1,42 +0,0 @@
From 0e73d3f474d08779b64ed99fb3f80c1e806ff1b7 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 28 Nov 2019 16:11:51 +0100
Subject: [PATCH] IPsrcaddr: fixes to replace local rule if using local table,
and set src back to primary for device on stop
---
heartbeat/IPsrcaddr | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index d80b72165..f9085f082 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -75,6 +75,10 @@ USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination"
CMDCHANGE="$IP2UTIL route change to "
+if [ "$OCF_RESKEY_table" = "local" ]; then
+ TABLE="$TABLE local"
+fi
+
SYSTYPE="`uname -s`"
usage() {
@@ -247,8 +251,14 @@ srca_stop() {
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE' failed"
+ OPTS=""
+ if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] ;then
+ PRIMARY_IP="$($IP2UTIL -4 -o addr show dev eth0 primary | awk '{split($4,a,"/");print a[1]}')"
+ OPTS="proto kernel scope host src $PRIMARY_IP"
+ fi
+
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
$CMDCHANGE $ROUTE_WO_SRC || \

View File

@ -1,45 +0,0 @@
From 7afc581f6cd8fc37c3e14ece12fb16d31f1886f9 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 10 Jan 2020 14:35:56 +0100
Subject: [PATCH] IPsrcaddr: fixes to avoid failing during probe
---
heartbeat/IPsrcaddr | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index f9085f082..0ef8b391f 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -75,6 +75,10 @@ USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination"
CMDCHANGE="$IP2UTIL route change to "
+if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then
+ CMDSHOW="$CMDSHOW src $OCF_RESKEY_ipaddress"
+fi
+
if [ "$OCF_RESKEY_table" = "local" ]; then
TABLE="$TABLE local"
fi
@@ -183,7 +187,7 @@ export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
# Capture matching route - doublequotes prevent word splitting...
- ROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
+ ROUTE="`$CMDSHOW 2> /dev/null`" || errorexit "command '$CMDSHOW' failed"
# ... so we can make sure there is only 1 matching route
[ 1 -eq `echo "$ROUTE" | wc -l` ] || \
@@ -199,6 +203,11 @@ srca_read() {
# and what remains after stripping out the source ip address clause
ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\5/"`
+ # using "src <ip>" only returns output if there's a match
+ if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then
+ [ -z "$ROUTE" ] && return 1 || return 0
+ fi
+
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
return 2

View File

@ -1,23 +0,0 @@
From 5f0d15ad70098510a3782d6fd18d6eacfb51b0cf Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 16 Jan 2020 14:59:26 +0100
Subject: [PATCH] IPsrcaddr: remove hardcoded device when using destination
parameter
---
heartbeat/IPsrcaddr | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 0ef8b391f..7cdc3a9fe 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -262,7 +262,7 @@ srca_stop() {
OPTS=""
if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] ;then
- PRIMARY_IP="$($IP2UTIL -4 -o addr show dev eth0 primary | awk '{split($4,a,"/");print a[1]}')"
+ PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
OPTS="proto kernel scope host src $PRIMARY_IP"
fi

View File

@ -1,57 +0,0 @@
From fcaa52bb98a8686d993550c6f4ab7867625c8059 Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Wed, 29 Aug 2018 16:18:55 -0400
Subject: [PATCH] rabbitmq-cluster: get cluster status from mnesia during
monitor
If mnesia is not running (for example if `rabbitmqctl stop_app` has
been called, or the service has paused during partition due to the
pause_minority strategy) then the cluster_status command to
rabbitmqctl will read the cached cluster status from disk and the
command returns 0 even though the service isn't really running at all.
Instead, force the cluster status to be read from mnesia. If mnesia
is not running due to the above or similar circumstances, the command
will catch that and properly fail the monitor action.
Resolves: RHBZ#1595753
---
heartbeat/rabbitmq-cluster | 20 +++++---------------
1 file changed, 5 insertions(+), 15 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index a7d2db614..204917475 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -181,26 +181,16 @@ remove_pid () {
rmq_monitor() {
local rc
- $RMQ_CTL cluster_status > /dev/null 2>&1
- rc=$?
- case "$rc" in
- 0)
+ if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
ocf_log debug "RabbitMQ server is running normally"
rmq_write_nodename
-
+
return $OCF_SUCCESS
- ;;
- 2|68|69|70|75|78)
- ocf_log info "RabbitMQ server is not running"
+ else
+ ocf_log info "RabbitMQ server could not get cluster status from mnesia"
rmq_delete_nodename
return $OCF_NOT_RUNNING
- ;;
- *)
- ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
- rmq_delete_nodename
- return $OCF_ERR_GENERIC
- ;;
- esac
+ fi
}
rmq_init_and_wait()

View File

@ -1,96 +0,0 @@
From cc23c5523a0185fa557a5ab9056d50a60300d12a Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Tue, 16 Oct 2018 16:21:25 -0400
Subject: [PATCH] rabbitmq-cluster: fail monitor when node is in minority
partition
It's possible for mnesia to still be running, but for mnesia to be
partitioned. And it's also possible to get into this state without
pacemaker seeing the node go down so no corrective action is taken.
When monitoring, check the number of nodes that pacemaker thinks is
running, and compare to the number of nodes that mnesia thinks is
running. If mnesia only sees a minority of the total nodes, fail it
so corrective action can be taken to rejoin the cluster.
This also adds a new function, rmq_app_running, which simply checks
whether the app is running or not and does not care about the
partition status. This is now used instead of the full monitor in a
few places where we don't care about partition state.
Resolves: RHBZ#1639826
---
heartbeat/rabbitmq-cluster | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 204917475..78b2bbadf 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -178,10 +178,31 @@ remove_pid () {
rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
}
+rmq_app_running() {
+ if $RMQ_CTL eval 'application:which_applications().' | grep -q '{rabbit,'; then
+ ocf_log debug "RabbitMQ application is running"
+ return $OCF_SUCCESS
+ else
+ ocf_log debug "RabbitMQ application is stopped"
+ return $OCF_NOT_RUNNING
+ fi
+}
+
rmq_monitor() {
local rc
if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
+ pcs_running=$(rmq_join_list | wc -w)
+ ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running"
+ rmq_running=$($RMQ_CTL eval 'length(mnesia:system_info(running_db_nodes)).')
+ ocf_log debug "RabbitMQ thinks ${rmq_running} RabbitMQ nodes are running"
+
+ if [ $(( $rmq_running * 2 )) -lt $pcs_running ]; then
+ ocf_log info "RabbitMQ is a minority partition, failing monitor"
+ rmq_delete_nodename
+ return $OCF_ERR_GENERIC
+ fi
+
ocf_log debug "RabbitMQ server is running normally"
rmq_write_nodename
@@ -215,7 +236,7 @@ rmq_init_and_wait()
return $OCF_ERR_GENERIC
fi
- rmq_monitor
+ rmq_app_running
return $?
}
@@ -236,6 +257,7 @@ rmq_start_first()
if [ $rc -eq 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "cluster bootstrapped"
+ rmq_write_nodename
if [ -n "$OCF_RESKEY_set_policy" ]; then
# do not quote set_policy, we are passing in arguments
@@ -492,7 +514,7 @@ rmq_stop() {
end.
"
- rmq_monitor
+ rmq_app_running
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
@@ -508,7 +530,7 @@ rmq_stop() {
#TODO add kill logic
stop_wait=1
while [ $stop_wait = 1 ]; do
- rmq_monitor
+ rmq_app_running
rc=$?
if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
stop_wait=0

View File

@ -1,63 +0,0 @@
From 19ee29342f8bb573722991b8cbe4503309ad0bf9 Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Fri, 2 Nov 2018 13:12:53 -0400
Subject: [PATCH] rabbitmq-cluster: fix regression in rmq_stop
This regression was introduced in PR#1249 (cc23c55). The stop action
was modified to use rmq_app_running in order to check the service
status, which allows for the following sequence of events:
- service is started, unclustered
- stop_app is called
- cluster_join is attempted and fails
- stop is called
Because stop_app was called, rmq_app_running returns $OCF_NOT_RUNNING
and the stop action is a no-op. This means the erlang VM continues
running.
When the start action is attempted again, a new erlang VM is launched,
but this VM fails to boot because the old one is still running and is
registered with the same name (rabbit@nodename).
This adds a new function, rmq_node_alive, which does a simple eval to
test whether the erlang VM is up, independent of the rabbit app. The
stop action now uses rmq_node_alive to check the service status, so
even if stop_app was previously called, the erlang VM will be stopped
properly.
Resolves: RHBZ#1639826
---
heartbeat/rabbitmq-cluster | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 78b2bbadf..a2de9dc20 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -188,6 +188,16 @@ rmq_app_running() {
fi
}
+rmq_node_alive() {
+ if $RMQ_CTL eval 'ok.'; then
+ ocf_log debug "RabbitMQ node is alive"
+ return $OCF_SUCCESS
+ else
+ ocf_log debug "RabbitMQ node is down"
+ return $OCF_NOT_RUNNING
+ fi
+}
+
rmq_monitor() {
local rc
@@ -514,7 +524,7 @@ rmq_stop() {
end.
"
- rmq_app_running
+ rmq_node_alive
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi

View File

@ -1,83 +0,0 @@
From 63c9449bfa9a7fecbc0f00394699a475a384671d Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Thu, 9 Aug 2018 16:33:26 +0200
Subject: [PATCH] rabbitmq-cluster: retry start when cluster join fails
When a node tries to join an existing cluster, it fetches a node
list to try to connect from any of those running nodes.
If the nodes from this list become unavailable while we're joining
the cluster, the rabbitmq server will fail to get clustered and
make the start operation fail.
Give the resource a chance to start anyway by retrying the entire
start actions until it succeeds or until the start timeout is
reached and pacemaker stops the start operation.
Co-Authored-by: <michele@acksyn.org>
Suggested-by: <abeekhof@redhat.com>
---
heartbeat/rabbitmq-cluster | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 9ff49e075..84f383460 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -31,6 +31,12 @@
#######################################################################
+# This arbitrary value here is used by the rmq_start action to
+# signify that the resource agent must retry the start process
+# It might potentially conflict with OCF assigned error code
+# in the future.
+RMQ_TRY_RESTART_ERROR_CODE=126
+
RMQ_SERVER=/usr/sbin/rabbitmq-server
RMQ_CTL=/usr/sbin/rabbitmqctl
RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
@@ -354,7 +360,7 @@ rmq_notify() {
return $OCF_SUCCESS
}
-rmq_start() {
+rmq_try_start() {
local join_list=""
local rc
@@ -384,8 +390,16 @@ rmq_start() {
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
- return $OCF_ERR_GENERIC
+ # we could not join the rabbitmq cluster from any of the running nodes
+ # this might be due to a unexpected reset of those nodes. Give ourself
+ # a chance to start by retrying the entire start sequence.
+
+ ocf_log warn "Failed to join the RabbitMQ cluster from nodes ${join_list}. Stopping local unclustered rabbitmq"
+ rmq_stop
+
+ ocf_log warn "Re-detect available rabbitmq nodes and try to start again"
+ # return an unused OCF value to signify a "retry" condition
+ return $RMQ_TRY_RESTART_ERROR_CODE
fi
# Restore users, user permissions, and policies (if any)
@@ -443,6 +457,15 @@ rmq_start() {
return $OCF_SUCCESS
}
+rmq_start() {
+ local rc=$RMQ_TRY_RESTART_ERROR_CODE
+ while [ $rc -eq $RMQ_TRY_RESTART_ERROR_CODE ]; do
+ rmq_try_start
+ rc=$?
+ done
+ return $rc
+}
+
rmq_stop() {
# Backup users, user permissions, and policies
BaseDataDir=`dirname $RMQ_DATA_DIR`

View File

@ -1,42 +0,0 @@
From 8ed87936e9ad06318cc49ea767885a405dfde11e Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Wed, 5 Dec 2018 11:45:43 -0500
Subject: [PATCH] rabbitmq-cluster: better ensure node attributes are removed
Ensure that the attribute is removed at the end of the stop action.
Also if rmq_app_running or rmq_node_alive shows the service as down,
ensure the attribute is deleted as well.
Resolves: RHBZ#1656368
---
heartbeat/rabbitmq-cluster | 3 +++
1 file changed, 3 insertions(+)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 1643dd1e7..2dca3e216 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -184,6 +184,7 @@ rmq_app_running() {
return $OCF_SUCCESS
else
ocf_log debug "RabbitMQ application is stopped"
+ rmq_delete_nodename
return $OCF_NOT_RUNNING
fi
}
@@ -194,6 +195,7 @@ rmq_node_alive() {
return $OCF_SUCCESS
else
ocf_log debug "RabbitMQ node is down"
+ rmq_delete_nodename
return $OCF_NOT_RUNNING
fi
}
@@ -554,6 +556,7 @@ rmq_stop() {
sleep 1
done
+ rmq_delete_nodename
remove_pid
return $OCF_SUCCESS
}

View File

@ -1,32 +0,0 @@
From 2b6e4a94c847129dd014a1efa733cd1b4a2448e6 Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Fri, 2 Nov 2018 10:11:41 -0400
Subject: [PATCH] rabbitmq-cluster: debug log detailed output when mnesia query
fails
---
heartbeat/rabbitmq-cluster | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 78b2bbadf..fabfeedfb 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -191,7 +191,8 @@ rmq_app_running() {
rmq_monitor() {
local rc
- if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
+ status=$($RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1)
+ if echo "${status}" | grep -q '^{ok'; then
pcs_running=$(rmq_join_list | wc -w)
ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running"
rmq_running=$($RMQ_CTL eval 'length(mnesia:system_info(running_db_nodes)).')
@@ -209,6 +210,7 @@ rmq_monitor() {
return $OCF_SUCCESS
else
ocf_log info "RabbitMQ server could not get cluster status from mnesia"
+ ocf_log debug "${status}"
rmq_delete_nodename
return $OCF_NOT_RUNNING
fi

View File

@ -1,87 +0,0 @@
From 5a33171b2c40e2e1587e82aad0cb7e39abcf615d Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Thu, 13 Dec 2018 12:58:43 -0500
Subject: [PATCH] rabbitmq-cluster: always use quiet flag for eval calls
On older rabbitmq versions, rabbitmqctl appends "...done." at the end
of the output. However we expect eval without this extra output so it
can be used for further processing. The -q option to rabbitmqctl
suppresses the extra output, so ensure we always pass that when
calling eval.
Resolves: RHBZ#1659072
---
heartbeat/rabbitmq-cluster | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 2dca3e216..e82ac2399 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -39,6 +39,7 @@ RMQ_TRY_RESTART_ERROR_CODE=126
RMQ_SERVER=/usr/sbin/rabbitmq-server
RMQ_CTL=/usr/sbin/rabbitmqctl
+RMQ_EVAL="${RMQ_CTL} eval -q"
RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
RMQ_PID_DIR="/var/run/rabbitmq"
RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
@@ -179,7 +180,7 @@ remove_pid () {
}
rmq_app_running() {
- if $RMQ_CTL eval 'application:which_applications().' | grep -q '{rabbit,'; then
+ if $RMQ_EVAL 'application:which_applications().' | grep -q '{rabbit,'; then
ocf_log debug "RabbitMQ application is running"
return $OCF_SUCCESS
else
@@ -190,7 +191,7 @@ rmq_app_running() {
}
rmq_node_alive() {
- if $RMQ_CTL eval 'ok.'; then
+ if $RMQ_EVAL 'ok.'; then
ocf_log debug "RabbitMQ node is alive"
return $OCF_SUCCESS
else
@@ -203,11 +204,11 @@ rmq_node_alive() {
rmq_monitor() {
local rc
- status=$($RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1)
+ status=$($RMQ_EVAL 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1)
if echo "${status}" | grep -q '^{ok'; then
pcs_running=$(rmq_join_list | wc -w)
ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running"
- rmq_running=$($RMQ_CTL eval 'length(mnesia:system_info(running_db_nodes)).')
+ rmq_running=$($RMQ_EVAL 'length(mnesia:system_info(running_db_nodes)).')
ocf_log debug "RabbitMQ thinks ${rmq_running} RabbitMQ nodes are running"
if [ $(( $rmq_running * 2 )) -lt $pcs_running ]; then
@@ -294,7 +295,7 @@ rmq_start_first()
rmq_is_clustered()
{
- $RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true
+ $RMQ_EVAL 'rabbit_mnesia:is_clustered().' | grep -q true
}
rmq_join_existing()
@@ -432,7 +433,7 @@ rmq_try_start() {
# Restore users, user permissions, and policies (if any)
BaseDataDir=`dirname $RMQ_DATA_DIR`
- $RMQ_CTL eval "
+ $RMQ_EVAL "
%% Run only if Mnesia is ready.
lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
begin
@@ -497,7 +498,7 @@ rmq_start() {
rmq_stop() {
# Backup users, user permissions, and policies
BaseDataDir=`dirname $RMQ_DATA_DIR`
- $RMQ_CTL eval "
+ $RMQ_EVAL "
%% Run only if Mnesia is still available.
lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
begin

View File

@ -1,88 +0,0 @@
From 5949405d0031a4aba91c81cb28c24821ad2d439a Mon Sep 17 00:00:00 2001
From: Reid Wahl <nwahl@redhat.com>
Date: Thu, 3 Jan 2019 15:05:20 -0800
Subject: [PATCH] docker: Fix issues with stop operation
The docker RA's stop operation doesn't behave properly in some cases.
1. It returns a false success code in case of an error response from
the daemon.
2. It fails at `remove_container()` if the container does not exist
but another docker object of the same name does exist.
In case #1, the `container_exists()` function returns the same exit code
(1) if the container is not found (an expected error) or if there is an
error response from the docker daemon (an unexpected error). These types
of errors should be handled differently.
In case #2, the `docker inspect` calls do not limit their search to
containers. So if a non-container object is found with a matching name,
the RA attempts to remove a container by that name. Such a container may
not exist.
This patch fixes these issues as follows:
1. Match an error response in `container_exists()` against the string
"No such container".
2. Add `--type=container` to the `docker inspect` calls to restrict
the match.
---
heartbeat/docker | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/heartbeat/docker b/heartbeat/docker
index f5ba83ff2..c206344ad 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -215,7 +215,7 @@ monitor_cmd_exec()
out=$(docker exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
rc=$?
else
- out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
+ out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --type=container --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
rc=$?
fi
@@ -236,7 +236,25 @@ monitor_cmd_exec()
container_exists()
{
- docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
+ local err
+
+ err=$(docker inspect --type=container $CONTAINER 2>&1 >/dev/null)
+
+ if [ $? -ne $OCF_SUCCESS ]; then
+ case $err in
+ *"No such container"*)
+ # Return failure instead of exiting if container does not exist
+ return 1
+ ;;
+ *)
+ # Exit if error running command
+ ocf_exit_reason "$err"
+ exit $OCF_ERR_GENERIC
+ ;;
+ esac
+ fi
+
+ return $OCF_SUCCESS
}
remove_container()
@@ -265,7 +283,7 @@ docker_simple_status()
fi
# retrieve the 'Running' attribute for the container
- val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
+ val=$(docker inspect --type=container --format {{.State.Running}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not running as a result of container not being found
return $OCF_NOT_RUNNING
@@ -295,7 +313,7 @@ docker_health_status()
# if starting takes longer than monitor timeout then upstream will make this fail.
while
- val=$(docker inspect --format {{.State.Health.Status}} $CONTAINER 2>/dev/null)
+ val=$(docker inspect --type=container --format {{.State.Health.Status}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not healthy as a result of container not being found
return $OCF_NOT_RUNNING

Some files were not shown because too many files have changed in this diff Show More