- fence_aws: add --skip-race-check parameter to allow running outside

of AWS network
  Resolves: rhbz#2183162
This commit is contained in:
Oyvind Albrigtsen 2023-05-03 10:29:27 +02:00
parent 242bca3411
commit 514735c181
3 changed files with 196 additions and 1 deletions

View File

@ -0,0 +1,165 @@
From 73fdae1b9da5aa1ba1d371dcc47fe31a4d22bb31 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 30 Mar 2023 12:20:05 +0200
Subject: [PATCH] fence_aws: fixes to allow running outside of AWS network
- add --skip-race-check parameter to allow running outside of AWS
network e.g. for openshift
- fixed and improved logging logic
- use --debug-file parameter for file logging
---
agents/aws/fence_aws.py | 50 ++++++++++++++++++++-----------
tests/data/metadata/fence_aws.xml | 5 ++++
2 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
index c947bf29c..5d1677144 100644
--- a/agents/aws/fence_aws.py
+++ b/agents/aws/fence_aws.py
@@ -16,13 +16,13 @@
except ImportError:
pass
-logger = logging.getLogger("fence_aws")
+logger = logging.getLogger()
logger.propagate = False
logger.setLevel(logging.INFO)
logger.addHandler(SyslogLibHandler())
logging.getLogger('botocore.vendored').propagate = False
-def get_instance_id():
+def get_instance_id(options):
try:
token = requests.put('http://169.254.169.254/latest/api/token', headers={"X-aws-ec2-metadata-token-ttl-seconds" : "21600"}).content.decode("UTF-8")
r = requests.get('http://169.254.169.254/latest/meta-data/instance-id', headers={"X-aws-ec2-metadata-token" : token}).content.decode("UTF-8")
@@ -30,12 +30,15 @@ def get_instance_id():
except HTTPError as http_err:
logger.error('HTTP error occurred while trying to access EC2 metadata server: %s', http_err)
except Exception as err:
- logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
+ if "--skip-race-check" not in options:
+ logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
+ else:
+ logger.debug('A fatal error occurred while trying to access EC2 metadata server: %s', err)
return None
-
+
def get_nodes_list(conn, options):
- logger.info("Starting monitor operation")
+ logger.debug("Starting monitor operation")
result = {}
try:
if "--filter" in options:
@@ -63,7 +66,7 @@ def get_power_status(conn, options):
try:
instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [options["--plug"]]}])
state = list(instance)[0].state["Name"]
- logger.info("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper())
+ logger.debug("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper())
if state == "running":
return "on"
elif state == "stopped":
@@ -78,7 +81,7 @@ def get_power_status(conn, options):
except IndexError:
fail(EC_STATUS)
except Exception as e:
- logging.error("Failed to get power status: %s", e)
+ logger.error("Failed to get power status: %s", e)
fail(EC_STATUS)
def get_self_power_status(conn, instance_id):
@@ -86,10 +89,10 @@ def get_self_power_status(conn, instance_id):
instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
state = list(instance)[0].state["Name"]
if state == "running":
- logging.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper())
+ logger.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper())
return "ok"
else:
- logging.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper())
+ logger.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper())
return "alert"
except ClientError:
@@ -100,18 +103,18 @@ def get_self_power_status(conn, instance_id):
return "fail"
def set_power_status(conn, options):
- my_instance = get_instance_id()
+ my_instance = get_instance_id(options)
try:
if (options["--action"]=="off"):
- if (get_self_power_status(conn,my_instance) == "ok"):
+ if "--skip-race-check" in options or get_self_power_status(conn,my_instance) == "ok":
conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True)
- logger.info("Called StopInstance API call for %s", options["--plug"])
+ logger.debug("Called StopInstance API call for %s", options["--plug"])
else:
- logger.info("Skipping fencing as instance is not in running status")
+ logger.debug("Skipping fencing as instance is not in running status")
elif (options["--action"]=="on"):
conn.instances.filter(InstanceIds=[options["--plug"]]).start()
except Exception as e:
- logger.error("Failed to power %s %s: %s", \
+ logger.debug("Failed to power %s %s: %s", \
options["--action"], options["--plug"], e)
def define_new_opts():
@@ -156,12 +159,20 @@ def define_new_opts():
"default": "False",
"order": 6
}
+ all_opt["skip_race_check"] = {
+ "getopt" : "",
+ "longopt" : "skip-race-check",
+ "help" : "--skip-race-check Skip race condition check",
+ "shortdesc": "Skip race condition check",
+ "required": "0",
+ "order": 7
+ }
# Main agent method
def main():
conn = None
- device_opt = ["port", "no_password", "region", "access_key", "secret_key", "filter", "boto3_debug"]
+ device_opt = ["port", "no_password", "region", "access_key", "secret_key", "filter", "boto3_debug", "skip_race_check"]
atexit.register(atexit_handler)
@@ -183,12 +194,15 @@ def main():
run_delay(options)
- if options.get("--verbose") is not None:
- lh = logging.FileHandler('/var/log/fence_aws_debug.log')
+ if "--debug-file" in options:
+ for handler in logger.handlers:
+ if isinstance(handler, logging.FileHandler):
+ logger.removeHandler(handler)
+ lh = logging.FileHandler(options["--debug-file"])
logger.addHandler(lh)
lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
lh.setFormatter(lhf)
- logger.setLevel(logging.DEBUG)
+ lh.setLevel(logging.DEBUG)
if options["--boto3_debug"].lower() not in ["1", "yes", "on", "true"]:
boto3.set_stream_logger('boto3',logging.INFO)
diff --git a/tests/data/metadata/fence_aws.xml b/tests/data/metadata/fence_aws.xml
index 76995ecf2..32de4418a 100644
--- a/tests/data/metadata/fence_aws.xml
+++ b/tests/data/metadata/fence_aws.xml
@@ -46,6 +46,11 @@ For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.ht
<content type="string" default="False" />
<shortdesc lang="en">Boto Lib debug</shortdesc>
</parameter>
+ <parameter name="skip_race_check" unique="0" required="0">
+ <getopt mixed="--skip-race-check" />
+ <content type="boolean" />
+ <shortdesc lang="en">Skip race condition check</shortdesc>
+ </parameter>
<parameter name="quiet" unique="0" required="0">
<getopt mixed="-q, --quiet" />
<content type="boolean" />

View File

@ -0,0 +1,21 @@
From a2e2432cfec0af9a8a90f9d7fed18759da6f9b0c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 13 Apr 2023 10:14:31 +0200
Subject: [PATCH] fence_aws: fail when power action request fails
---
agents/aws/fence_aws.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
index 5d1677144..0a375bbec 100644
--- a/agents/aws/fence_aws.py
+++ b/agents/aws/fence_aws.py
@@ -116,6 +116,7 @@ def set_power_status(conn, options):
except Exception as e:
logger.debug("Failed to power %s %s: %s", \
options["--action"], options["--plug"], e)
+ fail(EC_STATUS)
def define_new_opts():
all_opt["region"] = {

View File

@ -60,7 +60,7 @@
Name: fence-agents
Summary: Set of unified programs capable of host isolation ("fencing")
Version: 4.10.0
Release: 44%{?alphatag:.%{alphatag}}%{?dist}
Release: 45%{?alphatag:.%{alphatag}}%{?dist}
License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/fence-agents
Source0: https://fedorahosted.org/releases/f/e/fence-agents/%{name}-%{version}.tar.gz
@ -232,6 +232,8 @@ Patch37: bz2160480-fence_scsi-fix-validate-all.patch
Patch38: bz2152107-fencing-1-add-plug_separator.patch
Patch39: bz2152107-fencing-2-update-DEPENDENCY_OPT.patch
Patch40: bz2187327-fence_scsi-detect-devices-in-shared-vgs.patch
Patch41: bz2183162-fence_aws-1-add-skip-race-check-parameter.patch
Patch42: bz2183162-fence_aws-2-fail-when-power-action-request-fails.patch
%global supportedagents amt_ws apc apc_snmp bladecenter brocade cisco_mds cisco_ucs compute drac5 eaton_snmp emerson eps evacuate hpblade ibmblade ibm_powervs ibm_vpc ifmib ilo ilo_moonshot ilo_mp ilo_ssh intelmodular ipdu ipmilan kdump kubevirt lpar mpath redfish rhevm rsa rsb sbd scsi vmware_rest vmware_soap wti
%ifarch x86_64
@ -383,6 +385,8 @@ BuildRequires: %{systemd_units}
%patch38 -p1
%patch39 -p1
%patch40 -p1
%patch41 -p1
%patch42 -p1
# prevent compilation of something that won't get used anyway
sed -i.orig 's|FENCE_ZVM=1|FENCE_ZVM=0|' configure.ac
@ -1449,6 +1453,11 @@ are located on corosync cluster nodes.
%endif
%changelog
* Wed May 3 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-45
- fence_aws: add --skip-race-check parameter to allow running outside
of AWS network
Resolves: rhbz#2183162
* Thu Apr 20 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-44
- fence_scsi: detect devices in shared VGs
Resolves: rhbz#2187327