From 0ee4c62105ee8f90a43fe0bf8a65bc9b9da2e7e0 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Wed, 18 Jul 2018 11:54:40 -0300 Subject: [PATCH 1/4] gcp-vpc-move-route.in: python implementation of gcp-vpc-move-ip.in gcloud api is not reliable and it is slow, add a python version of gcp-vpc-move-ip.in --- configure.ac | 1 + doc/man/Makefile.am | 1 + heartbeat/Makefile.am | 1 + heartbeat/gcp-vpc-move-route.in | 441 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 444 insertions(+) create mode 100644 heartbeat/gcp-vpc-move-route.in diff --git a/configure.ac b/configure.ac index 3d8f9ca74..039b4942c 100644 --- a/configure.ac +++ b/configure.ac @@ -960,6 +960,7 @@ AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88]) AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio]) AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip]) AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip]) +AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route]) AC_CONFIG_FILES([heartbeat/iSCSILogicalUnit], [chmod +x heartbeat/iSCSILogicalUnit]) AC_CONFIG_FILES([heartbeat/iSCSITarget], [chmod +x heartbeat/iSCSITarget]) AC_CONFIG_FILES([heartbeat/jira], [chmod +x heartbeat/jira]) diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index e9eaf369f..3ac0569de 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -115,6 +115,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_garbd.7 \ ocf_heartbeat_gcp-vpc-move-ip.7 \ ocf_heartbeat_gcp-vpc-move-vip.7 \ + ocf_heartbeat_gcp-vpc-move-route.7 \ ocf_heartbeat_iSCSILogicalUnit.7 \ ocf_heartbeat_iSCSITarget.7 \ ocf_heartbeat_iface-bridge.7 \ diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index 36b271956..d4750bf09 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -112,6 +112,7 @@ ocf_SCRIPTS = AoEtarget \ garbd \ gcp-vpc-move-ip \ gcp-vpc-move-vip \ + gcp-vpc-move-route \ iSCSILogicalUnit \ iSCSITarget \ ids \ diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in new file mode 100644 index 000000000..5f4569baa --- /dev/null +++ b/heartbeat/gcp-vpc-move-route.in @@ -0,0 +1,441 @@ +#!@PYTHON@ -tt +# - *- coding: utf- 8 - *- +# +# +# OCF resource agent to move an IP address within a VPC in GCP +# +# License: GNU General Public License (GPL) +# Copyright (c) 2018 Hervé Werner (MFG Labs) +# Copyright 2018 Google Inc. +# Based on code from Markus Guertler (aws-vpc-move-ip) +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + + +####################################################################### + +import atexit +import logging +import os +import sys +import time + +try: + import googleapiclient.discovery + import pyroute2 +except ImportError: + pass + +if sys.version_info >= (3, 0): + # Python 3 imports. + import urllib.parse as urlparse + import urllib.request as urlrequest +else: + # Python 2 imports. + import urllib as urlparse + import urllib2 as urlrequest + + +OCF_SUCCESS = 0 +OCF_ERR_GENERIC = 1 +OCF_ERR_UNIMPLEMENTED = 3 +OCF_ERR_PERM = 4 +OCF_ERR_CONFIGURED = 6 +OCF_NOT_RUNNING = 7 +GCP_API_URL_PREFIX = 'https://www.googleapis.com/compute/v1' +METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' +METADATA_HEADERS = {'Metadata-Flavor': 'Google'} +METADATA = \ +''' + + +1.0 + +Resource Agent that can move a floating IP addresse within a GCP VPC by changing an +entry in the routing table. This agent also configures the floating IP locally +on the instance OS. +Requirements : +- IP forwarding must be enabled on all instances in order to be able to +terminate the route +- The floating IP address must be choosen so that it is outside all existing +subnets in the VPC network +- IAM permissions +(see https://cloud.google.com/compute/docs/access/iam-permissions) : +1) compute.routes.delete, compute.routes.get and compute.routes.update on the +route +2) compute.networks.updatePolicy on the network (to add a new route) +3) compute.networks.get on the network (to check the VPC network existence) +4) compute.routes.list on the project (to check conflicting routes) + +Move IP within a GCP VPC + + + + +If enabled (set to true), IP failover logs will be posted to stackdriver logging +Stackdriver-logging support + + + + + +Floating IP address. Note that this IP must be chosen outside of all existing +subnet ranges + +Floating IP + + + + + +Name of the VPC network + +VPC network + + + + + +Name of the network interface + +Network interface name + + + + + +Route name + +Route name + + + + + + + + + + + + +''' + + +class Context(object): + __slots__ = 'conn', 'iface_idx', 'instance', 'instance_url', 'interface', \ + 'ip', 'iproute', 'project', 'route_name', 'vpc_network', \ + 'vpc_network_url', 'zone' + + +def wait_for_operation(ctx, response): + """Blocks until operation completes. + Code from GitHub's GoogleCloudPlatform/python-docs-samples + + Args: + response: dict, a request's response + """ + def _OperationGetter(response): + operation = response[u'name'] + if response.get(u'zone'): + return ctx.conn.zoneOperations().get( + project=ctx.project, zone=ctx.zone, operation=operation) + else: + return ctx.conn.globalOperations().get( + project=ctx.project, operation=operation) + + while True: + result = _OperationGetter(response).execute() + + if result['status'] == 'DONE': + if 'error' in result: + raise Exception(result['error']) + return result + + time.sleep(1) + + +def get_metadata(metadata_key, params=None, timeout=None): + """Performs a GET request with the metadata headers. + + Args: + metadata_key: string, the metadata to perform a GET request on. + params: dictionary, the query parameters in the GET request. + timeout: int, timeout in seconds for metadata requests. + + Returns: + HTTP response from the GET request. + + Raises: + urlerror.HTTPError: raises when the GET request fails. + """ + timeout = timeout or 60 + metadata_url = os.path.join(METADATA_SERVER, metadata_key) + params = urlparse.urlencode(params or {}) + url = '%s?%s' % (metadata_url, params) + request = urlrequest.Request(url, headers=METADATA_HEADERS) + request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) + return request_opener.open(request, timeout=timeout * 1.1).read() + + +def validate(ctx): + if os.geteuid() != 0: + logging.error('You must run this agent as root') + sys.exit(OCF_ERR_PERM) + + try: + ctx.conn = googleapiclient.discovery.build('compute', 'v1') + except Exception as e: + logging.error('Couldn\'t connect with google api: ' + str(e)) + sys.exit(OCF_ERR_CONFIGURED) + + ctx.ip = os.environ.get('OCF_RESKEY_ip') + if not ctx.ip: + logging.error('Missing ip parameter') + sys.exit(OCF_ERR_CONFIGURED) + + try: + ctx.instance = get_metadata('instance/name') + ctx.zone = get_metadata('instance/zone').split('/')[-1] + ctx.project = get_metadata('project/project-id') + except Exception as e: + logging.error( + 'Instance information not found. Is this a GCE instance ?: %s', str(e)) + sys.exit(OCF_ERR_CONFIGURED) + + ctx.instance_url = '%s/projects/%s/zones/%s/instances/%s' % ( + GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance) + ctx.vpc_network = os.environ.get('OCF_RESKEY_vpc_network', 'default') + ctx.vpc_network_url = '%s/projects/%s/global/networks/%s' % ( + GCP_API_URL_PREFIX, ctx.project, ctx.vpc_network) + ctx.interface = os.environ.get('OCF_RESKEY_interface', 'eth0') + ctx.route_name = os.environ.get( + 'OCF_RESKEY_route_name', 'ra-%s' % os.environ['__SCRIPT_NAME']) + ctx.iproute = pyroute2.IPRoute() + atexit.register(ctx.iproute.close) + idxs = ctx.iproute.link_lookup(ifname=ctx.interface) + if not idxs: + logging.error('Network interface not found') + sys.exit(OCF_ERR_CONFIGURED) + ctx.iface_idx = idxs[0] + + +def check_conflicting_routes(ctx): + fl = '(destRange = "%s*") AND (network = "%s") AND (name != "%s")' % ( + ctx.ip, ctx.vpc_network_url, ctx.route_name) + request = ctx.conn.routes().list(project=ctx.project, filter=fl) + response = request.execute() + route_list = response.get('items', None) + if route_list: + logging.error( + 'Conflicting unnmanaged routes for destination %s/32 in VPC %s found : %s', + ctx.ip, ctx.vpc_network, str(route_list)) + sys.exit(OCF_ERR_CONFIGURED) + + +def route_release(ctx): + request = ctx.conn.routes().delete(project=ctx.project, route=ctx.route_name) + wait_for_operation(ctx, request.execute()) + + +def ip_monitor(ctx): + logging.info('IP monitor: checking local network configuration') + + def address_filter(addr): + for attr in addr['attrs']: + if attr[0] == 'IFA_LOCAL': + if attr[1] == ctx.ip: + return True + else: + return False + + route = ctx.iproute.get_addr( + index=ctx.iface_idx, match=address_filter) + if not route: + logging.warn( + 'The floating IP %s is not locally configured on this instance (%s)', + ctx.ip, ctx.instance) + return OCF_NOT_RUNNING + + logging.debug( + 'The floating IP %s is correctly configured on this instance (%s)', + ctx.ip, ctx.instance) + return OCF_SUCCESS + + +def ip_release(ctx): + ctx.iproute.addr('del', index=ctx.iface_idx, address=ctx.ip, mask=32) + + +def ip_and_route_start(ctx): + logging.info('Bringing up the floating IP %s', ctx.ip) + + # Add a new entry in the routing table + # If the route entry exists and is pointing to another instance, take it over + + # Ensure that there is no route that we are not aware of that is also handling our IP + check_conflicting_routes(ctx) + + # There is no replace API, We need to first delete the existing route if any + try: + request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name) + request.execute() + # TODO: check specific exception for 404 + except googleapiclient.errors.HttpError as e: + if e.resp.status != 404: + raise + else: + route_release(ctx) + + route_body = { + 'name': ctx.route_name, + 'network': ctx.vpc_network_url, + 'destRange': '%s/32' % ctx.ip, + 'nextHopInstance': ctx.instance_url, + } + try: + request = ctx.conn.routes().insert(project=ctx.project, body=route_body) + wait_for_operation(ctx, request.execute()) + except googleapiclient.errors.HttpError: + try: + request = ctx.conn.networks().get( + project=ctx.project, network=ctx.vpc_network) + request.execute() + except googleapiclient.errors.HttpError as e: + if e.resp.status == 404: + logging.error('VPC network not found') + sys.exit(OCF_ERR_CONFIGURED) + else: + raise + else: + raise + + # Configure the IP address locally + # We need to release the IP first + if ip_monitor(ctx) == OCF_SUCCESS: + ip_release(ctx) + + ctx.iproute.addr('add', index=ctx.iface_idx, address=ctx.ip, mask=32) + ctx.iproute.link('set', index=ctx.iface_idx, state='up') + logging.info('Successfully brought up the floating IP %s', ctx.ip) + + +def route_monitor(ctx): + logging.info('GCP route monitor: checking route table') + + # Ensure that there is no route that we are not aware of that is also handling our IP + check_conflicting_routes + + try: + request = ctx.conn.routes().get(project=ctx.project, route=ctx.route_name) + response = request.execute() + except googleapiclient.errors.HttpError as e: + if 'Insufficient Permission' in e.content: + return OCF_ERR_PERM + elif e.resp.status == 404: + return OCF_NOT_RUNNING + else: + raise + + routed_to_instance = response.get('nextHopInstance', '') + instance_url = '%s/projects/%s/zones/%s/instances/%s' % ( + GCP_API_URL_PREFIX, ctx.project, ctx.zone, ctx.instance) + if routed_to_instance != instance_url: + logging.warn( + 'The floating IP %s is not routed to this instance (%s) but to instance %s', + ctx.ip, ctx.instance, routed_to_instance.split('/')[-1]) + return OCF_NOT_RUNNING + + logging.debug( + 'The floating IP %s is correctly routed to this instance (%s)', + ctx.ip, ctx.instance) + return OCF_SUCCESS + + +def ip_and_route_stop(ctx): + logging.info('Bringing down the floating IP %s', ctx.ip) + + # Delete the route entry + # If the route entry exists and is pointing to another instance, don't touch it + if route_monitor(ctx) == OCF_NOT_RUNNING: + logging.info( + 'The floating IP %s is already not routed to this instance (%s)', + ctx.ip, ctx.instance) + else: + route_release(ctx) + + if ip_monitor(ctx) == OCF_NOT_RUNNING: + logging.info('The floating IP %s is already down', ctx.ip) + else: + ip_release(ctx) + + +def configure_logs(ctx): + # Prepare logging + logging.basicConfig( + format='gcp:route - %(levelname)s - %(message)s', level=logging.INFO) + logging.getLogger('googleapiclient').setLevel(logging.WARN) + logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging') + if logging_env: + logging_env = logging_env.lower() + if any(x in logging_env for x in ['yes', 'true', 'enabled']): + try: + import google.cloud.logging.handlers + client = google.cloud.logging.Client() + handler = google.cloud.logging.handlers.CloudLoggingHandler( + client, name=ctx.instance) + handler.setLevel(logging.INFO) + formatter = logging.Formatter('gcp:route "%(message)s"') + handler.setFormatter(formatter) + root_logger = logging.getLogger() + root_logger.addHandler(handler) + except ImportError: + logging.error('Couldn\'t import google.cloud.logging, ' + 'disabling Stackdriver-logging support') + + +def main(): + if 'meta-data' in sys.argv[1]: + print(METADATA) + return + + ctx = Context() + + validate(ctx) + if 'validate-all' in sys.argv[1]: + return + + configure_logs(ctx) + if 'start' in sys.argv[1]: + ip_and_route_start(ctx) + elif 'stop' in sys.argv[1]: + ip_and_route_stop(ctx) + elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]: + sys.exit(ip_monitor(ctx)) + else: + usage = 'usage: $0 {start|stop|monitor|status|meta-data|validate-all}' + logging.error(usage) + sys.exit(OCF_ERR_UNIMPLEMENTED) + + +if __name__ == "__main__": + main() From 6590c99f462403808854114ec1031755e5ce6b36 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Thu, 19 Jul 2018 12:33:44 -0300 Subject: [PATCH 2/4] gcp-vpc-move-ip.in: add deprecation message --- heartbeat/gcp-vpc-move-ip.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/heartbeat/gcp-vpc-move-ip.in b/heartbeat/gcp-vpc-move-ip.in index 4a6c343a8..3b8d998b3 100755 --- a/heartbeat/gcp-vpc-move-ip.in +++ b/heartbeat/gcp-vpc-move-ip.in @@ -348,6 +348,8 @@ ip_and_route_stop() { # ############################################################################### +ocf_log warn "gcp-vpc-move-ip is deprecated, prefer to use gcp-vpc-move-route instead" + case $__OCF_ACTION in meta-data) metadata exit $OCF_SUCCESS From 73608196d21068c6c2d5fb9f77e3d40179c85fee Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Fri, 20 Jul 2018 08:26:17 -0300 Subject: [PATCH 3/4] gcp-vpc-move-route.in: move stackdriver parameter Move stackdriver parameter to the bottom of metadata list --- heartbeat/gcp-vpc-move-route.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in index 5f4569baa..8d5bfff36 100644 --- a/heartbeat/gcp-vpc-move-route.in +++ b/heartbeat/gcp-vpc-move-route.in @@ -90,12 +90,6 @@ route - -If enabled (set to true), IP failover logs will be posted to stackdriver logging -Stackdriver-logging support - - - Floating IP address. Note that this IP must be chosen outside of all existing @@ -128,6 +122,12 @@ Route name Route name + + +If enabled (set to true), IP failover logs will be posted to stackdriver logging +Stackdriver-logging support + + From e54565ec69f809b28337c0471ad0a9b26a64f8bf Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Fri, 20 Jul 2018 08:45:53 -0300 Subject: [PATCH 4/4] gcp-vpc-move-route.in: minor fixes --- heartbeat/gcp-vpc-move-route.in | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-route.in index 8d5bfff36..566a70f86 100644 --- a/heartbeat/gcp-vpc-move-route.in +++ b/heartbeat/gcp-vpc-move-route.in @@ -104,7 +104,7 @@ subnet ranges Name of the VPC network VPC network - + @@ -112,7 +112,7 @@ Name of the VPC network Name of the network interface Network interface name - + @@ -120,7 +120,7 @@ Name of the network interface Route name Route name - + @@ -138,7 +138,7 @@ Route name -''' +''' % os.path.basename(sys.argv[0]) class Context(object): @@ -229,7 +229,7 @@ def validate(ctx): GCP_API_URL_PREFIX, ctx.project, ctx.vpc_network) ctx.interface = os.environ.get('OCF_RESKEY_interface', 'eth0') ctx.route_name = os.environ.get( - 'OCF_RESKEY_route_name', 'ra-%s' % os.environ['__SCRIPT_NAME']) + 'OCF_RESKEY_route_name', 'ra-%s' % os.path.basename(sys.argv[0])) ctx.iproute = pyroute2.IPRoute() atexit.register(ctx.iproute.close) idxs = ctx.iproute.link_lookup(ifname=ctx.interface) @@ -432,7 +432,8 @@ def main(): elif 'status' in sys.argv[1] or 'monitor' in sys.argv[1]: sys.exit(ip_monitor(ctx)) else: - usage = 'usage: $0 {start|stop|monitor|status|meta-data|validate-all}' + usage = 'usage: %s {start|stop|monitor|status|meta-data|validate-all}' % \ + os.path.basename(sys.argv[0]) logging.error(usage) sys.exit(OCF_ERR_UNIMPLEMENTED)