From 90b595650d7d8a6f6a69a9f7060c6406aa731c18 Mon Sep 17 00:00:00 2001 From: "Fabio M. Di Nitto" Date: Wed, 28 Jul 2021 10:08:10 +0200 Subject: [PATCH] Add storage-mon pacemaker health check Signed-off-by: Fabio M. Di Nitto --- .gitignore | 41 ++++++ configure.ac | 1 + doc/man/Makefile.am | 3 +- heartbeat/Makefile.am | 17 +-- heartbeat/storage-mon.in | 263 +++++++++++++++++++++++++++++++++++++++ tools/Makefile.am | 5 +- tools/storage_mon.c | 263 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 583 insertions(+), 10 deletions(-) create mode 100644 heartbeat/storage-mon.in create mode 100644 tools/storage_mon.c diff --git a/.gitignore b/.gitignore index 38d3566205..f7277bf04e 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,46 @@ heartbeat/ocf-shellfuncs heartbeat/send_ua heartbeat/shellfuncs heartbeat/*.pyc +heartbeat/AoEtarget +heartbeat/CTDB +heartbeat/ManageRAID +heartbeat/ManageVE +heartbeat/Squid +heartbeat/SysInfo +heartbeat/aws-vpc-route53 +heartbeat/azure-events +heartbeat/clvm +heartbeat/conntrackd +heartbeat/dnsupdate +heartbeat/dummypy +heartbeat/eDir88 +heartbeat/fio +heartbeat/galera +heartbeat/gcp-pd-move +heartbeat/gcp-vpc-move-ip +heartbeat/gcp-vpc-move-route +heartbeat/gcp-vpc-move-vip +heartbeat/iSCSILogicalUnit +heartbeat/iSCSITarget +heartbeat/jira +heartbeat/kamailio +heartbeat/lxc +heartbeat/lxd-info +heartbeat/machine-info +heartbeat/mariadb +heartbeat/mpathpersist +heartbeat/nfsnotify +heartbeat/openstack-info +heartbeat/rabbitmq-cluster +heartbeat/redis +heartbeat/rsyslog +heartbeat/sg_persist +heartbeat/slapd +heartbeat/smb-share +heartbeat/storage-mon +heartbeat/sybaseASE +heartbeat/syslog-ng +heartbeat/vsftpd include/agent_config.h include/config.h include/config.h.in @@ -61,6 +101,7 @@ systemd/resource-agents.conf tools/findif tools/ocf-tester tools/send_arp +tools/storage_mon tools/tickle_tcp tools/ocft/README tools/ocft/README.zh_CN diff --git a/configure.ac b/configure.ac index 717fb95432..c125df98f6 100644 --- a/configure.ac +++ b/configure.ac @@ -1002,6 +1002,7 @@ AC_CONFIG_FILES([heartbeat/rsyslog], [chmod +x heartbeat/rsyslog]) AC_CONFIG_FILES([heartbeat/smb-share], [chmod +x heartbeat/smb-share]) AC_CONFIG_FILES([heartbeat/sg_persist], [chmod +x heartbeat/sg_persist]) AC_CONFIG_FILES([heartbeat/slapd], [chmod +x heartbeat/slapd]) +AC_CONFIG_FILES([heartbeat/storage-mon], [chmod +x heartbeat/storage-mon]) AC_CONFIG_FILES([heartbeat/sybaseASE], [chmod +x heartbeat/sybaseASE]) AC_CONFIG_FILES([heartbeat/syslog-ng], [chmod +x heartbeat/syslog-ng]) AC_CONFIG_FILES([heartbeat/vsftpd], [chmod +x heartbeat/vsftpd]) diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index 947d83cb2b..97904ccb16 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -138,6 +138,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_mariadb.7 \ ocf_heartbeat_mdraid.7 \ ocf_heartbeat_minio.7 \ + ocf_heartbeat_mpathpersist.7 \ ocf_heartbeat_mysql.7 \ ocf_heartbeat_mysql-proxy.7 \ ocf_heartbeat_nagios.7 \ @@ -175,7 +176,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_smb-share.7 \ ocf_heartbeat_sybaseASE.7 \ ocf_heartbeat_sg_persist.7 \ - ocf_heartbeat_mpathpersist.7 \ + ocf_heartbeat_storage-mon.7 \ ocf_heartbeat_symlink.7 \ ocf_heartbeat_syslog-ng.7 \ ocf_heartbeat_tomcat.7 \ diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index 9af44cc127..5d52d211f2 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -32,22 +32,22 @@ ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat dtddir = $(datadir)/$(PACKAGE_NAME) dtd_DATA = ra-api-1.dtd metadata.rng +ocf_PROGRAMS = + if USE_IPV6ADDR_AGENT -ocf_PROGRAMS = IPv6addr -else -ocf_PROGRAMS = +ocf_PROGRAMS += IPv6addr endif +halib_PROGRAMS = + if IPV6ADDR_COMPATIBLE -halib_PROGRAMS = send_ua -else -halib_PROGRAMS = +halib_PROGRAMS += send_ua endif IPv6addr_SOURCES = IPv6addr.c IPv6addr_utils.c -send_ua_SOURCES = send_ua.c IPv6addr_utils.c - IPv6addr_LDADD = -lplumb $(LIBNETLIBS) + +send_ua_SOURCES = send_ua.c IPv6addr_utils.c send_ua_LDADD = $(LIBNETLIBS) osp_SCRIPTS = nova-compute-wait \ @@ -170,6 +170,7 @@ ocf_SCRIPTS = AoEtarget \ mpathpersist \ slapd \ + storage-mon \ sybaseASE \ symlink \ syslog-ng \ tomcat \ diff --git a/heartbeat/storage-mon.in b/heartbeat/storage-mon.in new file mode 100644 index 0000000000..5b289fe554 --- /dev/null +++ b/heartbeat/storage-mon.in @@ -0,0 +1,263 @@ +#!@BASH_SHELL@ +# +# Copyright (C) 2021 Red Hat, Inc. All rights reserved. +# +# Authors: Christine Caulfield +# Fabio M. Di Nitto +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +# +# Checks storage I/O status of all given drives and writes the #health-storage +# status into the CIB +# Implementation is heavily based on ocf:pacemaker:HealtSMART +# +# It sends a single block on IO to a radom location on the device and reports any errors returned. +# If the IO hangs, that will also be returned. (bear in mind tha tmay also hang the C app in some +# instances). +# +# It's worth making a note in the RA description that the smartmon RA is also recommended (this +# does not replace it), and that Pacemaker health checking should be configued. +# +# https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Explained/singlehtml/index.html#tracking-node-health + +####################################################################### + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# +STORAGEMON=$HA_BIN/storage_mon +ATTRDUP=/usr/sbin/attrd_updater + +OCF_RESKEY_CRM_meta_interval_default="0" +OCF_RESKEY_io_timeout_default="10" +OCF_RESKEY_inject_errors_default="" +OCF_RESKEY_state_file_default="${HA_RSCTMP%%/}/storage-mon-${OCF_RESOURCE_INSTANCE}.state" + +# Explicitly list all environment variables used, to make static analysis happy +: ${OCF_RESKEY_CRM_meta_interval:=${OCF_RESKEY_CRM_meta_interval_default}} +: ${OCF_RESKEY_drives:=""} +: ${OCF_RESKEY_io_timeout:=${OCF_RESKEY_io_timeout_default}} +: ${OCF_RESKEY_inject_errors:=${OCF_RESKEY_inject_errors_default}} +: ${OCF_RESKEY_state_file:=${OCF_RESKEY_state_file_default}} + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +System health agent that checks the storage I/O status of the given drives and +updates the #health-storage attribute. Usage is highly recommended in combination +with storage-mon monitoring agent. The agent currently support a maximum of 25 +devices per instance. + +storage I/O health status + + + + + +Location to store the resource state in. + +State file + + + + + +The drive(s) to check as a SPACE separated list. Enter the full path to the device, e.g. "/dev/sda". + +Drives to check + + + + + +Specify disk I/O timeout in seconds. Minimum 1, recommeded 10 (default). + +Disk I/O timeout + + + + + +Used only for testing! Specify % of I/O errors to simulate drives failures. + +Specify % of I/O errors to simulate drives failures + + + + + + + + + + + + + +END + return $OCF_SUCCESS +} + +####################################################################### + +storage-mon_usage() { + cat < +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __FreeBSD__ +#include +#endif + +#define MAX_DEVICES 25 +#define DEFAULT_TIMEOUT 10 + +static void usage(char *name, FILE *f) +{ + fprintf(f, "usage: %s [-hv] [-d ]... [-s ]... [-t ]\n", name); + fprintf(f, " --device device to test, up to %d instances\n", MAX_DEVICES); + fprintf(f, " --score score if device fails the test. Must match --device count\n"); + fprintf(f, " --timeout max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT); + fprintf(f, " --inject-errors-percent Generate EIO errors %% of the time (for testing only)\n"); + fprintf(f, " --verbose emit extra output to stdout\n"); + fprintf(f, " --help print this messages\n"); +} + +/* Check one device */ +static void *test_device(const char *device, int verbose, int inject_error_percent) +{ + uint64_t devsize; + int device_fd; + int res; + off_t seek_spot; + char buffer[512]; + + if (verbose) { + printf("Testing device %s\n", device); + } + + device_fd = open(device, O_RDONLY); + if (device_fd < 0) { + fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno)); + exit(-1); + } +#ifdef __FreeBSD__ + res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize); +#else + res = ioctl(device_fd, BLKGETSIZE64, &devsize); +#endif + if (res != 0) { + fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno)); + close(device_fd); + exit(-1); + } + if (verbose) { + fprintf(stderr, "%s: size=%zu\n", device, devsize); + } + /* Don't fret about real randomness */ + srand(time(NULL) + getpid()); + /* Pick a random place on the device - sector aligned */ + seek_spot = (rand() % (devsize-1024)) & 0xFFFFFFFFFFFFFE00; + res = lseek(device_fd, seek_spot, SEEK_SET); + if (res < 0) { + fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno)); + close(device_fd); + exit(-1); + } + + if (verbose) { + printf("%s: reading from pos %ld\n", device, seek_spot); + } + + res = read(device_fd, buffer, sizeof(buffer)); + if (res < 0) { + fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno)); + close(device_fd); + exit(-1); + } + if (res < (int)sizeof(buffer)) { + fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res); + close(device_fd); + exit(-1); + } + + /* Fake an error */ + if (inject_error_percent && ((rand() % 100) < inject_error_percent)) { + fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n"); + close(device_fd); + exit(-1); + } + res = close(device_fd); + if (res != 0) { + fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno)); + close(device_fd); + exit(-1); + } + + if (verbose) { + printf("%s: done\n", device); + } + exit(0); +} + +int main(int argc, char *argv[]) +{ + char *devices[MAX_DEVICES]; + int scores[MAX_DEVICES]; + pid_t test_forks[MAX_DEVICES]; + size_t device_count = 0; + size_t score_count = 0; + size_t finished_count = 0; + int timeout = DEFAULT_TIMEOUT; + struct timespec ts; + time_t start_time; + size_t i; + int final_score = 0; + int opt, option_index; + int verbose = 0; + int inject_error_percent = 0; + struct option long_options[] = { + {"timeout", required_argument, 0, 't' }, + {"device", required_argument, 0, 'd' }, + {"score", required_argument, 0, 's' }, + {"inject-errors-percent", required_argument, 0, 0 }, + {"verbose", no_argument, 0, 'v' }, + {"help", no_argument, 0, 'h' }, + {0, 0, 0, 0 } + }; + while ( (opt = getopt_long(argc, argv, "hvt:d:s:", + long_options, &option_index)) != -1 ) { + switch (opt) { + case 0: /* Long-only options */ + if (strcmp(long_options[option_index].name, "inject-errors-percent") == 0) { + inject_error_percent = atoi(optarg); + if (inject_error_percent < 1 || inject_error_percent > 100) { + fprintf(stderr, "inject_error_percent should be between 1 and 100\n"); + return -1; + } + } + break; + case 'd': + if (device_count < MAX_DEVICES) { + devices[device_count++] = strdup(optarg); + } else { + fprintf(stderr, "too many devices, max is %d\n", MAX_DEVICES); + return -1; + } + break; + case 's': + if (device_count < MAX_DEVICES) { + int score = atoi(optarg); + if (score < 1 || score > 10) { + fprintf(stderr, "Score must be between 1 and 10 inclusive\n"); + return -1; + } + scores[score_count++] = score; + } else { + fprintf(stderr, "too many scores, max is %d\n", MAX_DEVICES); + return -1; + } + break; + case 'v': + verbose++; + break; + case 't': + timeout = atoi(optarg); + if (timeout < 1) { + fprintf(stderr, "invalid timeout %d. Min 1, recommended %d (default)\n", timeout, DEFAULT_TIMEOUT); + return -1; + } + break; + case 'h': + usage(argv[0], stdout); + break; + default: + usage(argv[0], stderr); + break; + } + + } + if (device_count == 0) { + fprintf(stderr, "No devices to test, use the -d or --device argument\n"); + return -1; + } + + if (device_count != score_count) { + fprintf(stderr, "There must be the same number of devices and scores\n"); + return -1; + } + + openlog("storage_mon", 0, LOG_DAEMON); + + memset(test_forks, 0, sizeof(test_forks)); + for (i=0; i ts.tv_sec)) { + for (i=0; i 0) { + w = waitpid(test_forks[i], &wstatus, WUNTRACED | WNOHANG | WCONTINUED); + if (w < 0) { + fprintf(stderr, "waitpid on %s failed: %s\n", devices[i], strerror(errno)); + return -1; + } + + if (w == test_forks[i]) { + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) { + finished_count++; + test_forks[i] = 0; + } else { + syslog(LOG_ERR, "Error reading from device %s", devices[i]); + final_score += scores[i]; + } + } + } + } + } + + usleep(100000); + + clock_gettime(CLOCK_REALTIME, &ts); + } + + /* See which threads have not finished */ + for (i=0; i