- mysql: fix replication issues
- LVM-activate: failover with missing PVs Resolves: rhbz#2179003 Resolves: rhbz#2174911
This commit is contained in:
parent
99121b9174
commit
87b017967c
156
bz2174911-LVM-activate-failover-with-missing-pvs.patch
Normal file
156
bz2174911-LVM-activate-failover-with-missing-pvs.patch
Normal file
@ -0,0 +1,156 @@
|
||||
From 51dd5d5d051aa3b3f0c104f8e80f212cd5780fc3 Mon Sep 17 00:00:00 2001
|
||||
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||
Date: Tue, 14 Mar 2023 09:14:28 +0100
|
||||
Subject: [PATCH] LVM-activate: failover with missing PVs
|
||||
|
||||
There area two changes included:
|
||||
|
||||
- Allow the system ID to be changed on a VG when the VG is
|
||||
missing PVs, as long as a majority of PVs are still present.
|
||||
This requires a recent version of lvm that supports the
|
||||
--majoritypvs option for vgchange.
|
||||
|
||||
- Use --activationmode degraded when activating LVs so that
|
||||
raid LVs can be activated when legs are missing, as long as
|
||||
sufficient devices are available for raid to provide all the
|
||||
data in the LV.
|
||||
|
||||
By David Teigland.
|
||||
---
|
||||
heartbeat/LVM-activate | 82 ++++++++++++++++++++++++++++++++----------
|
||||
1 file changed, 64 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
|
||||
index e951a08e9c..f6f24a3b52 100755
|
||||
--- a/heartbeat/LVM-activate
|
||||
+++ b/heartbeat/LVM-activate
|
||||
@@ -50,6 +50,8 @@ OCF_RESKEY_vg_access_mode_default=""
|
||||
OCF_RESKEY_activation_mode_default="exclusive"
|
||||
OCF_RESKEY_tag_default="pacemaker"
|
||||
OCF_RESKEY_partial_activation_default="false"
|
||||
+OCF_RESKEY_degraded_activation_default="false"
|
||||
+OCF_RESKEY_majority_pvs_default="false"
|
||||
|
||||
: ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}}
|
||||
: ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}}
|
||||
@@ -57,6 +59,8 @@ OCF_RESKEY_partial_activation_default="false"
|
||||
: ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}}
|
||||
: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}}
|
||||
: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}}
|
||||
+: ${OCF_RESKEY_degraded_activation=${OCF_RESKEY_degraded_activation_default}}
|
||||
+: ${OCF_RESKEY_majority_pvs=${OCF_RESKEY_majority_pvs_default}}
|
||||
|
||||
# If LV is given, only activate this named LV; otherwise, activate all
|
||||
# LVs in the named VG.
|
||||
@@ -191,6 +195,29 @@ logical volumes.
|
||||
<content type="string" default="${OCF_RESKEY_partial_activation_default}" />
|
||||
</parameter>
|
||||
|
||||
+<parameter name="degraded_activation" unique="0" required="0">
|
||||
+<longdesc lang="en">
|
||||
+Activate RAID LVs using the "degraded" activation mode. This allows RAID
|
||||
+LVs to be activated with missing PVs if all data can be provided with
|
||||
+RAID redundancy. The RAID level determines the number of PVs that are
|
||||
+required for degraded activation to succeed. If fewer PVs are available,
|
||||
+then degraded activation will fail. Also enable majority_pvs.
|
||||
+</longdesc>
|
||||
+<shortdesc lang="en">Activate RAID LVs in degraded mode when missing PVs</shortdesc>
|
||||
+<content type="string" default="${OCF_RESKEY_degraded_activation_default}" />
|
||||
+</parameter>
|
||||
+
|
||||
+<parameter name="majority_pvs" unique="0" required="0">
|
||||
+<longdesc lang="en">
|
||||
+If set, the VG system ID can be reassigned to a new host if a majority
|
||||
+of PVs in the VG are present. Otherwise, VG failover with system ID
|
||||
+will fail when the VG is missing PVs. Also enable degraded_activation
|
||||
+when RAID LVs are used.
|
||||
+</longdesc>
|
||||
+<shortdesc lang="en">Allow changing the system ID of a VG with a majority of PVs</shortdesc>
|
||||
+<content type="string" default="${OCF_RESKEY_majority_pvs_default}" />
|
||||
+</parameter>
|
||||
+
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
@@ -524,24 +551,27 @@ lvm_validate() {
|
||||
exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
|
||||
- # Inconsistency might be due to missing physical volumes, which doesn't
|
||||
- # automatically mean we should fail. If partial_activation=true then
|
||||
- # we should let start try to handle it, or if no PVs are listed as
|
||||
- # "unknown device" then another node may have marked a device missing
|
||||
- # where we have access to all of them and can start without issue.
|
||||
- case $(vgs -o attr --noheadings $VG | tr -d ' ') in
|
||||
- ???p??*)
|
||||
- if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||
- # We are missing devices and cannot activate partially
|
||||
- ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially"
|
||||
- exit $OCF_ERR_GENERIC
|
||||
+ vg_missing_pv_count=$(vgs -o missing_pv_count --noheadings ${VG} 2>/dev/null)
|
||||
+
|
||||
+ if [ $vg_missing_pv_count -gt 0 ]; then
|
||||
+ ocf_log warn "Volume Group ${VG} is missing $vg_missing_pv_count PVs."
|
||||
+
|
||||
+ # Setting new system ID will succeed if over half of PVs remain.
|
||||
+ # Don't try to calculate here if a majority is present,
|
||||
+ # but leave this up to the vgchange command to determine.
|
||||
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
|
||||
+ ocf_log warn "Attempting fail over with missing PVs (majority.)"
|
||||
+
|
||||
+ # Setting new system ID will fail, and behavior is undefined for
|
||||
+ # other access modes.
|
||||
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||
+ ocf_log warn "Attempting fail over with missing PVs (partial.)"
|
||||
+
|
||||
else
|
||||
- # We are missing devices but are allowed to activate partially.
|
||||
- # Assume that caused the vgck failure and carry on
|
||||
- ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
|
||||
+ ocf_exit_reason "Volume group [$VG] has devices missing. Consider majority_pvs=true"
|
||||
+ exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
- ;;
|
||||
- esac
|
||||
+ fi
|
||||
|
||||
# Get the access mode from VG metadata and check if it matches the input
|
||||
# value. Skip to check "tagging" mode because there's no reliable way to
|
||||
@@ -601,7 +631,18 @@ lvm_validate() {
|
||||
do_activate() {
|
||||
do_activate_opt=$1
|
||||
|
||||
- if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||
+ if ocf_is_true "$OCF_RESKEY_degraded_activation" ; then
|
||||
+ # This will allow a RAID LV to be activated if sufficient
|
||||
+ # devices are available to allow the LV to be usable
|
||||
+ do_activate_opt="${do_activate_opt} --activationmode degraded"
|
||||
+
|
||||
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||
+ # This will allow a mirror LV to be activated if any
|
||||
+ # devices are missing, but the activated LV may not be
|
||||
+ # usable, so it is not recommended. Also, other LV
|
||||
+ # types without data redundancy will be activated
|
||||
+ # when partial is set.
|
||||
+ # RAID LVs and degraded_activation should be used instead.
|
||||
do_activate_opt="${do_activate_opt} --partial"
|
||||
fi
|
||||
|
||||
@@ -661,11 +702,16 @@ clvmd_activate() {
|
||||
}
|
||||
|
||||
systemid_activate() {
|
||||
+ majority_opt=""
|
||||
set_autoactivation=0
|
||||
cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]')
|
||||
|
||||
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
|
||||
+ vgchange --help | grep '\--majoritypvs' >/dev/null 2>&1 && majority_opt="--majoritypvs"
|
||||
+ fi
|
||||
+
|
||||
# Put our system ID on the VG
|
||||
- vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
|
||||
+ vgchange -y $majority_opt --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
|
||||
--systemid ${SYSTEM_ID} ${VG}
|
||||
vgchange --help | grep '\--setautoactivation' >/dev/null 2>&1 && set_autoactivation=1
|
||||
|
70
bz2179003-mysql-replication-fixes.patch
Normal file
70
bz2179003-mysql-replication-fixes.patch
Normal file
@ -0,0 +1,70 @@
|
||||
From 706b48fd93a75a582c538013aea1418b6ed69dd0 Mon Sep 17 00:00:00 2001
|
||||
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||
Date: Thu, 9 Mar 2023 15:57:59 +0100
|
||||
Subject: [PATCH] mysql: promotable fixes to avoid nodes getting bounced around
|
||||
by setting -v 1/-v 2, and added OCF_CHECK_LEVEL=10 for promotable resources
|
||||
to be able to distinguish between promoted and not
|
||||
|
||||
---
|
||||
heartbeat/mysql | 19 +++++++++++++------
|
||||
1 file changed, 13 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/heartbeat/mysql b/heartbeat/mysql
|
||||
index 9ab49ab20e..29ed427319 100755
|
||||
--- a/heartbeat/mysql
|
||||
+++ b/heartbeat/mysql
|
||||
@@ -757,6 +757,10 @@ mysql_monitor() {
|
||||
status_loglevel="info"
|
||||
fi
|
||||
|
||||
+ if ocf_is_ms; then
|
||||
+ OCF_CHECK_LEVEL=10
|
||||
+ fi
|
||||
+
|
||||
mysql_common_status $status_loglevel
|
||||
rc=$?
|
||||
|
||||
@@ -777,7 +781,13 @@ mysql_monitor() {
|
||||
return $rc
|
||||
fi
|
||||
|
||||
- if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
|
||||
+ if [ $OCF_CHECK_LEVEL -eq 10 ]; then
|
||||
+ if [ -z "$OCF_RESKEY_test_table" ]; then
|
||||
+ ocf_exit_reason "test_table not set"
|
||||
+ return $OCF_ERR_CONFIGURED
|
||||
+
|
||||
+ fi
|
||||
+
|
||||
# Check if this instance is configured as a slave, and if so
|
||||
# check slave status
|
||||
if is_slave; then
|
||||
@@ -795,18 +805,16 @@ mysql_monitor() {
|
||||
ocf_exit_reason "Failed to select from $test_table";
|
||||
return $OCF_ERR_GENERIC;
|
||||
fi
|
||||
- else
|
||||
- # In case no exnteded tests are enabled and we are in master/slave mode _always_ set the master score to 1 if we reached this point
|
||||
- ocf_is_ms && $CRM_MASTER -v 1
|
||||
fi
|
||||
|
||||
if ocf_is_ms && ! get_read_only; then
|
||||
ocf_log debug "MySQL monitor succeeded (master)";
|
||||
# Always set master score for the master
|
||||
- $CRM_MASTER -v 2
|
||||
+ $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
|
||||
return $OCF_RUNNING_MASTER
|
||||
else
|
||||
ocf_log debug "MySQL monitor succeeded";
|
||||
+ ocf_is_ms && $CRM_MASTER -v 1
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
}
|
||||
@@ -873,7 +881,6 @@ mysql_start() {
|
||||
# preference set by the administrator. We choose a low
|
||||
# greater-than-zero preference.
|
||||
$CRM_MASTER -v 1
|
||||
-
|
||||
fi
|
||||
|
||||
# Initial monitor action
|
@ -45,7 +45,7 @@
|
||||
Name: resource-agents
|
||||
Summary: Open Source HA Reusable Cluster Resource Scripts
|
||||
Version: 4.10.0
|
||||
Release: 35%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||
Release: 36%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||
License: GPLv2+ and LGPLv2+
|
||||
URL: https://github.com/ClusterLabs/resource-agents
|
||||
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
||||
@ -96,6 +96,8 @@ Patch43: bz2157872-5-pgsqlms-alidate-all-OCF_CHECK_LEVEL-10.patch
|
||||
Patch44: bz2142518-IPaddr2-IPsrcaddr-support-policy-based-routing.patch
|
||||
Patch45: bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch
|
||||
Patch46: bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch
|
||||
Patch47: bz2179003-mysql-replication-fixes.patch
|
||||
Patch48: bz2174911-LVM-activate-failover-with-missing-pvs.patch
|
||||
|
||||
# bundled ha-cloud-support libs
|
||||
Patch500: ha-cloud-support-aws.patch
|
||||
@ -266,6 +268,8 @@ exit 1
|
||||
%patch44 -p1
|
||||
%patch45 -p1
|
||||
%patch46 -p1
|
||||
%patch47 -p1
|
||||
%patch48 -p1
|
||||
|
||||
# bundled ha-cloud-support libs
|
||||
%patch500 -p1
|
||||
@ -587,6 +591,13 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
|
||||
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
||||
|
||||
%changelog
|
||||
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-36
|
||||
- mysql: fix replication issues
|
||||
- LVM-activate: failover with missing PVs
|
||||
|
||||
Resolves: rhbz#2179003
|
||||
Resolves: rhbz#2174911
|
||||
|
||||
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-35
|
||||
- IPaddr2/IPsrcaddr: support policy-based routing
|
||||
- lvmlockd: add "use_lvmlockd = 1" if it's commented out or missing
|
||||
|
Loading…
Reference in New Issue
Block a user