- mysql: fix replication issues
- LVM-activate: failover with missing PVs Resolves: rhbz#2179003 Resolves: rhbz#2174911
This commit is contained in:
parent
99121b9174
commit
87b017967c
156
bz2174911-LVM-activate-failover-with-missing-pvs.patch
Normal file
156
bz2174911-LVM-activate-failover-with-missing-pvs.patch
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
From 51dd5d5d051aa3b3f0c104f8e80f212cd5780fc3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Tue, 14 Mar 2023 09:14:28 +0100
|
||||||
|
Subject: [PATCH] LVM-activate: failover with missing PVs
|
||||||
|
|
||||||
|
There area two changes included:
|
||||||
|
|
||||||
|
- Allow the system ID to be changed on a VG when the VG is
|
||||||
|
missing PVs, as long as a majority of PVs are still present.
|
||||||
|
This requires a recent version of lvm that supports the
|
||||||
|
--majoritypvs option for vgchange.
|
||||||
|
|
||||||
|
- Use --activationmode degraded when activating LVs so that
|
||||||
|
raid LVs can be activated when legs are missing, as long as
|
||||||
|
sufficient devices are available for raid to provide all the
|
||||||
|
data in the LV.
|
||||||
|
|
||||||
|
By David Teigland.
|
||||||
|
---
|
||||||
|
heartbeat/LVM-activate | 82 ++++++++++++++++++++++++++++++++----------
|
||||||
|
1 file changed, 64 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
|
||||||
|
index e951a08e9c..f6f24a3b52 100755
|
||||||
|
--- a/heartbeat/LVM-activate
|
||||||
|
+++ b/heartbeat/LVM-activate
|
||||||
|
@@ -50,6 +50,8 @@ OCF_RESKEY_vg_access_mode_default=""
|
||||||
|
OCF_RESKEY_activation_mode_default="exclusive"
|
||||||
|
OCF_RESKEY_tag_default="pacemaker"
|
||||||
|
OCF_RESKEY_partial_activation_default="false"
|
||||||
|
+OCF_RESKEY_degraded_activation_default="false"
|
||||||
|
+OCF_RESKEY_majority_pvs_default="false"
|
||||||
|
|
||||||
|
: ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}}
|
||||||
|
: ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}}
|
||||||
|
@@ -57,6 +59,8 @@ OCF_RESKEY_partial_activation_default="false"
|
||||||
|
: ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}}
|
||||||
|
: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}}
|
||||||
|
: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}}
|
||||||
|
+: ${OCF_RESKEY_degraded_activation=${OCF_RESKEY_degraded_activation_default}}
|
||||||
|
+: ${OCF_RESKEY_majority_pvs=${OCF_RESKEY_majority_pvs_default}}
|
||||||
|
|
||||||
|
# If LV is given, only activate this named LV; otherwise, activate all
|
||||||
|
# LVs in the named VG.
|
||||||
|
@@ -191,6 +195,29 @@ logical volumes.
|
||||||
|
<content type="string" default="${OCF_RESKEY_partial_activation_default}" />
|
||||||
|
</parameter>
|
||||||
|
|
||||||
|
+<parameter name="degraded_activation" unique="0" required="0">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+Activate RAID LVs using the "degraded" activation mode. This allows RAID
|
||||||
|
+LVs to be activated with missing PVs if all data can be provided with
|
||||||
|
+RAID redundancy. The RAID level determines the number of PVs that are
|
||||||
|
+required for degraded activation to succeed. If fewer PVs are available,
|
||||||
|
+then degraded activation will fail. Also enable majority_pvs.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">Activate RAID LVs in degraded mode when missing PVs</shortdesc>
|
||||||
|
+<content type="string" default="${OCF_RESKEY_degraded_activation_default}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
+<parameter name="majority_pvs" unique="0" required="0">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+If set, the VG system ID can be reassigned to a new host if a majority
|
||||||
|
+of PVs in the VG are present. Otherwise, VG failover with system ID
|
||||||
|
+will fail when the VG is missing PVs. Also enable degraded_activation
|
||||||
|
+when RAID LVs are used.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">Allow changing the system ID of a VG with a majority of PVs</shortdesc>
|
||||||
|
+<content type="string" default="${OCF_RESKEY_majority_pvs_default}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<actions>
|
||||||
|
@@ -524,24 +551,27 @@ lvm_validate() {
|
||||||
|
exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
- # Inconsistency might be due to missing physical volumes, which doesn't
|
||||||
|
- # automatically mean we should fail. If partial_activation=true then
|
||||||
|
- # we should let start try to handle it, or if no PVs are listed as
|
||||||
|
- # "unknown device" then another node may have marked a device missing
|
||||||
|
- # where we have access to all of them and can start without issue.
|
||||||
|
- case $(vgs -o attr --noheadings $VG | tr -d ' ') in
|
||||||
|
- ???p??*)
|
||||||
|
- if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||||
|
- # We are missing devices and cannot activate partially
|
||||||
|
- ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially"
|
||||||
|
- exit $OCF_ERR_GENERIC
|
||||||
|
+ vg_missing_pv_count=$(vgs -o missing_pv_count --noheadings ${VG} 2>/dev/null)
|
||||||
|
+
|
||||||
|
+ if [ $vg_missing_pv_count -gt 0 ]; then
|
||||||
|
+ ocf_log warn "Volume Group ${VG} is missing $vg_missing_pv_count PVs."
|
||||||
|
+
|
||||||
|
+ # Setting new system ID will succeed if over half of PVs remain.
|
||||||
|
+ # Don't try to calculate here if a majority is present,
|
||||||
|
+ # but leave this up to the vgchange command to determine.
|
||||||
|
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
|
||||||
|
+ ocf_log warn "Attempting fail over with missing PVs (majority.)"
|
||||||
|
+
|
||||||
|
+ # Setting new system ID will fail, and behavior is undefined for
|
||||||
|
+ # other access modes.
|
||||||
|
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||||
|
+ ocf_log warn "Attempting fail over with missing PVs (partial.)"
|
||||||
|
+
|
||||||
|
else
|
||||||
|
- # We are missing devices but are allowed to activate partially.
|
||||||
|
- # Assume that caused the vgck failure and carry on
|
||||||
|
- ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
|
||||||
|
+ ocf_exit_reason "Volume group [$VG] has devices missing. Consider majority_pvs=true"
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
- ;;
|
||||||
|
- esac
|
||||||
|
+ fi
|
||||||
|
|
||||||
|
# Get the access mode from VG metadata and check if it matches the input
|
||||||
|
# value. Skip to check "tagging" mode because there's no reliable way to
|
||||||
|
@@ -601,7 +631,18 @@ lvm_validate() {
|
||||||
|
do_activate() {
|
||||||
|
do_activate_opt=$1
|
||||||
|
|
||||||
|
- if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||||
|
+ if ocf_is_true "$OCF_RESKEY_degraded_activation" ; then
|
||||||
|
+ # This will allow a RAID LV to be activated if sufficient
|
||||||
|
+ # devices are available to allow the LV to be usable
|
||||||
|
+ do_activate_opt="${do_activate_opt} --activationmode degraded"
|
||||||
|
+
|
||||||
|
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
|
||||||
|
+ # This will allow a mirror LV to be activated if any
|
||||||
|
+ # devices are missing, but the activated LV may not be
|
||||||
|
+ # usable, so it is not recommended. Also, other LV
|
||||||
|
+ # types without data redundancy will be activated
|
||||||
|
+ # when partial is set.
|
||||||
|
+ # RAID LVs and degraded_activation should be used instead.
|
||||||
|
do_activate_opt="${do_activate_opt} --partial"
|
||||||
|
fi
|
||||||
|
|
||||||
|
@@ -661,11 +702,16 @@ clvmd_activate() {
|
||||||
|
}
|
||||||
|
|
||||||
|
systemid_activate() {
|
||||||
|
+ majority_opt=""
|
||||||
|
set_autoactivation=0
|
||||||
|
cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]')
|
||||||
|
|
||||||
|
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
|
||||||
|
+ vgchange --help | grep '\--majoritypvs' >/dev/null 2>&1 && majority_opt="--majoritypvs"
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
# Put our system ID on the VG
|
||||||
|
- vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
|
||||||
|
+ vgchange -y $majority_opt --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
|
||||||
|
--systemid ${SYSTEM_ID} ${VG}
|
||||||
|
vgchange --help | grep '\--setautoactivation' >/dev/null 2>&1 && set_autoactivation=1
|
||||||
|
|
70
bz2179003-mysql-replication-fixes.patch
Normal file
70
bz2179003-mysql-replication-fixes.patch
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
From 706b48fd93a75a582c538013aea1418b6ed69dd0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Thu, 9 Mar 2023 15:57:59 +0100
|
||||||
|
Subject: [PATCH] mysql: promotable fixes to avoid nodes getting bounced around
|
||||||
|
by setting -v 1/-v 2, and added OCF_CHECK_LEVEL=10 for promotable resources
|
||||||
|
to be able to distinguish between promoted and not
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/mysql | 19 +++++++++++++------
|
||||||
|
1 file changed, 13 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/mysql b/heartbeat/mysql
|
||||||
|
index 9ab49ab20e..29ed427319 100755
|
||||||
|
--- a/heartbeat/mysql
|
||||||
|
+++ b/heartbeat/mysql
|
||||||
|
@@ -757,6 +757,10 @@ mysql_monitor() {
|
||||||
|
status_loglevel="info"
|
||||||
|
fi
|
||||||
|
|
||||||
|
+ if ocf_is_ms; then
|
||||||
|
+ OCF_CHECK_LEVEL=10
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
mysql_common_status $status_loglevel
|
||||||
|
rc=$?
|
||||||
|
|
||||||
|
@@ -777,7 +781,13 @@ mysql_monitor() {
|
||||||
|
return $rc
|
||||||
|
fi
|
||||||
|
|
||||||
|
- if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
|
||||||
|
+ if [ $OCF_CHECK_LEVEL -eq 10 ]; then
|
||||||
|
+ if [ -z "$OCF_RESKEY_test_table" ]; then
|
||||||
|
+ ocf_exit_reason "test_table not set"
|
||||||
|
+ return $OCF_ERR_CONFIGURED
|
||||||
|
+
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
# Check if this instance is configured as a slave, and if so
|
||||||
|
# check slave status
|
||||||
|
if is_slave; then
|
||||||
|
@@ -795,18 +805,16 @@ mysql_monitor() {
|
||||||
|
ocf_exit_reason "Failed to select from $test_table";
|
||||||
|
return $OCF_ERR_GENERIC;
|
||||||
|
fi
|
||||||
|
- else
|
||||||
|
- # In case no exnteded tests are enabled and we are in master/slave mode _always_ set the master score to 1 if we reached this point
|
||||||
|
- ocf_is_ms && $CRM_MASTER -v 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ocf_is_ms && ! get_read_only; then
|
||||||
|
ocf_log debug "MySQL monitor succeeded (master)";
|
||||||
|
# Always set master score for the master
|
||||||
|
- $CRM_MASTER -v 2
|
||||||
|
+ $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
|
||||||
|
return $OCF_RUNNING_MASTER
|
||||||
|
else
|
||||||
|
ocf_log debug "MySQL monitor succeeded";
|
||||||
|
+ ocf_is_ms && $CRM_MASTER -v 1
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
@@ -873,7 +881,6 @@ mysql_start() {
|
||||||
|
# preference set by the administrator. We choose a low
|
||||||
|
# greater-than-zero preference.
|
||||||
|
$CRM_MASTER -v 1
|
||||||
|
-
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Initial monitor action
|
@ -45,7 +45,7 @@
|
|||||||
Name: resource-agents
|
Name: resource-agents
|
||||||
Summary: Open Source HA Reusable Cluster Resource Scripts
|
Summary: Open Source HA Reusable Cluster Resource Scripts
|
||||||
Version: 4.10.0
|
Version: 4.10.0
|
||||||
Release: 35%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
Release: 36%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||||
License: GPLv2+ and LGPLv2+
|
License: GPLv2+ and LGPLv2+
|
||||||
URL: https://github.com/ClusterLabs/resource-agents
|
URL: https://github.com/ClusterLabs/resource-agents
|
||||||
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
||||||
@ -96,6 +96,8 @@ Patch43: bz2157872-5-pgsqlms-alidate-all-OCF_CHECK_LEVEL-10.patch
|
|||||||
Patch44: bz2142518-IPaddr2-IPsrcaddr-support-policy-based-routing.patch
|
Patch44: bz2142518-IPaddr2-IPsrcaddr-support-policy-based-routing.patch
|
||||||
Patch45: bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch
|
Patch45: bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch
|
||||||
Patch46: bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch
|
Patch46: bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch
|
||||||
|
Patch47: bz2179003-mysql-replication-fixes.patch
|
||||||
|
Patch48: bz2174911-LVM-activate-failover-with-missing-pvs.patch
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
Patch500: ha-cloud-support-aws.patch
|
Patch500: ha-cloud-support-aws.patch
|
||||||
@ -266,6 +268,8 @@ exit 1
|
|||||||
%patch44 -p1
|
%patch44 -p1
|
||||||
%patch45 -p1
|
%patch45 -p1
|
||||||
%patch46 -p1
|
%patch46 -p1
|
||||||
|
%patch47 -p1
|
||||||
|
%patch48 -p1
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
%patch500 -p1
|
%patch500 -p1
|
||||||
@ -587,6 +591,13 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
|
|||||||
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-36
|
||||||
|
- mysql: fix replication issues
|
||||||
|
- LVM-activate: failover with missing PVs
|
||||||
|
|
||||||
|
Resolves: rhbz#2179003
|
||||||
|
Resolves: rhbz#2174911
|
||||||
|
|
||||||
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-35
|
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-35
|
||||||
- IPaddr2/IPsrcaddr: support policy-based routing
|
- IPaddr2/IPsrcaddr: support policy-based routing
|
||||||
- lvmlockd: add "use_lvmlockd = 1" if it's commented out or missing
|
- lvmlockd: add "use_lvmlockd = 1" if it's commented out or missing
|
||||||
|
Loading…
Reference in New Issue
Block a user