From 426b9269b8bc2de8d53d5cf99ffad08b1796a820 Mon Sep 17 00:00:00 2001 From: Lichen Liu Date: Tue, 11 Jun 2024 10:46:56 +0800 Subject: [PATCH] Re apply upstream patches We should use PatchXX to apply upstream kdump-utils patches instead of directly merge them into git. Resolves: RHEL-36415 Resolves: RHEL-37670 Upstream: https://github.com/rhkdump/kdump-utils/ Conflict: None Signed-off-by: Lichen Liu --- ...tup-Fix-missing-systemd-system.conf-.patch | 81 +++++++++++++++++++ ...prd-Fix-makedumpfile-parameter-check.patch | 55 +++++++++++++ ...-PHY-and-MDIO-bus-drivers-explicitly.patch | 53 ++++++++++++ ...er-of-physical-device-for-a-SR-IOV-v.patch | 50 ++++++++++++ ...ate-98-kexec-rules-for-crash-hotplug.patch | 47 +++++++++++ ...ie_ports-compat-to-KDUMP_COMMANDLINE.patch | 61 ++++++++++++++ ...ot-re-register-fadump-if-kernel-hotp.patch | 41 ++++++++++ kexec-tools.spec | 14 ++++ 8 files changed, 402 insertions(+) create mode 100644 0001-dracut-module-setup-Fix-missing-systemd-system.conf-.patch create mode 100644 0002-mkdumprd-Fix-makedumpfile-parameter-check.patch create mode 100644 0003-Try-to-install-PHY-and-MDIO-bus-drivers-explicitly.patch create mode 100644 0004-Install-the-driver-of-physical-device-for-a-SR-IOV-v.patch create mode 100644 0005-update-98-kexec-rules-for-crash-hotplug.patch create mode 100644 0006-sysconfig-add-pcie_ports-compat-to-KDUMP_COMMANDLINE.patch create mode 100644 0007-fadump-udev-do-not-re-register-fadump-if-kernel-hotp.patch diff --git a/0001-dracut-module-setup-Fix-missing-systemd-system.conf-.patch b/0001-dracut-module-setup-Fix-missing-systemd-system.conf-.patch new file mode 100644 index 0000000..3ab42fe --- /dev/null +++ b/0001-dracut-module-setup-Fix-missing-systemd-system.conf-.patch @@ -0,0 +1,81 @@ +From a078cd8dfe9fe7956ef04c04c97b7c7354808833 Mon Sep 17 00:00:00 2001 +From: Philipp Rudo +Date: Thu, 11 Apr 2024 16:58:34 +0200 +Subject: [PATCH 1/7] dracut-module-setup: Fix missing systemd/system.conf + error + +There is a bug report for RHEL10 about a grep error reading + + grep: /var/tmp/dracut.DiZuKp/initramfs/etc/systemd/system.conf*: No such file or directory + +that shows up when rebuilding the initrd. This is caused by systemd +v255 that allows installing the default systemd config files to +/usr/lib/systemd instead of /etc/systemd [1][2] which is done for RHEL. +So unless a user manually adds /etc/systemd/system.conf the file no +longer exists. + +However the test that requires the call to grep is somewhat wonky. IIUC +the test is there so we don't overwrite a setting the user might have +made. In my opinion this only makes sense as long as the timeout set is +larger than what we would set. But this part of the logic is missing. +So fix the error message by removing the test and add our config +unconditionally. + +While at it rename the created drop-ins to 99-kdump.conf to follow +the recommended naming convention and to make sure that our value takes +precedence. + +Note: In case the test is still needed we can fall back to use +'systemd-analyse cat-config' that automatically considers all potential +locations for the config and its drop-ins. + +[1] 6495361c7d ("meson: add build option for install path of main config files") +[2] 6378f257e7 ("various: use new config loader instead of config_parse_config_file()") + +Signed-off-by: Philipp Rudo +--- + dracut-module-setup.sh | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/dracut-module-setup.sh b/dracut-module-setup.sh +index 7e1cb9f..acc0b28 100755 +--- a/dracut-module-setup.sh ++++ b/dracut-module-setup.sh +@@ -973,21 +973,24 @@ kdump_install_random_seed() { + + kdump_install_systemd_conf() { + # Kdump turns out to require longer default systemd mount timeout +- # than 1st kernel(90s by default), we use default 300s for kdump. +- if ! grep -q -r "^[[:space:]]*DefaultTimeoutStartSec=" "${initdir}/etc/systemd/system.conf"*; then +- mkdir -p "${initdir}/etc/systemd/system.conf.d" +- echo "[Manager]" > "${initdir}/etc/systemd/system.conf.d/kdump.conf" +- echo "DefaultTimeoutStartSec=300s" >> "${initdir}/etc/systemd/system.conf.d/kdump.conf" +- fi ++ # than 1st kernel(45s by default), we use default 300s for kdump. ++ mkdir -p "${initdir}/etc/systemd/system.conf.d" ++ cat > "${initdir}/etc/systemd/system.conf.d/99-kdump.conf" << EOF ++[Manager] ++DefaultTimeoutStartSec=300s ++EOF ++ + + # Forward logs to console directly, and don't read Kmsg, this avoids + # unneccessary memory consumption and make console output more useful. + # Only do so for non fadump image. + mkdir -p "${initdir}/etc/systemd/journald.conf.d" +- echo "[Journal]" > "${initdir}/etc/systemd/journald.conf.d/kdump.conf" +- echo "Storage=volatile" >> "${initdir}/etc/systemd/journald.conf.d/kdump.conf" +- echo "ReadKMsg=no" >> "${initdir}/etc/systemd/journald.conf.d/kdump.conf" +- echo "ForwardToConsole=yes" >> "${initdir}/etc/systemd/journald.conf.d/kdump.conf" ++ cat > "${initdir}/etc/systemd/journald.conf.d/99-kdump.conf" << EOF ++[Journal] ++Storage=volatile ++ReadKMsg=no ++ForwardToConsole=yes ++EOF + } + + remove_cpu_online_rule() { +-- +2.44.0 + diff --git a/0002-mkdumprd-Fix-makedumpfile-parameter-check.patch b/0002-mkdumprd-Fix-makedumpfile-parameter-check.patch new file mode 100644 index 0000000..974417c --- /dev/null +++ b/0002-mkdumprd-Fix-makedumpfile-parameter-check.patch @@ -0,0 +1,55 @@ +From 247c7a5f39b305f9a83bad2d936d00237165b7e0 Mon Sep 17 00:00:00 2001 +From: "Mamoru Nishibe (Fujitsu)" +Date: Wed, 24 Apr 2024 08:11:12 +0000 +Subject: [PATCH 2/7] mkdumprd: Fix makedumpfile parameter check. + +If only "makedumpfile" is written in "core_collector" of /etc/kdump.conf +and try to run makedumpfile without options, +"makedumpfile --check-params" fails and terminates abnormally. + + # grep ^core_collector /etc/kdump.conf + core_collector makedumpfile + # /usr/bin/kdumpctl start + : + Commandline parameter is invalid. + Try `makedumpfile --help' for more information. + kdump: makedumpfile parameter check failed. + kdump: mkdumprd: failed to make kdump initrd + kdump: Starting kdump: [FAILED] + +On the other hand, "makedumpfile --check-params" works fine without any options. + + # makedumpfile --check-params vmcore dumpfile + # echo $? + 0 + +In addition, before verify_core_collector() was implemented, +initial RAM for kdump was successfully created using only "core_collector makedumpfile". +I consider it a regression. + +This is due to a parameter extraction error in verify_core_collector(). +Fix it to correctly extract only the options as follows. + +Fixes: a1c28126 ("mkdumprd: Use makedumpfile --check-params option") +Signed-off-by: Mamoru Nishibe +Reviewed-by: Coiby Xu +--- + mkdumprd | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mkdumprd b/mkdumprd +index 31c4b76..27eed5e 100644 +--- a/mkdumprd ++++ b/mkdumprd +@@ -256,7 +256,7 @@ check_user_configured_target() + verify_core_collector() + { + local _cmd="${1%% *}" +- local _params="${1#* }" ++ local _params="${1#${_cmd}}" + + if [[ $_cmd != "makedumpfile" ]]; then + if is_raw_dump_target; then +-- +2.44.0 + diff --git a/0003-Try-to-install-PHY-and-MDIO-bus-drivers-explicitly.patch b/0003-Try-to-install-PHY-and-MDIO-bus-drivers-explicitly.patch new file mode 100644 index 0000000..483de9c --- /dev/null +++ b/0003-Try-to-install-PHY-and-MDIO-bus-drivers-explicitly.patch @@ -0,0 +1,53 @@ +From d057153a1c3c36612a14143b29c0ff0be34e4fc2 Mon Sep 17 00:00:00 2001 +From: Coiby Xu +Date: Thu, 21 Sep 2023 11:50:14 +0800 +Subject: [PATCH 3/7] Try to install PHY and MDIO bus drivers explicitly + +Resolves: https://issues.redhat.com/browse/RHEL-7028 + +Currently, nfs dumping fails on some machines that has a dedicated PHY +driver (dealing with the physical layer) or MDIO bus (connecting the MAC +to PHY devices) driver. This is because kexec-tools doesn't install +dedicated PHY or MDIO driver explicitly. Usually a NIC driver shouldn't +specify the dependency on the needed PHY or MDIO driver because it +shouldn't a NIC (medium access control, MAC) driver is for dealing with +the Data link layer and a PHY driver is for physical layer. So as long +as a MAC driver can talk to the PHY layer via APIs, it shouldn't care +which PHY driver or device it's talking to. So when the +dependency on a PHY driver or MDIO driver is not found by dracut's +instmods, the PHY or MDIO driver won't be installed. + +This patch passes =drivers/net/phy and =drivers/net/mdio to dracut's +instmods which will only install in-use PHY or MDIO driver(s). + +Note ideally we should find out which PHY driver is used by a NIC but +unfortunately currently no universal way can be found +(/sys/class/net/NIC_NAME/phydev/driver/module can be used to find the + name of the PHY driver for some NICs but it doesn't exist for some NICs +like Qualcomm Atheros AR8031). So is it for a MDIO bus driver. +Fortunately currently no huge memory consumption is found for a PHY or +MDIO driver. + +Fixes: a65dde2d ("Reduce kdump memory consumption by only installing needed NIC drivers") +Reported-by: Doreen Alongi +Signed-off-by: Coiby Xu +--- + dracut-module-setup.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/dracut-module-setup.sh b/dracut-module-setup.sh +index acc0b28..3c9bdbe 100755 +--- a/dracut-module-setup.sh ++++ b/dracut-module-setup.sh +@@ -384,7 +384,7 @@ _get_hpyerv_physical_driver() { + kdump_install_nic_driver() { + local _netif _driver _drivers + +- _drivers=() ++ _drivers=('=drivers/net/phy' '=drivers/net/mdio') + + for _netif in $1; do + [[ $_netif == lo ]] && continue +-- +2.44.0 + diff --git a/0004-Install-the-driver-of-physical-device-for-a-SR-IOV-v.patch b/0004-Install-the-driver-of-physical-device-for-a-SR-IOV-v.patch new file mode 100644 index 0000000..c8d6a6a --- /dev/null +++ b/0004-Install-the-driver-of-physical-device-for-a-SR-IOV-v.patch @@ -0,0 +1,50 @@ +From 7a8edc8de67dccae23b01461bc3b17c0ad42aa5f Mon Sep 17 00:00:00 2001 +From: Coiby Xu +Date: Wed, 27 Sep 2023 09:31:39 +0800 +Subject: [PATCH 4/7] Install the driver of physical device for a SR-IOV + virtual device + +Currently, network dumping failed over a NIC that is a Single Root I/O +Virtualization (SR-IOV) virtual device. Usually the driver of the +virtual device won't specify the dependency on the driver of the +physical device. So to fix this issue, the driver of the physical device +needs to be found and installed as well. + +Fixes: a65dde2d ("Reduce kdump memory consumption by only installing needed NIC drivers") +Signed-off-by: Coiby Xu +--- + dracut-module-setup.sh | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/dracut-module-setup.sh b/dracut-module-setup.sh +index 3c9bdbe..89d9959 100755 +--- a/dracut-module-setup.sh ++++ b/dracut-module-setup.sh +@@ -381,6 +381,14 @@ _get_hpyerv_physical_driver() { + _get_nic_driver "$_physical_nic" + } + ++_get_physical_function_driver() { ++ local _physfn_dir=/sys/class/net/"$1"/device/physfn ++ ++ if [[ -e "$_physfn_dir" ]]; then ++ basename "$(readlink -f "$_physfn_dir"/driver)" ++ fi ++} ++ + kdump_install_nic_driver() { + local _netif _driver _drivers + +@@ -408,6 +416,9 @@ kdump_install_nic_driver() { + fi + + _drivers+=("$_driver") ++ # For a Single Root I/O Virtualization (SR-IOV) virtual device, ++ # the driver of physical device needs to be installed as well ++ _drivers+=("$(_get_physical_function_driver "$_netif")") + done + + [[ -n ${_drivers[*]} ]] || return +-- +2.44.0 + diff --git a/0005-update-98-kexec-rules-for-crash-hotplug.patch b/0005-update-98-kexec-rules-for-crash-hotplug.patch new file mode 100644 index 0000000..09bfd73 --- /dev/null +++ b/0005-update-98-kexec-rules-for-crash-hotplug.patch @@ -0,0 +1,47 @@ +From 659e0aae8f00570c85e82e1317153bf89e59929c Mon Sep 17 00:00:00 2001 +From: Baoquan He +Date: Thu, 7 Sep 2023 10:37:20 -0500 +Subject: [PATCH 5/7] update 98-kexec rules for crash hotplug + +In kernel, with the support of cpu/memory hotplug on crash, kdump +reloading only needs to update the elfcorehdr. + +To realize the benefits, we need prevent udev from updating kdump +kernel on hot un/plug changes when detecting that the crash_hotplug +sysfs nodes are present. + +Link: https://lore.kernel.org/lkml/20230814214446.6659-1-eric.devolder@oracle.com/ +Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d68b4b6f307d155475cce541f2aee938032ed22e +Signed-off-by: Baoquan He +--- + 98-kexec.rules | 4 ++++ + 98-kexec.rules.ppc64 | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/98-kexec.rules b/98-kexec.rules +index b73b701..52b2ee8 100644 +--- a/98-kexec.rules ++++ b/98-kexec.rules +@@ -1,3 +1,7 @@ ++# The kernel updates the crash elfcorehdr for CPU and memory changes ++SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" ++SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" ++ + SUBSYSTEM=="cpu", ACTION=="add", GOTO="kdump_reload" + SUBSYSTEM=="cpu", ACTION=="remove", GOTO="kdump_reload" + SUBSYSTEM=="memory", ACTION=="online", GOTO="kdump_reload" +diff --git a/98-kexec.rules.ppc64 b/98-kexec.rules.ppc64 +index e9db276..e7735b3 100644 +--- a/98-kexec.rules.ppc64 ++++ b/98-kexec.rules.ppc64 +@@ -1,3 +1,7 @@ ++# The kernel updates the crash elfcorehdr for CPU and memory changes ++SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" ++SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" ++ + SUBSYSTEM=="cpu", ACTION=="online", GOTO="kdump_reload_cpu" + SUBSYSTEM=="memory", ACTION=="online", GOTO="kdump_reload_mem" + SUBSYSTEM=="memory", ACTION=="offline", GOTO="kdump_reload_mem" +-- +2.44.0 + diff --git a/0006-sysconfig-add-pcie_ports-compat-to-KDUMP_COMMANDLINE.patch b/0006-sysconfig-add-pcie_ports-compat-to-KDUMP_COMMANDLINE.patch new file mode 100644 index 0000000..986c855 --- /dev/null +++ b/0006-sysconfig-add-pcie_ports-compat-to-KDUMP_COMMANDLINE.patch @@ -0,0 +1,61 @@ +From ada6f5edf1ae06fc88759aa2f94d09e2a98d21ef Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 1 May 2024 16:53:19 +0800 +Subject: [PATCH 6/7] sysconfig: add pcie_ports compat to + KDUMP_COMMANDLINE_APPEND on x86_64 + +There have been some of failing cases of kdump in 2nd kernel, where +ususally only one cpu is enabled by "nr_cpus=1", but with a large +number of devices, which may easily exceed the maximum IRQ resources of +one cpu can handle. As a result, the 2nd kernel will hang and kdump +fails. This issue is often observed on machines with many cpus and many +devices. + +On those systems, pcieports consume quite proportion of IRQ resources, +many following message can be seen in dmesg log: + + pcieport 0000:18:01.0: PME: Signaling with IRQ 109 + +According to kernel doc[1], when "pcie_ports=compat" applied, it will disable +native PCIe services (PME, AER, DPC, PCIe hotplug). Those functions are +power management events, error reporting, performance, hotplug related, +which are not the must-have functions for kdump. In addition, after +testing, no side effects such as cannot writing vmcore into sdx, nvme +etc been noticed. + +This patch will disable native PCIe services for 2nd kernel, to saving the +scarce IRQ resources and increase the kdump success. + +Attach Prarit's comments: + +This makes sense to me. The only concern anyone should have is that a PCIE +error could have been responsible for taking down the kernel in the first +place, and booting into the second kernel could then also have a fatal +problem. I'm not sure we can ever fix that type of cascade of panics :) +so it makes sense to disable these features. + +[1]: https://www.kernel.org/doc/html/v6.9-rc1/admin-guide/kernel-parameters.html + +Signed-off-by: Tao Liu +Acked-by: Prarit Bhargava +Acked-by: Dave Young +--- + gen-kdump-sysconfig.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gen-kdump-sysconfig.sh b/gen-kdump-sysconfig.sh +index 78b0bb7..1a2cd92 100755 +--- a/gen-kdump-sysconfig.sh ++++ b/gen-kdump-sysconfig.sh +@@ -104,7 +104,7 @@ s390x) + x86_64) + update_param KEXEC_ARGS "-s" + update_param KDUMP_COMMANDLINE_APPEND \ +- "irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off numa=off udev.children-max=2 panic=10 acpi_no_memhotplug transparent_hugepage=never nokaslr hest_disable novmcoredd cma=0 hugetlb_cma=0" ++ "irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off numa=off udev.children-max=2 panic=10 acpi_no_memhotplug transparent_hugepage=never nokaslr hest_disable novmcoredd cma=0 hugetlb_cma=0 pcie_ports=compat" + ;; + *) + echo "Warning: Unknown architecture '$1', using default sysconfig template." >&2 +-- +2.44.0 + diff --git a/0007-fadump-udev-do-not-re-register-fadump-if-kernel-hotp.patch b/0007-fadump-udev-do-not-re-register-fadump-if-kernel-hotp.patch new file mode 100644 index 0000000..d29e7e1 --- /dev/null +++ b/0007-fadump-udev-do-not-re-register-fadump-if-kernel-hotp.patch @@ -0,0 +1,41 @@ +From b4e3d3724cf372493b404586126067ff66e550d6 Mon Sep 17 00:00:00 2001 +From: Sourabh Jain +Date: Fri, 26 Apr 2024 18:17:49 +0530 +Subject: [PATCH 7/7] fadump/udev: do not re-register fadump if kernel hotplug + ready + +With the introduction of kernel commit c6c5b14dac0d ("powerpc: make fadump +resilient with memory add/remove events") linux kernel now internally manages +the update of elfcorehdr during memory add/remove events. So no need to +re-register fadump if the /sys/kernel/fadump/hotplug_ready is set to 1. + +No impact for kernels that do not have /sys/kernel/fadump/hotplug_ready +sysfs. + +Relevant kernel commit links: +1. https://msgid.link/20240422195932.1583833-2-sourabhjain@linux.ibm.com +2. https://msgid.link/20240422195932.1583833-3-sourabhjain@linux.ibm.com + +Signed-off-by: Sourabh Jain +--- + 98-kexec.rules.ppc64 | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/98-kexec.rules.ppc64 b/98-kexec.rules.ppc64 +index e7735b3..85fe0b1 100644 +--- a/98-kexec.rules.ppc64 ++++ b/98-kexec.rules.ppc64 +@@ -15,7 +15,9 @@ GOTO="kdump_reload_end" + + LABEL="kdump_reload_mem" + +-RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'" ++# Don't re-register fadump if /sys/kernel/fadump/hotplug_ready sysfs is set to 1. ++ ++RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; ! test -f /sys/kernel/fadump/hotplug_ready || cat /sys/kernel/fadump/hotplug_ready | grep 1 || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'" + + GOTO="kdump_reload_end" + +-- +2.44.0 + diff --git a/kexec-tools.spec b/kexec-tools.spec index 2fae15b..6601c00 100644 --- a/kexec-tools.spec +++ b/kexec-tools.spec @@ -55,6 +55,13 @@ Patch101: kexec-tools-2.0.28-Fix-building-on-x86_64-with-binutils-2.41.patch Patch601: kexec-tools-2.0.28-kexec-don-t-use-kexec_file_load-on-XEN.patch Patch602: kexec-tools-2.0.28-makedumfpile-0001-PATCH-ppc64-get-vmalloc-start-address-from-vmcoreinf.patch +Patch603: 0001-dracut-module-setup-Fix-missing-systemd-system.conf-.patch +Patch604: 0002-mkdumprd-Fix-makedumpfile-parameter-check.patch +Patch605: 0003-Try-to-install-PHY-and-MDIO-bus-drivers-explicitly.patch +Patch606: 0004-Install-the-driver-of-physical-device-for-a-SR-IOV-v.patch +Patch607: 0005-update-98-kexec-rules-for-crash-hotplug.patch +Patch608: 0006-sysconfig-add-pcie_ports-compat-to-KDUMP_COMMANDLINE.patch +Patch609: 0007-fadump-udev-do-not-re-register-fadump-if-kernel-hotp.patch %description kexec-tools provides /sbin/kexec binary that facilitates a new @@ -140,6 +147,13 @@ tar -z -x -v -f %{SOURCE19} %patch 101 -p1 %patch 601 -p1 %patch 602 -p1 +%patch 603 -p1 -d kdump-utils-%{kdump_utils_ver} +%patch 604 -p1 -d kdump-utils-%{kdump_utils_ver} +%patch 605 -p1 -d kdump-utils-%{kdump_utils_ver} +%patch 606 -p1 -d kdump-utils-%{kdump_utils_ver} +%patch 607 -p1 -d kdump-utils-%{kdump_utils_ver} +%patch 608 -p1 -d kdump-utils-%{kdump_utils_ver} +%patch 609 -p1 -d kdump-utils-%{kdump_utils_ver} %ifarch ppc %define archdef ARCH=ppc