From d593bfa6fc5e2e894798e22fa9c4c433517de4b3 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Thu, 21 Jul 2022 19:00:19 +0800 Subject: [PATCH] KDUMP_COMMANDLINE: remove irqpoll parameter on aws aarch64 platform Currently, kdump may experience failure on some aws aarch64 platform. The final scenario is: [ 79.145089] printk: console [ttyS0] disabled Then the system has no response any more. And after reboot, there is no vmcore generated under /var/crash/. More detail [1]. In a short word, it is caused by the irqpoll policy and some unknown acpi issue. The serial device is hot-removed as a pci device. More detailed, the irqpoll policy demands to iterate over all interrupt handler, if the interrupt line is shared, then the handler is dispatched. And acpi handler acpi_irq() is on a shared interrupt line, so it is called. But for some unknown reason, the acpi hardware regs hold wrong state, and the acpi driver decides that a hot-removed event happens on a pci slot, which finally removes the pci serial device. To tackle this issue by removing the irqpoll parameter on aws aarch64 platform, until the real root cause in acpi is found and resolved. [1]: https://bugzilla.redhat.com/show_bug.cgi?id=2080468#c0 Signed-off-by: Pingfan Liu Acked-by: Coiby Xu --- kdumpctl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kdumpctl b/kdumpctl index 2157371..7ff635c 100755 --- a/kdumpctl +++ b/kdumpctl @@ -641,6 +641,18 @@ function remove_kdump_kernel_key() keyctl unlink "$KDUMP_KEY_ID" %:.ima } +function is_aws_aarch64() +{ + local _bios_model + + _bios_model=$(lscpu | grep "BIOS Model name") + if [[ "${_bios_model}" =~ "AWS Graviton" ]]; then + return 0 + fi + + return 1 +} + # Load the kdump kernel specified in /etc/sysconfig/kdump # If none is specified, try to load a kdump kernel with the same version # as the currently running kernel. @@ -650,6 +662,10 @@ load_kdump() KEXEC_ARGS=$(prepare_kexec_args "${KEXEC_ARGS}") KDUMP_COMMANDLINE=$(prepare_cmdline "${KDUMP_COMMANDLINE}" "${KDUMP_COMMANDLINE_REMOVE}" "${KDUMP_COMMANDLINE_APPEND}") + # This is a workaround on AWS platform, since irqpoll may cause the hot-remove of some pci hotplug device + if is_aws_aarch64; then + KDUMP_COMMANDLINE=$(remove_cmdline_param "${KDUMP_COMMANDLINE}" irqpoll) + fi # For secureboot enabled machines, use new kexec file based syscall. # Old syscall will always fail as it does not have capability to