From eee7917ef5272691b9d4ee6341463f3c78f7f909 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 12 Jul 2023 17:49:13 +0200 Subject: [PATCH 6/7] activate_mapping: only blacklist irq if error is considered permanent Some errors reported when writing to smp_affinity are transient. For example, when a CPU interrupt controller does not have enough room to map the IRQ, the kernel will return "No space left on device". This kind of situation can change over time. Do not mark the IRQ affinity as "unmanaged". Let irqbalance try again later. Signed-off-by: Robin Jarry --- activate.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/activate.c b/activate.c index 4418cda..7353692 100644 --- a/activate.c +++ b/activate.c @@ -91,9 +91,23 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un info->moved = 0; /*migration is done*/ error: log(TO_ALL, LOG_WARNING, - "Cannot change IRQ %i affinity: %s. Will never try again.\n", + "Cannot change IRQ %i affinity: %s\n", info->irq, strerror(errsave)); - info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + switch (errsave) { + case ENOSPC: /* Specified CPU APIC is full. */ + case EAGAIN: /* Interrupted by signal. */ + case EBUSY: /* Affinity change already in progress. */ + case EINVAL: /* IRQ would be bound to no CPU. */ + case ERANGE: /* CPU in mask is offline. */ + case ENOMEM: /* Kernel cannot allocate CPU mask. */ + /* Do not blacklist the IRQ on transient errors. */ + break; + default: + /* Any other error is considered permanent. */ + info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + log(TO_ALL, LOG_WARNING, "IRQ %i affinity is now unmanaged\n", + info->irq); + } } void activate_mappings(void) -- 2.40.1