From d7861c877555805e731ace3d930460a8c23e4ae6 Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Tue, 18 Jul 2023 09:24:18 +0800 Subject: [PATCH] Release 1.9.2-2 Rebase to latest upstream commit (50699824c7) Resolves:rhbz2184735 Signed-off-by: Tao Liu --- ...-fflush-the-buffered-data-to-smp_aff.patch | 35 +++++ ...mapping-fflush-the-buffered-data-to-.patch | 30 ++++ ...-avoid-use-after-free-when-affinity-.patch | 144 ++++++++++++++++++ ...apping-make-sure-to-catch-all-errors.patch | 48 ++++++ ...activate_mapping-report-error-reason.patch | 64 ++++++++ ...-only-blacklist-irq-if-error-is-cons.patch | 51 +++++++ ...-avoid-logging-error-when-there-is-n.patch | 28 ++++ irqbalance.spec | 19 ++- 8 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 0001-activate_mapping-fflush-the-buffered-data-to-smp_aff.patch create mode 100644 0002-Revert-activate_mapping-fflush-the-buffered-data-to-.patch create mode 100644 0003-activate_mapping-avoid-use-after-free-when-affinity-.patch create mode 100644 0004-activate_mapping-make-sure-to-catch-all-errors.patch create mode 100644 0005-activate_mapping-report-error-reason.patch create mode 100644 0006-activate_mapping-only-blacklist-irq-if-error-is-cons.patch create mode 100644 0007-activate_mapping-avoid-logging-error-when-there-is-n.patch diff --git a/0001-activate_mapping-fflush-the-buffered-data-to-smp_aff.patch b/0001-activate_mapping-fflush-the-buffered-data-to-smp_aff.patch new file mode 100644 index 0000000..2524c42 --- /dev/null +++ b/0001-activate_mapping-fflush-the-buffered-data-to-smp_aff.patch @@ -0,0 +1,35 @@ +From 8bbc0aeca0187ad3f5f942408198bc0fc055b0f8 Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Tue, 4 Jul 2023 10:04:10 +0800 +Subject: [PATCH 1/7] activate_mapping: fflush the buffered data to + smp_affinity + +Previously irqbalance uses the return value of fprintf() to decide whether +the modification of smp_affinity is successful or not. However it is not +reliable because fprintf() is stream buffered, a fflush() should be used to +check the buffered data been successfully written into the file before the +judgement. + +This patch fixes the issue by introducing fflush() after fprintf(). + +Signed-off-by: Tao Liu +--- + activate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/activate.c b/activate.c +index 62cfd08..6d0b096 100644 +--- a/activate.c ++++ b/activate.c +@@ -76,7 +76,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); +- if (ret < 0) { ++ if (ret < 0 || fflush(file)) { + log(TO_ALL, LOG_WARNING, "cannot change irq %i's affinity, add it to banned list", info->irq); + add_banned_irq(info->irq); + remove_one_irq_from_db(info->irq); +-- +2.40.1 + diff --git a/0002-Revert-activate_mapping-fflush-the-buffered-data-to-.patch b/0002-Revert-activate_mapping-fflush-the-buffered-data-to-.patch new file mode 100644 index 0000000..ceb5faf --- /dev/null +++ b/0002-Revert-activate_mapping-fflush-the-buffered-data-to-.patch @@ -0,0 +1,30 @@ +From 4efc1923f8fd161d131d2b8e8930bf5c9ed66668 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Sun, 9 Jul 2023 10:09:17 -0400 +Subject: [PATCH 2/7] Revert "activate_mapping: fflush the buffered data to + smp_affinity" + +This reverts commit 8bbc0aeca0187ad3f5f942408198bc0fc055b0f8. + +Was causing a segfault: +https://github.com/Irqbalance/irqbalance/issues/267 +--- + activate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/activate.c b/activate.c +index 6d0b096..62cfd08 100644 +--- a/activate.c ++++ b/activate.c +@@ -76,7 +76,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); +- if (ret < 0 || fflush(file)) { ++ if (ret < 0) { + log(TO_ALL, LOG_WARNING, "cannot change irq %i's affinity, add it to banned list", info->irq); + add_banned_irq(info->irq); + remove_one_irq_from_db(info->irq); +-- +2.40.1 + diff --git a/0003-activate_mapping-avoid-use-after-free-when-affinity-.patch b/0003-activate_mapping-avoid-use-after-free-when-affinity-.patch new file mode 100644 index 0000000..e545b55 --- /dev/null +++ b/0003-activate_mapping-avoid-use-after-free-when-affinity-.patch @@ -0,0 +1,144 @@ +From f589bdced6e1fe885969f2833fc3cacdfb60ea79 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Tue, 11 Jul 2023 15:17:55 +0200 +Subject: [PATCH 3/7] activate_mapping: avoid use-after-free when affinity + cannot be set + +add_banned_irq appends the irq_info to the banned_irqs list. +remove_one_irq_from_db removes it from the interrupts_db and free()s it. + +This leaves an invalid pointer dangling in banned_irqs *and* potentially +in rebalance_irq_list which can cause use-after-free errors. + +Do not move the irq_info around. Only add a flag to indicate that this +irq's affinity cannot be managed and ignore the irq when this flag is +set. + +Link: https://github.com/Irqbalance/irqbalance/issues/267 +Fixes: 55c5c321c73e ("arm64: Add irq aff change check For aarch64...") +Signed-off-by: Robin Jarry +--- + activate.c | 8 +++++--- + classify.c | 28 +++------------------------- + irqbalance.h | 2 -- + types.h | 3 ++- + 4 files changed, 10 insertions(+), 31 deletions(-) + +diff --git a/activate.c b/activate.c +index 62cfd08..6f8af27 100644 +--- a/activate.c ++++ b/activate.c +@@ -60,6 +60,9 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + if (!info->assigned_obj) + return; + ++ if (info->flags & IRQ_FLAG_AFFINITY_UNMANAGED) ++ return; ++ + /* activate only online cpus, otherwise writing to procfs returns EOVERFLOW */ + cpus_and(applied_mask, cpu_online_map, info->assigned_obj->mask); + +@@ -77,9 +80,8 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); + if (ret < 0) { +- log(TO_ALL, LOG_WARNING, "cannot change irq %i's affinity, add it to banned list", info->irq); +- add_banned_irq(info->irq); +- remove_one_irq_from_db(info->irq); ++ log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); ++ info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + } + fclose(file); + info->moved = 0; /*migration is done*/ +diff --git a/classify.c b/classify.c +index dac813c..1601a1e 100644 +--- a/classify.c ++++ b/classify.c +@@ -257,7 +257,7 @@ static gint compare_ints(gconstpointer a, gconstpointer b) + return ai->irq - bi->irq; + } + +-static void __add_banned_irq(int irq, GList **list) ++static void add_banned_irq(int irq, GList **list) + { + struct irq_info find, *new; + GList *entry; +@@ -281,14 +281,9 @@ static void __add_banned_irq(int irq, GList **list) + return; + } + +-void add_banned_irq(int irq) +-{ +- __add_banned_irq(irq, &banned_irqs); +-} +- + void add_cl_banned_irq(int irq) + { +- __add_banned_irq(irq, &cl_banned_irqs); ++ add_banned_irq(irq, &cl_banned_irqs); + } + + gint substr_find(gconstpointer a, gconstpointer b) +@@ -392,23 +387,6 @@ get_numa_node: + return new; + } + +-void remove_one_irq_from_db(int irq) +-{ +- struct irq_info find, *tmp; +- GList *entry = NULL; +- +- find.irq = irq; +- entry = g_list_find_custom(interrupts_db, &find, compare_ints); +- if (!entry) +- return; +- +- tmp = entry->data; +- interrupts_db = g_list_remove(interrupts_db, tmp); +- free(tmp); +- log(TO_CONSOLE, LOG_INFO, "IRQ %d was removed from db.\n", irq); +- return; +-} +- + static void parse_user_policy_key(char *buf, int irq, struct user_irq_policy *pol) + { + char *key, *value, *end; +@@ -629,7 +607,7 @@ static void add_new_irq(char *path, struct irq_info *hint) + /* Set NULL devpath for the irq has no sysfs entries */ + get_irq_user_policy(path, irq, &pol); + if ((pol.ban == 1) || check_for_irq_ban(hint, mod)) { /*FIXME*/ +- __add_banned_irq(irq, &banned_irqs); ++ add_banned_irq(irq, &banned_irqs); + new = get_irq_info(irq); + } else + new = add_one_irq_to_db(path, hint, &pol); +diff --git a/irqbalance.h b/irqbalance.h +index e7f6b94..46e05ca 100644 +--- a/irqbalance.h ++++ b/irqbalance.h +@@ -109,8 +109,6 @@ extern struct irq_info *get_irq_info(int irq); + extern void migrate_irq(GList **from, GList **to, struct irq_info *info); + extern void free_cl_opts(void); + extern void add_cl_banned_module(char *modname); +-extern void add_banned_irq(int irq); +-extern void remove_one_irq_from_db(int irq); + #define irq_numa_node(irq) ((irq)->numa_node) + + +diff --git a/types.h b/types.h +index 9693cf4..c63cfea 100644 +--- a/types.h ++++ b/types.h +@@ -34,7 +34,8 @@ + /* + * IRQ Internal tracking flags + */ +-#define IRQ_FLAG_BANNED 1 ++#define IRQ_FLAG_BANNED (1ULL << 0) ++#define IRQ_FLAG_AFFINITY_UNMANAGED (1ULL << 1) + + enum obj_type_e { + OBJ_TYPE_CPU, +-- +2.40.1 + diff --git a/0004-activate_mapping-make-sure-to-catch-all-errors.patch b/0004-activate_mapping-make-sure-to-catch-all-errors.patch new file mode 100644 index 0000000..6c964e6 --- /dev/null +++ b/0004-activate_mapping-make-sure-to-catch-all-errors.patch @@ -0,0 +1,48 @@ +From 470a64b190628574c28a266bdcf8960291463191 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Wed, 12 Jul 2023 08:51:08 +0200 +Subject: [PATCH 4/7] activate_mapping: make sure to catch all errors + +fprintf() is buffered and may not report an error which may be deferred +when fflush() is called (either explicitly or internally by fclose()). + +Check for errors returned by fopen(), fprintf() and fclose() and add +IRQ_FLAG_AFFINITY_UNMANAGED accordingly. + +Fixes: 55c5c321c73e ("arm64: Add irq aff change check For aarch64, ...") +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2184735 +Signed-off-by: Robin Jarry +--- + activate.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/activate.c b/activate.c +index 6f8af27..a4112e0 100644 +--- a/activate.c ++++ b/activate.c +@@ -75,16 +75,16 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + sprintf(buf, "/proc/irq/%i/smp_affinity", info->irq); + file = fopen(buf, "w"); + if (!file) +- return; ++ goto error; + + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); +- if (ret < 0) { +- log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); +- info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; +- } +- fclose(file); ++ if (fclose(file) || ret < 0) ++ goto error; + info->moved = 0; /*migration is done*/ ++error: ++ log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); ++ info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + } + + void activate_mappings(void) +-- +2.40.1 + diff --git a/0005-activate_mapping-report-error-reason.patch b/0005-activate_mapping-report-error-reason.patch new file mode 100644 index 0000000..3424657 --- /dev/null +++ b/0005-activate_mapping-report-error-reason.patch @@ -0,0 +1,64 @@ +From 9a1fd29a82c9762c3676f613075d44a8d1fcbe82 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Wed, 12 Jul 2023 08:59:45 +0200 +Subject: [PATCH 5/7] activate_mapping: report error reason + +If a given IRQ affinity cannot be set, include strerror in the warning +message. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2184735 +Signed-off-by: Robin Jarry +--- + activate.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/activate.c b/activate.c +index a4112e0..4418cda 100644 +--- a/activate.c ++++ b/activate.c +@@ -25,10 +25,12 @@ + * of interrupts to the kernel. + */ + #include "config.h" ++#include + #include + #include + #include + #include ++#include + + #include "irqbalance.h" + +@@ -48,7 +50,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + { + char buf[PATH_MAX]; + FILE *file; +- int ret = 0; ++ int errsave, ret; + cpumask_t applied_mask; + + /* +@@ -79,11 +81,18 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + + cpumask_scnprintf(buf, PATH_MAX, applied_mask); + ret = fprintf(file, "%s", buf); +- if (fclose(file) || ret < 0) ++ errsave = errno; ++ if (fclose(file)) { ++ errsave = errno; ++ goto error; ++ } ++ if (ret < 0) + goto error; + info->moved = 0; /*migration is done*/ + error: +- log(TO_ALL, LOG_WARNING, "cannot change IRQ %i affinity, will never try again\n", info->irq); ++ log(TO_ALL, LOG_WARNING, ++ "Cannot change IRQ %i affinity: %s. Will never try again.\n", ++ info->irq, strerror(errsave)); + info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; + } + +-- +2.40.1 + diff --git a/0006-activate_mapping-only-blacklist-irq-if-error-is-cons.patch b/0006-activate_mapping-only-blacklist-irq-if-error-is-cons.patch new file mode 100644 index 0000000..7e3a5e6 --- /dev/null +++ b/0006-activate_mapping-only-blacklist-irq-if-error-is-cons.patch @@ -0,0 +1,51 @@ +From eee7917ef5272691b9d4ee6341463f3c78f7f909 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Wed, 12 Jul 2023 17:49:13 +0200 +Subject: [PATCH 6/7] activate_mapping: only blacklist irq if error is + considered permanent + +Some errors reported when writing to smp_affinity are transient. For +example, when a CPU interrupt controller does not have enough room to +map the IRQ, the kernel will return "No space left on device". + +This kind of situation can change over time. Do not mark the IRQ +affinity as "unmanaged". Let irqbalance try again later. + +Signed-off-by: Robin Jarry +--- + activate.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/activate.c b/activate.c +index 4418cda..7353692 100644 +--- a/activate.c ++++ b/activate.c +@@ -91,9 +91,23 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + info->moved = 0; /*migration is done*/ + error: + log(TO_ALL, LOG_WARNING, +- "Cannot change IRQ %i affinity: %s. Will never try again.\n", ++ "Cannot change IRQ %i affinity: %s\n", + info->irq, strerror(errsave)); +- info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; ++ switch (errsave) { ++ case ENOSPC: /* Specified CPU APIC is full. */ ++ case EAGAIN: /* Interrupted by signal. */ ++ case EBUSY: /* Affinity change already in progress. */ ++ case EINVAL: /* IRQ would be bound to no CPU. */ ++ case ERANGE: /* CPU in mask is offline. */ ++ case ENOMEM: /* Kernel cannot allocate CPU mask. */ ++ /* Do not blacklist the IRQ on transient errors. */ ++ break; ++ default: ++ /* Any other error is considered permanent. */ ++ info->flags |= IRQ_FLAG_AFFINITY_UNMANAGED; ++ log(TO_ALL, LOG_WARNING, "IRQ %i affinity is now unmanaged\n", ++ info->irq); ++ } + } + + void activate_mappings(void) +-- +2.40.1 + diff --git a/0007-activate_mapping-avoid-logging-error-when-there-is-n.patch b/0007-activate_mapping-avoid-logging-error-when-there-is-n.patch new file mode 100644 index 0000000..f34232b --- /dev/null +++ b/0007-activate_mapping-avoid-logging-error-when-there-is-n.patch @@ -0,0 +1,28 @@ +From bc7794dc78474c463a26926749537f23abc4c082 Mon Sep 17 00:00:00 2001 +From: Robin Jarry +Date: Thu, 13 Jul 2023 20:49:16 +0200 +Subject: [PATCH 7/7] activate_mapping: avoid logging error when there is none + +Add missing return statement. + +Fixes: 470a64b19062 ("activate_mapping: make sure to catch all errors") +Signed-off-by: Robin Jarry +--- + activate.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/activate.c b/activate.c +index 7353692..548a401 100644 +--- a/activate.c ++++ b/activate.c +@@ -89,6 +89,7 @@ static void activate_mapping(struct irq_info *info, void *data __attribute__((un + if (ret < 0) + goto error; + info->moved = 0; /*migration is done*/ ++ return; + error: + log(TO_ALL, LOG_WARNING, + "Cannot change IRQ %i affinity: %s\n", +-- +2.40.1 + diff --git a/irqbalance.spec b/irqbalance.spec index 2676137..2eb6419 100644 --- a/irqbalance.spec +++ b/irqbalance.spec @@ -1,6 +1,6 @@ Name: irqbalance Version: 1.9.2 -Release: 1%{?dist} +Release: 2%{?dist} Epoch: 2 Summary: IRQ balancing daemon @@ -37,6 +37,13 @@ Patch11: 0010-Revert-Fix-CPU-number-condition-in-service-file.patch Patch12: 0011-Fix-signedness-of-error-handling.patch Patch13: 0012-Fix-it-so-we-actually-stop-when-we-hit-an-interrupt-.patch Patch14: 0013-procinterrupts-fix-initialisation-of-regex_t-struct.patch +Patch15: 0001-activate_mapping-fflush-the-buffered-data-to-smp_aff.patch +Patch16: 0002-Revert-activate_mapping-fflush-the-buffered-data-to-.patch +Patch17: 0003-activate_mapping-avoid-use-after-free-when-affinity-.patch +Patch18: 0004-activate_mapping-make-sure-to-catch-all-errors.patch +Patch19: 0005-activate_mapping-report-error-reason.patch +Patch20: 0006-activate_mapping-only-blacklist-irq-if-error-is-cons.patch +Patch21: 0007-activate_mapping-avoid-logging-error-when-there-is-n.patch %description irqbalance is a daemon that evenly distributes IRQ load across @@ -58,6 +65,13 @@ multiple CPUs for enhanced performance. %patch12 -p1 %patch13 -p1 %patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 %build ./autogen.sh @@ -100,6 +114,9 @@ fi /sbin/chkconfig --del irqbalance >/dev/null 2>&1 || : %changelog +* Tue Jul 18 2023 Tao Liu - 2:1.9.2-2 +- Rebase to latest upstream commit (50699824c7) + * Thu May 18 2023 Tao Liu - 2:1.9.2-1 - Rebase to latest upstream commit (184c95029e)