diff --git a/0001-Add-sample-irqbalance-environment-file.patch b/0001-Add-sample-irqbalance-environment-file.patch new file mode 100644 index 0000000..ec6f25e --- /dev/null +++ b/0001-Add-sample-irqbalance-environment-file.patch @@ -0,0 +1,74 @@ +From 626dded557de1e7b90cb847df9e900d40be5af1a Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Wed, 14 Dec 2011 07:09:07 -0500 +Subject: [PATCH 1/8] Add sample irqbalance environment file + +It was pointed out that the example systemd unit file pointed to a corresponding +environment file that had no sample. Fix that up, and modify the unit file to +pass available option via environment variables rather than command line options +since that looks a little cleaner. + +Signed-off-by: Neil Horman + +add irqbalance args variable to env file + +Allow users to pass general arguments to irqbalance through systemd + +Signed-off-by: Neil Horman +--- + misc/irqbalance.env | 26 ++++++++++++++++++++++++++ + misc/irqbalance.service | 5 ++--- + 2 files changed, 28 insertions(+), 3 deletions(-) + create mode 100644 misc/irqbalance.env + +diff --git a/misc/irqbalance.env b/misc/irqbalance.env +new file mode 100644 +index 0000000..bd87e3d +--- /dev/null ++++ b/misc/irqbalance.env +@@ -0,0 +1,26 @@ ++# irqbalance is a daemon process that distributes interrupts across ++# CPUS on SMP systems. The default is to rebalance once every 10 ++# seconds. This is the environment file that is specified to systemd via the ++# EnvironmentFile key in the service unit file (or via whatever method the init ++# system you're using has. ++# ++# ONESHOT=yes ++# after starting, wait for a minute, then look at the interrupt ++# load and balance it once; after balancing exit and do not change ++# it again. ++#IRQBALANCE_ONESHOT= ++ ++# ++# IRQBALANCE_BANNED_CPUS ++# 64 bit bitmask which allows you to indicate which cpu's should ++# be skipped when reblancing irqs. Cpu numbers which have their ++# corresponding bits set to one in this mask will not have any ++# irq's assigned to them on rebalance ++# ++#IRQBALANCE_BANNED_CPUS= ++ ++# ++# IRQBALANCE_ARGS ++# append any args here to the irqbalance daemon as documented in the man page ++# ++#IRQBALANCE_ARGS= +diff --git a/misc/irqbalance.service b/misc/irqbalance.service +index f349616..aae2b03 100644 +--- a/misc/irqbalance.service ++++ b/misc/irqbalance.service +@@ -3,9 +3,8 @@ Description=irqbalance daemon + After=syslog.target + + [Service] +-EnvironmentFile=/etc/sysconfig/irqbalance +-Type=forking +-ExecStart=/usr/sbin/irqbalance $ONESHOT ++EnvironmentFile=/path/to/irqbalance.env ++ExecStart=/usr/sbin/irqbalance $IRQBALANCE_ARGS + + [Install] + WantedBy=multi-user.target +-- +1.7.11.4 + diff --git a/0002-introduce-banirq-option.patch b/0002-introduce-banirq-option.patch new file mode 100644 index 0000000..137de84 --- /dev/null +++ b/0002-introduce-banirq-option.patch @@ -0,0 +1,172 @@ +From 4da232bbf763e535ec2512087aa9ac8a96fba3d9 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Fri, 17 Feb 2012 14:27:11 -0500 +Subject: [PATCH 2/8] introduce banirq option + +Fixing bug http://code.google.com/p/irqbalance/issues/detail?id=25 + +It was pointed out that during the rewrite of irqbalance I inadvertently removed +the support for the IRQBALANCE_BANNED_IRQS environment variable. While going to +return it to the build, it occured to me that, given the availability of msi[x] +irqs, a single system can literally have thousands of interrupt sources, making +the environment variable a non-scalable solution. Instead I'm adding a new +option, banirqs, which takes its place. It lets you build a list of irqs that +you want irqbalance to leave alone. + +Signed-off-by: Neil Horman +--- + classify.c | 32 ++++++++++++++++++++++++++++++++ + irqbalance.1 | 11 +++++++---- + irqbalance.c | 15 ++++++++++++--- + irqbalance.h | 1 + + 4 files changed, 52 insertions(+), 7 deletions(-) + +diff --git a/classify.c b/classify.c +index 124dab0..d59da7f 100644 +--- a/classify.c ++++ b/classify.c +@@ -52,6 +52,7 @@ static short class_codes[MAX_CLASS] = { + }; + + static GList *interrupts_db; ++static GList *banned_irqs; + + #define SYSDEV_DIR "/sys/bus/pci/devices" + +@@ -63,6 +64,30 @@ static gint compare_ints(gconstpointer a, gconstpointer b) + return ai->irq - bi->irq; + } + ++void add_banned_irq(int irq) ++{ ++ struct irq_info find, *new; ++ GList *entry; ++ ++ find.irq = irq; ++ entry = g_list_find_custom(banned_irqs, &find, compare_ints); ++ if (entry) ++ return; ++ ++ new = calloc(sizeof(struct irq_info), 1); ++ if (!new) { ++ if (debug_mode) ++ printf("No memory to ban irq %d\n", irq); ++ return; ++ } ++ ++ new->irq = irq; ++ ++ banned_irqs = g_list_append(banned_irqs, new); ++ return; ++} ++ ++ + /* + * Inserts an irq_info struct into the intterupts_db list + * devpath points to the device directory in sysfs for the +@@ -90,6 +115,13 @@ static struct irq_info *add_one_irq_to_db(const char *devpath, int irq) + return NULL; + } + ++ entry = g_list_find_custom(banned_irqs, &find, compare_ints); ++ if (entry) { ++ if (debug_mode) ++ printf("SKIPPING BANNED IRQ %d\n", irq); ++ return NULL; ++ } ++ + new = calloc(sizeof(struct irq_info), 1); + if (!new) + return NULL; +diff --git a/irqbalance.1 b/irqbalance.1 +index 55fc15f..978c7c1 100644 +--- a/irqbalance.1 ++++ b/irqbalance.1 +@@ -62,6 +62,13 @@ average cpu softirq workload, and no cpus are more than 1 standard deviation + above (and have more than 1 irq assigned to them), attempt to place 1 cpu in + powersave mode. In powersave mode, a cpu will not have any irqs balanced to it, + in an effort to prevent that cpu from waking up without need. ++ ++.TP ++.B --banirq= ++Add the specified irq list to the set of banned irqs. irqbalance will not affect ++the affinity of any irqs on the banned list, allowing them to be specified ++manually. This option is addative and can be specified multiple times ++ + .SH "ENVIRONMENT VARIABLES" + .TP + .B IRQBALANCE_ONESHOT +@@ -75,10 +82,6 @@ Same as --debug + .B IRQBALANCE_BANNED_CPUS + Provides a mask of cpus which irqbalance should ignore and never assign interrupts to + +-.TP +-.B IRQBALANCE_BANNED_INTERRUPTS +-A list of space delimited IRQ numbers that irqbalance should not touch +- + .SH "Homepage" + http://code.google.com/p/irqbalance + +diff --git a/irqbalance.c b/irqbalance.c +index 99c5db7..c613e2b 100644 +--- a/irqbalance.c ++++ b/irqbalance.c +@@ -72,7 +72,7 @@ struct option lopts[] = { + static void usage(void) + { + printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]\n"); +- printf(" [--powerthresh= | -p | ]\n"); ++ printf(" [--powerthresh= | -p | ] [--banirq= | -i ]\n"); + } + + static void parse_command_line(int argc, char **argv) +@@ -81,7 +81,7 @@ static void parse_command_line(int argc, char **argv) + int longind; + + while ((opt = getopt_long(argc, argv, +- "odh:p:", ++ "odh:p:b:", + lopts, &longind)) != -1) { + + switch(opt) { +@@ -103,6 +103,14 @@ static void parse_command_line(int argc, char **argv) + exit(1); + } + break; ++ case 'i': ++ val = strtoull(optarg, NULL, 10); ++ if (val == ULONG_MAX) { ++ usage(); ++ exit(1); ++ } ++ add_banned_irq((int)val); ++ break; + case 'p': + if (!strncmp(optarg, "off", strlen(optarg))) + power_thresh = ULONG_MAX; +@@ -179,8 +187,9 @@ int main(int argc, char** argv) + #ifdef HAVE_GETOPT_LONG + parse_command_line(argc, argv); + #else +- if (argc>1 && strstr(argv[1],"--debug")) ++ if (argc>1 && strstr(argv[1],"--debug")) { + debug_mode=1; ++ } + if (argc>1 && strstr(argv[1],"--oneshot")) + one_shot_mode=1; + #endif +diff --git a/irqbalance.h b/irqbalance.h +index 4e85325..956aa8c 100644 +--- a/irqbalance.h ++++ b/irqbalance.h +@@ -103,6 +103,7 @@ extern int get_cpu_count(void); + */ + extern void rebuild_irq_db(void); + extern void free_irq_db(void); ++extern void add_banned_irq(int irq); + extern void for_each_irq(GList *list, void (*cb)(struct irq_info *info, void *data), void *data); + extern struct irq_info *get_irq_info(int irq); + extern void migrate_irq(GList **from, GList **to, struct irq_info *info); +-- +1.7.11.4 + diff --git a/0003-When-IRQBALANCE_BANNED_CPUS-is-set-proc-stat-is-not-.patch b/0003-When-IRQBALANCE_BANNED_CPUS-is-set-proc-stat-is-not-.patch new file mode 100644 index 0000000..3eac789 --- /dev/null +++ b/0003-When-IRQBALANCE_BANNED_CPUS-is-set-proc-stat-is-not-.patch @@ -0,0 +1,44 @@ +From 718561bc79c095909f0c9d3fb2f0c1c163478b1e Mon Sep 17 00:00:00 2001 +From: Petr Holasek +Date: Mon, 20 Feb 2012 16:59:05 +0100 +Subject: [PATCH 3/8] When IRQBALANCE_BANNED_CPUS is set, /proc/stat is not + parsed properly. + +proc stats counts all the cpus in /proc/stat, but compares that number to the +value in get_cpu_count(), which returns the number of cpus actively being +balanced. Since that value doesn't include banned cpus, its incorrect. Since +we don't want to measure the load on banned cpus anyway, just skip those lines +so cpucount doesn't increment and the count remains equal. + +Signed-off-by: Petr Holasek +Signed-off-by: Neil Horman +--- + procinterrupts.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/procinterrupts.c b/procinterrupts.c +index 4d3b07b..c032caf 100644 +--- a/procinterrupts.c ++++ b/procinterrupts.c +@@ -32,6 +32,8 @@ + + #define LINESIZE 4096 + ++extern cpumask_t banned_cpus; ++ + static int proc_int_has_msi = 0; + static int msi_found_in_sysfs = 0; + +@@ -217,6 +219,9 @@ void parse_proc_stat(void) + + cpunr = strtoul(&line[3], NULL, 10); + ++ if (cpu_isset(cpunr, banned_cpus)) ++ continue; ++ + rc = sscanf(line, "%*s %*d %*d %*d %*d %*d %d %d", &irq_load, &softirq_load); + if (rc < 2) + break; +-- +1.7.11.4 + diff --git a/0004-Make-irqbalance-scan-for-new-irqs-when-it-detects-ne.patch b/0004-Make-irqbalance-scan-for-new-irqs-when-it-detects-ne.patch new file mode 100644 index 0000000..045892e --- /dev/null +++ b/0004-Make-irqbalance-scan-for-new-irqs-when-it-detects-ne.patch @@ -0,0 +1,91 @@ +From 0edc531b0a2ebb41eb5cf49168e2897640cba0ec Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Mon, 2 Jul 2012 13:27:14 -0400 +Subject: [PATCH 4/8] Make irqbalance scan for new irqs when it detects new + irqs + +Like cpu hotplug, irqbalance needs to rebuild its topo map and irq db when it +detects new irqs in the system. This patch adds that ability + +Resolves: http://code.google.com/p/irqbalance/issues/detail?id=32 + +Singed-off-by: Neil Horman +--- + irqbalance.c | 6 +++--- + irqbalance.h | 2 +- + procinterrupts.c | 14 ++++++++++++-- + 3 files changed, 16 insertions(+), 6 deletions(-) + +diff --git a/irqbalance.c b/irqbalance.c +index c613e2b..5d40321 100644 +--- a/irqbalance.c ++++ b/irqbalance.c +@@ -40,7 +40,7 @@ volatile int keep_going = 1; + int one_shot_mode; + int debug_mode; + int numa_avail; +-int need_cpu_rescan; ++int need_rescan; + extern cpumask_t banned_cpus; + enum hp_e hint_policy = HINT_POLICY_SUBSET; + unsigned long power_thresh = ULONG_MAX; +@@ -256,8 +256,8 @@ int main(int argc, char** argv) + parse_proc_stat(); + + /* cope with cpu hotplug -- detected during /proc/interrupts parsing */ +- if (need_cpu_rescan) { +- need_cpu_rescan = 0; ++ if (need_rescan) { ++ need_rescan = 0; + /* if there's a hotplug event we better turn off power mode for a bit until things settle */ + power_mode = 0; + if (debug_mode) +diff --git a/irqbalance.h b/irqbalance.h +index 956aa8c..043bfe6 100644 +--- a/irqbalance.h ++++ b/irqbalance.h +@@ -64,7 +64,7 @@ enum hp_e { + extern int debug_mode; + extern int one_shot_mode; + extern int power_mode; +-extern int need_cpu_rescan; ++extern int need_rescan; + extern enum hp_e hint_policy; + extern unsigned long long cycle_count; + extern unsigned long power_thresh; +diff --git a/procinterrupts.c b/procinterrupts.c +index c032caf..4559b16 100644 +--- a/procinterrupts.c ++++ b/procinterrupts.c +@@ -82,8 +82,18 @@ void parse_proc_interrupts(void) + c++; + number = strtoul(line, NULL, 10); + info = get_irq_info(number); +- if (!info) ++ if (!info) { ++ /* ++ * If this is our 0th pass through this routine ++ * this is an irq that wasn't reported in sysfs ++ * and we should just add it. If we've been running ++ * a while then this irq just appeared and its time ++ * to rescan our irqs ++ */ ++ if (cycle_count) ++ need_rescan = 1; + info = add_misc_irq(number); ++ } + + count = 0; + cpunr = 0; +@@ -99,7 +109,7 @@ void parse_proc_interrupts(void) + cpunr++; + } + if (cpunr != core_count) +- need_cpu_rescan = 1; ++ need_rescan = 1; + + info->last_irq_count = info->irq_count; + info->irq_count = count; +-- +1.7.11.4 + diff --git a/0005-Add-banscript-option.patch b/0005-Add-banscript-option.patch new file mode 100644 index 0000000..bf02d1e --- /dev/null +++ b/0005-Add-banscript-option.patch @@ -0,0 +1,218 @@ +From b18eb8f6b28cc9b0816be0fb8fe3468c9f64f345 Mon Sep 17 00:00:00 2001 +From: Neil Horman +Date: Thu, 5 Jul 2012 14:54:35 -0400 +Subject: [PATCH 5/8] Add banscript option + +Its been requested in several different ways, that irqbalance have a more robust +mechanism for setting balancing policy at run time. While I don't feel its +apropriate to have irqbalance be able to implement arbitrary balance policy +(having a flexible mechanism to define which irqs should be placed where can +become exceedingly complex), I do think we need some mechanism that easily +allows users to dynamically exclude irqs from the irqbalance policy at run time. +The banscript option does exactly this. It allows the user to point irqbalance +toward an exacutable file that is run one for each irq deiscovered passing the +sysfs path of the device and an irq vector as arguments. A zero exit code tells +irqbalance to manage the irq as it normally would, while a non-zero exit tells +irqbalance to ignore the interrupt entirely. This provides adminstrators a code +point with which to exclude irqs dynamically based on any programatic +informatino available, and to manage those irqs independently, etither via +another irqbalance like program, or via static affinity setting. + +Signed-off-by: Neil Horman + +Reesolves: http://code.google.com/p/irqbalance/issues/detail?id=33 +--- + classify.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ + irqbalance.1 | 11 +++++++++++ + irqbalance.c | 25 +++++++++++++++++++++---- + irqbalance.h | 1 + + 4 files changed, 79 insertions(+), 4 deletions(-) + +diff --git a/classify.c b/classify.c +index d59da7f..750d946 100644 +--- a/classify.c ++++ b/classify.c +@@ -207,6 +207,43 @@ out: + return new; + } + ++static int check_for_irq_ban(char *path, int irq) ++{ ++ char *cmd; ++ int rc; ++ ++ if (!banscript) ++ return 0; ++ ++ cmd = alloca(strlen(path)+strlen(banscript)+32); ++ if (!cmd) ++ return 0; ++ ++ sprintf(cmd, "%s %s %d",banscript, path, irq); ++ rc = system(cmd); ++ ++ /* ++ * The system command itself failed ++ */ ++ if (rc == -1) { ++ if (debug_mode) ++ printf("%s failed, please check the --banscript option\n", cmd); ++ else ++ syslog(LOG_INFO, "%s failed, please check the --banscript option\n", cmd); ++ return 0; ++ } ++ ++ if (WEXITSTATUS(rc)) { ++ if (debug_mode) ++ printf("irq %d is baned by %s\n", irq, banscript); ++ else ++ syslog(LOG_INFO, "irq %d is baned by %s\n", irq, banscript); ++ return 1; ++ } ++ return 0; ++ ++} ++ + /* + * Figures out which interrupt(s) relate to the device we're looking at in dirname + */ +@@ -231,6 +268,10 @@ static void build_one_dev_entry(const char *dirname) + irqnum = strtol(entry->d_name, NULL, 10); + if (irqnum) { + sprintf(path, "%s/%s", SYSDEV_DIR, dirname); ++ if (check_for_irq_ban(path, irqnum)) { ++ add_banned_irq(irqnum); ++ continue; ++ } + new = add_one_irq_to_db(path, irqnum); + if (!new) + continue; +@@ -253,6 +294,11 @@ static void build_one_dev_entry(const char *dirname) + */ + if (irqnum) { + sprintf(path, "%s/%s", SYSDEV_DIR, dirname); ++ if (check_for_irq_ban(path, irqnum)) { ++ add_banned_irq(irqnum); ++ goto done; ++ } ++ + new = add_one_irq_to_db(path, irqnum); + if (!new) + goto done; +diff --git a/irqbalance.1 b/irqbalance.1 +index 978c7c1..63b0e26 100644 +--- a/irqbalance.1 ++++ b/irqbalance.1 +@@ -69,6 +69,17 @@ Add the specified irq list to the set of banned irqs. irqbalance will not affect + the affinity of any irqs on the banned list, allowing them to be specified + manually. This option is addative and can be specified multiple times + ++.TP ++.B --banscript=