From d696c1a26aaf8e482eba00c6a6c4e04af2d82b7e Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 19 Dec 2010 20:44:06 -0500 Subject: [PATCH] Linux 2.6.37-rc6-git5 sched-cure-more-NO_HZ-load-average-woes.patch merged upstream. --- .gitignore | 1 + kernel.spec | 12 +- sched-cure-more-NO_HZ-load-average-woes.patch | 273 ------------------ sources | 1 + 4 files changed, 7 insertions(+), 280 deletions(-) delete mode 100644 sched-cure-more-NO_HZ-load-average-woes.patch diff --git a/.gitignore b/.gitignore index 2a9f5306b..ce9b02aee 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ clog *.rpm kernel-2.6.*/ /patch-2.6.37-rc6.bz2 +/patch-2.6.37-rc6-git5.bz2 diff --git a/kernel.spec b/kernel.spec index 052fe2456..51d3f3c36 100644 --- a/kernel.spec +++ b/kernel.spec @@ -84,7 +84,7 @@ Summary: The Linux kernel # The rc snapshot level %define rcrev 6 # The git snapshot level -%define gitrev 0 +%define gitrev 5 # Set rpm version accordingly %define rpmversion 2.6.%{upstream_sublevel} %endif @@ -716,9 +716,6 @@ Patch12401: debug-tty-print-dev-name.patch Patch12410: mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch Patch12411: mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch -# rhbz#650934 -Patch12420: sched-cure-more-NO_HZ-load-average-woes.patch - Patch12421: fs-call-security_d_instantiate-in-d_obtain_alias.patch Patch12422: net-AF_PACKET-vmalloc.patch @@ -1330,9 +1327,6 @@ ApplyPatch debug-tty-print-dev-name.patch ApplyPatch mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch ApplyPatch mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch -# rhbz#650934 -ApplyPatch sched-cure-more-NO_HZ-load-average-woes.patch - # rhbz#662344,600690 ApplyPatch fs-call-security_d_instantiate-in-d_obtain_alias.patch @@ -1952,6 +1946,10 @@ fi # || || %changelog +* Sun Dec 19 2010 Kyle McMartin 2.6.37-0.rc6.git5.1 +- Linux 2.6.37-rc6-git5 +- sched-cure-more-NO_HZ-load-average-woes.patch: upstream. + * Sat Dec 18 2010 Kyle McMartin - Patch from nhorman against f13: Enhance AF_PACKET to allow non-contiguous buffer alloc (#637619) diff --git a/sched-cure-more-NO_HZ-load-average-woes.patch b/sched-cure-more-NO_HZ-load-average-woes.patch deleted file mode 100644 index a4053c3b4..000000000 --- a/sched-cure-more-NO_HZ-load-average-woes.patch +++ /dev/null @@ -1,273 +0,0 @@ -From bounces.tip@hpa.at.zytor.com Wed Dec 8 15:40:48 2010 -From: tip-bot for Peter Zijlstra -In-Reply-To: <1291129145.32004.874.camel@laptop> -References: <1291129145.32004.874.camel@laptop> -Subject: [tip:sched/urgent] sched: Cure more NO_HZ load average woes -Message-ID: -Git-Commit-ID: 0f004f5a696a9434b7214d0d3cbd0525ee77d428 - -Commit-ID: 0f004f5a696a9434b7214d0d3cbd0525ee77d428 -Gitweb: http://git.kernel.org/tip/0f004f5a696a9434b7214d0d3cbd0525ee77d428 -Author: Peter Zijlstra -AuthorDate: Tue, 30 Nov 2010 19:48:45 +0100 -Committer: Ingo Molnar -CommitDate: Wed, 8 Dec 2010 20:15:04 +0100 - -sched: Cure more NO_HZ load average woes - -There's a long-running regression that proved difficult to fix and -which is hitting certain people and is rather annoying in its effects. - -Damien reported that after 74f5187ac8 (sched: Cure load average vs -NO_HZ woes) his load average is unnaturally high, he also noted that -even with that patch reverted the load avgerage numbers are not -correct. - -The problem is that the previous patch only solved half the NO_HZ -problem, it addressed the part of going into NO_HZ mode, not of -comming out of NO_HZ mode. This patch implements that missing half. - -When comming out of NO_HZ mode there are two important things to take -care of: - - - Folding the pending idle delta into the global active count. - - Correctly aging the averages for the idle-duration. - -So with this patch the NO_HZ interaction should be complete and -behaviour between CONFIG_NO_HZ=[yn] should be equivalent. - -Furthermore, this patch slightly changes the load average computation -by adding a rounding term to the fixed point multiplication. - -Reported-by: Damien Wyart -Reported-by: Tim McGrath -Tested-by: Damien Wyart -Tested-by: Orion Poplawski -Tested-by: Kyle McMartin -Signed-off-by: Peter Zijlstra -Cc: stable@kernel.org -Cc: Chase Douglas -LKML-Reference: <1291129145.32004.874.camel@laptop> -Signed-off-by: Ingo Molnar ---- - include/linux/sched.h | 2 +- - kernel/sched.c | 150 +++++++++++++++++++++++++++++++++++++++++++++---- - kernel/timer.c | 2 +- - 3 files changed, 141 insertions(+), 13 deletions(-) - -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 2c79e92..2238745 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu); - extern unsigned long this_cpu_load(void); - - --extern void calc_global_load(void); -+extern void calc_global_load(unsigned long ticks); - - extern unsigned long get_parent_ip(unsigned long addr); - -diff --git a/kernel/sched.c b/kernel/sched.c -index dc91a4d..6b7c26a 100644 ---- a/kernel/sched.c -+++ b/kernel/sched.c -@@ -3119,6 +3119,15 @@ static long calc_load_fold_active(struct rq *this_rq) - return delta; - } - -+static unsigned long -+calc_load(unsigned long load, unsigned long exp, unsigned long active) -+{ -+ load *= exp; -+ load += active * (FIXED_1 - exp); -+ load += 1UL << (FSHIFT - 1); -+ return load >> FSHIFT; -+} -+ - #ifdef CONFIG_NO_HZ - /* - * For NO_HZ we delay the active fold to the next LOAD_FREQ update. -@@ -3148,6 +3157,128 @@ static long calc_load_fold_idle(void) - - return delta; - } -+ -+/** -+ * fixed_power_int - compute: x^n, in O(log n) time -+ * -+ * @x: base of the power -+ * @frac_bits: fractional bits of @x -+ * @n: power to raise @x to. -+ * -+ * By exploiting the relation between the definition of the natural power -+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and -+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, -+ * (where: n_i \elem {0, 1}, the binary vector representing n), -+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is -+ * of course trivially computable in O(log_2 n), the length of our binary -+ * vector. -+ */ -+static unsigned long -+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) -+{ -+ unsigned long result = 1UL << frac_bits; -+ -+ if (n) for (;;) { -+ if (n & 1) { -+ result *= x; -+ result += 1UL << (frac_bits - 1); -+ result >>= frac_bits; -+ } -+ n >>= 1; -+ if (!n) -+ break; -+ x *= x; -+ x += 1UL << (frac_bits - 1); -+ x >>= frac_bits; -+ } -+ -+ return result; -+} -+ -+/* -+ * a1 = a0 * e + a * (1 - e) -+ * -+ * a2 = a1 * e + a * (1 - e) -+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e) -+ * = a0 * e^2 + a * (1 - e) * (1 + e) -+ * -+ * a3 = a2 * e + a * (1 - e) -+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) -+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) -+ * -+ * ... -+ * -+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] -+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) -+ * = a0 * e^n + a * (1 - e^n) -+ * -+ * [1] application of the geometric series: -+ * -+ * n 1 - x^(n+1) -+ * S_n := \Sum x^i = ------------- -+ * i=0 1 - x -+ */ -+static unsigned long -+calc_load_n(unsigned long load, unsigned long exp, -+ unsigned long active, unsigned int n) -+{ -+ -+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); -+} -+ -+/* -+ * NO_HZ can leave us missing all per-cpu ticks calling -+ * calc_load_account_active(), but since an idle CPU folds its delta into -+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold -+ * in the pending idle delta if our idle period crossed a load cycle boundary. -+ * -+ * Once we've updated the global active value, we need to apply the exponential -+ * weights adjusted to the number of cycles missed. -+ */ -+static void calc_global_nohz(unsigned long ticks) -+{ -+ long delta, active, n; -+ -+ if (time_before(jiffies, calc_load_update)) -+ return; -+ -+ /* -+ * If we crossed a calc_load_update boundary, make sure to fold -+ * any pending idle changes, the respective CPUs might have -+ * missed the tick driven calc_load_account_active() update -+ * due to NO_HZ. -+ */ -+ delta = calc_load_fold_idle(); -+ if (delta) -+ atomic_long_add(delta, &calc_load_tasks); -+ -+ /* -+ * If we were idle for multiple load cycles, apply them. -+ */ -+ if (ticks >= LOAD_FREQ) { -+ n = ticks / LOAD_FREQ; -+ -+ active = atomic_long_read(&calc_load_tasks); -+ active = active > 0 ? active * FIXED_1 : 0; -+ -+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); -+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); -+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); -+ -+ calc_load_update += n * LOAD_FREQ; -+ } -+ -+ /* -+ * Its possible the remainder of the above division also crosses -+ * a LOAD_FREQ period, the regular check in calc_global_load() -+ * which comes after this will take care of that. -+ * -+ * Consider us being 11 ticks before a cycle completion, and us -+ * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will -+ * age us 4 cycles, and the test in calc_global_load() will -+ * pick up the final one. -+ */ -+} - #else - static void calc_load_account_idle(struct rq *this_rq) - { -@@ -3157,6 +3288,10 @@ static inline long calc_load_fold_idle(void) - { - return 0; - } -+ -+static void calc_global_nohz(unsigned long ticks) -+{ -+} - #endif - - /** -@@ -3174,24 +3309,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) - loads[2] = (avenrun[2] + offset) << shift; - } - --static unsigned long --calc_load(unsigned long load, unsigned long exp, unsigned long active) --{ -- load *= exp; -- load += active * (FIXED_1 - exp); -- return load >> FSHIFT; --} -- - /* - * calc_load - update the avenrun load estimates 10 ticks after the - * CPUs have updated calc_load_tasks. - */ --void calc_global_load(void) -+void calc_global_load(unsigned long ticks) - { -- unsigned long upd = calc_load_update + 10; - long active; - -- if (time_before(jiffies, upd)) -+ calc_global_nohz(ticks); -+ -+ if (time_before(jiffies, calc_load_update + 10)) - return; - - active = atomic_long_read(&calc_load_tasks); -diff --git a/kernel/timer.c b/kernel/timer.c -index 68a9ae7..7bd715f 100644 ---- a/kernel/timer.c -+++ b/kernel/timer.c -@@ -1319,7 +1319,7 @@ void do_timer(unsigned long ticks) - { - jiffies_64 += ticks; - update_wall_time(); -- calc_global_load(); -+ calc_global_load(ticks); - } - - #ifdef __ARCH_WANT_SYS_ALARM - diff --git a/sources b/sources index f90d2b8d6..26d5eb3fc 100644 --- a/sources +++ b/sources @@ -1,2 +1,3 @@ 61f3739a73afb6914cb007f37fb09b62 linux-2.6.36.tar.bz2 bdc7a207c56dc6e2fc4a548c50e63289 patch-2.6.37-rc6.bz2 +e3f789b7a9c30a62b6dd7940d0ec6279 patch-2.6.37-rc6-git5.bz2