diff --git a/kernel.spec b/kernel.spec index bb437aac3..bd84f2d91 100644 --- a/kernel.spec +++ b/kernel.spec @@ -711,9 +711,10 @@ Patch12205: runtime_pm_fixups.patch Patch12303: dmar-disable-when-ricoh-multifunction.patch -Patch12400: mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch -Patch12401: mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch -Patch12402: mm-slub-default-slub_max_order-to-0.patch +Patch12400: mm-vmscan-correct-use-pgdat_balanced-in-sleeping_prematurely.patch +Patch12401: mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch +Patch12402: mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch +Patch12403: mm-vmscan-if-kswapd-has-been-running-too-long-allow-it-to-sleep.patch Patch12500: x86-amd-fix-another-erratum-400-bug.patch @@ -1322,9 +1323,10 @@ ApplyPatch acpi_reboot.patch # rhbz#605888 ApplyPatch dmar-disable-when-ricoh-multifunction.patch +ApplyPatch mm-vmscan-correct-use-pgdat_balanced-in-sleeping_prematurely.patch ApplyPatch mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch ApplyPatch mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch -ApplyPatch mm-slub-default-slub_max_order-to-0.patch +ApplyPatch mm-vmscan-if-kswapd-has-been-running-too-long-allow-it-to-sleep.patch ApplyPatch x86-amd-fix-another-erratum-400-bug.patch @@ -1936,6 +1938,9 @@ fi # and build. %changelog +* Sat May 14 2011 Kyle McMartin +- Update to v2 of Mel Gorman's SLUB patchset + * Sat May 14 2011 Kyle McMartin 2.6.39-0.rc7.git6.1 - tmpfs: implement generic xattr support Merge Eric Paris' patch to add xattr support to tmpfs, so that it can be diff --git a/mm-slub-default-slub_max_order-to-0.patch b/mm-slub-default-slub_max_order-to-0.patch deleted file mode 100644 index 266673411..000000000 --- a/mm-slub-default-slub_max_order-to-0.patch +++ /dev/null @@ -1,67 +0,0 @@ -From owner-linux-mm@kvack.org Wed May 11 11:35:30 2011 -From: Mel Gorman -To: Andrew Morton -Subject: [PATCH 3/3] mm: slub: Default slub_max_order to 0 -Date: Wed, 11 May 2011 16:29:33 +0100 -Message-Id: <1305127773-10570-4-git-send-email-mgorman@suse.de> - -To avoid locking and per-cpu overhead, SLUB optimisically uses -high-order allocations up to order-3 by default and falls back to -lower allocations if they fail. While care is taken that the caller -and kswapd take no unusual steps in response to this, there are -further consequences like shrinkers who have to free more objects to -release any memory. There is anecdotal evidence that significant time -is being spent looping in shrinkers with insufficient progress being -made (https://lkml.org/lkml/2011/4/28/361) and keeping kswapd awake. - -SLUB is now the default allocator and some bug reports have been -pinned down to SLUB using high orders during operations like -copying large amounts of data. SLUBs use of high-orders benefits -applications that are sized to memory appropriately but this does not -necessarily apply to large file servers or desktops. This patch -causes SLUB to use order-0 pages like SLAB does by default. -There is further evidence that this keeps kswapd's usage lower -(https://lkml.org/lkml/2011/5/10/383). - -Signed-off-by: Mel Gorman ---- - Documentation/vm/slub.txt | 2 +- - mm/slub.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt -index 07375e7..778e9fa 100644 ---- a/Documentation/vm/slub.txt -+++ b/Documentation/vm/slub.txt -@@ -117,7 +117,7 @@ can be influenced by kernel parameters: - - slub_min_objects=x (default 4) - slub_min_order=x (default 0) --slub_max_order=x (default 1) -+slub_max_order=x (default 0) - - slub_min_objects allows to specify how many objects must at least fit - into one slab in order for the allocation order to be acceptable. -diff --git a/mm/slub.c b/mm/slub.c -index 1071723..23a4789 100644 ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2198,7 +2198,7 @@ EXPORT_SYMBOL(kmem_cache_free); - * take the list_lock. - */ - static int slub_min_order; --static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; -+static int slub_max_order; - static int slub_min_objects; - - /* --- -1.7.3.4 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ -Don't email: email@kvack.org - diff --git a/mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch b/mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch index 70191d54b..f07c75bad 100644 --- a/mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch +++ b/mm-slub-do-not-take-expensive-steps-for-slubs-speculative-high-order-allocations.patch @@ -1,9 +1,23 @@ -From owner-linux-mm@kvack.org Wed May 11 11:29:53 2011 -From: Mel Gorman -To: Andrew Morton -Subject: [PATCH 2/3] mm: slub: Do not take expensive steps for SLUBs speculative high-order allocations -Date: Wed, 11 May 2011 16:29:32 +0100 -Message-Id: <1305127773-10570-3-git-send-email-mgorman@suse.de> +From linux-fsdevel-owner@vger.kernel.org Fri May 13 10:04:18 2011 +From: Mel Gorman +To: Andrew Morton +Cc: James Bottomley , + Colin King , + Raghavendra D Prabhu , + Jan Kara , Chris Mason , + Christoph Lameter , + Pekka Enberg , + Rik van Riel , + Johannes Weiner , + linux-fsdevel , + linux-mm , + linux-kernel , + linux-ext4 , + Mel Gorman +Subject: [PATCH 3/4] mm: slub: Do not take expensive steps for SLUBs speculative high-order allocations +Date: Fri, 13 May 2011 15:03:23 +0100 +Message-Id: <1305295404-12129-4-git-send-email-mgorman@suse.de> +X-Mailing-List: linux-fsdevel@vger.kernel.org To avoid locking and per-cpu overhead, SLUB optimisically uses high-order allocations and falls back to lower allocations if they @@ -13,14 +27,13 @@ benefit of using high-order pages in SLUB. On a desktop system, two users report that the system is getting stalled with kswapd using large amounts of CPU. -This patch prevents SLUB taking any expensive steps when trying to -use high-order allocations. Instead, it is expected to fall back to -smaller orders more aggressively. Testing from users was somewhat -inconclusive on how much this helped but local tests showed it made -a positive difference. It makes sense that falling back to order-0 -allocations is faster than entering compaction or direct reclaim. +This patch prevents SLUB taking any expensive steps when trying to use +high-order allocations. Instead, it is expected to fall back to smaller +orders more aggressively. Testing was somewhat inconclusive on how much +this helped but it makes sense that falling back to order-0 allocations +is faster than entering compaction or direct reclaim. -Signed-off-yet: Mel Gorman +Signed-off-by: Mel Gorman --- mm/page_alloc.c | 3 ++- mm/slub.c | 3 ++- @@ -48,7 +61,7 @@ index 9f8a97b..057f1e2 100644 * Not worth trying to allocate harder for * __GFP_NOMEMALLOC even if it can't schedule. diff --git a/mm/slub.c b/mm/slub.c -index 98c358d..1071723 100644 +index 98c358d..c5797ab 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1170,7 +1170,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -56,18 +69,10 @@ index 98c358d..1071723 100644 * so we fall-back to the minimum order allocation. */ - alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY | __GFP_NO_KSWAPD) & ~__GFP_NOFAIL; -+ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY | __GFP_NO_KSWAPD) & -+ ~(__GFP_NOFAIL | __GFP_WAIT); ++ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NO_KSWAPD) & ++ ~(__GFP_NOFAIL | __GFP_WAIT | __GFP_REPEAT); page = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!page)) { -- 1.7.3.4 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ -Don't email: email@kvack.org - diff --git a/mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch b/mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch index 2010a1c13..d730d971c 100644 --- a/mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch +++ b/mm-slub-do-not-wake-kswapd-for-slubs-speculative-high-order-allocations.patch @@ -1,9 +1,23 @@ -From owner-linux-mm@kvack.org Wed May 11 11:29:50 2011 -From: Mel Gorman -To: Andrew Morton -Subject: [PATCH 1/3] mm: slub: Do not wake kswapd for SLUBs speculative high-order allocations -Date: Wed, 11 May 2011 16:29:31 +0100 -Message-Id: <1305127773-10570-2-git-send-email-mgorman@suse.de> +From linux-fsdevel-owner@vger.kernel.org Fri May 13 10:04:00 2011 +From: Mel Gorman +To: Andrew Morton +Cc: James Bottomley , + Colin King , + Raghavendra D Prabhu , + Jan Kara , Chris Mason , + Christoph Lameter , + Pekka Enberg , + Rik van Riel , + Johannes Weiner , + linux-fsdevel , + linux-mm , + linux-kernel , + linux-ext4 , + Mel Gorman +Subject: [PATCH 2/4] mm: slub: Do not wake kswapd for SLUBs speculative high-order allocations +Date: Fri, 13 May 2011 15:03:22 +0100 +Message-Id: <1305295404-12129-3-git-send-email-mgorman@suse.de> +X-Mailing-List: linux-fsdevel@vger.kernel.org To avoid locking and per-cpu overhead, SLUB optimisically uses high-order allocations and falls back to lower allocations if they @@ -36,11 +50,3 @@ index 9d2e5e4..98c358d 100644 if (unlikely(!page)) { -- 1.7.3.4 - --- -To unsubscribe, send a message with 'unsubscribe linux-mm' in -the body to majordomo@kvack.org. For more info on Linux MM, -see: http://www.linux-mm.org/ . -Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ -Don't email: email@kvack.org - diff --git a/mm-vmscan-correct-use-pgdat_balanced-in-sleeping_prematurely.patch b/mm-vmscan-correct-use-pgdat_balanced-in-sleeping_prematurely.patch new file mode 100644 index 000000000..907a74d98 --- /dev/null +++ b/mm-vmscan-correct-use-pgdat_balanced-in-sleeping_prematurely.patch @@ -0,0 +1,48 @@ +From linux-fsdevel-owner@vger.kernel.org Fri May 13 10:03:38 2011 +From: Mel Gorman +To: Andrew Morton +Cc: James Bottomley , + Colin King , + Raghavendra D Prabhu , + Jan Kara , Chris Mason , + Christoph Lameter , + Pekka Enberg , + Rik van Riel , + Johannes Weiner , + linux-fsdevel , + linux-mm , + linux-kernel , + linux-ext4 , + Mel Gorman +Subject: [PATCH 1/4] mm: vmscan: Correct use of pgdat_balanced in sleeping_prematurely +Date: Fri, 13 May 2011 15:03:21 +0100 +Message-Id: <1305295404-12129-2-git-send-email-mgorman@suse.de> +X-Mailing-List: linux-fsdevel@vger.kernel.org + +Johannes Weiner poined out that the logic in commit [1741c877: mm: +kswapd: keep kswapd awake for high-order allocations until a percentage +of the node is balanced] is backwards. Instead of allowing kswapd to go +to sleep when balancing for high order allocations, it keeps it kswapd +running uselessly. + +From-but-was-not-signed-off-by: Johannes Weiner +Will-sign-off-after-Johannes: Mel Gorman +--- + mm/vmscan.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index f6b435c..af24d1e 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2286,7 +2286,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, + * must be balanced + */ + if (order) +- return pgdat_balanced(pgdat, balanced, classzone_idx); ++ return !pgdat_balanced(pgdat, balanced, classzone_idx); + else + return !all_zones_ok; + } +-- +1.7.3.4 diff --git a/mm-vmscan-if-kswapd-has-been-running-too-long-allow-it-to-sleep.patch b/mm-vmscan-if-kswapd-has-been-running-too-long-allow-it-to-sleep.patch new file mode 100644 index 000000000..f2c3510ca --- /dev/null +++ b/mm-vmscan-if-kswapd-has-been-running-too-long-allow-it-to-sleep.patch @@ -0,0 +1,47 @@ +From linux-fsdevel-owner@vger.kernel.org Fri May 13 10:03:52 2011 +From: Mel Gorman +To: Andrew Morton +Cc: James Bottomley , + Colin King , + Raghavendra D Prabhu , + Jan Kara , Chris Mason , + Christoph Lameter , + Pekka Enberg , + Rik van Riel , + Johannes Weiner , + linux-fsdevel , + linux-mm , + linux-kernel , + linux-ext4 , + Mel Gorman +Subject: [PATCH 4/4] mm: vmscan: If kswapd has been running too long, allow it to sleep +Date: Fri, 13 May 2011 15:03:24 +0100 +Message-Id: <1305295404-12129-5-git-send-email-mgorman@suse.de> +X-Mailing-List: linux-fsdevel@vger.kernel.org + +Under constant allocation pressure, kswapd can be in the situation where +sleeping_prematurely() will always return true even if kswapd has been +running a long time. Check if kswapd needs to be scheduled. + +Signed-off-by: Mel Gorman +--- + mm/vmscan.c | 4 ++++ + 1 files changed, 4 insertions(+), 0 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index af24d1e..4d24828 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2251,6 +2251,10 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, + unsigned long balanced = 0; + bool all_zones_ok = true; + ++ /* If kswapd has been running too long, just sleep */ ++ if (need_resched()) ++ return false; ++ + /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ + if (remaining) + return true; +-- +1.7.3.4