From ad0b6d368abfab33ae31bd8edeb0165116ba06d3 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 29 Mar 2019 19:36:44 +0000 Subject: [PATCH] Pick up a mm fix causing hangs (rhbz 1693525) --- ...-when-resetting-pageblock-skip-hints.patch | 113 ++++++++++++++++++ kernel.spec | 3 + 2 files changed, 116 insertions(+) create mode 100644 Correct-zone-boundary-handling-when-resetting-pageblock-skip-hints.patch diff --git a/Correct-zone-boundary-handling-when-resetting-pageblock-skip-hints.patch b/Correct-zone-boundary-handling-when-resetting-pageblock-skip-hints.patch new file mode 100644 index 000000000..c0eb9bcab --- /dev/null +++ b/Correct-zone-boundary-handling-when-resetting-pageblock-skip-hints.patch @@ -0,0 +1,113 @@ +From patchwork Wed Mar 27 08:54:24 2019 +Date: Wed, 27 Mar 2019 08:54:24 +0000 +From: Mel Gorman +To: Andrew Morton +Cc: Mikhail Gavrilov , + Daniel Jordan , Qian Cai , + linux-mm@kvack.org, vbabka@suse.cz, linux-kernel@vger.kernel.org +Subject: [PATCH] Correct zone boundary handling when resetting pageblock skip + hints + +Mikhail Gavrilo reported the following bug being triggered in a Fedora +kernel based on 5.1-rc1 but it is relevant to a vanilla kernel. + + kernel: page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) + kernel: ------------[ cut here ]------------ + kernel: kernel BUG at include/linux/mm.h:1021! + kernel: invalid opcode: 0000 [#1] SMP NOPTI + kernel: CPU: 6 PID: 116 Comm: kswapd0 Tainted: G C 5.1.0-0.rc1.git1.3.fc31.x86_64 #1 + kernel: Hardware name: System manufacturer System Product Name/ROG STRIX X470-I GAMING, BIOS 1201 12/07/2018 + kernel: RIP: 0010:__reset_isolation_pfn+0x244/0x2b0 + kernel: Code: fe 06 e8 0f 8e fc ff 44 0f b6 4c 24 04 48 85 c0 0f 85 dc fe ff ff e9 68 fe ff ff 48 c7 c6 58 b7 2e 8c 4c 89 ff e8 0c 75 00 00 <0f> 0b 48 c7 c6 58 b7 2e 8c e8 fe 74 00 00 0f 0b 48 89 fa 41 b8 01 + kernel: RSP: 0018:ffff9e2d03f0fde8 EFLAGS: 00010246 + kernel: RAX: 0000000000000034 RBX: 000000000081f380 RCX: ffff8cffbddd6c20 + kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: ffff8cffbddd6c20 + kernel: RBP: 0000000000000001 R08: 0000009898b94613 R09: 0000000000000000 + kernel: R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000100000 + kernel: R13: 0000000000100000 R14: 0000000000000001 R15: ffffca7de07ce000 + kernel: FS: 0000000000000000(0000) GS:ffff8cffbdc00000(0000) knlGS:0000000000000000 + kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + kernel: CR2: 00007fc1670e9000 CR3: 00000007f5276000 CR4: 00000000003406e0 + kernel: Call Trace: + kernel: __reset_isolation_suitable+0x62/0x120 + kernel: reset_isolation_suitable+0x3b/0x40 + kernel: kswapd+0x147/0x540 + kernel: ? finish_wait+0x90/0x90 + kernel: kthread+0x108/0x140 + kernel: ? balance_pgdat+0x560/0x560 + kernel: ? kthread_park+0x90/0x90 + kernel: ret_from_fork+0x27/0x50 + +He bisected it down to commit e332f741a8dd ("mm, compaction: be selective +about what pageblocks to clear skip hints"). The problem is that the patch +in question was sloppy with respect to the handling of zone boundaries. In +some instances, it was possible for PFNs outside of a zone to be examined +and if those were not properly initialised or poisoned then it would +trigger the VM_BUG_ON. This patch corrects the zone boundary issues when +resetting pageblock skip hints and Mikhail reported that the bug did not +trigger after 30 hours of testing. + +Fixes: e332f741a8dd ("mm, compaction: be selective about what pageblocks to clear skip hints") +Reported-and-tested-by: Mikhail Gavrilov +Signed-off-by: Mel Gorman +--- + mm/compaction.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +diff --git a/mm/compaction.c b/mm/compaction.c +index f171a83707ce..b4930bf93c8a 100644 +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -242,6 +242,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, + bool check_target) + { + struct page *page = pfn_to_online_page(pfn); ++ struct page *block_page; + struct page *end_page; + unsigned long block_pfn; + +@@ -267,20 +268,26 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, + get_pageblock_migratetype(page) != MIGRATE_MOVABLE) + return false; + ++ /* Ensure the start of the pageblock or zone is online and valid */ ++ block_pfn = pageblock_start_pfn(pfn); ++ block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn)); ++ if (block_page) { ++ page = block_page; ++ pfn = block_pfn; ++ } ++ ++ /* Ensure the end of the pageblock or zone is online and valid */ ++ block_pfn += pageblock_nr_pages; ++ block_pfn = min(block_pfn, zone_end_pfn(zone) - 1); ++ end_page = pfn_to_online_page(block_pfn); ++ if (!end_page) ++ return false; ++ + /* + * Only clear the hint if a sample indicates there is either a + * free page or an LRU page in the block. One or other condition + * is necessary for the block to be a migration source/target. + */ +- block_pfn = pageblock_start_pfn(pfn); +- pfn = max(block_pfn, zone->zone_start_pfn); +- page = pfn_to_page(pfn); +- if (zone != page_zone(page)) +- return false; +- pfn = block_pfn + pageblock_nr_pages; +- pfn = min(pfn, zone_end_pfn(zone)); +- end_page = pfn_to_page(pfn); +- + do { + if (pfn_valid_within(pfn)) { + if (check_source && PageLRU(page)) { +@@ -309,7 +316,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, + static void __reset_isolation_suitable(struct zone *zone) + { + unsigned long migrate_pfn = zone->zone_start_pfn; +- unsigned long free_pfn = zone_end_pfn(zone); ++ unsigned long free_pfn = zone_end_pfn(zone) - 1; + unsigned long reset_migrate = free_pfn; + unsigned long reset_free = migrate_pfn; + bool source_set = false; diff --git a/kernel.spec b/kernel.spec index a4d474f0e..4b2415f75 100644 --- a/kernel.spec +++ b/kernel.spec @@ -591,6 +591,9 @@ Patch508: 0001-virt-vbox-Implement-passing-requestor-info-to-the-ho.patch # rhbz 1688283 Patch512: v3-tpm-fix-an-invalid-condition-in-tpm_common_poll.patch +# https://patchwork.kernel.org/patch/10872997/ +Patch513: Correct-zone-boundary-handling-when-resetting-pageblock-skip-hints.patch + # END OF PATCH DEFINITIONS %endif