Linux v3.4-9208-gaf56e0a
This commit is contained in:
parent
ad6b974304
commit
478adfe8fb
@ -3333,7 +3333,7 @@ CONFIG_MFD_VX855=m
|
|||||||
CONFIG_MFD_SM501=m
|
CONFIG_MFD_SM501=m
|
||||||
CONFIG_MFD_SM501_GPIO=y
|
CONFIG_MFD_SM501_GPIO=y
|
||||||
# CONFIG_MFD_TC6393XB is not set
|
# CONFIG_MFD_TC6393XB is not set
|
||||||
CONFIG_MFD_WM8400=m
|
# CONFIG_MFD_WM8400 is not set
|
||||||
# CONFIG_MFD_WM8350_I2C is not set
|
# CONFIG_MFD_WM8350_I2C is not set
|
||||||
# CONFIG_MFD_WM8350 is not set
|
# CONFIG_MFD_WM8350 is not set
|
||||||
# CONFIG_MFD_WM831X is not set
|
# CONFIG_MFD_WM831X is not set
|
||||||
@ -3342,6 +3342,7 @@ CONFIG_MFD_WM8400=m
|
|||||||
# CONFIG_MFD_WM8994 is not set
|
# CONFIG_MFD_WM8994 is not set
|
||||||
# CONFIG_MFD_88PM860X is not set
|
# CONFIG_MFD_88PM860X is not set
|
||||||
# CONFIG_LPC_SCH is not set
|
# CONFIG_LPC_SCH is not set
|
||||||
|
# CONFIG_LPC_ICH is not set
|
||||||
# CONFIG_HTC_I2CPLD is not set
|
# CONFIG_HTC_I2CPLD is not set
|
||||||
# CONFIG_MFD_MAX8925 is not set
|
# CONFIG_MFD_MAX8925 is not set
|
||||||
# CONFIG_MFD_ASIC3 is not set
|
# CONFIG_MFD_ASIC3 is not set
|
||||||
@ -3358,6 +3359,8 @@ CONFIG_MFD_WM8400=m
|
|||||||
# CONFIG_MFD_TC3589X is not set
|
# CONFIG_MFD_TC3589X is not set
|
||||||
# CONFIG_MFD_WL1273_CORE is not set
|
# CONFIG_MFD_WL1273_CORE is not set
|
||||||
# CONFIG_MFD_TPS65217 is not set
|
# CONFIG_MFD_TPS65217 is not set
|
||||||
|
# CONFIG_MFD_LM3533 is not set
|
||||||
|
# CONFIG_MFD_MC13XXX_I2C is not set
|
||||||
|
|
||||||
#
|
#
|
||||||
# File systems
|
# File systems
|
||||||
@ -3494,6 +3497,7 @@ CONFIG_CUSE=m
|
|||||||
#
|
#
|
||||||
CONFIG_NETWORK_FILESYSTEMS=y
|
CONFIG_NETWORK_FILESYSTEMS=y
|
||||||
CONFIG_NFS_FS=m
|
CONFIG_NFS_FS=m
|
||||||
|
CONFIG_NFS_V2=y
|
||||||
CONFIG_NFS_V3=y
|
CONFIG_NFS_V3=y
|
||||||
CONFIG_NFS_V3_ACL=y
|
CONFIG_NFS_V3_ACL=y
|
||||||
CONFIG_NFS_V4=y
|
CONFIG_NFS_V4=y
|
||||||
@ -3682,6 +3686,7 @@ CONFIG_LOCKUP_DETECTOR=y
|
|||||||
CONFIG_ATOMIC64_SELFTEST=y
|
CONFIG_ATOMIC64_SELFTEST=y
|
||||||
CONFIG_MEMORY_FAILURE=y
|
CONFIG_MEMORY_FAILURE=y
|
||||||
CONFIG_HWPOISON_INJECT=m
|
CONFIG_HWPOISON_INJECT=m
|
||||||
|
CONFIG_CROSS_MEMORY_ATTACH=y
|
||||||
# CONFIG_DEBUG_SECTION_MISMATCH is not set
|
# CONFIG_DEBUG_SECTION_MISMATCH is not set
|
||||||
# CONFIG_BACKTRACE_SELF_TEST is not set
|
# CONFIG_BACKTRACE_SELF_TEST is not set
|
||||||
CONFIG_LATENCYTOP=y
|
CONFIG_LATENCYTOP=y
|
||||||
@ -3977,6 +3982,7 @@ CONFIG_LEDS_TRIGGER_IDE_DISK=y
|
|||||||
CONFIG_LEDS_TRIGGER_HEARTBEAT=m
|
CONFIG_LEDS_TRIGGER_HEARTBEAT=m
|
||||||
CONFIG_LEDS_TRIGGER_BACKLIGHT=m
|
CONFIG_LEDS_TRIGGER_BACKLIGHT=m
|
||||||
CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
|
CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
|
||||||
|
CONFIG_LEDS_TRIGGER_TRANSIENT=m
|
||||||
CONFIG_LEDS_ALIX2=m
|
CONFIG_LEDS_ALIX2=m
|
||||||
CONFIG_LEDS_CLEVO_MAIL=m
|
CONFIG_LEDS_CLEVO_MAIL=m
|
||||||
CONFIG_LEDS_INTEL_SS4200=m
|
CONFIG_LEDS_INTEL_SS4200=m
|
||||||
|
@ -357,6 +357,9 @@ CONFIG_TOSHIBA_BT_RFKILL=m
|
|||||||
|
|
||||||
CONFIG_VGA_SWITCHEROO=y
|
CONFIG_VGA_SWITCHEROO=y
|
||||||
CONFIG_LPC_SCH=m
|
CONFIG_LPC_SCH=m
|
||||||
|
CONFIG_LPC_ICH=m
|
||||||
|
|
||||||
|
CONFIG_GPIO_ICH=m
|
||||||
|
|
||||||
CONFIG_PCI_CNB20LE_QUIRK=y
|
CONFIG_PCI_CNB20LE_QUIRK=y
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
|
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
|
||||||
index 71c7096..5defdd5 100644
|
index 296cfc2..516e1e2 100644
|
||||||
--- a/drivers/gpu/drm/i915/intel_dp.c
|
--- a/drivers/gpu/drm/i915/intel_dp.c
|
||||||
+++ b/drivers/gpu/drm/i915/intel_dp.c
|
+++ b/drivers/gpu/drm/i915/intel_dp.c
|
||||||
@@ -347,7 +347,7 @@ intel_dp_check_edp(struct intel_dp *intel_dp)
|
@@ -350,7 +350,7 @@ intel_dp_check_edp(struct intel_dp *intel_dp)
|
||||||
if (!is_edp(intel_dp))
|
if (!is_edp(intel_dp))
|
||||||
return;
|
return;
|
||||||
if (!ironlake_edp_have_panel_power(intel_dp) && !ironlake_edp_have_panel_vdd(intel_dp)) {
|
if (!ironlake_edp_have_panel_power(intel_dp) && !ironlake_edp_have_panel_vdd(intel_dp)) {
|
||||||
@ -11,7 +11,7 @@ index 71c7096..5defdd5 100644
|
|||||||
DRM_DEBUG_KMS("Status 0x%08x Control 0x%08x\n",
|
DRM_DEBUG_KMS("Status 0x%08x Control 0x%08x\n",
|
||||||
I915_READ(PCH_PP_STATUS),
|
I915_READ(PCH_PP_STATUS),
|
||||||
I915_READ(PCH_PP_CONTROL));
|
I915_READ(PCH_PP_CONTROL));
|
||||||
@@ -397,7 +397,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
|
@@ -400,7 +400,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try == 3) {
|
if (try == 3) {
|
||||||
@ -20,7 +20,7 @@ index 71c7096..5defdd5 100644
|
|||||||
I915_READ(ch_ctl));
|
I915_READ(ch_ctl));
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
}
|
}
|
||||||
@@ -1018,8 +1018,8 @@ static void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp)
|
@@ -1024,8 +1024,8 @@ static void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp)
|
||||||
return;
|
return;
|
||||||
DRM_DEBUG_KMS("Turn eDP VDD on\n");
|
DRM_DEBUG_KMS("Turn eDP VDD on\n");
|
||||||
|
|
||||||
@ -31,7 +31,7 @@ index 71c7096..5defdd5 100644
|
|||||||
|
|
||||||
intel_dp->want_panel_vdd = true;
|
intel_dp->want_panel_vdd = true;
|
||||||
|
|
||||||
@@ -1084,7 +1084,8 @@ static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
|
@@ -1090,7 +1090,8 @@ static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
DRM_DEBUG_KMS("Turn eDP VDD off %d\n", intel_dp->want_panel_vdd);
|
DRM_DEBUG_KMS("Turn eDP VDD off %d\n", intel_dp->want_panel_vdd);
|
||||||
@ -41,14 +41,13 @@ index 71c7096..5defdd5 100644
|
|||||||
|
|
||||||
intel_dp->want_panel_vdd = false;
|
intel_dp->want_panel_vdd = false;
|
||||||
|
|
||||||
@@ -1154,7 +1155,9 @@ static void ironlake_edp_panel_off(struct intel_dp *intel_dp)
|
@@ -1160,7 +1161,8 @@ static void ironlake_edp_panel_off(struct intel_dp *intel_dp)
|
||||||
|
|
||||||
DRM_DEBUG_KMS("Turn eDP power off\n");
|
DRM_DEBUG_KMS("Turn eDP power off\n");
|
||||||
|
|
||||||
- WARN(intel_dp->want_panel_vdd, "Cannot turn power off while VDD is on\n");
|
- WARN(!intel_dp->want_panel_vdd, "Need VDD to turn off panel\n");
|
||||||
+ if (intel_dp->want_panel_vdd)
|
+ if (!intel_dp->want_panel_vdd)
|
||||||
+ DRM_ERROR("Cannot turn power off while VDD is on\n");
|
+ DRM_ERROR("Need VDD to turn off panel\n");
|
||||||
+
|
|
||||||
ironlake_panel_vdd_off_sync(intel_dp); /* finish any pending work */
|
|
||||||
|
|
||||||
pp = ironlake_get_pp_control(dev_priv);
|
pp = ironlake_get_pp_control(dev_priv);
|
||||||
|
pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_BLC_ENABLE);
|
||||||
|
13
kernel.spec
13
kernel.spec
@ -62,7 +62,7 @@ Summary: The Linux kernel
|
|||||||
# For non-released -rc kernels, this will be appended after the rcX and
|
# For non-released -rc kernels, this will be appended after the rcX and
|
||||||
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
|
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
|
||||||
#
|
#
|
||||||
%global baserelease 2
|
%global baserelease 1
|
||||||
%global fedora_build %{baserelease}
|
%global fedora_build %{baserelease}
|
||||||
|
|
||||||
# base_sublevel is the kernel version we're starting with and patching
|
# base_sublevel is the kernel version we're starting with and patching
|
||||||
@ -95,7 +95,7 @@ Summary: The Linux kernel
|
|||||||
# The rc snapshot level
|
# The rc snapshot level
|
||||||
%define rcrev 0
|
%define rcrev 0
|
||||||
# The git snapshot level
|
# The git snapshot level
|
||||||
%define gitrev 9
|
%define gitrev 10
|
||||||
# Set rpm version accordingly
|
# Set rpm version accordingly
|
||||||
%define rpmversion 3.%{upstream_sublevel}.0
|
%define rpmversion 3.%{upstream_sublevel}.0
|
||||||
%endif
|
%endif
|
||||||
@ -749,9 +749,6 @@ Patch22000: weird-root-dentry-name-debug.patch
|
|||||||
#selinux ptrace child permissions
|
#selinux ptrace child permissions
|
||||||
Patch22001: selinux-apply-different-permission-to-ptrace-child.patch
|
Patch22001: selinux-apply-different-permission-to-ptrace-child.patch
|
||||||
|
|
||||||
#rhbz 822825 822821 CVE-2012-2372
|
|
||||||
Patch22021: mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_populate-SMP-race-condition.patch
|
|
||||||
|
|
||||||
# END OF PATCH DEFINITIONS
|
# END OF PATCH DEFINITIONS
|
||||||
|
|
||||||
%endif
|
%endif
|
||||||
@ -1447,9 +1444,6 @@ ApplyPatch selinux-apply-different-permission-to-ptrace-child.patch
|
|||||||
#Highbank clock functions
|
#Highbank clock functions
|
||||||
ApplyPatch highbank-export-clock-functions.patch
|
ApplyPatch highbank-export-clock-functions.patch
|
||||||
|
|
||||||
#rhbz 822825 822821 CVE-2012-2372
|
|
||||||
ApplyPatch mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_populate-SMP-race-condition.patch
|
|
||||||
|
|
||||||
# END OF PATCH APPLICATIONS
|
# END OF PATCH APPLICATIONS
|
||||||
|
|
||||||
%endif
|
%endif
|
||||||
@ -2300,6 +2294,9 @@ fi
|
|||||||
# ||----w |
|
# ||----w |
|
||||||
# || ||
|
# || ||
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu May 31 2012 Josh Boyer <jwboyer@redhat.com> - 3.5.0-0.rc0.git10.1
|
||||||
|
- Linux v3.4-9208-gaf56e0a
|
||||||
|
|
||||||
* Wed May 30 2012 Josh Boyer <jwboyer@redhat.com>
|
* Wed May 30 2012 Josh Boyer <jwboyer@redhat.com>
|
||||||
- modsign: Fix 32bit ELF table interpretation from David Howells (rhbz 825944)
|
- modsign: Fix 32bit ELF table interpretation from David Howells (rhbz 825944)
|
||||||
|
|
||||||
|
@ -1,272 +0,0 @@
|
|||||||
Path: news.gmane.org!not-for-mail
|
|
||||||
From: Andrea Arcangeli <aarcange@redhat.com>
|
|
||||||
Newsgroups: gmane.linux.kernel.mm
|
|
||||||
Subject: [PATCH] mm: pmd_read_atomic: fix 32bit PAE pmd walk vs pmd_populate SMP race condition
|
|
||||||
Date: Thu, 24 May 2012 01:39:01 +0200
|
|
||||||
Lines: 208
|
|
||||||
Approved: news@gmane.org
|
|
||||||
Message-ID: <1337816341-30743-1-git-send-email-aarcange@redhat.com>
|
|
||||||
References: <20120518230028.GF32479@redhat.com>
|
|
||||||
NNTP-Posting-Host: plane.gmane.org
|
|
||||||
X-Trace: dough.gmane.org 1337816354 18906 80.91.229.3 (23 May 2012 23:39:14 GMT)
|
|
||||||
X-Complaints-To: usenet@dough.gmane.org
|
|
||||||
NNTP-Posting-Date: Wed, 23 May 2012 23:39:14 +0000 (UTC)
|
|
||||||
Cc: Andrew Morton <akpm@linux-foundation.org>, Mel Gorman <mgorman@suse.de>,
|
|
||||||
Hugh Dickins <hughd@google.com>, Larry Woodman <lwoodman@redhat.com>,
|
|
||||||
Petr Matousek <pmatouse@redhat.com>,
|
|
||||||
Ulrich Obergfell <uobergfe@redhat.com>, Rik van Riel <riel@redhat.com>
|
|
||||||
To: linux-mm@kvack.org
|
|
||||||
Original-X-From: owner-linux-mm@kvack.org Thu May 24 01:39:12 2012
|
|
||||||
Return-path: <owner-linux-mm@kvack.org>
|
|
||||||
Envelope-to: glkm-linux-mm-2@m.gmane.org
|
|
||||||
Original-Received: from kanga.kvack.org ([205.233.56.17])
|
|
||||||
by plane.gmane.org with esmtp (Exim 4.69)
|
|
||||||
(envelope-from <owner-linux-mm@kvack.org>)
|
|
||||||
id 1SXL94-0002ub-3P
|
|
||||||
for glkm-linux-mm-2@m.gmane.org; Thu, 24 May 2012 01:39:10 +0200
|
|
||||||
Original-Received: by kanga.kvack.org (Postfix)
|
|
||||||
id 1684A6B0083; Wed, 23 May 2012 19:39:09 -0400 (EDT)
|
|
||||||
Delivered-To: linux-mm-outgoing@kvack.org
|
|
||||||
Original-Received: by kanga.kvack.org (Postfix, from userid 40)
|
|
||||||
id 080DD6B0092; Wed, 23 May 2012 19:39:08 -0400 (EDT)
|
|
||||||
X-Original-To: int-list-linux-mm@kvack.org
|
|
||||||
Delivered-To: int-list-linux-mm@kvack.org
|
|
||||||
Original-Received: by kanga.kvack.org (Postfix, from userid 63042)
|
|
||||||
id C84046B00E7; Wed, 23 May 2012 19:39:08 -0400 (EDT)
|
|
||||||
X-Original-To: linux-mm@kvack.org
|
|
||||||
Delivered-To: linux-mm@kvack.org
|
|
||||||
Original-Received: from psmtp.com (na3sys010amx119.postini.com [74.125.245.119])
|
|
||||||
by kanga.kvack.org (Postfix) with SMTP id 0B2DC6B0083
|
|
||||||
for <linux-mm@kvack.org>; Wed, 23 May 2012 19:39:07 -0400 (EDT)
|
|
||||||
Original-Received: from mx1.redhat.com ([209.132.183.28]) (using TLSv1) by na3sys010amx119.postini.com ([74.125.244.10]) with SMTP;
|
|
||||||
Wed, 23 May 2012 18:39:08 CDT
|
|
||||||
Original-Received: from int-mx12.intmail.prod.int.phx2.redhat.com (int-mx12.intmail.prod.int.phx2.redhat.com [10.5.11.25])
|
|
||||||
by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q4NNd3dP002492
|
|
||||||
(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK);
|
|
||||||
Wed, 23 May 2012 19:39:03 -0400
|
|
||||||
Original-Received: from random.random (ovpn-113-72.phx2.redhat.com [10.3.113.72])
|
|
||||||
by int-mx12.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id q4NNd1P7012233;
|
|
||||||
Wed, 23 May 2012 19:39:02 -0400
|
|
||||||
In-Reply-To: <20120518230028.GF32479@redhat.com>
|
|
||||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.11.25
|
|
||||||
X-pstn-neptune: 0/0/0.00/0
|
|
||||||
X-pstn-levels: (S:99.90000/99.90000 CV:99.9000 FC:95.5390 LC:95.5390 R:95.9108 P:95.9108 M:97.0282 C:98.6951 )
|
|
||||||
X-pstn-dkim: 0 skipped:not-enabled
|
|
||||||
X-pstn-settings: 3 (1.0000:1.0000) s cv gt3 gt2 gt1 r p m c
|
|
||||||
X-pstn-addresses: from <aarcange@redhat.com> [db-null]
|
|
||||||
X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.2
|
|
||||||
Original-Sender: owner-linux-mm@kvack.org
|
|
||||||
Precedence: bulk
|
|
||||||
X-Loop: owner-majordomo@kvack.org
|
|
||||||
List-ID: <linux-mm.kvack.org>
|
|
||||||
Xref: news.gmane.org gmane.linux.kernel.mm:78936
|
|
||||||
Archived-At: <http://permalink.gmane.org/gmane.linux.kernel.mm/78936>
|
|
||||||
|
|
||||||
When holding the mmap_sem for reading, pmd_offset_map_lock should only
|
|
||||||
run on a pmd_t that has been read atomically from the pmdp
|
|
||||||
pointer, otherwise we may read only half of it leading to this crash.
|
|
||||||
|
|
||||||
PID: 11679 TASK: f06e8000 CPU: 3 COMMAND: "do_race_2_panic"
|
|
||||||
#0 [f06a9dd8] crash_kexec at c049b5ec
|
|
||||||
#1 [f06a9e2c] oops_end at c083d1c2
|
|
||||||
#2 [f06a9e40] no_context at c0433ded
|
|
||||||
#3 [f06a9e64] bad_area_nosemaphore at c043401a
|
|
||||||
#4 [f06a9e6c] __do_page_fault at c0434493
|
|
||||||
#5 [f06a9eec] do_page_fault at c083eb45
|
|
||||||
#6 [f06a9f04] error_code (via page_fault) at c083c5d5
|
|
||||||
EAX: 01fb470c EBX: fff35000 ECX: 00000003 EDX: 00000100 EBP:
|
|
||||||
00000000
|
|
||||||
DS: 007b ESI: 9e201000 ES: 007b EDI: 01fb4700 GS: 00e0
|
|
||||||
CS: 0060 EIP: c083bc14 ERR: ffffffff EFLAGS: 00010246
|
|
||||||
#7 [f06a9f38] _spin_lock at c083bc14
|
|
||||||
#8 [f06a9f44] sys_mincore at c0507b7d
|
|
||||||
#9 [f06a9fb0] system_call at c083becd
|
|
||||||
start len
|
|
||||||
EAX: ffffffda EBX: 9e200000 ECX: 00001000 EDX: 6228537f
|
|
||||||
DS: 007b ESI: 00000000 ES: 007b EDI: 003d0f00
|
|
||||||
SS: 007b ESP: 62285354 EBP: 62285388 GS: 0033
|
|
||||||
CS: 0073 EIP: 00291416 ERR: 000000da EFLAGS: 00000286
|
|
||||||
|
|
||||||
This should be a longstanding bug affecting x86 32bit PAE without
|
|
||||||
THP. Only archs with 64bit large pmd_t and 32bit unsigned long should
|
|
||||||
be affected.
|
|
||||||
|
|
||||||
With THP enabled the barrier() in
|
|
||||||
pmd_none_or_trans_huge_or_clear_bad() would partly hide the bug when
|
|
||||||
the pmd transition from none to stable, by forcing a re-read of the
|
|
||||||
*pmd in pmd_offset_map_lock, but when THP is enabled a new set of
|
|
||||||
problem arises by the fact could then transition freely in any of the
|
|
||||||
none, pmd_trans_huge or pmd_trans_stable states. So making the barrier
|
|
||||||
in pmd_none_or_trans_huge_or_clear_bad() unconditional isn't good idea
|
|
||||||
and it would be a flakey solution.
|
|
||||||
|
|
||||||
This should be fully fixed by introducing a pmd_read_atomic that reads
|
|
||||||
the pmd in order with THP disabled, or by reading the pmd atomically
|
|
||||||
with cmpxchg8b with THP enabled.
|
|
||||||
|
|
||||||
Luckily this new race condition only triggers in the places that must
|
|
||||||
already be covered by pmd_none_or_trans_huge_or_clear_bad() so the fix
|
|
||||||
is localized there but this bug is not related to THP.
|
|
||||||
|
|
||||||
NOTE: this can trigger on x86 32bit systems with PAE enabled with more
|
|
||||||
than 4G of ram, otherwise the high part of the pmd will never risk to
|
|
||||||
be truncated because it would be zero at all times, in turn so hiding
|
|
||||||
the SMP race.
|
|
||||||
|
|
||||||
This bug was discovered and fully debugged by Ulrich, quote:
|
|
||||||
|
|
||||||
----
|
|
||||||
[..]
|
|
||||||
pmd_none_or_trans_huge_or_clear_bad() loads the content of edx and
|
|
||||||
eax.
|
|
||||||
|
|
||||||
496 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t
|
|
||||||
*pmd)
|
|
||||||
497 {
|
|
||||||
498 /* depend on compiler for an atomic pmd read */
|
|
||||||
499 pmd_t pmdval = *pmd;
|
|
||||||
|
|
||||||
// edi = pmd pointer
|
|
||||||
0xc0507a74 <sys_mincore+548>: mov 0x8(%esp),%edi
|
|
||||||
...
|
|
||||||
// edx = PTE page table high address
|
|
||||||
0xc0507a84 <sys_mincore+564>: mov 0x4(%edi),%edx
|
|
||||||
...
|
|
||||||
// eax = PTE page table low address
|
|
||||||
0xc0507a8e <sys_mincore+574>: mov (%edi),%eax
|
|
||||||
|
|
||||||
[..]
|
|
||||||
|
|
||||||
Please note that the PMD is not read atomically. These are two "mov"
|
|
||||||
instructions where the high order bits of the PMD entry are fetched
|
|
||||||
first. Hence, the above machine code is prone to the following race.
|
|
||||||
|
|
||||||
- The PMD entry {high|low} is 0x0000000000000000.
|
|
||||||
The "mov" at 0xc0507a84 loads 0x00000000 into edx.
|
|
||||||
|
|
||||||
- A page fault (on another CPU) sneaks in between the two "mov"
|
|
||||||
instructions and instantiates the PMD.
|
|
||||||
|
|
||||||
- The PMD entry {high|low} is now 0x00000003fda38067.
|
|
||||||
The "mov" at 0xc0507a8e loads 0xfda38067 into eax.
|
|
||||||
----
|
|
||||||
|
|
||||||
Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
|
|
||||||
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
|
|
||||||
---
|
|
||||||
arch/x86/include/asm/pgtable-3level.h | 50 +++++++++++++++++++++++++++++++++
|
|
||||||
include/asm-generic/pgtable.h | 22 +++++++++++++-
|
|
||||||
2 files changed, 70 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
|
|
||||||
index effff47..43876f1 100644
|
|
||||||
--- a/arch/x86/include/asm/pgtable-3level.h
|
|
||||||
+++ b/arch/x86/include/asm/pgtable-3level.h
|
|
||||||
@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
|
|
||||||
ptep->pte_low = pte.pte_low;
|
|
||||||
}
|
|
||||||
|
|
||||||
+#define pmd_read_atomic pmd_read_atomic
|
|
||||||
+/*
|
|
||||||
+ * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
|
|
||||||
+ * a "*pmdp" dereference done by gcc. Problem is, in certain places
|
|
||||||
+ * where pte_offset_map_lock is called, concurrent page faults are
|
|
||||||
+ * allowed, if the mmap_sem is hold for reading. An example is mincore
|
|
||||||
+ * vs page faults vs MADV_DONTNEED. On the page fault side
|
|
||||||
+ * pmd_populate rightfully does a set_64bit, but if we're reading the
|
|
||||||
+ * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
|
|
||||||
+ * because gcc will not read the 64bit of the pmd atomically. To fix
|
|
||||||
+ * this all places running pmd_offset_map_lock() while holding the
|
|
||||||
+ * mmap_sem in read mode, shall read the pmdp pointer using this
|
|
||||||
+ * function to know if the pmd is null nor not, and in turn to know if
|
|
||||||
+ * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
|
|
||||||
+ * operations.
|
|
||||||
+ *
|
|
||||||
+ * Without THP if the mmap_sem is hold for reading, the
|
|
||||||
+ * pmd can only transition from null to not null while pmd_read_atomic runs.
|
|
||||||
+ * So there's no need of literally reading it atomically.
|
|
||||||
+ *
|
|
||||||
+ * With THP if the mmap_sem is hold for reading, the pmd can become
|
|
||||||
+ * THP or null or point to a pte (and in turn become "stable") at any
|
|
||||||
+ * time under pmd_read_atomic, so it's mandatory to read it atomically
|
|
||||||
+ * with cmpxchg8b.
|
|
||||||
+ */
|
|
||||||
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
|
||||||
+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
|
||||||
+{
|
|
||||||
+ pmdval_t ret;
|
|
||||||
+ u32 *tmp = (u32 *)pmdp;
|
|
||||||
+
|
|
||||||
+ ret = (pmdval_t) (*tmp);
|
|
||||||
+ if (ret) {
|
|
||||||
+ /*
|
|
||||||
+ * If the low part is null, we must not read the high part
|
|
||||||
+ * or we can end up with a partial pmd.
|
|
||||||
+ */
|
|
||||||
+ smp_rmb();
|
|
||||||
+ ret |= ((pmdval_t)*(tmp + 1)) << 32;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return (pmd_t) { ret };
|
|
||||||
+}
|
|
||||||
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
||||||
+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
|
||||||
+{
|
|
||||||
+ return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
|
|
||||||
+}
|
|
||||||
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
||||||
+
|
|
||||||
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
|
|
||||||
{
|
|
||||||
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
|
|
||||||
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
|
|
||||||
index 125c54e..fa596d9 100644
|
|
||||||
--- a/include/asm-generic/pgtable.h
|
|
||||||
+++ b/include/asm-generic/pgtable.h
|
|
||||||
@@ -446,6 +446,18 @@ static inline int pmd_write(pmd_t pmd)
|
|
||||||
#endif /* __HAVE_ARCH_PMD_WRITE */
|
|
||||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
||||||
|
|
||||||
+#ifndef pmd_read_atomic
|
|
||||||
+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
|
||||||
+{
|
|
||||||
+ /*
|
|
||||||
+ * Depend on compiler for an atomic pmd read. NOTE: this is
|
|
||||||
+ * only going to work, if the pmdval_t isn't larger than
|
|
||||||
+ * an unsigned long.
|
|
||||||
+ */
|
|
||||||
+ return *pmdp;
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* This function is meant to be used by sites walking pagetables with
|
|
||||||
* the mmap_sem hold in read mode to protect against MADV_DONTNEED and
|
|
||||||
@@ -459,11 +471,17 @@ static inline int pmd_write(pmd_t pmd)
|
|
||||||
* undefined so behaving like if the pmd was none is safe (because it
|
|
||||||
* can return none anyway). The compiler level barrier() is critically
|
|
||||||
* important to compute the two checks atomically on the same pmdval.
|
|
||||||
+ *
|
|
||||||
+ * For 32bit kernels with a 64bit large pmd_t this automatically takes
|
|
||||||
+ * care of reading the pmd atomically to avoid SMP race conditions
|
|
||||||
+ * against pmd_populate() when the mmap_sem is hold for reading by the
|
|
||||||
+ * caller (a special atomic read not done by "gcc" as in the generic
|
|
||||||
+ * version above, is also needed when THP is disabled because the page
|
|
||||||
+ * fault can populate the pmd from under us).
|
|
||||||
*/
|
|
||||||
static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
|
|
||||||
{
|
|
||||||
- /* depend on compiler for an atomic pmd read */
|
|
||||||
- pmd_t pmdval = *pmd;
|
|
||||||
+ pmd_t pmdval = pmd_read_atomic(pmd);
|
|
||||||
/*
|
|
||||||
* The barrier will stabilize the pmdval in a register or on
|
|
||||||
* the stack so that it will stop changing under the code.
|
|
||||||
|
|
||||||
--
|
|
||||||
To unsubscribe, send a message with 'unsubscribe linux-mm' in
|
|
||||||
the body to majordomo@kvack.org. For more info on Linux MM,
|
|
||||||
see: http://www.linux-mm.org/ .
|
|
||||||
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
|
|
||||||
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user