Linux v3.3-6972-ge22057c
This commit is contained in:
parent
d5a077e500
commit
62c169cbc3
@ -1,113 +0,0 @@
|
|||||||
From davej Thu Mar 22 16:38:38 2012
|
|
||||||
Return-Path: linux-kernel-owner@vger.kernel.org
|
|
||||||
X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on
|
|
||||||
gelk.kernelslacker.org
|
|
||||||
X-Spam-Level:
|
|
||||||
X-Spam-Status: No, score=-1.2 required=5.0 tests=KB_DATE_CONTAINS_TAB,
|
|
||||||
RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD,UNPARSEABLE_RELAY autolearn=unavailable
|
|
||||||
version=3.3.2
|
|
||||||
Received: from mail.corp.redhat.com [10.5.5.51]
|
|
||||||
by gelk.kernelslacker.org with IMAP (fetchmail-6.3.21)
|
|
||||||
for <davej@localhost> (single-drop); Thu, 22 Mar 2012 16:38:38 -0400 (EDT)
|
|
||||||
Received: from zmta02.collab.prod.int.phx2.redhat.com (LHLO
|
|
||||||
zmta02.collab.prod.int.phx2.redhat.com) (10.5.5.32) by
|
|
||||||
zmail11.collab.prod.int.phx2.redhat.com with LMTP; Thu, 22 Mar 2012
|
|
||||||
16:37:12 -0400 (EDT)
|
|
||||||
Received: from localhost (localhost.localdomain [127.0.0.1])
|
|
||||||
by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id BE4B31280F5;
|
|
||||||
Thu, 22 Mar 2012 16:37:12 -0400 (EDT)
|
|
||||||
X-Quarantine-ID: <rVyHUDnYJs0w>
|
|
||||||
Received: from zmta02.collab.prod.int.phx2.redhat.com ([127.0.0.1])
|
|
||||||
by localhost (zmta02.collab.prod.int.phx2.redhat.com [127.0.0.1]) (amavisd-new, port 10024)
|
|
||||||
with ESMTP id rVyHUDnYJs0w; Thu, 22 Mar 2012 16:37:12 -0400 (EDT)
|
|
||||||
Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24])
|
|
||||||
by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id 34CCC1280EF;
|
|
||||||
Thu, 22 Mar 2012 16:37:12 -0400 (EDT)
|
|
||||||
Received: from mx1.redhat.com (ext-mx14.extmail.prod.ext.phx2.redhat.com [10.5.110.19])
|
|
||||||
by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id q2MKbBbO012811;
|
|
||||||
Thu, 22 Mar 2012 16:37:11 -0400
|
|
||||||
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
|
|
||||||
by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q2MIJPCS018091;
|
|
||||||
Thu, 22 Mar 2012 16:37:10 -0400
|
|
||||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
|
||||||
id S1759738Ab2CVUhD (ORCPT <rfc822;agordeev@redhat.com> + 54 others);
|
|
||||||
Thu, 22 Mar 2012 16:37:03 -0400
|
|
||||||
Received: from zeniv.linux.org.uk ([195.92.253.2]:35901 "EHLO
|
|
||||||
ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
|
||||||
with ESMTP id S1758619Ab2CVUg7 (ORCPT
|
|
||||||
<rfc822;linux-kernel@vger.kernel.org>);
|
|
||||||
Thu, 22 Mar 2012 16:36:59 -0400
|
|
||||||
Received: from viro by ZenIV.linux.org.uk with local (Exim 4.76 #1 (Red Hat Linux))
|
|
||||||
id 1SAokk-0008Fi-MR; Thu, 22 Mar 2012 20:36:58 +0000
|
|
||||||
Date: Thu, 22 Mar 2012 20:36:58 +0000
|
|
||||||
From: Al Viro <viro@ZenIV.linux.org.uk>
|
|
||||||
To: Linus Torvalds <torvalds@linux-foundation.org>
|
|
||||||
Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xensource.com,
|
|
||||||
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Subject: Re: Regression introduced by
|
|
||||||
bfcfaa77bdf0f775263e906015982a608df01c76 (vfs: use 'unsigned long' accesses
|
|
||||||
for dcache name comparison and hashing)
|
|
||||||
Message-ID: <20120322203658.GC6589@ZenIV.linux.org.uk>
|
|
||||||
References: <20120322183845.GA17264@phenom.dumpdata.com>
|
|
||||||
<20120322200918.GZ6589@ZenIV.linux.org.uk>
|
|
||||||
<20120322202445.GB6589@ZenIV.linux.org.uk>
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=us-ascii
|
|
||||||
Content-Disposition: inline
|
|
||||||
In-Reply-To: <20120322202445.GB6589@ZenIV.linux.org.uk>
|
|
||||||
User-Agent: Mutt/1.5.21 (2010-09-15)
|
|
||||||
Sender: linux-kernel-owner@vger.kernel.org
|
|
||||||
Precedence: bulk
|
|
||||||
List-ID: <linux-kernel.vger.kernel.org>
|
|
||||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
|
||||||
X-RedHat-Spam-Score: -5.01 (RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD)
|
|
||||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24
|
|
||||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.110.19
|
|
||||||
Status: RO
|
|
||||||
Content-Length: 1440
|
|
||||||
Lines: 43
|
|
||||||
|
|
||||||
On Thu, Mar 22, 2012 at 08:24:45PM +0000, Al Viro wrote:
|
|
||||||
>
|
|
||||||
> OK, full_name_hash()/hash_name() definitely have a mismatch and it's on the
|
|
||||||
> names of length 8*n: trivial experiment shows that we have
|
|
||||||
> name hash_name full_name_hash
|
|
||||||
> a 61 61
|
|
||||||
> ab 6261 6261
|
|
||||||
> abc 636261 636261
|
|
||||||
> abcd 64636261 64636261
|
|
||||||
> abcdabc 64c6c4c2 64c6c4c2
|
|
||||||
> abcdabcd efcead5 c8c6c4c2
|
|
||||||
> abcdabcd9 efceb0e efceb0e
|
|
||||||
>
|
|
||||||
> Linus, which way do you prefer to shift it? Should hash_name() change to
|
|
||||||
> match full_name_hash() or should it be the other way round?
|
|
||||||
>
|
|
||||||
> What happens is that you get multiplication by 9 and adding 0 in the former,
|
|
||||||
> after having added the last full word. In the latter we add the last full
|
|
||||||
> word, see that there's nothing left and bugger off.
|
|
||||||
|
|
||||||
Guys, could you check if this fixes it?
|
|
||||||
|
|
||||||
diff --git a/fs/namei.c b/fs/namei.c
|
|
||||||
index 13e6a1f..7451d6f8 100644
|
|
||||||
--- a/fs/namei.c
|
|
||||||
+++ b/fs/namei.c
|
|
||||||
@@ -1439,10 +1439,10 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
a = *(unsigned long *)name;
|
|
||||||
- hash *= 9;
|
|
||||||
if (len < sizeof(unsigned long))
|
|
||||||
break;
|
|
||||||
hash += a;
|
|
||||||
+ hash *= 9;
|
|
||||||
name += sizeof(unsigned long);
|
|
||||||
len -= sizeof(unsigned long);
|
|
||||||
if (!len)
|
|
||||||
--
|
|
||||||
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
|
|
||||||
the body of a message to majordomo@vger.kernel.org
|
|
||||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
|
||||||
Please read the FAQ at http://www.tux.org/lkml/
|
|
||||||
|
|
36
kernel.spec
36
kernel.spec
@ -62,7 +62,7 @@ Summary: The Linux kernel
|
|||||||
# For non-released -rc kernels, this will be appended after the rcX and
|
# For non-released -rc kernels, this will be appended after the rcX and
|
||||||
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
|
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
|
||||||
#
|
#
|
||||||
%global baserelease 2
|
%global baserelease 1
|
||||||
%global fedora_build %{baserelease}
|
%global fedora_build %{baserelease}
|
||||||
|
|
||||||
# base_sublevel is the kernel version we're starting with and patching
|
# base_sublevel is the kernel version we're starting with and patching
|
||||||
@ -95,7 +95,7 @@ Summary: The Linux kernel
|
|||||||
# The rc snapshot level
|
# The rc snapshot level
|
||||||
%define rcrev 0
|
%define rcrev 0
|
||||||
# The git snapshot level
|
# The git snapshot level
|
||||||
%define gitrev 1
|
%define gitrev 2
|
||||||
# Set rpm version accordingly
|
# Set rpm version accordingly
|
||||||
%define rpmversion 3.%{upstream_sublevel}.0
|
%define rpmversion 3.%{upstream_sublevel}.0
|
||||||
%endif
|
%endif
|
||||||
@ -653,8 +653,6 @@ Patch100: taint-vbox.patch
|
|||||||
Patch160: linux-2.6-32bit-mmap-exec-randomization.patch
|
Patch160: linux-2.6-32bit-mmap-exec-randomization.patch
|
||||||
Patch161: linux-2.6-i386-nx-emulation.patch
|
Patch161: linux-2.6-i386-nx-emulation.patch
|
||||||
|
|
||||||
Patch383: linux-2.6-defaults-aspm.patch
|
|
||||||
|
|
||||||
Patch390: linux-2.6-defaults-acpi-video.patch
|
Patch390: linux-2.6-defaults-acpi-video.patch
|
||||||
Patch391: linux-2.6-acpi-video-dos.patch
|
Patch391: linux-2.6-acpi-video-dos.patch
|
||||||
Patch394: linux-2.6-acpi-debug-infinite-loop.patch
|
Patch394: linux-2.6-acpi-debug-infinite-loop.patch
|
||||||
@ -682,7 +680,6 @@ Patch900: modsign-20111207.patch
|
|||||||
|
|
||||||
# virt + ksm patches
|
# virt + ksm patches
|
||||||
Patch1555: fix_xen_guest_on_old_EC2.patch
|
Patch1555: fix_xen_guest_on_old_EC2.patch
|
||||||
Patch1556: linux-3.3-virtio-scsi.patch
|
|
||||||
|
|
||||||
# DRM
|
# DRM
|
||||||
#atch1700: drm-edid-try-harder-to-fix-up-broken-headers.patch
|
#atch1700: drm-edid-try-harder-to-fix-up-broken-headers.patch
|
||||||
@ -708,9 +705,6 @@ Patch2901: linux-2.6-v4l-dvb-experimental.patch
|
|||||||
Patch4000: ext4-fix-resize-when-resizing-within-single-group.patch
|
Patch4000: ext4-fix-resize-when-resizing-within-single-group.patch
|
||||||
|
|
||||||
# NFSv4
|
# NFSv4
|
||||||
Patch1102: linux-3.3-newidmapper-01.patch
|
|
||||||
Patch1103: linux-3.3-newidmapper-02.patch
|
|
||||||
Patch1104: linux-3.3-newidmapper-03.patch
|
|
||||||
|
|
||||||
# patches headed upstream
|
# patches headed upstream
|
||||||
Patch12016: disable-i8042-check-on-apple-mac.patch
|
Patch12016: disable-i8042-check-on-apple-mac.patch
|
||||||
@ -734,8 +728,6 @@ Patch21010: highbank-export-clock-functions.patch
|
|||||||
|
|
||||||
Patch21070: ext4-Support-check-none-nocheck-mount-options.patch
|
Patch21070: ext4-Support-check-none-nocheck-mount-options.patch
|
||||||
|
|
||||||
Patch21092: udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch
|
|
||||||
|
|
||||||
Patch21094: power-x86-destdir.patch
|
Patch21094: power-x86-destdir.patch
|
||||||
|
|
||||||
#rhbz 788260
|
#rhbz 788260
|
||||||
@ -744,7 +736,6 @@ Patch21233: jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch
|
|||||||
#rhbz 754518
|
#rhbz 754518
|
||||||
Patch21235: scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
|
Patch21235: scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
|
||||||
|
|
||||||
Patch21250: mcelog-rcu-splat.patch
|
|
||||||
Patch21260: x86-Avoid-invoking-RCU-when-CPU-is-idle.patch
|
Patch21260: x86-Avoid-invoking-RCU-when-CPU-is-idle.patch
|
||||||
|
|
||||||
#rhbz 727865 730007
|
#rhbz 727865 730007
|
||||||
@ -753,9 +744,6 @@ Patch21300: ACPICA-Fix-regression-in-FADT-revision-checks.patch
|
|||||||
#rhbz 728478
|
#rhbz 728478
|
||||||
Patch21302: sony-laptop-Enable-keyboard-backlight-by-default.patch
|
Patch21302: sony-laptop-Enable-keyboard-backlight-by-default.patch
|
||||||
|
|
||||||
#rhbz 803809 CVE-2012-1179
|
|
||||||
Patch21304: mm-thp-fix-pmd_bad-triggering.patch
|
|
||||||
|
|
||||||
#rhbz 804007
|
#rhbz 804007
|
||||||
Patch21305: mac80211-fix-possible-tid_rx-reorder_timer-use-after-free.patch
|
Patch21305: mac80211-fix-possible-tid_rx-reorder_timer-use-after-free.patch
|
||||||
|
|
||||||
@ -766,8 +754,6 @@ Patch21400: unhandled-irqs-switch-to-polling.patch
|
|||||||
|
|
||||||
Patch22000: weird-root-dentry-name-debug.patch
|
Patch22000: weird-root-dentry-name-debug.patch
|
||||||
|
|
||||||
Patch23000: fix-dentry-hash.patch
|
|
||||||
|
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
|
BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
|
||||||
@ -1344,9 +1330,6 @@ ApplyPatch ext4-fix-resize-when-resizing-within-single-group.patch
|
|||||||
# eCryptfs
|
# eCryptfs
|
||||||
|
|
||||||
# NFSv4
|
# NFSv4
|
||||||
ApplyPatch linux-3.3-newidmapper-01.patch
|
|
||||||
ApplyPatch linux-3.3-newidmapper-02.patch
|
|
||||||
ApplyPatch linux-3.3-newidmapper-03.patch
|
|
||||||
|
|
||||||
# USB
|
# USB
|
||||||
|
|
||||||
@ -1362,8 +1345,6 @@ ApplyPatch acpi-sony-nonvs-blacklist.patch
|
|||||||
#
|
#
|
||||||
# PCI
|
# PCI
|
||||||
#
|
#
|
||||||
# enable ASPM by default on hardware we expect to work
|
|
||||||
ApplyPatch linux-2.6-defaults-aspm.patch
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# SCSI Bits.
|
# SCSI Bits.
|
||||||
@ -1433,7 +1414,6 @@ ApplyOptionalPatch linux-2.6-v4l-dvb-experimental.patch
|
|||||||
|
|
||||||
# Patches headed upstream
|
# Patches headed upstream
|
||||||
ApplyPatch disable-i8042-check-on-apple-mac.patch
|
ApplyPatch disable-i8042-check-on-apple-mac.patch
|
||||||
ApplyPatch linux-3.3-virtio-scsi.patch
|
|
||||||
|
|
||||||
# rhbz#605888
|
# rhbz#605888
|
||||||
ApplyPatch dmar-disable-when-ricoh-multifunction.patch
|
ApplyPatch dmar-disable-when-ricoh-multifunction.patch
|
||||||
@ -1447,8 +1427,6 @@ ApplyPatch lis3-improve-handling-of-null-rate.patch
|
|||||||
|
|
||||||
ApplyPatch ext4-Support-check-none-nocheck-mount-options.patch
|
ApplyPatch ext4-Support-check-none-nocheck-mount-options.patch
|
||||||
|
|
||||||
ApplyPatch udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch
|
|
||||||
|
|
||||||
ApplyPatch power-x86-destdir.patch
|
ApplyPatch power-x86-destdir.patch
|
||||||
|
|
||||||
#rhbz 788269
|
#rhbz 788269
|
||||||
@ -1457,8 +1435,6 @@ ApplyPatch jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch
|
|||||||
#rhbz 754518
|
#rhbz 754518
|
||||||
ApplyPatch scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
|
ApplyPatch scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
|
||||||
|
|
||||||
ApplyPatch mcelog-rcu-splat.patch
|
|
||||||
|
|
||||||
#rhbz 727865 730007
|
#rhbz 727865 730007
|
||||||
ApplyPatch ACPICA-Fix-regression-in-FADT-revision-checks.patch
|
ApplyPatch ACPICA-Fix-regression-in-FADT-revision-checks.patch
|
||||||
|
|
||||||
@ -1475,11 +1451,6 @@ ApplyPatch unhandled-irqs-switch-to-polling.patch
|
|||||||
|
|
||||||
ApplyPatch weird-root-dentry-name-debug.patch
|
ApplyPatch weird-root-dentry-name-debug.patch
|
||||||
|
|
||||||
ApplyPatch fix-dentry-hash.patch
|
|
||||||
|
|
||||||
#rhbz 803809 CVE-2012-1179
|
|
||||||
ApplyPatch mm-thp-fix-pmd_bad-triggering.patch
|
|
||||||
|
|
||||||
#Highbank clock functions
|
#Highbank clock functions
|
||||||
ApplyPatch highbank-export-clock-functions.patch
|
ApplyPatch highbank-export-clock-functions.patch
|
||||||
|
|
||||||
@ -2336,6 +2307,9 @@ fi
|
|||||||
# ||----w |
|
# ||----w |
|
||||||
# || ||
|
# || ||
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Mar 26 2012 Justin M. Forbes <jforbes@redhat.com> - 3.4.0-0.rc0.git2.1
|
||||||
|
- Linux v3.3-6972-ge22057c
|
||||||
|
|
||||||
* Thu Mar 22 2012 Dave Jones <davej@redhat.com> 3.4.0-0.rc0.git1.2
|
* Thu Mar 22 2012 Dave Jones <davej@redhat.com> 3.4.0-0.rc0.git1.2
|
||||||
- Fix occasional EBADMSG from signed modules. (rhbz 804345)
|
- Fix occasional EBADMSG from signed modules. (rhbz 804345)
|
||||||
|
|
||||||
|
@ -1,12 +0,0 @@
|
|||||||
diff -up linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg linux-2.6.30.noarch/drivers/pci/pcie/aspm.c
|
|
||||||
--- linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg 2009-07-16 22:01:11.000000000 +0100
|
|
||||||
+++ linux-2.6.30.noarch/drivers/pci/pcie/aspm.c 2009-07-16 22:01:30.000000000 +0100
|
|
||||||
@@ -65,7 +65,7 @@ static LIST_HEAD(link_list);
|
|
||||||
#define POLICY_DEFAULT 0 /* BIOS default setting */
|
|
||||||
#define POLICY_PERFORMANCE 1 /* high performance */
|
|
||||||
#define POLICY_POWERSAVE 2 /* high power saving */
|
|
||||||
-static int aspm_policy;
|
|
||||||
+static int aspm_policy = POLICY_POWERSAVE;
|
|
||||||
static const char *policy_str[] = {
|
|
||||||
[POLICY_DEFAULT] = "default",
|
|
||||||
[POLICY_PERFORMANCE] = "performance",
|
|
@ -1,217 +0,0 @@
|
|||||||
commit e6499c6f4b5f56a16f8b8ef60529c1da28b13aea
|
|
||||||
Author: Bryan Schumaker <bjschuma@netapp.com>
|
|
||||||
Date: Thu Jan 26 16:54:23 2012 -0500
|
|
||||||
|
|
||||||
NFS: Fall back on old idmapper if request_key() fails
|
|
||||||
|
|
||||||
This patch removes the CONFIG_NFS_USE_NEW_IDMAPPER compile option.
|
|
||||||
First, the idmapper will attempt to map the id using /sbin/request-key
|
|
||||||
and nfsidmap. If this fails (if /etc/request-key.conf is not configured
|
|
||||||
properly) then the idmapper will call the legacy code to perform the
|
|
||||||
mapping. I left a comment stating where the legacy code begins to make
|
|
||||||
it easier for somebody to remove in the future.
|
|
||||||
|
|
||||||
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
|
|
||||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
|
||||||
|
|
||||||
diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c
|
|
||||||
--- linux-3.2.noarch/fs/nfs/idmap.c.orig 2012-01-27 10:07:07.209851446 -0500
|
|
||||||
+++ linux-3.2.noarch/fs/nfs/idmap.c 2012-01-27 10:15:42.914563082 -0500
|
|
||||||
@@ -142,8 +142,6 @@ static int nfs_map_numeric_to_string(__u
|
|
||||||
return snprintf(buf, buflen, "%u", id);
|
|
||||||
}
|
|
||||||
|
|
||||||
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
|
|
||||||
-
|
|
||||||
#include <linux/cred.h>
|
|
||||||
#include <linux/sunrpc/sched.h>
|
|
||||||
#include <linux/nfs4.h>
|
|
||||||
@@ -328,43 +326,7 @@ static int nfs_idmap_lookup_id(const cha
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
|
|
||||||
-{
|
|
||||||
- if (nfs_map_string_to_numeric(name, namelen, uid))
|
|
||||||
- return 0;
|
|
||||||
- return nfs_idmap_lookup_id(name, namelen, "uid", uid);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
|
|
||||||
-{
|
|
||||||
- if (nfs_map_string_to_numeric(name, namelen, gid))
|
|
||||||
- return 0;
|
|
||||||
- return nfs_idmap_lookup_id(name, namelen, "gid", gid);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
|
|
||||||
-{
|
|
||||||
- int ret = -EINVAL;
|
|
||||||
-
|
|
||||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
|
||||||
- ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
|
|
||||||
- if (ret < 0)
|
|
||||||
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
|
|
||||||
-{
|
|
||||||
- int ret = -EINVAL;
|
|
||||||
-
|
|
||||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
|
||||||
- ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
|
|
||||||
- if (ret < 0)
|
|
||||||
- ret = nfs_map_numeric_to_string(gid, buf, buflen);
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
|
|
||||||
-
|
|
||||||
+/* idmap classic begins here */
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/mutex.h>
|
|
||||||
#include <linux/init.h>
|
|
||||||
@@ -796,19 +758,27 @@ static unsigned int fnvhash32(const void
|
|
||||||
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
|
|
||||||
{
|
|
||||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
|
||||||
+ int ret = -EINVAL;
|
|
||||||
|
|
||||||
if (nfs_map_string_to_numeric(name, namelen, uid))
|
|
||||||
return 0;
|
|
||||||
- return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
|
|
||||||
+ ret = nfs_idmap_lookup_id(name, namelen, "uid", uid);
|
|
||||||
+ if (ret < 0)
|
|
||||||
+ ret = nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
|
|
||||||
+ return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
|
|
||||||
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
|
|
||||||
{
|
|
||||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
|
||||||
+ int ret = -EINVAL;
|
|
||||||
|
|
||||||
- if (nfs_map_string_to_numeric(name, namelen, uid))
|
|
||||||
+ if (nfs_map_string_to_numeric(name, namelen, gid))
|
|
||||||
return 0;
|
|
||||||
- return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
|
|
||||||
+ ret = nfs_idmap_lookup_id(name, namelen, "gid", gid);
|
|
||||||
+ if (ret < 0)
|
|
||||||
+ ret = nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, gid);
|
|
||||||
+ return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
|
|
||||||
@@ -816,22 +786,26 @@ int nfs_map_uid_to_name(const struct nfs
|
|
||||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
|
||||||
int ret = -EINVAL;
|
|
||||||
|
|
||||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
|
||||||
- ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
|
|
||||||
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
|
|
||||||
+ ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
|
|
||||||
+ if (ret < 0)
|
|
||||||
+ ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
|
|
||||||
+ }
|
|
||||||
if (ret < 0)
|
|
||||||
ret = nfs_map_numeric_to_string(uid, buf, buflen);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
|
|
||||||
+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
|
|
||||||
{
|
|
||||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
|
||||||
int ret = -EINVAL;
|
|
||||||
|
|
||||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
|
||||||
- ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
|
|
||||||
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
|
|
||||||
+ ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
|
|
||||||
+ if (ret < 0)
|
|
||||||
+ ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, gid, buf);
|
|
||||||
+ }
|
|
||||||
if (ret < 0)
|
|
||||||
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
|
|
||||||
+ ret = nfs_map_numeric_to_string(gid, buf, buflen);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
-
|
|
||||||
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
|
|
||||||
diff -up linux-3.2.noarch/fs/nfs/Kconfig.orig linux-3.2.noarch/fs/nfs/Kconfig
|
|
||||||
--- linux-3.2.noarch/fs/nfs/Kconfig.orig 2012-01-04 18:55:44.000000000 -0500
|
|
||||||
+++ linux-3.2.noarch/fs/nfs/Kconfig 2012-01-27 10:15:42.913562572 -0500
|
|
||||||
@@ -132,14 +132,3 @@ config NFS_USE_KERNEL_DNS
|
|
||||||
select DNS_RESOLVER
|
|
||||||
select KEYS
|
|
||||||
default y
|
|
||||||
-
|
|
||||||
-config NFS_USE_NEW_IDMAPPER
|
|
||||||
- bool "Use the new idmapper upcall routine"
|
|
||||||
- depends on NFS_V4 && KEYS
|
|
||||||
- help
|
|
||||||
- Say Y here if you want NFS to use the new idmapper upcall functions.
|
|
||||||
- You will need /sbin/request-key (usually provided by the keyutils
|
|
||||||
- package). For details, read
|
|
||||||
- <file:Documentation/filesystems/nfs/idmapper.txt>.
|
|
||||||
-
|
|
||||||
- If you are unsure, say N.
|
|
||||||
diff -up linux-3.2.noarch/fs/nfs/sysctl.c.orig linux-3.2.noarch/fs/nfs/sysctl.c
|
|
||||||
--- linux-3.2.noarch/fs/nfs/sysctl.c.orig 2012-01-04 18:55:44.000000000 -0500
|
|
||||||
+++ linux-3.2.noarch/fs/nfs/sysctl.c 2012-01-27 10:15:42.914563082 -0500
|
|
||||||
@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = {
|
|
||||||
.extra1 = (int *)&nfs_set_port_min,
|
|
||||||
.extra2 = (int *)&nfs_set_port_max,
|
|
||||||
},
|
|
||||||
-#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
|
|
||||||
{
|
|
||||||
.procname = "idmap_cache_timeout",
|
|
||||||
.data = &nfs_idmap_cache_timeout,
|
|
||||||
@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = {
|
|
||||||
.mode = 0644,
|
|
||||||
.proc_handler = proc_dointvec_jiffies,
|
|
||||||
},
|
|
||||||
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
.procname = "nfs_mountpoint_timeout",
|
|
||||||
diff -up linux-3.2.noarch/include/linux/nfs_idmap.h.orig linux-3.2.noarch/include/linux/nfs_idmap.h
|
|
||||||
--- linux-3.2.noarch/include/linux/nfs_idmap.h.orig 2012-01-27 10:06:46.783643915 -0500
|
|
||||||
+++ linux-3.2.noarch/include/linux/nfs_idmap.h 2012-01-27 10:15:42.915563594 -0500
|
|
||||||
@@ -69,36 +69,11 @@ struct nfs_server;
|
|
||||||
struct nfs_fattr;
|
|
||||||
struct nfs4_string;
|
|
||||||
|
|
||||||
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
|
|
||||||
-
|
|
||||||
int nfs_idmap_init(void);
|
|
||||||
void nfs_idmap_quit(void);
|
|
||||||
-
|
|
||||||
-static inline int nfs_idmap_new(struct nfs_client *clp)
|
|
||||||
-{
|
|
||||||
- return 0;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static inline void nfs_idmap_delete(struct nfs_client *clp)
|
|
||||||
-{
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */
|
|
||||||
-
|
|
||||||
-static inline int nfs_idmap_init(void)
|
|
||||||
-{
|
|
||||||
- return 0;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static inline void nfs_idmap_quit(void)
|
|
||||||
-{
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
int nfs_idmap_new(struct nfs_client *);
|
|
||||||
void nfs_idmap_delete(struct nfs_client *);
|
|
||||||
|
|
||||||
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
|
|
||||||
-
|
|
||||||
void nfs_fattr_init_names(struct nfs_fattr *fattr,
|
|
||||||
struct nfs4_string *owner_name,
|
|
||||||
struct nfs4_string *group_name);
|
|
@ -1,97 +0,0 @@
|
|||||||
commit 3cd0f37a2cc9e4d6188df10041a2441eaa41d991
|
|
||||||
Author: Bryan Schumaker <bjschuma@netapp.com>
|
|
||||||
Date: Thu Jan 26 16:54:24 2012 -0500
|
|
||||||
|
|
||||||
NFS: Keep idmapper include files in one place
|
|
||||||
|
|
||||||
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
|
|
||||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
|
||||||
|
|
||||||
diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c
|
|
||||||
--- linux-3.2.noarch/fs/nfs/idmap.c.orig 2012-01-27 10:15:42.914563082 -0500
|
|
||||||
+++ linux-3.2.noarch/fs/nfs/idmap.c 2012-01-27 10:19:22.711401559 -0500
|
|
||||||
@@ -39,6 +39,36 @@
|
|
||||||
#include <linux/slab.h>
|
|
||||||
#include <linux/nfs_idmap.h>
|
|
||||||
#include <linux/nfs_fs.h>
|
|
||||||
+#include <linux/cred.h>
|
|
||||||
+#include <linux/sunrpc/sched.h>
|
|
||||||
+#include <linux/nfs4.h>
|
|
||||||
+#include <linux/nfs_fs_sb.h>
|
|
||||||
+#include <linux/keyctl.h>
|
|
||||||
+#include <linux/key-type.h>
|
|
||||||
+#include <linux/rcupdate.h>
|
|
||||||
+#include <linux/err.h>
|
|
||||||
+#include <keys/user-type.h>
|
|
||||||
+
|
|
||||||
+/* include files needed by legacy idmapper */
|
|
||||||
+#include <linux/module.h>
|
|
||||||
+#include <linux/mutex.h>
|
|
||||||
+#include <linux/init.h>
|
|
||||||
+#include <linux/socket.h>
|
|
||||||
+#include <linux/in.h>
|
|
||||||
+#include <linux/sched.h>
|
|
||||||
+#include <linux/sunrpc/clnt.h>
|
|
||||||
+#include <linux/workqueue.h>
|
|
||||||
+#include <linux/sunrpc/rpc_pipe_fs.h>
|
|
||||||
+#include <linux/nfs_fs.h>
|
|
||||||
+#include "nfs4_fs.h"
|
|
||||||
+
|
|
||||||
+#define NFS_UINT_MAXLEN 11
|
|
||||||
+#define IDMAP_HASH_SZ 128
|
|
||||||
+
|
|
||||||
+/* Default cache timeout is 10 minutes */
|
|
||||||
+unsigned int nfs_idmap_cache_timeout = 600 * HZ;
|
|
||||||
+const struct cred *id_resolver_cache;
|
|
||||||
+
|
|
||||||
|
|
||||||
/**
|
|
||||||
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
|
|
||||||
@@ -142,21 +172,6 @@ static int nfs_map_numeric_to_string(__u
|
|
||||||
return snprintf(buf, buflen, "%u", id);
|
|
||||||
}
|
|
||||||
|
|
||||||
-#include <linux/cred.h>
|
|
||||||
-#include <linux/sunrpc/sched.h>
|
|
||||||
-#include <linux/nfs4.h>
|
|
||||||
-#include <linux/nfs_fs_sb.h>
|
|
||||||
-#include <linux/keyctl.h>
|
|
||||||
-#include <linux/key-type.h>
|
|
||||||
-#include <linux/rcupdate.h>
|
|
||||||
-#include <linux/err.h>
|
|
||||||
-
|
|
||||||
-#include <keys/user-type.h>
|
|
||||||
-
|
|
||||||
-#define NFS_UINT_MAXLEN 11
|
|
||||||
-
|
|
||||||
-const struct cred *id_resolver_cache;
|
|
||||||
-
|
|
||||||
struct key_type key_type_id_resolver = {
|
|
||||||
.name = "id_resolver",
|
|
||||||
.instantiate = user_instantiate,
|
|
||||||
@@ -327,25 +342,6 @@ static int nfs_idmap_lookup_id(const cha
|
|
||||||
}
|
|
||||||
|
|
||||||
/* idmap classic begins here */
|
|
||||||
-#include <linux/module.h>
|
|
||||||
-#include <linux/mutex.h>
|
|
||||||
-#include <linux/init.h>
|
|
||||||
-#include <linux/socket.h>
|
|
||||||
-#include <linux/in.h>
|
|
||||||
-#include <linux/sched.h>
|
|
||||||
-#include <linux/sunrpc/clnt.h>
|
|
||||||
-#include <linux/workqueue.h>
|
|
||||||
-#include <linux/sunrpc/rpc_pipe_fs.h>
|
|
||||||
-
|
|
||||||
-#include <linux/nfs_fs.h>
|
|
||||||
-
|
|
||||||
-#include "nfs4_fs.h"
|
|
||||||
-
|
|
||||||
-#define IDMAP_HASH_SZ 128
|
|
||||||
-
|
|
||||||
-/* Default cache timeout is 10 minutes */
|
|
||||||
-unsigned int nfs_idmap_cache_timeout = 600 * HZ;
|
|
||||||
-
|
|
||||||
static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
|
|
||||||
{
|
|
||||||
char *endp;
|
|
@ -1,40 +0,0 @@
|
|||||||
commit a602bea3e7ccc5ce3da61d2c18245c4058983926
|
|
||||||
Author: Bryan Schumaker <bjschuma@netapp.com>
|
|
||||||
Date: Thu Jan 26 16:54:25 2012 -0500
|
|
||||||
|
|
||||||
NFS: Update idmapper documentation
|
|
||||||
|
|
||||||
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
|
|
||||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
|
||||||
|
|
||||||
diff -up linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt
|
|
||||||
--- linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig 2012-01-04 18:55:44.000000000 -0500
|
|
||||||
+++ linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt 2012-01-27 10:19:55.406740364 -0500
|
|
||||||
@@ -4,13 +4,21 @@ ID Mapper
|
|
||||||
=========
|
|
||||||
Id mapper is used by NFS to translate user and group ids into names, and to
|
|
||||||
translate user and group names into ids. Part of this translation involves
|
|
||||||
-performing an upcall to userspace to request the information. Id mapper will
|
|
||||||
-user request-key to perform this upcall and cache the result. The program
|
|
||||||
-/usr/sbin/nfs.idmap should be called by request-key, and will perform the
|
|
||||||
-translation and initialize a key with the resulting information.
|
|
||||||
+performing an upcall to userspace to request the information. There are two
|
|
||||||
+ways NFS could obtain this information: placing a call to /sbin/request-key
|
|
||||||
+or by placing a call to the rpc.idmap daemon.
|
|
||||||
+
|
|
||||||
+NFS will attempt to call /sbin/request-key first. If this succeeds, the
|
|
||||||
+result will be cached using the generic request-key cache. This call should
|
|
||||||
+only fail if /etc/request-key.conf is not configured for the id_resolver key
|
|
||||||
+type, see the "Configuring" section below if you wish to use the request-key
|
|
||||||
+method.
|
|
||||||
+
|
|
||||||
+If the call to /sbin/request-key fails (if /etc/request-key.conf is not
|
|
||||||
+configured with the id_resolver key type), then the idmapper will ask the
|
|
||||||
+legacy rpc.idmap daemon for the id mapping. This result will be stored
|
|
||||||
+in a custom NFS idmap cache.
|
|
||||||
|
|
||||||
- NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
|
|
||||||
- feature.
|
|
||||||
|
|
||||||
===========
|
|
||||||
Configuring
|
|
@ -1,993 +0,0 @@
|
|||||||
From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001
|
|
||||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Date: Fri, 20 Jan 2012 17:27:20 +0100
|
|
||||||
Cc: <linux-scsi@vger.kernel.org>
|
|
||||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
|
||||||
Cc: kvm@vger.kernel.org
|
|
||||||
Cc: Pekka Enberg <penberg@kernel.org>
|
|
||||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>
|
|
||||||
Subject: [PATCH v5 0/3] virtio-scsi driver
|
|
||||||
|
|
||||||
This is the first implementation of the virtio-scsi driver, a virtual
|
|
||||||
HBA that will be supported by KVM. It implements a subset of the spec,
|
|
||||||
in particular it does not implement asynchronous notifications for either
|
|
||||||
LUN reset/removal/addition or CD-ROM media events, but it is already
|
|
||||||
functional and usable.
|
|
||||||
|
|
||||||
Other matching bits:
|
|
||||||
|
|
||||||
- spec at http://people.redhat.com/pbonzini/virtio-spec.pdf
|
|
||||||
|
|
||||||
- QEMU implementation at git://github.com/bonzini/qemu.git,
|
|
||||||
branch virtio-scsi
|
|
||||||
|
|
||||||
Please review. Getting this in 3.3 is starting to look like wishful thinking,
|
|
||||||
but the possibility of regressions is obviously zero so I'm still dreaming.
|
|
||||||
Otherwise, that would be 3.4.
|
|
||||||
|
|
||||||
Paolo Bonzini (3):
|
|
||||||
virtio-scsi: first version
|
|
||||||
virtio-scsi: add error handling
|
|
||||||
virtio-scsi: add power management support
|
|
||||||
|
|
||||||
v4->v5: change virtio id from 7 to 8
|
|
||||||
|
|
||||||
v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN;
|
|
||||||
fixed 32-bit compilation; added power management support;
|
|
||||||
adjusted calls to virtqueue_add_buf
|
|
||||||
|
|
||||||
drivers/scsi/Kconfig | 8 +
|
|
||||||
drivers/scsi/Makefile | 1 +
|
|
||||||
drivers/scsi/virtio_scsi.c | 594 +++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
include/linux/virtio_ids.h | 1 +
|
|
||||||
include/linux/virtio_scsi.h | 114 +++++++++
|
|
||||||
5 files changed, 718 insertions(+), 0 deletions(-)
|
|
||||||
create mode 100644 drivers/scsi/virtio_scsi.c
|
|
||||||
create mode 100644 include/linux/virtio_scsi.h
|
|
||||||
|
|
||||||
From 84ad93b7215e18ab1755a625ede0fb00175e79bb Mon Sep 17 00:00:00 2001
|
|
||||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Date: Tue, 29 Nov 2011 16:31:09 +0100
|
|
||||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org>
|
|
||||||
Subject: [PATCH v5 1/3] virtio-scsi: first version
|
|
||||||
|
|
||||||
The virtio-scsi HBA is the basis of an alternative storage stack
|
|
||||||
for QEMU-based virtual machines (including KVM). Compared to
|
|
||||||
virtio-blk it is more scalable, because it supports many LUNs
|
|
||||||
on a single PCI slot), more powerful (it more easily supports
|
|
||||||
passthrough of host devices to the guest) and more easily
|
|
||||||
extensible (new SCSI features implemented by QEMU should not
|
|
||||||
require updating the driver in the guest).
|
|
||||||
|
|
||||||
Cc: linux-scsi <linux-scsi@vger.kernel.org>
|
|
||||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
|
||||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Cc: kvm@vger.kernel.org
|
|
||||||
Acked-by: Pekka Enberg <penberg@kernel.org>
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
---
|
|
||||||
v4->v5: change virtio id from 7 to 8
|
|
||||||
|
|
||||||
v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN;
|
|
||||||
fixed 32-bit compilation; adjust call to virtqueue_add_buf
|
|
||||||
|
|
||||||
v2->v3: added mempool, formatting fixes
|
|
||||||
|
|
||||||
v1->v2: use dbg_dev, sdev_printk, scmd_printk
|
|
||||||
- renamed lock to vq_lock
|
|
||||||
- renamed cmd_vq to req_vq (and other similar changes)
|
|
||||||
- fixed missing break in VIRTIO_SCSI_S_OVERRUN
|
|
||||||
- added VIRTIO_SCSI_S_BUSY
|
|
||||||
- removed unused argument from virtscsi_map_cmd
|
|
||||||
- fixed two tabs that had slipped in
|
|
||||||
- moved max_sectors and cmd_per_lun from template to config space
|
|
||||||
- __attribute__((packed)) -> __packed
|
|
||||||
|
|
||||||
drivers/scsi/Kconfig | 8 +
|
|
||||||
drivers/scsi/Makefile | 1 +
|
|
||||||
drivers/scsi/virtio_scsi.c | 503 +++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
include/linux/virtio_ids.h | 1 +
|
|
||||||
include/linux/virtio_scsi.h | 114 ++++++++++
|
|
||||||
5 files changed, 627 insertions(+), 0 deletions(-)
|
|
||||||
create mode 100644 drivers/scsi/virtio_scsi.c
|
|
||||||
create mode 100644 include/linux/virtio_scsi.h
|
|
||||||
|
|
||||||
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
|
|
||||||
index 16570aa..827ebaf 100644
|
|
||||||
--- a/drivers/scsi/Kconfig
|
|
||||||
+++ b/drivers/scsi/Kconfig
|
|
||||||
@@ -1897,6 +1897,14 @@ config SCSI_BFA_FC
|
|
||||||
To compile this driver as a module, choose M here. The module will
|
|
||||||
be called bfa.
|
|
||||||
|
|
||||||
+config SCSI_VIRTIO
|
|
||||||
+ tristate "virtio-scsi support (EXPERIMENTAL)"
|
|
||||||
+ depends on EXPERIMENTAL && VIRTIO
|
|
||||||
+ help
|
|
||||||
+ This is the virtual HBA driver for virtio. If the kernel will
|
|
||||||
+ be used in a virtual machine, say Y or M.
|
|
||||||
+
|
|
||||||
+
|
|
||||||
endif # SCSI_LOWLEVEL
|
|
||||||
|
|
||||||
source "drivers/scsi/pcmcia/Kconfig"
|
|
||||||
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
|
|
||||||
index e4c1a69..ad24e06 100644
|
|
||||||
--- a/drivers/scsi/Makefile
|
|
||||||
+++ b/drivers/scsi/Makefile
|
|
||||||
@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_CXGB4_ISCSI) += libiscsi.o libiscsi_tcp.o cxgbi/
|
|
||||||
obj-$(CONFIG_SCSI_BNX2_ISCSI) += libiscsi.o bnx2i/
|
|
||||||
obj-$(CONFIG_BE2ISCSI) += libiscsi.o be2iscsi/
|
|
||||||
obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
|
|
||||||
+obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o
|
|
||||||
obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
|
|
||||||
obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o
|
|
||||||
|
|
||||||
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..3f87ae0
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/drivers/scsi/virtio_scsi.c
|
|
||||||
@@ -0,0 +1,503 @@
|
|
||||||
+/*
|
|
||||||
+ * Virtio SCSI HBA driver
|
|
||||||
+ *
|
|
||||||
+ * Copyright IBM Corp. 2010
|
|
||||||
+ * Copyright Red Hat, Inc. 2011
|
|
||||||
+ *
|
|
||||||
+ * Authors:
|
|
||||||
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
|
|
||||||
+ * Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
+ *
|
|
||||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
||||||
+ * See the COPYING file in the top-level directory.
|
|
||||||
+ *
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#include <linux/module.h>
|
|
||||||
+#include <linux/slab.h>
|
|
||||||
+#include <linux/mempool.h>
|
|
||||||
+#include <linux/virtio.h>
|
|
||||||
+#include <linux/virtio_ids.h>
|
|
||||||
+#include <linux/virtio_config.h>
|
|
||||||
+#include <linux/virtio_scsi.h>
|
|
||||||
+#include <scsi/scsi_host.h>
|
|
||||||
+#include <scsi/scsi_device.h>
|
|
||||||
+#include <scsi/scsi_cmnd.h>
|
|
||||||
+
|
|
||||||
+#define VIRTIO_SCSI_MEMPOOL_SZ 64
|
|
||||||
+
|
|
||||||
+/* Command queue element */
|
|
||||||
+struct virtio_scsi_cmd {
|
|
||||||
+ struct scsi_cmnd *sc;
|
|
||||||
+ union {
|
|
||||||
+ struct virtio_scsi_cmd_req cmd;
|
|
||||||
+ struct virtio_scsi_ctrl_tmf_req tmf;
|
|
||||||
+ struct virtio_scsi_ctrl_an_req an;
|
|
||||||
+ } req;
|
|
||||||
+ union {
|
|
||||||
+ struct virtio_scsi_cmd_resp cmd;
|
|
||||||
+ struct virtio_scsi_ctrl_tmf_resp tmf;
|
|
||||||
+ struct virtio_scsi_ctrl_an_resp an;
|
|
||||||
+ struct virtio_scsi_event evt;
|
|
||||||
+ } resp;
|
|
||||||
+} ____cacheline_aligned_in_smp;
|
|
||||||
+
|
|
||||||
+/* Driver instance state */
|
|
||||||
+struct virtio_scsi {
|
|
||||||
+ /* Protects ctrl_vq, req_vq and sg[] */
|
|
||||||
+ spinlock_t vq_lock;
|
|
||||||
+
|
|
||||||
+ struct virtio_device *vdev;
|
|
||||||
+ struct virtqueue *ctrl_vq;
|
|
||||||
+ struct virtqueue *event_vq;
|
|
||||||
+ struct virtqueue *req_vq;
|
|
||||||
+
|
|
||||||
+ /* For sglist construction when adding commands to the virtqueue. */
|
|
||||||
+ struct scatterlist sg[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static struct kmem_cache *virtscsi_cmd_cache;
|
|
||||||
+static mempool_t *virtscsi_cmd_pool;
|
|
||||||
+
|
|
||||||
+static inline struct Scsi_Host *virtio_scsi_host(struct virtio_device *vdev)
|
|
||||||
+{
|
|
||||||
+ return vdev->priv;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
|
|
||||||
+{
|
|
||||||
+ if (!resid)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ if (!scsi_bidi_cmnd(sc)) {
|
|
||||||
+ scsi_set_resid(sc, resid);
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ scsi_in(sc)->resid = min(resid, scsi_in(sc)->length);
|
|
||||||
+ scsi_out(sc)->resid = resid - scsi_in(sc)->resid;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/**
|
|
||||||
+ * virtscsi_complete_cmd - finish a scsi_cmd and invoke scsi_done
|
|
||||||
+ *
|
|
||||||
+ * Called with vq_lock held.
|
|
||||||
+ */
|
|
||||||
+static void virtscsi_complete_cmd(void *buf)
|
|
||||||
+{
|
|
||||||
+ struct virtio_scsi_cmd *cmd = buf;
|
|
||||||
+ struct scsi_cmnd *sc = cmd->sc;
|
|
||||||
+ struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
|
|
||||||
+
|
|
||||||
+ dev_dbg(&sc->device->sdev_gendev,
|
|
||||||
+ "cmd %p response %u status %#02x sense_len %u\n",
|
|
||||||
+ sc, resp->response, resp->status, resp->sense_len);
|
|
||||||
+
|
|
||||||
+ sc->result = resp->status;
|
|
||||||
+ virtscsi_compute_resid(sc, resp->resid);
|
|
||||||
+ switch (resp->response) {
|
|
||||||
+ case VIRTIO_SCSI_S_OK:
|
|
||||||
+ set_host_byte(sc, DID_OK);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_OVERRUN:
|
|
||||||
+ set_host_byte(sc, DID_ERROR);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_ABORTED:
|
|
||||||
+ set_host_byte(sc, DID_ABORT);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_BAD_TARGET:
|
|
||||||
+ set_host_byte(sc, DID_BAD_TARGET);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_RESET:
|
|
||||||
+ set_host_byte(sc, DID_RESET);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_BUSY:
|
|
||||||
+ set_host_byte(sc, DID_BUS_BUSY);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_TRANSPORT_FAILURE:
|
|
||||||
+ set_host_byte(sc, DID_TRANSPORT_DISRUPTED);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_TARGET_FAILURE:
|
|
||||||
+ set_host_byte(sc, DID_TARGET_FAILURE);
|
|
||||||
+ break;
|
|
||||||
+ case VIRTIO_SCSI_S_NEXUS_FAILURE:
|
|
||||||
+ set_host_byte(sc, DID_NEXUS_FAILURE);
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ scmd_printk(KERN_WARNING, sc, "Unknown response %d",
|
|
||||||
+ resp->response);
|
|
||||||
+ /* fall through */
|
|
||||||
+ case VIRTIO_SCSI_S_FAILURE:
|
|
||||||
+ set_host_byte(sc, DID_ERROR);
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ WARN_ON(resp->sense_len > VIRTIO_SCSI_SENSE_SIZE);
|
|
||||||
+ if (sc->sense_buffer) {
|
|
||||||
+ memcpy(sc->sense_buffer, resp->sense,
|
|
||||||
+ min_t(u32, resp->sense_len, VIRTIO_SCSI_SENSE_SIZE));
|
|
||||||
+ if (resp->sense_len)
|
|
||||||
+ set_driver_byte(sc, DRIVER_SENSE);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ mempool_free(cmd, virtscsi_cmd_pool);
|
|
||||||
+ sc->scsi_done(sc);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf))
|
|
||||||
+{
|
|
||||||
+ struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
|
||||||
+ struct virtio_scsi *vscsi = shost_priv(sh);
|
|
||||||
+ void *buf;
|
|
||||||
+ unsigned long flags;
|
|
||||||
+ unsigned int len;
|
|
||||||
+
|
|
||||||
+ spin_lock_irqsave(&vscsi->vq_lock, flags);
|
|
||||||
+
|
|
||||||
+ do {
|
|
||||||
+ virtqueue_disable_cb(vq);
|
|
||||||
+ while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
|
|
||||||
+ fn(buf);
|
|
||||||
+ } while (!virtqueue_enable_cb(vq));
|
|
||||||
+
|
|
||||||
+ spin_unlock_irqrestore(&vscsi->vq_lock, flags);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void virtscsi_req_done(struct virtqueue *vq)
|
|
||||||
+{
|
|
||||||
+ virtscsi_vq_done(vq, virtscsi_complete_cmd);
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+/* These are still stubs. */
|
|
||||||
+static void virtscsi_complete_free(void *buf)
|
|
||||||
+{
|
|
||||||
+ struct virtio_scsi_cmd *cmd = buf;
|
|
||||||
+
|
|
||||||
+ mempool_free(cmd, virtscsi_cmd_pool);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void virtscsi_ctrl_done(struct virtqueue *vq)
|
|
||||||
+{
|
|
||||||
+ virtscsi_vq_done(vq, virtscsi_complete_free);
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static void virtscsi_event_done(struct virtqueue *vq)
|
|
||||||
+{
|
|
||||||
+ virtscsi_vq_done(vq, virtscsi_complete_free);
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx,
|
|
||||||
+ struct scsi_data_buffer *sdb)
|
|
||||||
+{
|
|
||||||
+ struct sg_table *table = &sdb->table;
|
|
||||||
+ struct scatterlist *sg_elem;
|
|
||||||
+ unsigned int idx = *p_idx;
|
|
||||||
+ int i;
|
|
||||||
+
|
|
||||||
+ for_each_sg(table->sgl, sg_elem, table->nents, i)
|
|
||||||
+ sg_set_buf(&sg[idx++], sg_virt(sg_elem), sg_elem->length);
|
|
||||||
+
|
|
||||||
+ *p_idx = idx;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/**
|
|
||||||
+ * virtscsi_map_cmd - map a scsi_cmd to a virtqueue scatterlist
|
|
||||||
+ * @vscsi : virtio_scsi state
|
|
||||||
+ * @cmd : command structure
|
|
||||||
+ * @out_num : number of read-only elements
|
|
||||||
+ * @in_num : number of write-only elements
|
|
||||||
+ * @req_size : size of the request buffer
|
|
||||||
+ * @resp_size : size of the response buffer
|
|
||||||
+ *
|
|
||||||
+ * Called with vq_lock held.
|
|
||||||
+ */
|
|
||||||
+static void virtscsi_map_cmd(struct virtio_scsi *vscsi,
|
|
||||||
+ struct virtio_scsi_cmd *cmd,
|
|
||||||
+ unsigned *out_num, unsigned *in_num,
|
|
||||||
+ size_t req_size, size_t resp_size)
|
|
||||||
+{
|
|
||||||
+ struct scsi_cmnd *sc = cmd->sc;
|
|
||||||
+ struct scatterlist *sg = vscsi->sg;
|
|
||||||
+ unsigned int idx = 0;
|
|
||||||
+
|
|
||||||
+ if (sc) {
|
|
||||||
+ struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
|
|
||||||
+ BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
|
|
||||||
+
|
|
||||||
+ /* TODO: check feature bit and fail if unsupported? */
|
|
||||||
+ BUG_ON(sc->sc_data_direction == DMA_BIDIRECTIONAL);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ /* Request header. */
|
|
||||||
+ sg_set_buf(&sg[idx++], &cmd->req, req_size);
|
|
||||||
+
|
|
||||||
+ /* Data-out buffer. */
|
|
||||||
+ if (sc && sc->sc_data_direction != DMA_FROM_DEVICE)
|
|
||||||
+ virtscsi_map_sgl(sg, &idx, scsi_out(sc));
|
|
||||||
+
|
|
||||||
+ *out_num = idx;
|
|
||||||
+
|
|
||||||
+ /* Response header. */
|
|
||||||
+ sg_set_buf(&sg[idx++], &cmd->resp, resp_size);
|
|
||||||
+
|
|
||||||
+ /* Data-in buffer */
|
|
||||||
+ if (sc && sc->sc_data_direction != DMA_TO_DEVICE)
|
|
||||||
+ virtscsi_map_sgl(sg, &idx, scsi_in(sc));
|
|
||||||
+
|
|
||||||
+ *in_num = idx - *out_num;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int virtscsi_kick_cmd(struct virtio_scsi *vscsi, struct virtqueue *vq,
|
|
||||||
+ struct virtio_scsi_cmd *cmd,
|
|
||||||
+ size_t req_size, size_t resp_size, gfp_t gfp)
|
|
||||||
+{
|
|
||||||
+ unsigned int out_num, in_num;
|
|
||||||
+ unsigned long flags;
|
|
||||||
+ int ret;
|
|
||||||
+
|
|
||||||
+ spin_lock_irqsave(&vscsi->vq_lock, flags);
|
|
||||||
+
|
|
||||||
+ virtscsi_map_cmd(vscsi, cmd, &out_num, &in_num, req_size, resp_size);
|
|
||||||
+
|
|
||||||
+ ret = virtqueue_add_buf(vq, vscsi->sg, out_num, in_num, cmd, gfp);
|
|
||||||
+ if (ret >= 0)
|
|
||||||
+ virtqueue_kick(vq);
|
|
||||||
+
|
|
||||||
+ spin_unlock_irqrestore(&vscsi->vq_lock, flags);
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
|
|
||||||
+{
|
|
||||||
+ struct virtio_scsi *vscsi = shost_priv(sh);
|
|
||||||
+ struct virtio_scsi_cmd *cmd;
|
|
||||||
+ int ret;
|
|
||||||
+
|
|
||||||
+ dev_dbg(&sc->device->sdev_gendev,
|
|
||||||
+ "cmd %p CDB: %#02x\n", sc, sc->cmnd[0]);
|
|
||||||
+
|
|
||||||
+ ret = SCSI_MLQUEUE_HOST_BUSY;
|
|
||||||
+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC);
|
|
||||||
+ if (!cmd)
|
|
||||||
+ goto out;
|
|
||||||
+
|
|
||||||
+ memset(cmd, 0, sizeof(*cmd));
|
|
||||||
+ cmd->sc = sc;
|
|
||||||
+ cmd->req.cmd = (struct virtio_scsi_cmd_req){
|
|
||||||
+ .lun[0] = 1,
|
|
||||||
+ .lun[1] = sc->device->id,
|
|
||||||
+ .lun[2] = (sc->device->lun >> 8) | 0x40,
|
|
||||||
+ .lun[3] = sc->device->lun & 0xff,
|
|
||||||
+ .tag = (unsigned long)sc,
|
|
||||||
+ .task_attr = VIRTIO_SCSI_S_SIMPLE,
|
|
||||||
+ .prio = 0,
|
|
||||||
+ .crn = 0,
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
|
|
||||||
+ memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
|
|
||||||
+
|
|
||||||
+ if (virtscsi_kick_cmd(vscsi, vscsi->req_vq, cmd,
|
|
||||||
+ sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
|
|
||||||
+ GFP_ATOMIC) >= 0)
|
|
||||||
+ ret = 0;
|
|
||||||
+
|
|
||||||
+out:
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static struct scsi_host_template virtscsi_host_template = {
|
|
||||||
+ .module = THIS_MODULE,
|
|
||||||
+ .name = "Virtio SCSI HBA",
|
|
||||||
+ .proc_name = "virtio_scsi",
|
|
||||||
+ .queuecommand = virtscsi_queuecommand,
|
|
||||||
+ .this_id = -1,
|
|
||||||
+
|
|
||||||
+ .can_queue = 1024,
|
|
||||||
+ .dma_boundary = UINT_MAX,
|
|
||||||
+ .use_clustering = ENABLE_CLUSTERING,
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+#define virtscsi_config_get(vdev, fld) \
|
|
||||||
+ ({ \
|
|
||||||
+ typeof(((struct virtio_scsi_config *)0)->fld) __val; \
|
|
||||||
+ vdev->config->get(vdev, \
|
|
||||||
+ offsetof(struct virtio_scsi_config, fld), \
|
|
||||||
+ &__val, sizeof(__val)); \
|
|
||||||
+ __val; \
|
|
||||||
+ })
|
|
||||||
+
|
|
||||||
+#define virtscsi_config_set(vdev, fld, val) \
|
|
||||||
+ (void)({ \
|
|
||||||
+ typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \
|
|
||||||
+ vdev->config->set(vdev, \
|
|
||||||
+ offsetof(struct virtio_scsi_config, fld), \
|
|
||||||
+ &__val, sizeof(__val)); \
|
|
||||||
+ })
|
|
||||||
+
|
|
||||||
+static int __devinit virtscsi_init(struct virtio_device *vdev,
|
|
||||||
+ struct virtio_scsi *vscsi)
|
|
||||||
+{
|
|
||||||
+ int err;
|
|
||||||
+ struct virtqueue *vqs[3];
|
|
||||||
+ vq_callback_t *callbacks[] = {
|
|
||||||
+ virtscsi_ctrl_done,
|
|
||||||
+ virtscsi_event_done,
|
|
||||||
+ virtscsi_req_done
|
|
||||||
+ };
|
|
||||||
+ const char *names[] = {
|
|
||||||
+ "control",
|
|
||||||
+ "event",
|
|
||||||
+ "request"
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ /* Discover virtqueues and write information to configuration. */
|
|
||||||
+ err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
|
|
||||||
+ if (err)
|
|
||||||
+ return err;
|
|
||||||
+
|
|
||||||
+ vscsi->ctrl_vq = vqs[0];
|
|
||||||
+ vscsi->event_vq = vqs[1];
|
|
||||||
+ vscsi->req_vq = vqs[2];
|
|
||||||
+
|
|
||||||
+ virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
|
|
||||||
+ virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int __devinit virtscsi_probe(struct virtio_device *vdev)
|
|
||||||
+{
|
|
||||||
+ struct Scsi_Host *shost;
|
|
||||||
+ struct virtio_scsi *vscsi;
|
|
||||||
+ int err;
|
|
||||||
+ u32 sg_elems;
|
|
||||||
+ u32 cmd_per_lun;
|
|
||||||
+
|
|
||||||
+ /* We need to know how many segments before we allocate.
|
|
||||||
+ * We need an extra sg elements at head and tail.
|
|
||||||
+ */
|
|
||||||
+ sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
|
|
||||||
+
|
|
||||||
+ /* Allocate memory and link the structs together. */
|
|
||||||
+ shost = scsi_host_alloc(&virtscsi_host_template,
|
|
||||||
+ sizeof(*vscsi) + sizeof(vscsi->sg[0]) * (sg_elems + 2));
|
|
||||||
+
|
|
||||||
+ if (!shost)
|
|
||||||
+ return -ENOMEM;
|
|
||||||
+
|
|
||||||
+ shost->sg_tablesize = sg_elems;
|
|
||||||
+ vscsi = shost_priv(shost);
|
|
||||||
+ vscsi->vdev = vdev;
|
|
||||||
+ vdev->priv = shost;
|
|
||||||
+
|
|
||||||
+ /* Random initializations. */
|
|
||||||
+ spin_lock_init(&vscsi->vq_lock);
|
|
||||||
+ sg_init_table(vscsi->sg, sg_elems + 2);
|
|
||||||
+
|
|
||||||
+ err = virtscsi_init(vdev, vscsi);
|
|
||||||
+ if (err)
|
|
||||||
+ goto virtscsi_init_failed;
|
|
||||||
+
|
|
||||||
+ cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
|
|
||||||
+ shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
|
|
||||||
+ shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
|
|
||||||
+ shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1;
|
|
||||||
+ shost->max_id = virtscsi_config_get(vdev, max_target) + 1;
|
|
||||||
+ shost->max_channel = 0;
|
|
||||||
+ shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
|
|
||||||
+ err = scsi_add_host(shost, &vdev->dev);
|
|
||||||
+ if (err)
|
|
||||||
+ goto scsi_add_host_failed;
|
|
||||||
+
|
|
||||||
+ scsi_scan_host(shost);
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+scsi_add_host_failed:
|
|
||||||
+ vdev->config->del_vqs(vdev);
|
|
||||||
+virtscsi_init_failed:
|
|
||||||
+ scsi_host_put(shost);
|
|
||||||
+ return err;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev)
|
|
||||||
+{
|
|
||||||
+ /* Stop all the virtqueues. */
|
|
||||||
+ vdev->config->reset(vdev);
|
|
||||||
+
|
|
||||||
+ vdev->config->del_vqs(vdev);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void __devexit virtscsi_remove(struct virtio_device *vdev)
|
|
||||||
+{
|
|
||||||
+ struct Scsi_Host *shost = virtio_scsi_host(vdev);
|
|
||||||
+
|
|
||||||
+ scsi_remove_host(shost);
|
|
||||||
+
|
|
||||||
+ virtscsi_remove_vqs(vdev);
|
|
||||||
+ scsi_host_put(shost);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static struct virtio_device_id id_table[] = {
|
|
||||||
+ { VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
|
|
||||||
+ { 0 },
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static struct virtio_driver virtio_scsi_driver = {
|
|
||||||
+ .driver.name = KBUILD_MODNAME,
|
|
||||||
+ .driver.owner = THIS_MODULE,
|
|
||||||
+ .id_table = id_table,
|
|
||||||
+ .probe = virtscsi_probe,
|
|
||||||
+ .remove = __devexit_p(virtscsi_remove),
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int __init init(void)
|
|
||||||
+{
|
|
||||||
+ int ret = -ENOMEM;
|
|
||||||
+
|
|
||||||
+ virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
|
|
||||||
+ if (!virtscsi_cmd_cache) {
|
|
||||||
+ printk(KERN_ERR "kmem_cache_create() for "
|
|
||||||
+ "virtscsi_cmd_cache failed\n");
|
|
||||||
+ goto error;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+ virtscsi_cmd_pool =
|
|
||||||
+ mempool_create_slab_pool(VIRTIO_SCSI_MEMPOOL_SZ,
|
|
||||||
+ virtscsi_cmd_cache);
|
|
||||||
+ if (!virtscsi_cmd_pool) {
|
|
||||||
+ printk(KERN_ERR "mempool_create() for"
|
|
||||||
+ "virtscsi_cmd_pool failed\n");
|
|
||||||
+ goto error;
|
|
||||||
+ }
|
|
||||||
+ ret = register_virtio_driver(&virtio_scsi_driver);
|
|
||||||
+ if (ret < 0)
|
|
||||||
+ goto error;
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+error:
|
|
||||||
+ if (virtscsi_cmd_pool) {
|
|
||||||
+ mempool_destroy(virtscsi_cmd_pool);
|
|
||||||
+ virtscsi_cmd_pool = NULL;
|
|
||||||
+ }
|
|
||||||
+ if (virtscsi_cmd_cache) {
|
|
||||||
+ kmem_cache_destroy(virtscsi_cmd_cache);
|
|
||||||
+ virtscsi_cmd_cache = NULL;
|
|
||||||
+ }
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void __exit fini(void)
|
|
||||||
+{
|
|
||||||
+ unregister_virtio_driver(&virtio_scsi_driver);
|
|
||||||
+ mempool_destroy(virtscsi_cmd_pool);
|
|
||||||
+ kmem_cache_destroy(virtscsi_cmd_cache);
|
|
||||||
+}
|
|
||||||
+module_init(init);
|
|
||||||
+module_exit(fini);
|
|
||||||
+
|
|
||||||
+MODULE_DEVICE_TABLE(virtio, id_table);
|
|
||||||
+MODULE_DESCRIPTION("Virtio SCSI HBA driver");
|
|
||||||
+MODULE_LICENSE("GPL");
|
|
||||||
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
|
|
||||||
index 85bb0bb..d83ae52 100644
|
|
||||||
--- a/include/linux/virtio_ids.h
|
|
||||||
+++ b/include/linux/virtio_ids.h
|
|
||||||
@@ -34,6 +34,7 @@
|
|
||||||
#define VIRTIO_ID_CONSOLE 3 /* virtio console */
|
|
||||||
#define VIRTIO_ID_RNG 4 /* virtio ring */
|
|
||||||
#define VIRTIO_ID_BALLOON 5 /* virtio balloon */
|
|
||||||
+#define VIRTIO_ID_SCSI 8 /* virtio scsi */
|
|
||||||
#define VIRTIO_ID_9P 9 /* 9p virtio console */
|
|
||||||
|
|
||||||
#endif /* _LINUX_VIRTIO_IDS_H */
|
|
||||||
diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..8ddeafd
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/include/linux/virtio_scsi.h
|
|
||||||
@@ -0,0 +1,114 @@
|
|
||||||
+#ifndef _LINUX_VIRTIO_SCSI_H
|
|
||||||
+#define _LINUX_VIRTIO_SCSI_H
|
|
||||||
+/* This header is BSD licensed so anyone can use the definitions to implement
|
|
||||||
+ * compatible drivers/servers. */
|
|
||||||
+
|
|
||||||
+#define VIRTIO_SCSI_CDB_SIZE 32
|
|
||||||
+#define VIRTIO_SCSI_SENSE_SIZE 96
|
|
||||||
+
|
|
||||||
+/* SCSI command request, followed by data-out */
|
|
||||||
+struct virtio_scsi_cmd_req {
|
|
||||||
+ u8 lun[8]; /* Logical Unit Number */
|
|
||||||
+ u64 tag; /* Command identifier */
|
|
||||||
+ u8 task_attr; /* Task attribute */
|
|
||||||
+ u8 prio;
|
|
||||||
+ u8 crn;
|
|
||||||
+ u8 cdb[VIRTIO_SCSI_CDB_SIZE];
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+/* Response, followed by sense data and data-in */
|
|
||||||
+struct virtio_scsi_cmd_resp {
|
|
||||||
+ u32 sense_len; /* Sense data length */
|
|
||||||
+ u32 resid; /* Residual bytes in data buffer */
|
|
||||||
+ u16 status_qualifier; /* Status qualifier */
|
|
||||||
+ u8 status; /* Command completion status */
|
|
||||||
+ u8 response; /* Response values */
|
|
||||||
+ u8 sense[VIRTIO_SCSI_SENSE_SIZE];
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+/* Task Management Request */
|
|
||||||
+struct virtio_scsi_ctrl_tmf_req {
|
|
||||||
+ u32 type;
|
|
||||||
+ u32 subtype;
|
|
||||||
+ u8 lun[8];
|
|
||||||
+ u64 tag;
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+struct virtio_scsi_ctrl_tmf_resp {
|
|
||||||
+ u8 response;
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+/* Asynchronous notification query/subscription */
|
|
||||||
+struct virtio_scsi_ctrl_an_req {
|
|
||||||
+ u32 type;
|
|
||||||
+ u8 lun[8];
|
|
||||||
+ u32 event_requested;
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+struct virtio_scsi_ctrl_an_resp {
|
|
||||||
+ u32 event_actual;
|
|
||||||
+ u8 response;
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+struct virtio_scsi_event {
|
|
||||||
+ u32 event;
|
|
||||||
+ u8 lun[8];
|
|
||||||
+ u32 reason;
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+struct virtio_scsi_config {
|
|
||||||
+ u32 num_queues;
|
|
||||||
+ u32 seg_max;
|
|
||||||
+ u32 max_sectors;
|
|
||||||
+ u32 cmd_per_lun;
|
|
||||||
+ u32 event_info_size;
|
|
||||||
+ u32 sense_size;
|
|
||||||
+ u32 cdb_size;
|
|
||||||
+ u16 max_channel;
|
|
||||||
+ u16 max_target;
|
|
||||||
+ u32 max_lun;
|
|
||||||
+} __packed;
|
|
||||||
+
|
|
||||||
+/* Response codes */
|
|
||||||
+#define VIRTIO_SCSI_S_OK 0
|
|
||||||
+#define VIRTIO_SCSI_S_OVERRUN 1
|
|
||||||
+#define VIRTIO_SCSI_S_ABORTED 2
|
|
||||||
+#define VIRTIO_SCSI_S_BAD_TARGET 3
|
|
||||||
+#define VIRTIO_SCSI_S_RESET 4
|
|
||||||
+#define VIRTIO_SCSI_S_BUSY 5
|
|
||||||
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
|
|
||||||
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
|
|
||||||
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
|
|
||||||
+#define VIRTIO_SCSI_S_FAILURE 9
|
|
||||||
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
|
|
||||||
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
|
|
||||||
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
|
|
||||||
+
|
|
||||||
+/* Controlq type codes. */
|
|
||||||
+#define VIRTIO_SCSI_T_TMF 0
|
|
||||||
+#define VIRTIO_SCSI_T_AN_QUERY 1
|
|
||||||
+#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
|
|
||||||
+
|
|
||||||
+/* Valid TMF subtypes. */
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
|
|
||||||
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
|
|
||||||
+
|
|
||||||
+/* Events. */
|
|
||||||
+#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000
|
|
||||||
+#define VIRTIO_SCSI_T_NO_EVENT 0
|
|
||||||
+#define VIRTIO_SCSI_T_TRANSPORT_RESET 1
|
|
||||||
+#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
|
|
||||||
+
|
|
||||||
+#define VIRTIO_SCSI_S_SIMPLE 0
|
|
||||||
+#define VIRTIO_SCSI_S_ORDERED 1
|
|
||||||
+#define VIRTIO_SCSI_S_HEAD 2
|
|
||||||
+#define VIRTIO_SCSI_S_ACA 3
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+#endif /* _LINUX_VIRTIO_SCSI_H */
|
|
||||||
--
|
|
||||||
1.7.1
|
|
||||||
|
|
||||||
|
|
||||||
From 3c0e8846ac0fc2175dd0e06f495b16a30b549762 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Date: Tue, 29 Nov 2011 16:33:28 +0100
|
|
||||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org>
|
|
||||||
Subject: [PATCH v5 2/3] virtio-scsi: add error handling
|
|
||||||
|
|
||||||
This commit adds basic error handling to the virtio-scsi
|
|
||||||
HBA device. Task management functions are sent synchronously
|
|
||||||
via the control virtqueue.
|
|
||||||
|
|
||||||
Cc: linux-scsi <linux-scsi@vger.kernel.org>
|
|
||||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
|
||||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Cc: kvm@vger.kernel.org
|
|
||||||
Acked-by: Pekka Enberg <penberg@kernel.org>
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
---
|
|
||||||
v3->v4: fixed 32-bit compilation; adjusted call to virtscsi_kick_cmd
|
|
||||||
|
|
||||||
v2->v3: added mempool, used GFP_NOIO instead of GFP_ATOMIC,
|
|
||||||
formatting fixes
|
|
||||||
|
|
||||||
v1->v2: use scmd_printk
|
|
||||||
|
|
||||||
drivers/scsi/virtio_scsi.c | 73 +++++++++++++++++++++++++++++++++++++++++++-
|
|
||||||
1 files changed, 72 insertions(+), 1 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
|
|
||||||
index 3f87ae0..68104cd 100644
|
|
||||||
--- a/drivers/scsi/virtio_scsi.c
|
|
||||||
+++ b/drivers/scsi/virtio_scsi.c
|
|
||||||
@@ -29,6 +29,7 @@
|
|
||||||
/* Command queue element */
|
|
||||||
struct virtio_scsi_cmd {
|
|
||||||
struct scsi_cmnd *sc;
|
|
||||||
+ struct completion *comp;
|
|
||||||
union {
|
|
||||||
struct virtio_scsi_cmd_req cmd;
|
|
||||||
struct virtio_scsi_ctrl_tmf_req tmf;
|
|
||||||
@@ -168,11 +169,12 @@ static void virtscsi_req_done(struct virtqueue *vq)
|
|
||||||
virtscsi_vq_done(vq, virtscsi_complete_cmd);
|
|
||||||
};
|
|
||||||
|
|
||||||
-/* These are still stubs. */
|
|
||||||
static void virtscsi_complete_free(void *buf)
|
|
||||||
{
|
|
||||||
struct virtio_scsi_cmd *cmd = buf;
|
|
||||||
|
|
||||||
+ if (cmd->comp)
|
|
||||||
+ complete_all(cmd->comp);
|
|
||||||
mempool_free(cmd, virtscsi_cmd_pool);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -306,12 +308,81 @@ out:
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
|
|
||||||
+{
|
|
||||||
+ DECLARE_COMPLETION_ONSTACK(comp);
|
|
||||||
+ int ret;
|
|
||||||
+
|
|
||||||
+ cmd->comp = ∁
|
|
||||||
+ ret = virtscsi_kick_cmd(vscsi, vscsi->ctrl_vq, cmd,
|
|
||||||
+ sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
|
|
||||||
+ GFP_NOIO);
|
|
||||||
+ if (ret < 0)
|
|
||||||
+ return FAILED;
|
|
||||||
+
|
|
||||||
+ wait_for_completion(&comp);
|
|
||||||
+ if (cmd->resp.tmf.response != VIRTIO_SCSI_S_OK &&
|
|
||||||
+ cmd->resp.tmf.response != VIRTIO_SCSI_S_FUNCTION_SUCCEEDED)
|
|
||||||
+ return FAILED;
|
|
||||||
+
|
|
||||||
+ return SUCCESS;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int virtscsi_device_reset(struct scsi_cmnd *sc)
|
|
||||||
+{
|
|
||||||
+ struct virtio_scsi *vscsi = shost_priv(sc->device->host);
|
|
||||||
+ struct virtio_scsi_cmd *cmd;
|
|
||||||
+
|
|
||||||
+ sdev_printk(KERN_INFO, sc->device, "device reset\n");
|
|
||||||
+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
|
|
||||||
+ if (!cmd)
|
|
||||||
+ return FAILED;
|
|
||||||
+
|
|
||||||
+ memset(cmd, 0, sizeof(*cmd));
|
|
||||||
+ cmd->sc = sc;
|
|
||||||
+ cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
|
|
||||||
+ .type = VIRTIO_SCSI_T_TMF,
|
|
||||||
+ .subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET,
|
|
||||||
+ .lun[0] = 1,
|
|
||||||
+ .lun[1] = sc->device->id,
|
|
||||||
+ .lun[2] = (sc->device->lun >> 8) | 0x40,
|
|
||||||
+ .lun[3] = sc->device->lun & 0xff,
|
|
||||||
+ };
|
|
||||||
+ return virtscsi_tmf(vscsi, cmd);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int virtscsi_abort(struct scsi_cmnd *sc)
|
|
||||||
+{
|
|
||||||
+ struct virtio_scsi *vscsi = shost_priv(sc->device->host);
|
|
||||||
+ struct virtio_scsi_cmd *cmd;
|
|
||||||
+
|
|
||||||
+ scmd_printk(KERN_INFO, sc, "abort\n");
|
|
||||||
+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
|
|
||||||
+ if (!cmd)
|
|
||||||
+ return FAILED;
|
|
||||||
+
|
|
||||||
+ memset(cmd, 0, sizeof(*cmd));
|
|
||||||
+ cmd->sc = sc;
|
|
||||||
+ cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
|
|
||||||
+ .type = VIRTIO_SCSI_T_TMF,
|
|
||||||
+ .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
|
|
||||||
+ .lun[0] = 1,
|
|
||||||
+ .lun[1] = sc->device->id,
|
|
||||||
+ .lun[2] = (sc->device->lun >> 8) | 0x40,
|
|
||||||
+ .lun[3] = sc->device->lun & 0xff,
|
|
||||||
+ .tag = (unsigned long)sc,
|
|
||||||
+ };
|
|
||||||
+ return virtscsi_tmf(vscsi, cmd);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static struct scsi_host_template virtscsi_host_template = {
|
|
||||||
.module = THIS_MODULE,
|
|
||||||
.name = "Virtio SCSI HBA",
|
|
||||||
.proc_name = "virtio_scsi",
|
|
||||||
.queuecommand = virtscsi_queuecommand,
|
|
||||||
.this_id = -1,
|
|
||||||
+ .eh_abort_handler = virtscsi_abort,
|
|
||||||
+ .eh_device_reset_handler = virtscsi_device_reset,
|
|
||||||
|
|
||||||
.can_queue = 1024,
|
|
||||||
.dma_boundary = UINT_MAX,
|
|
||||||
--
|
|
||||||
1.7.1
|
|
||||||
|
|
||||||
|
|
||||||
From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001
|
|
||||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
Date: Fri, 13 Jan 2012 15:30:08 +0100
|
|
||||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org>
|
|
||||||
Subject: [PATCH v5 3/3] virtio-scsi: add power management support
|
|
||||||
|
|
||||||
This patch adds freeze/restore handlers for the HBA. Block queues
|
|
||||||
are managed independently by the disk devices.
|
|
||||||
|
|
||||||
Cc: linux-scsi <linux-scsi@vger.kernel.org>
|
|
||||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
|
||||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
|
||||||
Cc: kvm@vger.kernel.org
|
|
||||||
Acked-by: Pekka Enberg <penberg@kernel.org>
|
|
||||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
||||||
---
|
|
||||||
The feature has been merged in the virtio core for 3.3, so the patch
|
|
||||||
is new in v4.
|
|
||||||
|
|
||||||
drivers/scsi/virtio_scsi.c | 26 +++++++++++++++++++++++---
|
|
||||||
1 files changed, 23 insertions(+), 3 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
|
|
||||||
index 68104cd..efccd72 100644
|
|
||||||
--- a/drivers/scsi/virtio_scsi.c
|
|
||||||
+++ b/drivers/scsi/virtio_scsi.c
|
|
||||||
@@ -406,8 +406,8 @@ static struct scsi_host_template virtscsi_host_template = {
|
|
||||||
&__val, sizeof(__val)); \
|
|
||||||
})
|
|
||||||
|
|
||||||
-static int __devinit virtscsi_init(struct virtio_device *vdev,
|
|
||||||
- struct virtio_scsi *vscsi)
|
|
||||||
+static int virtscsi_init(struct virtio_device *vdev,
|
|
||||||
+ struct virtio_scsi *vscsi)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
struct virtqueue *vqs[3];
|
|
||||||
@@ -491,7 +491,7 @@ virtscsi_init_failed:
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
-static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev)
|
|
||||||
+static void virtscsi_remove_vqs(struct virtio_device *vdev)
|
|
||||||
{
|
|
||||||
/* Stop all the virtqueues. */
|
|
||||||
vdev->config->reset(vdev);
|
|
||||||
@@ -509,6 +509,22 @@ static void __devexit virtscsi_remove(struct virtio_device *vdev)
|
|
||||||
scsi_host_put(shost);
|
|
||||||
}
|
|
||||||
|
|
||||||
+#ifdef CONFIG_PM
|
|
||||||
+static int virtscsi_freeze(struct virtio_device *vdev)
|
|
||||||
+{
|
|
||||||
+ virtscsi_remove_vqs(vdev);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int virtscsi_restore(struct virtio_device *vdev)
|
|
||||||
+{
|
|
||||||
+ struct Scsi_Host *sh = virtio_scsi_host(vdev);
|
|
||||||
+ struct virtio_scsi *vscsi = shost_priv(sh);
|
|
||||||
+
|
|
||||||
+ return virtscsi_init(vdev, vscsi);
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
static struct virtio_device_id id_table[] = {
|
|
||||||
{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
|
|
||||||
{ 0 },
|
|
||||||
@@ -519,6 +535,10 @@ static struct virtio_driver virtio_scsi_driver = {
|
|
||||||
.driver.owner = THIS_MODULE,
|
|
||||||
.id_table = id_table,
|
|
||||||
.probe = virtscsi_probe,
|
|
||||||
+#ifdef CONFIG_PM
|
|
||||||
+ .freeze = virtscsi_freeze,
|
|
||||||
+ .restore = virtscsi_restore,
|
|
||||||
+#endif
|
|
||||||
.remove = __devexit_p(virtscsi_remove),
|
|
||||||
};
|
|
||||||
|
|
||||||
--
|
|
||||||
1.7.1
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
|||||||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
|
|
||||||
index f22a9f7..f525f99 100644
|
|
||||||
--- a/arch/x86/kernel/cpu/mcheck/mce.c
|
|
||||||
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
|
|
||||||
@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
|
|
||||||
{
|
|
||||||
unsigned int next, i, prev = 0;
|
|
||||||
|
|
||||||
- next = rcu_dereference_check_mce(mcelog.next);
|
|
||||||
+ next = ACCESS_ONCE(mcelog.next);
|
|
||||||
|
|
||||||
do {
|
|
||||||
struct mce *m;
|
|
||||||
|
|
||||||
|
|
@ -1,447 +0,0 @@
|
|||||||
In some cases it may happen that pmd_none_or_clear_bad() is called
|
|
||||||
with the mmap_sem hold in read mode. In those cases the huge page
|
|
||||||
faults can allocate hugepmds under pmd_none_or_clear_bad() and that
|
|
||||||
can trigger a false positive from pmd_bad() that will not like to see
|
|
||||||
a pmd materializing as trans huge.
|
|
||||||
|
|
||||||
It's not khugepaged the problem, khugepaged holds the mmap_sem in
|
|
||||||
write mode (and all those sites must hold the mmap_sem in read mode to
|
|
||||||
prevent pagetables to go away from under them, during code review it
|
|
||||||
seems vm86 mode on 32bit kernels requires that too unless it's
|
|
||||||
restricted to 1 thread per process or UP builds). The race is only
|
|
||||||
with the huge pagefaults that can convert a pmd_none() into a
|
|
||||||
pmd_trans_huge().
|
|
||||||
|
|
||||||
Effectively all these pmd_none_or_clear_bad() sites running with
|
|
||||||
mmap_sem in read mode are somewhat speculative with the page faults,
|
|
||||||
and the result is always undefined when they run simultaneously. This
|
|
||||||
is probably why it wasn't common to run into this. For example if the
|
|
||||||
madvise(MADV_DONTNEED) runs zap_page_range() shortly before the page
|
|
||||||
fault, the hugepage will not be zapped, if the page fault runs first
|
|
||||||
it will be zapped.
|
|
||||||
|
|
||||||
Altering pmd_bad() not to error out if it finds hugepmds won't be
|
|
||||||
enough to fix this, because zap_pmd_range would then proceed to call
|
|
||||||
zap_pte_range (which would be incorrect if the pmd become a
|
|
||||||
pmd_trans_huge()).
|
|
||||||
|
|
||||||
The simplest way to fix this is to read the pmd in the local stack
|
|
||||||
(regardless of what we read, no need of actual CPU barriers, only
|
|
||||||
compiler barrier needed), and be sure it is not changing under the
|
|
||||||
code that computes its value. Even if the real pmd is changing under
|
|
||||||
the value we hold on the stack, we don't care. If we actually end up
|
|
||||||
in zap_pte_range it means the pmd was not none already and it was not
|
|
||||||
huge, and it can't become huge from under us (khugepaged locking
|
|
||||||
explained above).
|
|
||||||
|
|
||||||
All we need is to enforce that there is no way anymore that in a code
|
|
||||||
path like below, pmd_trans_huge can be false, but
|
|
||||||
pmd_none_or_clear_bad can run into a hugepmd. The overhead of a
|
|
||||||
barrier() is just a compiler tweak and should not be measurable (I
|
|
||||||
only added it for THP builds). I don't exclude different compiler
|
|
||||||
versions may have prevented the race too by caching the value of *pmd
|
|
||||||
on the stack (that hasn't been verified, but it wouldn't be impossible
|
|
||||||
considering pmd_none_or_clear_bad, pmd_bad, pmd_trans_huge, pmd_none
|
|
||||||
are all inlines and there's no external function called in between
|
|
||||||
pmd_trans_huge and pmd_none_or_clear_bad).
|
|
||||||
|
|
||||||
if (pmd_trans_huge(*pmd)) {
|
|
||||||
if (next-addr != HPAGE_PMD_SIZE) {
|
|
||||||
VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
|
|
||||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
|
||||||
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
|
|
||||||
continue;
|
|
||||||
/* fall through */
|
|
||||||
}
|
|
||||||
if (pmd_none_or_clear_bad(pmd))
|
|
||||||
|
|
||||||
Because this race condition could be exercised without special
|
|
||||||
privileges this was reported in CVE-2012-1179.
|
|
||||||
|
|
||||||
The race was identified and fully explained by Ulrich who debugged it.
|
|
||||||
I'm quoting his accurate explanation below, for reference.
|
|
||||||
|
|
||||||
====== start quote =======
|
|
||||||
mapcount 0 page_mapcount 1
|
|
||||||
kernel BUG at mm/huge_memory.c:1384!
|
|
||||||
|
|
||||||
At some point prior to the panic, a "bad pmd ..." message similar to the
|
|
||||||
following is logged on the console:
|
|
||||||
|
|
||||||
mm/memory.c:145: bad pmd ffff8800376e1f98(80000000314000e7).
|
|
||||||
|
|
||||||
The "bad pmd ..." message is logged by pmd_clear_bad() before it clears
|
|
||||||
the page's PMD table entry.
|
|
||||||
|
|
||||||
143 void pmd_clear_bad(pmd_t *pmd)
|
|
||||||
144 {
|
|
||||||
-> 145 pmd_ERROR(*pmd);
|
|
||||||
146 pmd_clear(pmd);
|
|
||||||
147 }
|
|
||||||
|
|
||||||
After the PMD table entry has been cleared, there is an inconsistency
|
|
||||||
between the actual number of PMD table entries that are mapping the page
|
|
||||||
and the page's map count (_mapcount field in struct page). When the page
|
|
||||||
is subsequently reclaimed, __split_huge_page() detects this inconsistency.
|
|
||||||
|
|
||||||
1381 if (mapcount != page_mapcount(page))
|
|
||||||
1382 printk(KERN_ERR "mapcount %d page_mapcount %d\n",
|
|
||||||
1383 mapcount, page_mapcount(page));
|
|
||||||
-> 1384 BUG_ON(mapcount != page_mapcount(page));
|
|
||||||
|
|
||||||
The root cause of the problem is a race of two threads in a multithreaded
|
|
||||||
process. Thread B incurs a page fault on a virtual address that has never
|
|
||||||
been accessed (PMD entry is zero) while Thread A is executing an madvise()
|
|
||||||
system call on a virtual address within the same 2 MB (huge page) range.
|
|
||||||
|
|
||||||
virtual address space
|
|
||||||
.---------------------.
|
|
||||||
| |
|
|
||||||
| |
|
|
||||||
.-|---------------------|
|
|
||||||
| | |
|
|
||||||
| | |<-- B(fault)
|
|
||||||
| | |
|
|
||||||
2 MB | |/////////////////////|-.
|
|
||||||
huge < |/////////////////////| > A(range)
|
|
||||||
page | |/////////////////////|-'
|
|
||||||
| | |
|
|
||||||
| | |
|
|
||||||
'-|---------------------|
|
|
||||||
| |
|
|
||||||
| |
|
|
||||||
'---------------------'
|
|
||||||
|
|
||||||
- Thread A is executing an madvise(..., MADV_DONTNEED) system call
|
|
||||||
on the virtual address range "A(range)" shown in the picture.
|
|
||||||
|
|
||||||
sys_madvise
|
|
||||||
// Acquire the semaphore in shared mode.
|
|
||||||
down_read(¤t->mm->mmap_sem)
|
|
||||||
...
|
|
||||||
madvise_vma
|
|
||||||
switch (behavior)
|
|
||||||
case MADV_DONTNEED:
|
|
||||||
madvise_dontneed
|
|
||||||
zap_page_range
|
|
||||||
unmap_vmas
|
|
||||||
unmap_page_range
|
|
||||||
zap_pud_range
|
|
||||||
zap_pmd_range
|
|
||||||
//
|
|
||||||
// Assume that this huge page has never been accessed.
|
|
||||||
// I.e. content of the PMD entry is zero (not mapped).
|
|
||||||
//
|
|
||||||
if (pmd_trans_huge(*pmd)) {
|
|
||||||
// We don't get here due to the above assumption.
|
|
||||||
}
|
|
||||||
//
|
|
||||||
// Assume that Thread B incurred a page fault and
|
|
||||||
.---------> // sneaks in here as shown below.
|
|
||||||
| //
|
|
||||||
| if (pmd_none_or_clear_bad(pmd))
|
|
||||||
| {
|
|
||||||
| if (unlikely(pmd_bad(*pmd)))
|
|
||||||
| pmd_clear_bad
|
|
||||||
| {
|
|
||||||
| pmd_ERROR
|
|
||||||
| // Log "bad pmd ..." message here.
|
|
||||||
| pmd_clear
|
|
||||||
| // Clear the page's PMD entry.
|
|
||||||
| // Thread B incremented the map count
|
|
||||||
| // in page_add_new_anon_rmap(), but
|
|
||||||
| // now the page is no longer mapped
|
|
||||||
| // by a PMD entry (-> inconsistency).
|
|
||||||
| }
|
|
||||||
| }
|
|
||||||
|
|
|
||||||
v
|
|
||||||
- Thread B is handling a page fault on virtual address "B(fault)" shown
|
|
||||||
in the picture.
|
|
||||||
|
|
||||||
...
|
|
||||||
do_page_fault
|
|
||||||
__do_page_fault
|
|
||||||
// Acquire the semaphore in shared mode.
|
|
||||||
down_read_trylock(&mm->mmap_sem)
|
|
||||||
...
|
|
||||||
handle_mm_fault
|
|
||||||
if (pmd_none(*pmd) && transparent_hugepage_enabled(vma))
|
|
||||||
// We get here due to the above assumption (PMD entry is zero).
|
|
||||||
do_huge_pmd_anonymous_page
|
|
||||||
alloc_hugepage_vma
|
|
||||||
// Allocate a new transparent huge page here.
|
|
||||||
...
|
|
||||||
__do_huge_pmd_anonymous_page
|
|
||||||
...
|
|
||||||
spin_lock(&mm->page_table_lock)
|
|
||||||
...
|
|
||||||
page_add_new_anon_rmap
|
|
||||||
// Here we increment the page's map count (starts at -1).
|
|
||||||
atomic_set(&page->_mapcount, 0)
|
|
||||||
set_pmd_at
|
|
||||||
// Here we set the page's PMD entry which will be cleared
|
|
||||||
// when Thread A calls pmd_clear_bad().
|
|
||||||
...
|
|
||||||
spin_unlock(&mm->page_table_lock)
|
|
||||||
|
|
||||||
The mmap_sem does not prevent the race because both threads are acquiring
|
|
||||||
it in shared mode (down_read). Thread B holds the page_table_lock while
|
|
||||||
the page's map count and PMD table entry are updated. However, Thread A
|
|
||||||
does not synchronize on that lock.
|
|
||||||
====== end quote =======
|
|
||||||
|
|
||||||
Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
|
|
||||||
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
|
|
||||||
---
|
|
||||||
arch/x86/kernel/vm86_32.c | 2 +
|
|
||||||
fs/proc/task_mmu.c | 9 ++++++
|
|
||||||
include/asm-generic/pgtable.h | 57 +++++++++++++++++++++++++++++++++++++++++
|
|
||||||
mm/memcontrol.c | 4 +++
|
|
||||||
mm/memory.c | 14 ++++++++--
|
|
||||||
mm/mempolicy.c | 2 +-
|
|
||||||
mm/mincore.c | 2 +-
|
|
||||||
mm/pagewalk.c | 2 +-
|
|
||||||
mm/swapfile.c | 4 +--
|
|
||||||
9 files changed, 87 insertions(+), 9 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
|
||||||
index b466cab..328cb37 100644
|
|
||||||
--- a/arch/x86/kernel/vm86_32.c
|
|
||||||
+++ b/arch/x86/kernel/vm86_32.c
|
|
||||||
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
|
||||||
spinlock_t *ptl;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
+ down_write(&mm->mmap_sem);
|
|
||||||
pgd = pgd_offset(mm, 0xA0000);
|
|
||||||
if (pgd_none_or_clear_bad(pgd))
|
|
||||||
goto out;
|
|
||||||
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
|
||||||
}
|
|
||||||
pte_unmap_unlock(pte, ptl);
|
|
||||||
out:
|
|
||||||
+ up_write(&mm->mmap_sem);
|
|
||||||
flush_tlb();
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
|
||||||
index 7dcd2a2..3efa725 100644
|
|
||||||
--- a/fs/proc/task_mmu.c
|
|
||||||
+++ b/fs/proc/task_mmu.c
|
|
||||||
@@ -409,6 +409,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
|
||||||
} else {
|
|
||||||
spin_unlock(&walk->mm->page_table_lock);
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ if (pmd_trans_unstable(pmd))
|
|
||||||
+ return 0;
|
|
||||||
/*
|
|
||||||
* The mmap_sem held all the way back in m_start() is what
|
|
||||||
* keeps khugepaged out of here and from collapsing things
|
|
||||||
@@ -507,6 +510,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
|
|
||||||
struct page *page;
|
|
||||||
|
|
||||||
split_huge_page_pmd(walk->mm, pmd);
|
|
||||||
+ if (pmd_trans_unstable(pmd))
|
|
||||||
+ return 0;
|
|
||||||
|
|
||||||
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
|
||||||
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
|
||||||
@@ -670,6 +675,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
split_huge_page_pmd(walk->mm, pmd);
|
|
||||||
+ if (pmd_trans_unstable(pmd))
|
|
||||||
+ return 0;
|
|
||||||
|
|
||||||
/* find the first VMA at or above 'addr' */
|
|
||||||
vma = find_vma(walk->mm, addr);
|
|
||||||
@@ -961,6 +968,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
|
|
||||||
spin_unlock(&walk->mm->page_table_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (pmd_trans_unstable(pmd))
|
|
||||||
+ return 0;
|
|
||||||
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
|
|
||||||
do {
|
|
||||||
struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
|
|
||||||
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
|
|
||||||
index 76bff2b..10f8291 100644
|
|
||||||
--- a/include/asm-generic/pgtable.h
|
|
||||||
+++ b/include/asm-generic/pgtable.h
|
|
||||||
@@ -443,6 +443,63 @@ static inline int pmd_write(pmd_t pmd)
|
|
||||||
#endif /* __HAVE_ARCH_PMD_WRITE */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * This function is meant to be used by sites walking pagetables with
|
|
||||||
+ * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
|
|
||||||
+ * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
|
|
||||||
+ * into a null pmd and the transhuge page fault can convert a null pmd
|
|
||||||
+ * into an hugepmd or into a regular pmd (if the hugepage allocation
|
|
||||||
+ * fails). While holding the mmap_sem in read mode the pmd becomes
|
|
||||||
+ * stable and stops changing under us only if it's not null and not a
|
|
||||||
+ * transhuge pmd. When those races occurs and this function makes a
|
|
||||||
+ * difference vs the standard pmd_none_or_clear_bad, the result is
|
|
||||||
+ * undefined so behaving like if the pmd was none is safe (because it
|
|
||||||
+ * can return none anyway). The compiler level barrier() is critically
|
|
||||||
+ * important to compute the two checks atomically on the same pmdval.
|
|
||||||
+ */
|
|
||||||
+static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
|
|
||||||
+{
|
|
||||||
+ /* depend on compiler for an atomic pmd read */
|
|
||||||
+ pmd_t pmdval = *pmd;
|
|
||||||
+ /*
|
|
||||||
+ * The barrier will stabilize the pmdval in a register or on
|
|
||||||
+ * the stack so that it will stop changing under the code.
|
|
||||||
+ */
|
|
||||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
||||||
+ barrier();
|
|
||||||
+#endif
|
|
||||||
+ if (pmd_none(pmdval))
|
|
||||||
+ return 1;
|
|
||||||
+ if (unlikely(pmd_bad(pmdval))) {
|
|
||||||
+ if (!pmd_trans_huge(pmdval))
|
|
||||||
+ pmd_clear_bad(pmd);
|
|
||||||
+ return 1;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * This is a noop if Transparent Hugepage Support is not built into
|
|
||||||
+ * the kernel. Otherwise it is equivalent to
|
|
||||||
+ * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
|
|
||||||
+ * places that already verified the pmd is not none and they want to
|
|
||||||
+ * walk ptes while holding the mmap sem in read mode (write mode don't
|
|
||||||
+ * need this). If THP is not enabled, the pmd can't go away under the
|
|
||||||
+ * code even if MADV_DONTNEED runs, but if THP is enabled we need to
|
|
||||||
+ * run a pmd_trans_unstable before walking the ptes after
|
|
||||||
+ * split_huge_page_pmd returns (because it may have run when the pmd
|
|
||||||
+ * become null, but then a page fault can map in a THP and not a
|
|
||||||
+ * regular page).
|
|
||||||
+ */
|
|
||||||
+static inline int pmd_trans_unstable(pmd_t *pmd)
|
|
||||||
+{
|
|
||||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
||||||
+ return pmd_none_or_trans_huge_or_clear_bad(pmd);
|
|
||||||
+#else
|
|
||||||
+ return 0;
|
|
||||||
+#endif
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
|
||||||
|
|
||||||
#endif /* _ASM_GENERIC_PGTABLE_H */
|
|
||||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
|
||||||
index d0e57a3..67b0578 100644
|
|
||||||
--- a/mm/memcontrol.c
|
|
||||||
+++ b/mm/memcontrol.c
|
|
||||||
@@ -5193,6 +5193,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
|
||||||
spinlock_t *ptl;
|
|
||||||
|
|
||||||
split_huge_page_pmd(walk->mm, pmd);
|
|
||||||
+ if (pmd_trans_unstable(pmd))
|
|
||||||
+ return 0;
|
|
||||||
|
|
||||||
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
|
||||||
for (; addr != end; pte++, addr += PAGE_SIZE)
|
|
||||||
@@ -5355,6 +5357,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
|
||||||
spinlock_t *ptl;
|
|
||||||
|
|
||||||
split_huge_page_pmd(walk->mm, pmd);
|
|
||||||
+ if (pmd_trans_unstable(pmd))
|
|
||||||
+ return 0;
|
|
||||||
retry:
|
|
||||||
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
|
||||||
for (; addr != end; addr += PAGE_SIZE) {
|
|
||||||
diff --git a/mm/memory.c b/mm/memory.c
|
|
||||||
index fa2f04e..e3090fc 100644
|
|
||||||
--- a/mm/memory.c
|
|
||||||
+++ b/mm/memory.c
|
|
||||||
@@ -1251,12 +1251,20 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
|
|
||||||
VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
|
|
||||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
|
||||||
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
|
|
||||||
- continue;
|
|
||||||
+ goto next;
|
|
||||||
/* fall through */
|
|
||||||
}
|
|
||||||
- if (pmd_none_or_clear_bad(pmd))
|
|
||||||
- continue;
|
|
||||||
+ /*
|
|
||||||
+ * Here there can be other concurrent MADV_DONTNEED or
|
|
||||||
+ * trans huge page faults running, and if the pmd is
|
|
||||||
+ * none or trans huge it can change under us. This is
|
|
||||||
+ * because MADV_DONTNEED holds the mmap_sem in read
|
|
||||||
+ * mode.
|
|
||||||
+ */
|
|
||||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
|
||||||
+ goto next;
|
|
||||||
next = zap_pte_range(tlb, vma, pmd, addr, next, details);
|
|
||||||
+ next:
|
|
||||||
cond_resched();
|
|
||||||
} while (pmd++, addr = next, addr != end);
|
|
||||||
|
|
||||||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
|
|
||||||
index 47296fe..0a37570 100644
|
|
||||||
--- a/mm/mempolicy.c
|
|
||||||
+++ b/mm/mempolicy.c
|
|
||||||
@@ -512,7 +512,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
|
||||||
do {
|
|
||||||
next = pmd_addr_end(addr, end);
|
|
||||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
|
||||||
- if (pmd_none_or_clear_bad(pmd))
|
|
||||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
|
||||||
continue;
|
|
||||||
if (check_pte_range(vma, pmd, addr, next, nodes,
|
|
||||||
flags, private))
|
|
||||||
diff --git a/mm/mincore.c b/mm/mincore.c
|
|
||||||
index 636a868..936b4ce 100644
|
|
||||||
--- a/mm/mincore.c
|
|
||||||
+++ b/mm/mincore.c
|
|
||||||
@@ -164,7 +164,7 @@ static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
|
||||||
}
|
|
||||||
/* fall through */
|
|
||||||
}
|
|
||||||
- if (pmd_none_or_clear_bad(pmd))
|
|
||||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
|
||||||
mincore_unmapped_range(vma, addr, next, vec);
|
|
||||||
else
|
|
||||||
mincore_pte_range(vma, pmd, addr, next, vec);
|
|
||||||
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
|
|
||||||
index 2f5cf10..aa9701e 100644
|
|
||||||
--- a/mm/pagewalk.c
|
|
||||||
+++ b/mm/pagewalk.c
|
|
||||||
@@ -59,7 +59,7 @@ again:
|
|
||||||
continue;
|
|
||||||
|
|
||||||
split_huge_page_pmd(walk->mm, pmd);
|
|
||||||
- if (pmd_none_or_clear_bad(pmd))
|
|
||||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
|
||||||
goto again;
|
|
||||||
err = walk_pte_range(pmd, addr, next, walk);
|
|
||||||
if (err)
|
|
||||||
diff --git a/mm/swapfile.c b/mm/swapfile.c
|
|
||||||
index d999f09..f31b29d 100644
|
|
||||||
--- a/mm/swapfile.c
|
|
||||||
+++ b/mm/swapfile.c
|
|
||||||
@@ -932,9 +932,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
|
||||||
pmd = pmd_offset(pud, addr);
|
|
||||||
do {
|
|
||||||
next = pmd_addr_end(addr, end);
|
|
||||||
- if (unlikely(pmd_trans_huge(*pmd)))
|
|
||||||
- continue;
|
|
||||||
- if (pmd_none_or_clear_bad(pmd))
|
|
||||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
|
||||||
continue;
|
|
||||||
ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
|
|
||||||
if (ret)
|
|
||||||
|
|
||||||
--
|
|
||||||
To unsubscribe, send a message with 'unsubscribe linux-mm' in
|
|
||||||
the body to majordomo@kvack.org. For more info on Linux MM,
|
|
||||||
see: http://www.linux-mm.org/ .
|
|
||||||
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
|
|
||||||
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
|
|
2
sources
2
sources
@ -1,2 +1,2 @@
|
|||||||
7133f5a2086a7d7ef97abac610c094f5 linux-3.3.tar.xz
|
7133f5a2086a7d7ef97abac610c094f5 linux-3.3.tar.xz
|
||||||
fe8e2b8e93695cb876cc8394b3db83c4 patch-3.3-git1.xz
|
72643cb2a29683201f2049d151564c56 patch-3.3-git2.xz
|
||||||
|
@ -1,118 +0,0 @@
|
|||||||
From 92a9c19a89af2ca219fbb040a0059f414a4b7223 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Kay Sievers <kay.sievers@vrfy.org>
|
|
||||||
Date: Sat, 28 Jan 2012 19:57:46 +0000
|
|
||||||
Subject: [PATCH] udlfb: remove sysfs framebuffer device with USB
|
|
||||||
.disconnect()
|
|
||||||
|
|
||||||
The USB graphics card driver delays the unregistering of the framebuffer
|
|
||||||
device to a workqueue, which breaks the userspace visible remove uevent
|
|
||||||
sequence. Recent userspace tools started to support USB graphics card
|
|
||||||
hotplug out-of-the-box and rely on proper events sent by the kernel.
|
|
||||||
|
|
||||||
The framebuffer device is a direct child of the USB interface which is
|
|
||||||
removed immediately after the USB .disconnect() callback. But the fb device
|
|
||||||
in /sys stays around until its final cleanup, at a time where all the parent
|
|
||||||
devices have been removed already.
|
|
||||||
|
|
||||||
To work around that, we remove the sysfs fb device directly in the USB
|
|
||||||
.disconnect() callback and leave only the cleanup of the internal fb
|
|
||||||
data to the delayed work.
|
|
||||||
|
|
||||||
Before:
|
|
||||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
|
||||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
|
||||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb0 (graphics)
|
|
||||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
|
||||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
|
||||||
remove /2-1.2:1.0/graphics/fb0 (graphics)
|
|
||||||
|
|
||||||
After:
|
|
||||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
|
||||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
|
||||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics)
|
|
||||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics)
|
|
||||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
|
||||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
|
||||||
|
|
||||||
Cc: stable@vger.kernel.org
|
|
||||||
Tested-by: Bernie Thompson <bernie@plugable.com>
|
|
||||||
Acked-by: Bernie Thompson <bernie@plugable.com>
|
|
||||||
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
|
|
||||||
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
|
|
||||||
---
|
|
||||||
drivers/video/fbmem.c | 18 +++++++++++++++++-
|
|
||||||
drivers/video/udlfb.c | 2 +-
|
|
||||||
include/linux/fb.h | 1 +
|
|
||||||
3 files changed, 19 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
|
|
||||||
index ac9141b..c6ce416 100644
|
|
||||||
--- a/drivers/video/fbmem.c
|
|
||||||
+++ b/drivers/video/fbmem.c
|
|
||||||
@@ -1665,6 +1665,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
|
|
||||||
if (ret)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
+ unlink_framebuffer(fb_info);
|
|
||||||
if (fb_info->pixmap.addr &&
|
|
||||||
(fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
|
|
||||||
kfree(fb_info->pixmap.addr);
|
|
||||||
@@ -1672,7 +1673,6 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
|
|
||||||
registered_fb[i] = NULL;
|
|
||||||
num_registered_fb--;
|
|
||||||
fb_cleanup_device(fb_info);
|
|
||||||
- device_destroy(fb_class, MKDEV(FB_MAJOR, i));
|
|
||||||
event.info = fb_info;
|
|
||||||
fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
|
|
||||||
|
|
||||||
@@ -1681,6 +1681,22 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+int unlink_framebuffer(struct fb_info *fb_info)
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+
|
|
||||||
+ i = fb_info->node;
|
|
||||||
+ if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
|
|
||||||
+ return -EINVAL;
|
|
||||||
+
|
|
||||||
+ if (fb_info->dev) {
|
|
||||||
+ device_destroy(fb_class, MKDEV(FB_MAJOR, i));
|
|
||||||
+ fb_info->dev = NULL;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+EXPORT_SYMBOL(unlink_framebuffer);
|
|
||||||
+
|
|
||||||
void remove_conflicting_framebuffers(struct apertures_struct *a,
|
|
||||||
const char *name, bool primary)
|
|
||||||
{
|
|
||||||
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
|
|
||||||
index a197731..a40c05e 100644
|
|
||||||
--- a/drivers/video/udlfb.c
|
|
||||||
+++ b/drivers/video/udlfb.c
|
|
||||||
@@ -1739,7 +1739,7 @@ static void dlfb_usb_disconnect(struct usb_interface *interface)
|
|
||||||
for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++)
|
|
||||||
device_remove_file(info->dev, &fb_device_attrs[i]);
|
|
||||||
device_remove_bin_file(info->dev, &edid_attr);
|
|
||||||
-
|
|
||||||
+ unlink_framebuffer(info);
|
|
||||||
usb_set_intfdata(interface, NULL);
|
|
||||||
|
|
||||||
/* if clients still have us open, will be freed on last close */
|
|
||||||
diff --git a/include/linux/fb.h b/include/linux/fb.h
|
|
||||||
index c18122f..a395b8c 100644
|
|
||||||
--- a/include/linux/fb.h
|
|
||||||
+++ b/include/linux/fb.h
|
|
||||||
@@ -1003,6 +1003,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
|
|
||||||
/* drivers/video/fbmem.c */
|
|
||||||
extern int register_framebuffer(struct fb_info *fb_info);
|
|
||||||
extern int unregister_framebuffer(struct fb_info *fb_info);
|
|
||||||
+extern int unlink_framebuffer(struct fb_info *fb_info);
|
|
||||||
extern void remove_conflicting_framebuffers(struct apertures_struct *a,
|
|
||||||
const char *name, bool primary);
|
|
||||||
extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
|
|
||||||
--
|
|
||||||
1.7.6.5
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user