Linux v3.3-6972-ge22057c
This commit is contained in:
parent
d5a077e500
commit
62c169cbc3
@ -1,113 +0,0 @@
|
||||
From davej Thu Mar 22 16:38:38 2012
|
||||
Return-Path: linux-kernel-owner@vger.kernel.org
|
||||
X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on
|
||||
gelk.kernelslacker.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-1.2 required=5.0 tests=KB_DATE_CONTAINS_TAB,
|
||||
RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD,UNPARSEABLE_RELAY autolearn=unavailable
|
||||
version=3.3.2
|
||||
Received: from mail.corp.redhat.com [10.5.5.51]
|
||||
by gelk.kernelslacker.org with IMAP (fetchmail-6.3.21)
|
||||
for <davej@localhost> (single-drop); Thu, 22 Mar 2012 16:38:38 -0400 (EDT)
|
||||
Received: from zmta02.collab.prod.int.phx2.redhat.com (LHLO
|
||||
zmta02.collab.prod.int.phx2.redhat.com) (10.5.5.32) by
|
||||
zmail11.collab.prod.int.phx2.redhat.com with LMTP; Thu, 22 Mar 2012
|
||||
16:37:12 -0400 (EDT)
|
||||
Received: from localhost (localhost.localdomain [127.0.0.1])
|
||||
by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id BE4B31280F5;
|
||||
Thu, 22 Mar 2012 16:37:12 -0400 (EDT)
|
||||
X-Quarantine-ID: <rVyHUDnYJs0w>
|
||||
Received: from zmta02.collab.prod.int.phx2.redhat.com ([127.0.0.1])
|
||||
by localhost (zmta02.collab.prod.int.phx2.redhat.com [127.0.0.1]) (amavisd-new, port 10024)
|
||||
with ESMTP id rVyHUDnYJs0w; Thu, 22 Mar 2012 16:37:12 -0400 (EDT)
|
||||
Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24])
|
||||
by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id 34CCC1280EF;
|
||||
Thu, 22 Mar 2012 16:37:12 -0400 (EDT)
|
||||
Received: from mx1.redhat.com (ext-mx14.extmail.prod.ext.phx2.redhat.com [10.5.110.19])
|
||||
by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id q2MKbBbO012811;
|
||||
Thu, 22 Mar 2012 16:37:11 -0400
|
||||
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
|
||||
by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q2MIJPCS018091;
|
||||
Thu, 22 Mar 2012 16:37:10 -0400
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S1759738Ab2CVUhD (ORCPT <rfc822;agordeev@redhat.com> + 54 others);
|
||||
Thu, 22 Mar 2012 16:37:03 -0400
|
||||
Received: from zeniv.linux.org.uk ([195.92.253.2]:35901 "EHLO
|
||||
ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S1758619Ab2CVUg7 (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 22 Mar 2012 16:36:59 -0400
|
||||
Received: from viro by ZenIV.linux.org.uk with local (Exim 4.76 #1 (Red Hat Linux))
|
||||
id 1SAokk-0008Fi-MR; Thu, 22 Mar 2012 20:36:58 +0000
|
||||
Date: Thu, 22 Mar 2012 20:36:58 +0000
|
||||
From: Al Viro <viro@ZenIV.linux.org.uk>
|
||||
To: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xensource.com,
|
||||
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Subject: Re: Regression introduced by
|
||||
bfcfaa77bdf0f775263e906015982a608df01c76 (vfs: use 'unsigned long' accesses
|
||||
for dcache name comparison and hashing)
|
||||
Message-ID: <20120322203658.GC6589@ZenIV.linux.org.uk>
|
||||
References: <20120322183845.GA17264@phenom.dumpdata.com>
|
||||
<20120322200918.GZ6589@ZenIV.linux.org.uk>
|
||||
<20120322202445.GB6589@ZenIV.linux.org.uk>
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=us-ascii
|
||||
Content-Disposition: inline
|
||||
In-Reply-To: <20120322202445.GB6589@ZenIV.linux.org.uk>
|
||||
User-Agent: Mutt/1.5.21 (2010-09-15)
|
||||
Sender: linux-kernel-owner@vger.kernel.org
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
X-RedHat-Spam-Score: -5.01 (RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD)
|
||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24
|
||||
X-Scanned-By: MIMEDefang 2.68 on 10.5.110.19
|
||||
Status: RO
|
||||
Content-Length: 1440
|
||||
Lines: 43
|
||||
|
||||
On Thu, Mar 22, 2012 at 08:24:45PM +0000, Al Viro wrote:
|
||||
>
|
||||
> OK, full_name_hash()/hash_name() definitely have a mismatch and it's on the
|
||||
> names of length 8*n: trivial experiment shows that we have
|
||||
> name hash_name full_name_hash
|
||||
> a 61 61
|
||||
> ab 6261 6261
|
||||
> abc 636261 636261
|
||||
> abcd 64636261 64636261
|
||||
> abcdabc 64c6c4c2 64c6c4c2
|
||||
> abcdabcd efcead5 c8c6c4c2
|
||||
> abcdabcd9 efceb0e efceb0e
|
||||
>
|
||||
> Linus, which way do you prefer to shift it? Should hash_name() change to
|
||||
> match full_name_hash() or should it be the other way round?
|
||||
>
|
||||
> What happens is that you get multiplication by 9 and adding 0 in the former,
|
||||
> after having added the last full word. In the latter we add the last full
|
||||
> word, see that there's nothing left and bugger off.
|
||||
|
||||
Guys, could you check if this fixes it?
|
||||
|
||||
diff --git a/fs/namei.c b/fs/namei.c
|
||||
index 13e6a1f..7451d6f8 100644
|
||||
--- a/fs/namei.c
|
||||
+++ b/fs/namei.c
|
||||
@@ -1439,10 +1439,10 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
|
||||
|
||||
for (;;) {
|
||||
a = *(unsigned long *)name;
|
||||
- hash *= 9;
|
||||
if (len < sizeof(unsigned long))
|
||||
break;
|
||||
hash += a;
|
||||
+ hash *= 9;
|
||||
name += sizeof(unsigned long);
|
||||
len -= sizeof(unsigned long);
|
||||
if (!len)
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
||||
Please read the FAQ at http://www.tux.org/lkml/
|
||||
|
36
kernel.spec
36
kernel.spec
@ -62,7 +62,7 @@ Summary: The Linux kernel
|
||||
# For non-released -rc kernels, this will be appended after the rcX and
|
||||
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
|
||||
#
|
||||
%global baserelease 2
|
||||
%global baserelease 1
|
||||
%global fedora_build %{baserelease}
|
||||
|
||||
# base_sublevel is the kernel version we're starting with and patching
|
||||
@ -95,7 +95,7 @@ Summary: The Linux kernel
|
||||
# The rc snapshot level
|
||||
%define rcrev 0
|
||||
# The git snapshot level
|
||||
%define gitrev 1
|
||||
%define gitrev 2
|
||||
# Set rpm version accordingly
|
||||
%define rpmversion 3.%{upstream_sublevel}.0
|
||||
%endif
|
||||
@ -653,8 +653,6 @@ Patch100: taint-vbox.patch
|
||||
Patch160: linux-2.6-32bit-mmap-exec-randomization.patch
|
||||
Patch161: linux-2.6-i386-nx-emulation.patch
|
||||
|
||||
Patch383: linux-2.6-defaults-aspm.patch
|
||||
|
||||
Patch390: linux-2.6-defaults-acpi-video.patch
|
||||
Patch391: linux-2.6-acpi-video-dos.patch
|
||||
Patch394: linux-2.6-acpi-debug-infinite-loop.patch
|
||||
@ -682,7 +680,6 @@ Patch900: modsign-20111207.patch
|
||||
|
||||
# virt + ksm patches
|
||||
Patch1555: fix_xen_guest_on_old_EC2.patch
|
||||
Patch1556: linux-3.3-virtio-scsi.patch
|
||||
|
||||
# DRM
|
||||
#atch1700: drm-edid-try-harder-to-fix-up-broken-headers.patch
|
||||
@ -708,9 +705,6 @@ Patch2901: linux-2.6-v4l-dvb-experimental.patch
|
||||
Patch4000: ext4-fix-resize-when-resizing-within-single-group.patch
|
||||
|
||||
# NFSv4
|
||||
Patch1102: linux-3.3-newidmapper-01.patch
|
||||
Patch1103: linux-3.3-newidmapper-02.patch
|
||||
Patch1104: linux-3.3-newidmapper-03.patch
|
||||
|
||||
# patches headed upstream
|
||||
Patch12016: disable-i8042-check-on-apple-mac.patch
|
||||
@ -734,8 +728,6 @@ Patch21010: highbank-export-clock-functions.patch
|
||||
|
||||
Patch21070: ext4-Support-check-none-nocheck-mount-options.patch
|
||||
|
||||
Patch21092: udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch
|
||||
|
||||
Patch21094: power-x86-destdir.patch
|
||||
|
||||
#rhbz 788260
|
||||
@ -744,7 +736,6 @@ Patch21233: jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch
|
||||
#rhbz 754518
|
||||
Patch21235: scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
|
||||
|
||||
Patch21250: mcelog-rcu-splat.patch
|
||||
Patch21260: x86-Avoid-invoking-RCU-when-CPU-is-idle.patch
|
||||
|
||||
#rhbz 727865 730007
|
||||
@ -753,9 +744,6 @@ Patch21300: ACPICA-Fix-regression-in-FADT-revision-checks.patch
|
||||
#rhbz 728478
|
||||
Patch21302: sony-laptop-Enable-keyboard-backlight-by-default.patch
|
||||
|
||||
#rhbz 803809 CVE-2012-1179
|
||||
Patch21304: mm-thp-fix-pmd_bad-triggering.patch
|
||||
|
||||
#rhbz 804007
|
||||
Patch21305: mac80211-fix-possible-tid_rx-reorder_timer-use-after-free.patch
|
||||
|
||||
@ -766,8 +754,6 @@ Patch21400: unhandled-irqs-switch-to-polling.patch
|
||||
|
||||
Patch22000: weird-root-dentry-name-debug.patch
|
||||
|
||||
Patch23000: fix-dentry-hash.patch
|
||||
|
||||
%endif
|
||||
|
||||
BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
|
||||
@ -1344,9 +1330,6 @@ ApplyPatch ext4-fix-resize-when-resizing-within-single-group.patch
|
||||
# eCryptfs
|
||||
|
||||
# NFSv4
|
||||
ApplyPatch linux-3.3-newidmapper-01.patch
|
||||
ApplyPatch linux-3.3-newidmapper-02.patch
|
||||
ApplyPatch linux-3.3-newidmapper-03.patch
|
||||
|
||||
# USB
|
||||
|
||||
@ -1362,8 +1345,6 @@ ApplyPatch acpi-sony-nonvs-blacklist.patch
|
||||
#
|
||||
# PCI
|
||||
#
|
||||
# enable ASPM by default on hardware we expect to work
|
||||
ApplyPatch linux-2.6-defaults-aspm.patch
|
||||
|
||||
#
|
||||
# SCSI Bits.
|
||||
@ -1433,7 +1414,6 @@ ApplyOptionalPatch linux-2.6-v4l-dvb-experimental.patch
|
||||
|
||||
# Patches headed upstream
|
||||
ApplyPatch disable-i8042-check-on-apple-mac.patch
|
||||
ApplyPatch linux-3.3-virtio-scsi.patch
|
||||
|
||||
# rhbz#605888
|
||||
ApplyPatch dmar-disable-when-ricoh-multifunction.patch
|
||||
@ -1447,8 +1427,6 @@ ApplyPatch lis3-improve-handling-of-null-rate.patch
|
||||
|
||||
ApplyPatch ext4-Support-check-none-nocheck-mount-options.patch
|
||||
|
||||
ApplyPatch udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch
|
||||
|
||||
ApplyPatch power-x86-destdir.patch
|
||||
|
||||
#rhbz 788269
|
||||
@ -1457,8 +1435,6 @@ ApplyPatch jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch
|
||||
#rhbz 754518
|
||||
ApplyPatch scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch
|
||||
|
||||
ApplyPatch mcelog-rcu-splat.patch
|
||||
|
||||
#rhbz 727865 730007
|
||||
ApplyPatch ACPICA-Fix-regression-in-FADT-revision-checks.patch
|
||||
|
||||
@ -1475,11 +1451,6 @@ ApplyPatch unhandled-irqs-switch-to-polling.patch
|
||||
|
||||
ApplyPatch weird-root-dentry-name-debug.patch
|
||||
|
||||
ApplyPatch fix-dentry-hash.patch
|
||||
|
||||
#rhbz 803809 CVE-2012-1179
|
||||
ApplyPatch mm-thp-fix-pmd_bad-triggering.patch
|
||||
|
||||
#Highbank clock functions
|
||||
ApplyPatch highbank-export-clock-functions.patch
|
||||
|
||||
@ -2336,6 +2307,9 @@ fi
|
||||
# ||----w |
|
||||
# || ||
|
||||
%changelog
|
||||
* Mon Mar 26 2012 Justin M. Forbes <jforbes@redhat.com> - 3.4.0-0.rc0.git2.1
|
||||
- Linux v3.3-6972-ge22057c
|
||||
|
||||
* Thu Mar 22 2012 Dave Jones <davej@redhat.com> 3.4.0-0.rc0.git1.2
|
||||
- Fix occasional EBADMSG from signed modules. (rhbz 804345)
|
||||
|
||||
|
@ -1,12 +0,0 @@
|
||||
diff -up linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg linux-2.6.30.noarch/drivers/pci/pcie/aspm.c
|
||||
--- linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg 2009-07-16 22:01:11.000000000 +0100
|
||||
+++ linux-2.6.30.noarch/drivers/pci/pcie/aspm.c 2009-07-16 22:01:30.000000000 +0100
|
||||
@@ -65,7 +65,7 @@ static LIST_HEAD(link_list);
|
||||
#define POLICY_DEFAULT 0 /* BIOS default setting */
|
||||
#define POLICY_PERFORMANCE 1 /* high performance */
|
||||
#define POLICY_POWERSAVE 2 /* high power saving */
|
||||
-static int aspm_policy;
|
||||
+static int aspm_policy = POLICY_POWERSAVE;
|
||||
static const char *policy_str[] = {
|
||||
[POLICY_DEFAULT] = "default",
|
||||
[POLICY_PERFORMANCE] = "performance",
|
@ -1,217 +0,0 @@
|
||||
commit e6499c6f4b5f56a16f8b8ef60529c1da28b13aea
|
||||
Author: Bryan Schumaker <bjschuma@netapp.com>
|
||||
Date: Thu Jan 26 16:54:23 2012 -0500
|
||||
|
||||
NFS: Fall back on old idmapper if request_key() fails
|
||||
|
||||
This patch removes the CONFIG_NFS_USE_NEW_IDMAPPER compile option.
|
||||
First, the idmapper will attempt to map the id using /sbin/request-key
|
||||
and nfsidmap. If this fails (if /etc/request-key.conf is not configured
|
||||
properly) then the idmapper will call the legacy code to perform the
|
||||
mapping. I left a comment stating where the legacy code begins to make
|
||||
it easier for somebody to remove in the future.
|
||||
|
||||
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
|
||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
|
||||
diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c
|
||||
--- linux-3.2.noarch/fs/nfs/idmap.c.orig 2012-01-27 10:07:07.209851446 -0500
|
||||
+++ linux-3.2.noarch/fs/nfs/idmap.c 2012-01-27 10:15:42.914563082 -0500
|
||||
@@ -142,8 +142,6 @@ static int nfs_map_numeric_to_string(__u
|
||||
return snprintf(buf, buflen, "%u", id);
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
|
||||
-
|
||||
#include <linux/cred.h>
|
||||
#include <linux/sunrpc/sched.h>
|
||||
#include <linux/nfs4.h>
|
||||
@@ -328,43 +326,7 @@ static int nfs_idmap_lookup_id(const cha
|
||||
return ret;
|
||||
}
|
||||
|
||||
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
|
||||
-{
|
||||
- if (nfs_map_string_to_numeric(name, namelen, uid))
|
||||
- return 0;
|
||||
- return nfs_idmap_lookup_id(name, namelen, "uid", uid);
|
||||
-}
|
||||
-
|
||||
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
|
||||
-{
|
||||
- if (nfs_map_string_to_numeric(name, namelen, gid))
|
||||
- return 0;
|
||||
- return nfs_idmap_lookup_id(name, namelen, "gid", gid);
|
||||
-}
|
||||
-
|
||||
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
|
||||
-{
|
||||
- int ret = -EINVAL;
|
||||
-
|
||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
||||
- ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
|
||||
- if (ret < 0)
|
||||
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
|
||||
- return ret;
|
||||
-}
|
||||
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
|
||||
-{
|
||||
- int ret = -EINVAL;
|
||||
-
|
||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
||||
- ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
|
||||
- if (ret < 0)
|
||||
- ret = nfs_map_numeric_to_string(gid, buf, buflen);
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
|
||||
-
|
||||
+/* idmap classic begins here */
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/init.h>
|
||||
@@ -796,19 +758,27 @@ static unsigned int fnvhash32(const void
|
||||
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
|
||||
{
|
||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
||||
+ int ret = -EINVAL;
|
||||
|
||||
if (nfs_map_string_to_numeric(name, namelen, uid))
|
||||
return 0;
|
||||
- return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
|
||||
+ ret = nfs_idmap_lookup_id(name, namelen, "uid", uid);
|
||||
+ if (ret < 0)
|
||||
+ ret = nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
|
||||
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
|
||||
{
|
||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
||||
+ int ret = -EINVAL;
|
||||
|
||||
- if (nfs_map_string_to_numeric(name, namelen, uid))
|
||||
+ if (nfs_map_string_to_numeric(name, namelen, gid))
|
||||
return 0;
|
||||
- return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
|
||||
+ ret = nfs_idmap_lookup_id(name, namelen, "gid", gid);
|
||||
+ if (ret < 0)
|
||||
+ ret = nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, gid);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
|
||||
@@ -816,22 +786,26 @@ int nfs_map_uid_to_name(const struct nfs
|
||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
||||
int ret = -EINVAL;
|
||||
|
||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
||||
- ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
|
||||
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
|
||||
+ ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
|
||||
+ if (ret < 0)
|
||||
+ ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
|
||||
+ }
|
||||
if (ret < 0)
|
||||
ret = nfs_map_numeric_to_string(uid, buf, buflen);
|
||||
return ret;
|
||||
}
|
||||
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
|
||||
+int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
|
||||
{
|
||||
struct idmap *idmap = server->nfs_client->cl_idmap;
|
||||
int ret = -EINVAL;
|
||||
|
||||
- if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
|
||||
- ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
|
||||
+ if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
|
||||
+ ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
|
||||
+ if (ret < 0)
|
||||
+ ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, gid, buf);
|
||||
+ }
|
||||
if (ret < 0)
|
||||
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
|
||||
+ ret = nfs_map_numeric_to_string(gid, buf, buflen);
|
||||
return ret;
|
||||
}
|
||||
-
|
||||
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
|
||||
diff -up linux-3.2.noarch/fs/nfs/Kconfig.orig linux-3.2.noarch/fs/nfs/Kconfig
|
||||
--- linux-3.2.noarch/fs/nfs/Kconfig.orig 2012-01-04 18:55:44.000000000 -0500
|
||||
+++ linux-3.2.noarch/fs/nfs/Kconfig 2012-01-27 10:15:42.913562572 -0500
|
||||
@@ -132,14 +132,3 @@ config NFS_USE_KERNEL_DNS
|
||||
select DNS_RESOLVER
|
||||
select KEYS
|
||||
default y
|
||||
-
|
||||
-config NFS_USE_NEW_IDMAPPER
|
||||
- bool "Use the new idmapper upcall routine"
|
||||
- depends on NFS_V4 && KEYS
|
||||
- help
|
||||
- Say Y here if you want NFS to use the new idmapper upcall functions.
|
||||
- You will need /sbin/request-key (usually provided by the keyutils
|
||||
- package). For details, read
|
||||
- <file:Documentation/filesystems/nfs/idmapper.txt>.
|
||||
-
|
||||
- If you are unsure, say N.
|
||||
diff -up linux-3.2.noarch/fs/nfs/sysctl.c.orig linux-3.2.noarch/fs/nfs/sysctl.c
|
||||
--- linux-3.2.noarch/fs/nfs/sysctl.c.orig 2012-01-04 18:55:44.000000000 -0500
|
||||
+++ linux-3.2.noarch/fs/nfs/sysctl.c 2012-01-27 10:15:42.914563082 -0500
|
||||
@@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = {
|
||||
.extra1 = (int *)&nfs_set_port_min,
|
||||
.extra2 = (int *)&nfs_set_port_max,
|
||||
},
|
||||
-#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
|
||||
{
|
||||
.procname = "idmap_cache_timeout",
|
||||
.data = &nfs_idmap_cache_timeout,
|
||||
@@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
},
|
||||
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
|
||||
#endif
|
||||
{
|
||||
.procname = "nfs_mountpoint_timeout",
|
||||
diff -up linux-3.2.noarch/include/linux/nfs_idmap.h.orig linux-3.2.noarch/include/linux/nfs_idmap.h
|
||||
--- linux-3.2.noarch/include/linux/nfs_idmap.h.orig 2012-01-27 10:06:46.783643915 -0500
|
||||
+++ linux-3.2.noarch/include/linux/nfs_idmap.h 2012-01-27 10:15:42.915563594 -0500
|
||||
@@ -69,36 +69,11 @@ struct nfs_server;
|
||||
struct nfs_fattr;
|
||||
struct nfs4_string;
|
||||
|
||||
-#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
|
||||
-
|
||||
int nfs_idmap_init(void);
|
||||
void nfs_idmap_quit(void);
|
||||
-
|
||||
-static inline int nfs_idmap_new(struct nfs_client *clp)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static inline void nfs_idmap_delete(struct nfs_client *clp)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
-#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */
|
||||
-
|
||||
-static inline int nfs_idmap_init(void)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static inline void nfs_idmap_quit(void)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
int nfs_idmap_new(struct nfs_client *);
|
||||
void nfs_idmap_delete(struct nfs_client *);
|
||||
|
||||
-#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
|
||||
-
|
||||
void nfs_fattr_init_names(struct nfs_fattr *fattr,
|
||||
struct nfs4_string *owner_name,
|
||||
struct nfs4_string *group_name);
|
@ -1,97 +0,0 @@
|
||||
commit 3cd0f37a2cc9e4d6188df10041a2441eaa41d991
|
||||
Author: Bryan Schumaker <bjschuma@netapp.com>
|
||||
Date: Thu Jan 26 16:54:24 2012 -0500
|
||||
|
||||
NFS: Keep idmapper include files in one place
|
||||
|
||||
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
|
||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
|
||||
diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c
|
||||
--- linux-3.2.noarch/fs/nfs/idmap.c.orig 2012-01-27 10:15:42.914563082 -0500
|
||||
+++ linux-3.2.noarch/fs/nfs/idmap.c 2012-01-27 10:19:22.711401559 -0500
|
||||
@@ -39,6 +39,36 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/nfs_idmap.h>
|
||||
#include <linux/nfs_fs.h>
|
||||
+#include <linux/cred.h>
|
||||
+#include <linux/sunrpc/sched.h>
|
||||
+#include <linux/nfs4.h>
|
||||
+#include <linux/nfs_fs_sb.h>
|
||||
+#include <linux/keyctl.h>
|
||||
+#include <linux/key-type.h>
|
||||
+#include <linux/rcupdate.h>
|
||||
+#include <linux/err.h>
|
||||
+#include <keys/user-type.h>
|
||||
+
|
||||
+/* include files needed by legacy idmapper */
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/mutex.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/socket.h>
|
||||
+#include <linux/in.h>
|
||||
+#include <linux/sched.h>
|
||||
+#include <linux/sunrpc/clnt.h>
|
||||
+#include <linux/workqueue.h>
|
||||
+#include <linux/sunrpc/rpc_pipe_fs.h>
|
||||
+#include <linux/nfs_fs.h>
|
||||
+#include "nfs4_fs.h"
|
||||
+
|
||||
+#define NFS_UINT_MAXLEN 11
|
||||
+#define IDMAP_HASH_SZ 128
|
||||
+
|
||||
+/* Default cache timeout is 10 minutes */
|
||||
+unsigned int nfs_idmap_cache_timeout = 600 * HZ;
|
||||
+const struct cred *id_resolver_cache;
|
||||
+
|
||||
|
||||
/**
|
||||
* nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
|
||||
@@ -142,21 +172,6 @@ static int nfs_map_numeric_to_string(__u
|
||||
return snprintf(buf, buflen, "%u", id);
|
||||
}
|
||||
|
||||
-#include <linux/cred.h>
|
||||
-#include <linux/sunrpc/sched.h>
|
||||
-#include <linux/nfs4.h>
|
||||
-#include <linux/nfs_fs_sb.h>
|
||||
-#include <linux/keyctl.h>
|
||||
-#include <linux/key-type.h>
|
||||
-#include <linux/rcupdate.h>
|
||||
-#include <linux/err.h>
|
||||
-
|
||||
-#include <keys/user-type.h>
|
||||
-
|
||||
-#define NFS_UINT_MAXLEN 11
|
||||
-
|
||||
-const struct cred *id_resolver_cache;
|
||||
-
|
||||
struct key_type key_type_id_resolver = {
|
||||
.name = "id_resolver",
|
||||
.instantiate = user_instantiate,
|
||||
@@ -327,25 +342,6 @@ static int nfs_idmap_lookup_id(const cha
|
||||
}
|
||||
|
||||
/* idmap classic begins here */
|
||||
-#include <linux/module.h>
|
||||
-#include <linux/mutex.h>
|
||||
-#include <linux/init.h>
|
||||
-#include <linux/socket.h>
|
||||
-#include <linux/in.h>
|
||||
-#include <linux/sched.h>
|
||||
-#include <linux/sunrpc/clnt.h>
|
||||
-#include <linux/workqueue.h>
|
||||
-#include <linux/sunrpc/rpc_pipe_fs.h>
|
||||
-
|
||||
-#include <linux/nfs_fs.h>
|
||||
-
|
||||
-#include "nfs4_fs.h"
|
||||
-
|
||||
-#define IDMAP_HASH_SZ 128
|
||||
-
|
||||
-/* Default cache timeout is 10 minutes */
|
||||
-unsigned int nfs_idmap_cache_timeout = 600 * HZ;
|
||||
-
|
||||
static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
char *endp;
|
@ -1,40 +0,0 @@
|
||||
commit a602bea3e7ccc5ce3da61d2c18245c4058983926
|
||||
Author: Bryan Schumaker <bjschuma@netapp.com>
|
||||
Date: Thu Jan 26 16:54:25 2012 -0500
|
||||
|
||||
NFS: Update idmapper documentation
|
||||
|
||||
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
|
||||
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
|
||||
|
||||
diff -up linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt
|
||||
--- linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig 2012-01-04 18:55:44.000000000 -0500
|
||||
+++ linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt 2012-01-27 10:19:55.406740364 -0500
|
||||
@@ -4,13 +4,21 @@ ID Mapper
|
||||
=========
|
||||
Id mapper is used by NFS to translate user and group ids into names, and to
|
||||
translate user and group names into ids. Part of this translation involves
|
||||
-performing an upcall to userspace to request the information. Id mapper will
|
||||
-user request-key to perform this upcall and cache the result. The program
|
||||
-/usr/sbin/nfs.idmap should be called by request-key, and will perform the
|
||||
-translation and initialize a key with the resulting information.
|
||||
+performing an upcall to userspace to request the information. There are two
|
||||
+ways NFS could obtain this information: placing a call to /sbin/request-key
|
||||
+or by placing a call to the rpc.idmap daemon.
|
||||
+
|
||||
+NFS will attempt to call /sbin/request-key first. If this succeeds, the
|
||||
+result will be cached using the generic request-key cache. This call should
|
||||
+only fail if /etc/request-key.conf is not configured for the id_resolver key
|
||||
+type, see the "Configuring" section below if you wish to use the request-key
|
||||
+method.
|
||||
+
|
||||
+If the call to /sbin/request-key fails (if /etc/request-key.conf is not
|
||||
+configured with the id_resolver key type), then the idmapper will ask the
|
||||
+legacy rpc.idmap daemon for the id mapping. This result will be stored
|
||||
+in a custom NFS idmap cache.
|
||||
|
||||
- NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
|
||||
- feature.
|
||||
|
||||
===========
|
||||
Configuring
|
@ -1,993 +0,0 @@
|
||||
From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri, 20 Jan 2012 17:27:20 +0100
|
||||
Cc: <linux-scsi@vger.kernel.org>
|
||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
||||
Cc: kvm@vger.kernel.org
|
||||
Cc: Pekka Enberg <penberg@kernel.org>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>
|
||||
Subject: [PATCH v5 0/3] virtio-scsi driver
|
||||
|
||||
This is the first implementation of the virtio-scsi driver, a virtual
|
||||
HBA that will be supported by KVM. It implements a subset of the spec,
|
||||
in particular it does not implement asynchronous notifications for either
|
||||
LUN reset/removal/addition or CD-ROM media events, but it is already
|
||||
functional and usable.
|
||||
|
||||
Other matching bits:
|
||||
|
||||
- spec at http://people.redhat.com/pbonzini/virtio-spec.pdf
|
||||
|
||||
- QEMU implementation at git://github.com/bonzini/qemu.git,
|
||||
branch virtio-scsi
|
||||
|
||||
Please review. Getting this in 3.3 is starting to look like wishful thinking,
|
||||
but the possibility of regressions is obviously zero so I'm still dreaming.
|
||||
Otherwise, that would be 3.4.
|
||||
|
||||
Paolo Bonzini (3):
|
||||
virtio-scsi: first version
|
||||
virtio-scsi: add error handling
|
||||
virtio-scsi: add power management support
|
||||
|
||||
v4->v5: change virtio id from 7 to 8
|
||||
|
||||
v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN;
|
||||
fixed 32-bit compilation; added power management support;
|
||||
adjusted calls to virtqueue_add_buf
|
||||
|
||||
drivers/scsi/Kconfig | 8 +
|
||||
drivers/scsi/Makefile | 1 +
|
||||
drivers/scsi/virtio_scsi.c | 594 +++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/virtio_ids.h | 1 +
|
||||
include/linux/virtio_scsi.h | 114 +++++++++
|
||||
5 files changed, 718 insertions(+), 0 deletions(-)
|
||||
create mode 100644 drivers/scsi/virtio_scsi.c
|
||||
create mode 100644 include/linux/virtio_scsi.h
|
||||
|
||||
From 84ad93b7215e18ab1755a625ede0fb00175e79bb Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Tue, 29 Nov 2011 16:31:09 +0100
|
||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org>
|
||||
Subject: [PATCH v5 1/3] virtio-scsi: first version
|
||||
|
||||
The virtio-scsi HBA is the basis of an alternative storage stack
|
||||
for QEMU-based virtual machines (including KVM). Compared to
|
||||
virtio-blk it is more scalable, because it supports many LUNs
|
||||
on a single PCI slot), more powerful (it more easily supports
|
||||
passthrough of host devices to the guest) and more easily
|
||||
extensible (new SCSI features implemented by QEMU should not
|
||||
require updating the driver in the guest).
|
||||
|
||||
Cc: linux-scsi <linux-scsi@vger.kernel.org>
|
||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: kvm@vger.kernel.org
|
||||
Acked-by: Pekka Enberg <penberg@kernel.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
v4->v5: change virtio id from 7 to 8
|
||||
|
||||
v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN;
|
||||
fixed 32-bit compilation; adjust call to virtqueue_add_buf
|
||||
|
||||
v2->v3: added mempool, formatting fixes
|
||||
|
||||
v1->v2: use dbg_dev, sdev_printk, scmd_printk
|
||||
- renamed lock to vq_lock
|
||||
- renamed cmd_vq to req_vq (and other similar changes)
|
||||
- fixed missing break in VIRTIO_SCSI_S_OVERRUN
|
||||
- added VIRTIO_SCSI_S_BUSY
|
||||
- removed unused argument from virtscsi_map_cmd
|
||||
- fixed two tabs that had slipped in
|
||||
- moved max_sectors and cmd_per_lun from template to config space
|
||||
- __attribute__((packed)) -> __packed
|
||||
|
||||
drivers/scsi/Kconfig | 8 +
|
||||
drivers/scsi/Makefile | 1 +
|
||||
drivers/scsi/virtio_scsi.c | 503 +++++++++++++++++++++++++++++++++++++++++++
|
||||
include/linux/virtio_ids.h | 1 +
|
||||
include/linux/virtio_scsi.h | 114 ++++++++++
|
||||
5 files changed, 627 insertions(+), 0 deletions(-)
|
||||
create mode 100644 drivers/scsi/virtio_scsi.c
|
||||
create mode 100644 include/linux/virtio_scsi.h
|
||||
|
||||
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
|
||||
index 16570aa..827ebaf 100644
|
||||
--- a/drivers/scsi/Kconfig
|
||||
+++ b/drivers/scsi/Kconfig
|
||||
@@ -1897,6 +1897,14 @@ config SCSI_BFA_FC
|
||||
To compile this driver as a module, choose M here. The module will
|
||||
be called bfa.
|
||||
|
||||
+config SCSI_VIRTIO
|
||||
+ tristate "virtio-scsi support (EXPERIMENTAL)"
|
||||
+ depends on EXPERIMENTAL && VIRTIO
|
||||
+ help
|
||||
+ This is the virtual HBA driver for virtio. If the kernel will
|
||||
+ be used in a virtual machine, say Y or M.
|
||||
+
|
||||
+
|
||||
endif # SCSI_LOWLEVEL
|
||||
|
||||
source "drivers/scsi/pcmcia/Kconfig"
|
||||
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
|
||||
index e4c1a69..ad24e06 100644
|
||||
--- a/drivers/scsi/Makefile
|
||||
+++ b/drivers/scsi/Makefile
|
||||
@@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_CXGB4_ISCSI) += libiscsi.o libiscsi_tcp.o cxgbi/
|
||||
obj-$(CONFIG_SCSI_BNX2_ISCSI) += libiscsi.o bnx2i/
|
||||
obj-$(CONFIG_BE2ISCSI) += libiscsi.o be2iscsi/
|
||||
obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
|
||||
+obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o
|
||||
obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
|
||||
obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o
|
||||
|
||||
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
|
||||
new file mode 100644
|
||||
index 0000000..3f87ae0
|
||||
--- /dev/null
|
||||
+++ b/drivers/scsi/virtio_scsi.c
|
||||
@@ -0,0 +1,503 @@
|
||||
+/*
|
||||
+ * Virtio SCSI HBA driver
|
||||
+ *
|
||||
+ * Copyright IBM Corp. 2010
|
||||
+ * Copyright Red Hat, Inc. 2011
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
|
||||
+ * Paolo Bonzini <pbonzini@redhat.com>
|
||||
+ *
|
||||
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
+ * See the COPYING file in the top-level directory.
|
||||
+ *
|
||||
+ */
|
||||
+
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/slab.h>
|
||||
+#include <linux/mempool.h>
|
||||
+#include <linux/virtio.h>
|
||||
+#include <linux/virtio_ids.h>
|
||||
+#include <linux/virtio_config.h>
|
||||
+#include <linux/virtio_scsi.h>
|
||||
+#include <scsi/scsi_host.h>
|
||||
+#include <scsi/scsi_device.h>
|
||||
+#include <scsi/scsi_cmnd.h>
|
||||
+
|
||||
+#define VIRTIO_SCSI_MEMPOOL_SZ 64
|
||||
+
|
||||
+/* Command queue element */
|
||||
+struct virtio_scsi_cmd {
|
||||
+ struct scsi_cmnd *sc;
|
||||
+ union {
|
||||
+ struct virtio_scsi_cmd_req cmd;
|
||||
+ struct virtio_scsi_ctrl_tmf_req tmf;
|
||||
+ struct virtio_scsi_ctrl_an_req an;
|
||||
+ } req;
|
||||
+ union {
|
||||
+ struct virtio_scsi_cmd_resp cmd;
|
||||
+ struct virtio_scsi_ctrl_tmf_resp tmf;
|
||||
+ struct virtio_scsi_ctrl_an_resp an;
|
||||
+ struct virtio_scsi_event evt;
|
||||
+ } resp;
|
||||
+} ____cacheline_aligned_in_smp;
|
||||
+
|
||||
+/* Driver instance state */
|
||||
+struct virtio_scsi {
|
||||
+ /* Protects ctrl_vq, req_vq and sg[] */
|
||||
+ spinlock_t vq_lock;
|
||||
+
|
||||
+ struct virtio_device *vdev;
|
||||
+ struct virtqueue *ctrl_vq;
|
||||
+ struct virtqueue *event_vq;
|
||||
+ struct virtqueue *req_vq;
|
||||
+
|
||||
+ /* For sglist construction when adding commands to the virtqueue. */
|
||||
+ struct scatterlist sg[];
|
||||
+};
|
||||
+
|
||||
+static struct kmem_cache *virtscsi_cmd_cache;
|
||||
+static mempool_t *virtscsi_cmd_pool;
|
||||
+
|
||||
+static inline struct Scsi_Host *virtio_scsi_host(struct virtio_device *vdev)
|
||||
+{
|
||||
+ return vdev->priv;
|
||||
+}
|
||||
+
|
||||
+static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
|
||||
+{
|
||||
+ if (!resid)
|
||||
+ return;
|
||||
+
|
||||
+ if (!scsi_bidi_cmnd(sc)) {
|
||||
+ scsi_set_resid(sc, resid);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ scsi_in(sc)->resid = min(resid, scsi_in(sc)->length);
|
||||
+ scsi_out(sc)->resid = resid - scsi_in(sc)->resid;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * virtscsi_complete_cmd - finish a scsi_cmd and invoke scsi_done
|
||||
+ *
|
||||
+ * Called with vq_lock held.
|
||||
+ */
|
||||
+static void virtscsi_complete_cmd(void *buf)
|
||||
+{
|
||||
+ struct virtio_scsi_cmd *cmd = buf;
|
||||
+ struct scsi_cmnd *sc = cmd->sc;
|
||||
+ struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
|
||||
+
|
||||
+ dev_dbg(&sc->device->sdev_gendev,
|
||||
+ "cmd %p response %u status %#02x sense_len %u\n",
|
||||
+ sc, resp->response, resp->status, resp->sense_len);
|
||||
+
|
||||
+ sc->result = resp->status;
|
||||
+ virtscsi_compute_resid(sc, resp->resid);
|
||||
+ switch (resp->response) {
|
||||
+ case VIRTIO_SCSI_S_OK:
|
||||
+ set_host_byte(sc, DID_OK);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_OVERRUN:
|
||||
+ set_host_byte(sc, DID_ERROR);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_ABORTED:
|
||||
+ set_host_byte(sc, DID_ABORT);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_BAD_TARGET:
|
||||
+ set_host_byte(sc, DID_BAD_TARGET);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_RESET:
|
||||
+ set_host_byte(sc, DID_RESET);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_BUSY:
|
||||
+ set_host_byte(sc, DID_BUS_BUSY);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_TRANSPORT_FAILURE:
|
||||
+ set_host_byte(sc, DID_TRANSPORT_DISRUPTED);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_TARGET_FAILURE:
|
||||
+ set_host_byte(sc, DID_TARGET_FAILURE);
|
||||
+ break;
|
||||
+ case VIRTIO_SCSI_S_NEXUS_FAILURE:
|
||||
+ set_host_byte(sc, DID_NEXUS_FAILURE);
|
||||
+ break;
|
||||
+ default:
|
||||
+ scmd_printk(KERN_WARNING, sc, "Unknown response %d",
|
||||
+ resp->response);
|
||||
+ /* fall through */
|
||||
+ case VIRTIO_SCSI_S_FAILURE:
|
||||
+ set_host_byte(sc, DID_ERROR);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ WARN_ON(resp->sense_len > VIRTIO_SCSI_SENSE_SIZE);
|
||||
+ if (sc->sense_buffer) {
|
||||
+ memcpy(sc->sense_buffer, resp->sense,
|
||||
+ min_t(u32, resp->sense_len, VIRTIO_SCSI_SENSE_SIZE));
|
||||
+ if (resp->sense_len)
|
||||
+ set_driver_byte(sc, DRIVER_SENSE);
|
||||
+ }
|
||||
+
|
||||
+ mempool_free(cmd, virtscsi_cmd_pool);
|
||||
+ sc->scsi_done(sc);
|
||||
+}
|
||||
+
|
||||
+static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf))
|
||||
+{
|
||||
+ struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
||||
+ struct virtio_scsi *vscsi = shost_priv(sh);
|
||||
+ void *buf;
|
||||
+ unsigned long flags;
|
||||
+ unsigned int len;
|
||||
+
|
||||
+ spin_lock_irqsave(&vscsi->vq_lock, flags);
|
||||
+
|
||||
+ do {
|
||||
+ virtqueue_disable_cb(vq);
|
||||
+ while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
|
||||
+ fn(buf);
|
||||
+ } while (!virtqueue_enable_cb(vq));
|
||||
+
|
||||
+ spin_unlock_irqrestore(&vscsi->vq_lock, flags);
|
||||
+}
|
||||
+
|
||||
+static void virtscsi_req_done(struct virtqueue *vq)
|
||||
+{
|
||||
+ virtscsi_vq_done(vq, virtscsi_complete_cmd);
|
||||
+};
|
||||
+
|
||||
+/* These are still stubs. */
|
||||
+static void virtscsi_complete_free(void *buf)
|
||||
+{
|
||||
+ struct virtio_scsi_cmd *cmd = buf;
|
||||
+
|
||||
+ mempool_free(cmd, virtscsi_cmd_pool);
|
||||
+}
|
||||
+
|
||||
+static void virtscsi_ctrl_done(struct virtqueue *vq)
|
||||
+{
|
||||
+ virtscsi_vq_done(vq, virtscsi_complete_free);
|
||||
+};
|
||||
+
|
||||
+static void virtscsi_event_done(struct virtqueue *vq)
|
||||
+{
|
||||
+ virtscsi_vq_done(vq, virtscsi_complete_free);
|
||||
+};
|
||||
+
|
||||
+static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx,
|
||||
+ struct scsi_data_buffer *sdb)
|
||||
+{
|
||||
+ struct sg_table *table = &sdb->table;
|
||||
+ struct scatterlist *sg_elem;
|
||||
+ unsigned int idx = *p_idx;
|
||||
+ int i;
|
||||
+
|
||||
+ for_each_sg(table->sgl, sg_elem, table->nents, i)
|
||||
+ sg_set_buf(&sg[idx++], sg_virt(sg_elem), sg_elem->length);
|
||||
+
|
||||
+ *p_idx = idx;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * virtscsi_map_cmd - map a scsi_cmd to a virtqueue scatterlist
|
||||
+ * @vscsi : virtio_scsi state
|
||||
+ * @cmd : command structure
|
||||
+ * @out_num : number of read-only elements
|
||||
+ * @in_num : number of write-only elements
|
||||
+ * @req_size : size of the request buffer
|
||||
+ * @resp_size : size of the response buffer
|
||||
+ *
|
||||
+ * Called with vq_lock held.
|
||||
+ */
|
||||
+static void virtscsi_map_cmd(struct virtio_scsi *vscsi,
|
||||
+ struct virtio_scsi_cmd *cmd,
|
||||
+ unsigned *out_num, unsigned *in_num,
|
||||
+ size_t req_size, size_t resp_size)
|
||||
+{
|
||||
+ struct scsi_cmnd *sc = cmd->sc;
|
||||
+ struct scatterlist *sg = vscsi->sg;
|
||||
+ unsigned int idx = 0;
|
||||
+
|
||||
+ if (sc) {
|
||||
+ struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
|
||||
+ BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
|
||||
+
|
||||
+ /* TODO: check feature bit and fail if unsupported? */
|
||||
+ BUG_ON(sc->sc_data_direction == DMA_BIDIRECTIONAL);
|
||||
+ }
|
||||
+
|
||||
+ /* Request header. */
|
||||
+ sg_set_buf(&sg[idx++], &cmd->req, req_size);
|
||||
+
|
||||
+ /* Data-out buffer. */
|
||||
+ if (sc && sc->sc_data_direction != DMA_FROM_DEVICE)
|
||||
+ virtscsi_map_sgl(sg, &idx, scsi_out(sc));
|
||||
+
|
||||
+ *out_num = idx;
|
||||
+
|
||||
+ /* Response header. */
|
||||
+ sg_set_buf(&sg[idx++], &cmd->resp, resp_size);
|
||||
+
|
||||
+ /* Data-in buffer */
|
||||
+ if (sc && sc->sc_data_direction != DMA_TO_DEVICE)
|
||||
+ virtscsi_map_sgl(sg, &idx, scsi_in(sc));
|
||||
+
|
||||
+ *in_num = idx - *out_num;
|
||||
+}
|
||||
+
|
||||
+static int virtscsi_kick_cmd(struct virtio_scsi *vscsi, struct virtqueue *vq,
|
||||
+ struct virtio_scsi_cmd *cmd,
|
||||
+ size_t req_size, size_t resp_size, gfp_t gfp)
|
||||
+{
|
||||
+ unsigned int out_num, in_num;
|
||||
+ unsigned long flags;
|
||||
+ int ret;
|
||||
+
|
||||
+ spin_lock_irqsave(&vscsi->vq_lock, flags);
|
||||
+
|
||||
+ virtscsi_map_cmd(vscsi, cmd, &out_num, &in_num, req_size, resp_size);
|
||||
+
|
||||
+ ret = virtqueue_add_buf(vq, vscsi->sg, out_num, in_num, cmd, gfp);
|
||||
+ if (ret >= 0)
|
||||
+ virtqueue_kick(vq);
|
||||
+
|
||||
+ spin_unlock_irqrestore(&vscsi->vq_lock, flags);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
|
||||
+{
|
||||
+ struct virtio_scsi *vscsi = shost_priv(sh);
|
||||
+ struct virtio_scsi_cmd *cmd;
|
||||
+ int ret;
|
||||
+
|
||||
+ dev_dbg(&sc->device->sdev_gendev,
|
||||
+ "cmd %p CDB: %#02x\n", sc, sc->cmnd[0]);
|
||||
+
|
||||
+ ret = SCSI_MLQUEUE_HOST_BUSY;
|
||||
+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC);
|
||||
+ if (!cmd)
|
||||
+ goto out;
|
||||
+
|
||||
+ memset(cmd, 0, sizeof(*cmd));
|
||||
+ cmd->sc = sc;
|
||||
+ cmd->req.cmd = (struct virtio_scsi_cmd_req){
|
||||
+ .lun[0] = 1,
|
||||
+ .lun[1] = sc->device->id,
|
||||
+ .lun[2] = (sc->device->lun >> 8) | 0x40,
|
||||
+ .lun[3] = sc->device->lun & 0xff,
|
||||
+ .tag = (unsigned long)sc,
|
||||
+ .task_attr = VIRTIO_SCSI_S_SIMPLE,
|
||||
+ .prio = 0,
|
||||
+ .crn = 0,
|
||||
+ };
|
||||
+
|
||||
+ BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
|
||||
+ memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
|
||||
+
|
||||
+ if (virtscsi_kick_cmd(vscsi, vscsi->req_vq, cmd,
|
||||
+ sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
|
||||
+ GFP_ATOMIC) >= 0)
|
||||
+ ret = 0;
|
||||
+
|
||||
+out:
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static struct scsi_host_template virtscsi_host_template = {
|
||||
+ .module = THIS_MODULE,
|
||||
+ .name = "Virtio SCSI HBA",
|
||||
+ .proc_name = "virtio_scsi",
|
||||
+ .queuecommand = virtscsi_queuecommand,
|
||||
+ .this_id = -1,
|
||||
+
|
||||
+ .can_queue = 1024,
|
||||
+ .dma_boundary = UINT_MAX,
|
||||
+ .use_clustering = ENABLE_CLUSTERING,
|
||||
+};
|
||||
+
|
||||
+#define virtscsi_config_get(vdev, fld) \
|
||||
+ ({ \
|
||||
+ typeof(((struct virtio_scsi_config *)0)->fld) __val; \
|
||||
+ vdev->config->get(vdev, \
|
||||
+ offsetof(struct virtio_scsi_config, fld), \
|
||||
+ &__val, sizeof(__val)); \
|
||||
+ __val; \
|
||||
+ })
|
||||
+
|
||||
+#define virtscsi_config_set(vdev, fld, val) \
|
||||
+ (void)({ \
|
||||
+ typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \
|
||||
+ vdev->config->set(vdev, \
|
||||
+ offsetof(struct virtio_scsi_config, fld), \
|
||||
+ &__val, sizeof(__val)); \
|
||||
+ })
|
||||
+
|
||||
+static int __devinit virtscsi_init(struct virtio_device *vdev,
|
||||
+ struct virtio_scsi *vscsi)
|
||||
+{
|
||||
+ int err;
|
||||
+ struct virtqueue *vqs[3];
|
||||
+ vq_callback_t *callbacks[] = {
|
||||
+ virtscsi_ctrl_done,
|
||||
+ virtscsi_event_done,
|
||||
+ virtscsi_req_done
|
||||
+ };
|
||||
+ const char *names[] = {
|
||||
+ "control",
|
||||
+ "event",
|
||||
+ "request"
|
||||
+ };
|
||||
+
|
||||
+ /* Discover virtqueues and write information to configuration. */
|
||||
+ err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ vscsi->ctrl_vq = vqs[0];
|
||||
+ vscsi->event_vq = vqs[1];
|
||||
+ vscsi->req_vq = vqs[2];
|
||||
+
|
||||
+ virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
|
||||
+ virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int __devinit virtscsi_probe(struct virtio_device *vdev)
|
||||
+{
|
||||
+ struct Scsi_Host *shost;
|
||||
+ struct virtio_scsi *vscsi;
|
||||
+ int err;
|
||||
+ u32 sg_elems;
|
||||
+ u32 cmd_per_lun;
|
||||
+
|
||||
+ /* We need to know how many segments before we allocate.
|
||||
+ * We need an extra sg elements at head and tail.
|
||||
+ */
|
||||
+ sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
|
||||
+
|
||||
+ /* Allocate memory and link the structs together. */
|
||||
+ shost = scsi_host_alloc(&virtscsi_host_template,
|
||||
+ sizeof(*vscsi) + sizeof(vscsi->sg[0]) * (sg_elems + 2));
|
||||
+
|
||||
+ if (!shost)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ shost->sg_tablesize = sg_elems;
|
||||
+ vscsi = shost_priv(shost);
|
||||
+ vscsi->vdev = vdev;
|
||||
+ vdev->priv = shost;
|
||||
+
|
||||
+ /* Random initializations. */
|
||||
+ spin_lock_init(&vscsi->vq_lock);
|
||||
+ sg_init_table(vscsi->sg, sg_elems + 2);
|
||||
+
|
||||
+ err = virtscsi_init(vdev, vscsi);
|
||||
+ if (err)
|
||||
+ goto virtscsi_init_failed;
|
||||
+
|
||||
+ cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
|
||||
+ shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
|
||||
+ shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
|
||||
+ shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1;
|
||||
+ shost->max_id = virtscsi_config_get(vdev, max_target) + 1;
|
||||
+ shost->max_channel = 0;
|
||||
+ shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
|
||||
+ err = scsi_add_host(shost, &vdev->dev);
|
||||
+ if (err)
|
||||
+ goto scsi_add_host_failed;
|
||||
+
|
||||
+ scsi_scan_host(shost);
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+scsi_add_host_failed:
|
||||
+ vdev->config->del_vqs(vdev);
|
||||
+virtscsi_init_failed:
|
||||
+ scsi_host_put(shost);
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev)
|
||||
+{
|
||||
+ /* Stop all the virtqueues. */
|
||||
+ vdev->config->reset(vdev);
|
||||
+
|
||||
+ vdev->config->del_vqs(vdev);
|
||||
+}
|
||||
+
|
||||
+static void __devexit virtscsi_remove(struct virtio_device *vdev)
|
||||
+{
|
||||
+ struct Scsi_Host *shost = virtio_scsi_host(vdev);
|
||||
+
|
||||
+ scsi_remove_host(shost);
|
||||
+
|
||||
+ virtscsi_remove_vqs(vdev);
|
||||
+ scsi_host_put(shost);
|
||||
+}
|
||||
+
|
||||
+static struct virtio_device_id id_table[] = {
|
||||
+ { VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
|
||||
+ { 0 },
|
||||
+};
|
||||
+
|
||||
+static struct virtio_driver virtio_scsi_driver = {
|
||||
+ .driver.name = KBUILD_MODNAME,
|
||||
+ .driver.owner = THIS_MODULE,
|
||||
+ .id_table = id_table,
|
||||
+ .probe = virtscsi_probe,
|
||||
+ .remove = __devexit_p(virtscsi_remove),
|
||||
+};
|
||||
+
|
||||
+static int __init init(void)
|
||||
+{
|
||||
+ int ret = -ENOMEM;
|
||||
+
|
||||
+ virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
|
||||
+ if (!virtscsi_cmd_cache) {
|
||||
+ printk(KERN_ERR "kmem_cache_create() for "
|
||||
+ "virtscsi_cmd_cache failed\n");
|
||||
+ goto error;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
+ virtscsi_cmd_pool =
|
||||
+ mempool_create_slab_pool(VIRTIO_SCSI_MEMPOOL_SZ,
|
||||
+ virtscsi_cmd_cache);
|
||||
+ if (!virtscsi_cmd_pool) {
|
||||
+ printk(KERN_ERR "mempool_create() for"
|
||||
+ "virtscsi_cmd_pool failed\n");
|
||||
+ goto error;
|
||||
+ }
|
||||
+ ret = register_virtio_driver(&virtio_scsi_driver);
|
||||
+ if (ret < 0)
|
||||
+ goto error;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+error:
|
||||
+ if (virtscsi_cmd_pool) {
|
||||
+ mempool_destroy(virtscsi_cmd_pool);
|
||||
+ virtscsi_cmd_pool = NULL;
|
||||
+ }
|
||||
+ if (virtscsi_cmd_cache) {
|
||||
+ kmem_cache_destroy(virtscsi_cmd_cache);
|
||||
+ virtscsi_cmd_cache = NULL;
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static void __exit fini(void)
|
||||
+{
|
||||
+ unregister_virtio_driver(&virtio_scsi_driver);
|
||||
+ mempool_destroy(virtscsi_cmd_pool);
|
||||
+ kmem_cache_destroy(virtscsi_cmd_cache);
|
||||
+}
|
||||
+module_init(init);
|
||||
+module_exit(fini);
|
||||
+
|
||||
+MODULE_DEVICE_TABLE(virtio, id_table);
|
||||
+MODULE_DESCRIPTION("Virtio SCSI HBA driver");
|
||||
+MODULE_LICENSE("GPL");
|
||||
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
|
||||
index 85bb0bb..d83ae52 100644
|
||||
--- a/include/linux/virtio_ids.h
|
||||
+++ b/include/linux/virtio_ids.h
|
||||
@@ -34,6 +34,7 @@
|
||||
#define VIRTIO_ID_CONSOLE 3 /* virtio console */
|
||||
#define VIRTIO_ID_RNG 4 /* virtio ring */
|
||||
#define VIRTIO_ID_BALLOON 5 /* virtio balloon */
|
||||
+#define VIRTIO_ID_SCSI 8 /* virtio scsi */
|
||||
#define VIRTIO_ID_9P 9 /* 9p virtio console */
|
||||
|
||||
#endif /* _LINUX_VIRTIO_IDS_H */
|
||||
diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
|
||||
new file mode 100644
|
||||
index 0000000..8ddeafd
|
||||
--- /dev/null
|
||||
+++ b/include/linux/virtio_scsi.h
|
||||
@@ -0,0 +1,114 @@
|
||||
+#ifndef _LINUX_VIRTIO_SCSI_H
|
||||
+#define _LINUX_VIRTIO_SCSI_H
|
||||
+/* This header is BSD licensed so anyone can use the definitions to implement
|
||||
+ * compatible drivers/servers. */
|
||||
+
|
||||
+#define VIRTIO_SCSI_CDB_SIZE 32
|
||||
+#define VIRTIO_SCSI_SENSE_SIZE 96
|
||||
+
|
||||
+/* SCSI command request, followed by data-out */
|
||||
+struct virtio_scsi_cmd_req {
|
||||
+ u8 lun[8]; /* Logical Unit Number */
|
||||
+ u64 tag; /* Command identifier */
|
||||
+ u8 task_attr; /* Task attribute */
|
||||
+ u8 prio;
|
||||
+ u8 crn;
|
||||
+ u8 cdb[VIRTIO_SCSI_CDB_SIZE];
|
||||
+} __packed;
|
||||
+
|
||||
+/* Response, followed by sense data and data-in */
|
||||
+struct virtio_scsi_cmd_resp {
|
||||
+ u32 sense_len; /* Sense data length */
|
||||
+ u32 resid; /* Residual bytes in data buffer */
|
||||
+ u16 status_qualifier; /* Status qualifier */
|
||||
+ u8 status; /* Command completion status */
|
||||
+ u8 response; /* Response values */
|
||||
+ u8 sense[VIRTIO_SCSI_SENSE_SIZE];
|
||||
+} __packed;
|
||||
+
|
||||
+/* Task Management Request */
|
||||
+struct virtio_scsi_ctrl_tmf_req {
|
||||
+ u32 type;
|
||||
+ u32 subtype;
|
||||
+ u8 lun[8];
|
||||
+ u64 tag;
|
||||
+} __packed;
|
||||
+
|
||||
+struct virtio_scsi_ctrl_tmf_resp {
|
||||
+ u8 response;
|
||||
+} __packed;
|
||||
+
|
||||
+/* Asynchronous notification query/subscription */
|
||||
+struct virtio_scsi_ctrl_an_req {
|
||||
+ u32 type;
|
||||
+ u8 lun[8];
|
||||
+ u32 event_requested;
|
||||
+} __packed;
|
||||
+
|
||||
+struct virtio_scsi_ctrl_an_resp {
|
||||
+ u32 event_actual;
|
||||
+ u8 response;
|
||||
+} __packed;
|
||||
+
|
||||
+struct virtio_scsi_event {
|
||||
+ u32 event;
|
||||
+ u8 lun[8];
|
||||
+ u32 reason;
|
||||
+} __packed;
|
||||
+
|
||||
+struct virtio_scsi_config {
|
||||
+ u32 num_queues;
|
||||
+ u32 seg_max;
|
||||
+ u32 max_sectors;
|
||||
+ u32 cmd_per_lun;
|
||||
+ u32 event_info_size;
|
||||
+ u32 sense_size;
|
||||
+ u32 cdb_size;
|
||||
+ u16 max_channel;
|
||||
+ u16 max_target;
|
||||
+ u32 max_lun;
|
||||
+} __packed;
|
||||
+
|
||||
+/* Response codes */
|
||||
+#define VIRTIO_SCSI_S_OK 0
|
||||
+#define VIRTIO_SCSI_S_OVERRUN 1
|
||||
+#define VIRTIO_SCSI_S_ABORTED 2
|
||||
+#define VIRTIO_SCSI_S_BAD_TARGET 3
|
||||
+#define VIRTIO_SCSI_S_RESET 4
|
||||
+#define VIRTIO_SCSI_S_BUSY 5
|
||||
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
|
||||
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
|
||||
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
|
||||
+#define VIRTIO_SCSI_S_FAILURE 9
|
||||
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
|
||||
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
|
||||
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
|
||||
+
|
||||
+/* Controlq type codes. */
|
||||
+#define VIRTIO_SCSI_T_TMF 0
|
||||
+#define VIRTIO_SCSI_T_AN_QUERY 1
|
||||
+#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
|
||||
+
|
||||
+/* Valid TMF subtypes. */
|
||||
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
|
||||
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
|
||||
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
|
||||
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
|
||||
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
|
||||
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
|
||||
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
|
||||
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
|
||||
+
|
||||
+/* Events. */
|
||||
+#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000
|
||||
+#define VIRTIO_SCSI_T_NO_EVENT 0
|
||||
+#define VIRTIO_SCSI_T_TRANSPORT_RESET 1
|
||||
+#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
|
||||
+
|
||||
+#define VIRTIO_SCSI_S_SIMPLE 0
|
||||
+#define VIRTIO_SCSI_S_ORDERED 1
|
||||
+#define VIRTIO_SCSI_S_HEAD 2
|
||||
+#define VIRTIO_SCSI_S_ACA 3
|
||||
+
|
||||
+
|
||||
+#endif /* _LINUX_VIRTIO_SCSI_H */
|
||||
--
|
||||
1.7.1
|
||||
|
||||
|
||||
From 3c0e8846ac0fc2175dd0e06f495b16a30b549762 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Tue, 29 Nov 2011 16:33:28 +0100
|
||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org>
|
||||
Subject: [PATCH v5 2/3] virtio-scsi: add error handling
|
||||
|
||||
This commit adds basic error handling to the virtio-scsi
|
||||
HBA device. Task management functions are sent synchronously
|
||||
via the control virtqueue.
|
||||
|
||||
Cc: linux-scsi <linux-scsi@vger.kernel.org>
|
||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: kvm@vger.kernel.org
|
||||
Acked-by: Pekka Enberg <penberg@kernel.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
v3->v4: fixed 32-bit compilation; adjusted call to virtscsi_kick_cmd
|
||||
|
||||
v2->v3: added mempool, used GFP_NOIO instead of GFP_ATOMIC,
|
||||
formatting fixes
|
||||
|
||||
v1->v2: use scmd_printk
|
||||
|
||||
drivers/scsi/virtio_scsi.c | 73 +++++++++++++++++++++++++++++++++++++++++++-
|
||||
1 files changed, 72 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
|
||||
index 3f87ae0..68104cd 100644
|
||||
--- a/drivers/scsi/virtio_scsi.c
|
||||
+++ b/drivers/scsi/virtio_scsi.c
|
||||
@@ -29,6 +29,7 @@
|
||||
/* Command queue element */
|
||||
struct virtio_scsi_cmd {
|
||||
struct scsi_cmnd *sc;
|
||||
+ struct completion *comp;
|
||||
union {
|
||||
struct virtio_scsi_cmd_req cmd;
|
||||
struct virtio_scsi_ctrl_tmf_req tmf;
|
||||
@@ -168,11 +169,12 @@ static void virtscsi_req_done(struct virtqueue *vq)
|
||||
virtscsi_vq_done(vq, virtscsi_complete_cmd);
|
||||
};
|
||||
|
||||
-/* These are still stubs. */
|
||||
static void virtscsi_complete_free(void *buf)
|
||||
{
|
||||
struct virtio_scsi_cmd *cmd = buf;
|
||||
|
||||
+ if (cmd->comp)
|
||||
+ complete_all(cmd->comp);
|
||||
mempool_free(cmd, virtscsi_cmd_pool);
|
||||
}
|
||||
|
||||
@@ -306,12 +308,81 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
|
||||
+{
|
||||
+ DECLARE_COMPLETION_ONSTACK(comp);
|
||||
+ int ret;
|
||||
+
|
||||
+ cmd->comp = ∁
|
||||
+ ret = virtscsi_kick_cmd(vscsi, vscsi->ctrl_vq, cmd,
|
||||
+ sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
|
||||
+ GFP_NOIO);
|
||||
+ if (ret < 0)
|
||||
+ return FAILED;
|
||||
+
|
||||
+ wait_for_completion(&comp);
|
||||
+ if (cmd->resp.tmf.response != VIRTIO_SCSI_S_OK &&
|
||||
+ cmd->resp.tmf.response != VIRTIO_SCSI_S_FUNCTION_SUCCEEDED)
|
||||
+ return FAILED;
|
||||
+
|
||||
+ return SUCCESS;
|
||||
+}
|
||||
+
|
||||
+static int virtscsi_device_reset(struct scsi_cmnd *sc)
|
||||
+{
|
||||
+ struct virtio_scsi *vscsi = shost_priv(sc->device->host);
|
||||
+ struct virtio_scsi_cmd *cmd;
|
||||
+
|
||||
+ sdev_printk(KERN_INFO, sc->device, "device reset\n");
|
||||
+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
|
||||
+ if (!cmd)
|
||||
+ return FAILED;
|
||||
+
|
||||
+ memset(cmd, 0, sizeof(*cmd));
|
||||
+ cmd->sc = sc;
|
||||
+ cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
|
||||
+ .type = VIRTIO_SCSI_T_TMF,
|
||||
+ .subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET,
|
||||
+ .lun[0] = 1,
|
||||
+ .lun[1] = sc->device->id,
|
||||
+ .lun[2] = (sc->device->lun >> 8) | 0x40,
|
||||
+ .lun[3] = sc->device->lun & 0xff,
|
||||
+ };
|
||||
+ return virtscsi_tmf(vscsi, cmd);
|
||||
+}
|
||||
+
|
||||
+static int virtscsi_abort(struct scsi_cmnd *sc)
|
||||
+{
|
||||
+ struct virtio_scsi *vscsi = shost_priv(sc->device->host);
|
||||
+ struct virtio_scsi_cmd *cmd;
|
||||
+
|
||||
+ scmd_printk(KERN_INFO, sc, "abort\n");
|
||||
+ cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
|
||||
+ if (!cmd)
|
||||
+ return FAILED;
|
||||
+
|
||||
+ memset(cmd, 0, sizeof(*cmd));
|
||||
+ cmd->sc = sc;
|
||||
+ cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
|
||||
+ .type = VIRTIO_SCSI_T_TMF,
|
||||
+ .subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
|
||||
+ .lun[0] = 1,
|
||||
+ .lun[1] = sc->device->id,
|
||||
+ .lun[2] = (sc->device->lun >> 8) | 0x40,
|
||||
+ .lun[3] = sc->device->lun & 0xff,
|
||||
+ .tag = (unsigned long)sc,
|
||||
+ };
|
||||
+ return virtscsi_tmf(vscsi, cmd);
|
||||
+}
|
||||
+
|
||||
static struct scsi_host_template virtscsi_host_template = {
|
||||
.module = THIS_MODULE,
|
||||
.name = "Virtio SCSI HBA",
|
||||
.proc_name = "virtio_scsi",
|
||||
.queuecommand = virtscsi_queuecommand,
|
||||
.this_id = -1,
|
||||
+ .eh_abort_handler = virtscsi_abort,
|
||||
+ .eh_device_reset_handler = virtscsi_device_reset,
|
||||
|
||||
.can_queue = 1024,
|
||||
.dma_boundary = UINT_MAX,
|
||||
--
|
||||
1.7.1
|
||||
|
||||
|
||||
From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Fri, 13 Jan 2012 15:30:08 +0100
|
||||
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org>
|
||||
Subject: [PATCH v5 3/3] virtio-scsi: add power management support
|
||||
|
||||
This patch adds freeze/restore handlers for the HBA. Block queues
|
||||
are managed independently by the disk devices.
|
||||
|
||||
Cc: linux-scsi <linux-scsi@vger.kernel.org>
|
||||
Cc: Rusty Russell <rusty@rustcorp.com.au>
|
||||
Cc: Michael S. Tsirkin <mst@redhat.com>
|
||||
Cc: kvm@vger.kernel.org
|
||||
Acked-by: Pekka Enberg <penberg@kernel.org>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
The feature has been merged in the virtio core for 3.3, so the patch
|
||||
is new in v4.
|
||||
|
||||
drivers/scsi/virtio_scsi.c | 26 +++++++++++++++++++++++---
|
||||
1 files changed, 23 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
|
||||
index 68104cd..efccd72 100644
|
||||
--- a/drivers/scsi/virtio_scsi.c
|
||||
+++ b/drivers/scsi/virtio_scsi.c
|
||||
@@ -406,8 +406,8 @@ static struct scsi_host_template virtscsi_host_template = {
|
||||
&__val, sizeof(__val)); \
|
||||
})
|
||||
|
||||
-static int __devinit virtscsi_init(struct virtio_device *vdev,
|
||||
- struct virtio_scsi *vscsi)
|
||||
+static int virtscsi_init(struct virtio_device *vdev,
|
||||
+ struct virtio_scsi *vscsi)
|
||||
{
|
||||
int err;
|
||||
struct virtqueue *vqs[3];
|
||||
@@ -491,7 +491,7 @@ virtscsi_init_failed:
|
||||
return err;
|
||||
}
|
||||
|
||||
-static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev)
|
||||
+static void virtscsi_remove_vqs(struct virtio_device *vdev)
|
||||
{
|
||||
/* Stop all the virtqueues. */
|
||||
vdev->config->reset(vdev);
|
||||
@@ -509,6 +509,22 @@ static void __devexit virtscsi_remove(struct virtio_device *vdev)
|
||||
scsi_host_put(shost);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PM
|
||||
+static int virtscsi_freeze(struct virtio_device *vdev)
|
||||
+{
|
||||
+ virtscsi_remove_vqs(vdev);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int virtscsi_restore(struct virtio_device *vdev)
|
||||
+{
|
||||
+ struct Scsi_Host *sh = virtio_scsi_host(vdev);
|
||||
+ struct virtio_scsi *vscsi = shost_priv(sh);
|
||||
+
|
||||
+ return virtscsi_init(vdev, vscsi);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static struct virtio_device_id id_table[] = {
|
||||
{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
|
||||
{ 0 },
|
||||
@@ -519,6 +535,10 @@ static struct virtio_driver virtio_scsi_driver = {
|
||||
.driver.owner = THIS_MODULE,
|
||||
.id_table = id_table,
|
||||
.probe = virtscsi_probe,
|
||||
+#ifdef CONFIG_PM
|
||||
+ .freeze = virtscsi_freeze,
|
||||
+ .restore = virtscsi_restore,
|
||||
+#endif
|
||||
.remove = __devexit_p(virtscsi_remove),
|
||||
};
|
||||
|
||||
--
|
||||
1.7.1
|
||||
|
@ -1,15 +0,0 @@
|
||||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
|
||||
index f22a9f7..f525f99 100644
|
||||
--- a/arch/x86/kernel/cpu/mcheck/mce.c
|
||||
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
|
||||
@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
|
||||
{
|
||||
unsigned int next, i, prev = 0;
|
||||
|
||||
- next = rcu_dereference_check_mce(mcelog.next);
|
||||
+ next = ACCESS_ONCE(mcelog.next);
|
||||
|
||||
do {
|
||||
struct mce *m;
|
||||
|
||||
|
@ -1,447 +0,0 @@
|
||||
In some cases it may happen that pmd_none_or_clear_bad() is called
|
||||
with the mmap_sem hold in read mode. In those cases the huge page
|
||||
faults can allocate hugepmds under pmd_none_or_clear_bad() and that
|
||||
can trigger a false positive from pmd_bad() that will not like to see
|
||||
a pmd materializing as trans huge.
|
||||
|
||||
It's not khugepaged the problem, khugepaged holds the mmap_sem in
|
||||
write mode (and all those sites must hold the mmap_sem in read mode to
|
||||
prevent pagetables to go away from under them, during code review it
|
||||
seems vm86 mode on 32bit kernels requires that too unless it's
|
||||
restricted to 1 thread per process or UP builds). The race is only
|
||||
with the huge pagefaults that can convert a pmd_none() into a
|
||||
pmd_trans_huge().
|
||||
|
||||
Effectively all these pmd_none_or_clear_bad() sites running with
|
||||
mmap_sem in read mode are somewhat speculative with the page faults,
|
||||
and the result is always undefined when they run simultaneously. This
|
||||
is probably why it wasn't common to run into this. For example if the
|
||||
madvise(MADV_DONTNEED) runs zap_page_range() shortly before the page
|
||||
fault, the hugepage will not be zapped, if the page fault runs first
|
||||
it will be zapped.
|
||||
|
||||
Altering pmd_bad() not to error out if it finds hugepmds won't be
|
||||
enough to fix this, because zap_pmd_range would then proceed to call
|
||||
zap_pte_range (which would be incorrect if the pmd become a
|
||||
pmd_trans_huge()).
|
||||
|
||||
The simplest way to fix this is to read the pmd in the local stack
|
||||
(regardless of what we read, no need of actual CPU barriers, only
|
||||
compiler barrier needed), and be sure it is not changing under the
|
||||
code that computes its value. Even if the real pmd is changing under
|
||||
the value we hold on the stack, we don't care. If we actually end up
|
||||
in zap_pte_range it means the pmd was not none already and it was not
|
||||
huge, and it can't become huge from under us (khugepaged locking
|
||||
explained above).
|
||||
|
||||
All we need is to enforce that there is no way anymore that in a code
|
||||
path like below, pmd_trans_huge can be false, but
|
||||
pmd_none_or_clear_bad can run into a hugepmd. The overhead of a
|
||||
barrier() is just a compiler tweak and should not be measurable (I
|
||||
only added it for THP builds). I don't exclude different compiler
|
||||
versions may have prevented the race too by caching the value of *pmd
|
||||
on the stack (that hasn't been verified, but it wouldn't be impossible
|
||||
considering pmd_none_or_clear_bad, pmd_bad, pmd_trans_huge, pmd_none
|
||||
are all inlines and there's no external function called in between
|
||||
pmd_trans_huge and pmd_none_or_clear_bad).
|
||||
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
if (next-addr != HPAGE_PMD_SIZE) {
|
||||
VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
|
||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
||||
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
|
||||
continue;
|
||||
/* fall through */
|
||||
}
|
||||
if (pmd_none_or_clear_bad(pmd))
|
||||
|
||||
Because this race condition could be exercised without special
|
||||
privileges this was reported in CVE-2012-1179.
|
||||
|
||||
The race was identified and fully explained by Ulrich who debugged it.
|
||||
I'm quoting his accurate explanation below, for reference.
|
||||
|
||||
====== start quote =======
|
||||
mapcount 0 page_mapcount 1
|
||||
kernel BUG at mm/huge_memory.c:1384!
|
||||
|
||||
At some point prior to the panic, a "bad pmd ..." message similar to the
|
||||
following is logged on the console:
|
||||
|
||||
mm/memory.c:145: bad pmd ffff8800376e1f98(80000000314000e7).
|
||||
|
||||
The "bad pmd ..." message is logged by pmd_clear_bad() before it clears
|
||||
the page's PMD table entry.
|
||||
|
||||
143 void pmd_clear_bad(pmd_t *pmd)
|
||||
144 {
|
||||
-> 145 pmd_ERROR(*pmd);
|
||||
146 pmd_clear(pmd);
|
||||
147 }
|
||||
|
||||
After the PMD table entry has been cleared, there is an inconsistency
|
||||
between the actual number of PMD table entries that are mapping the page
|
||||
and the page's map count (_mapcount field in struct page). When the page
|
||||
is subsequently reclaimed, __split_huge_page() detects this inconsistency.
|
||||
|
||||
1381 if (mapcount != page_mapcount(page))
|
||||
1382 printk(KERN_ERR "mapcount %d page_mapcount %d\n",
|
||||
1383 mapcount, page_mapcount(page));
|
||||
-> 1384 BUG_ON(mapcount != page_mapcount(page));
|
||||
|
||||
The root cause of the problem is a race of two threads in a multithreaded
|
||||
process. Thread B incurs a page fault on a virtual address that has never
|
||||
been accessed (PMD entry is zero) while Thread A is executing an madvise()
|
||||
system call on a virtual address within the same 2 MB (huge page) range.
|
||||
|
||||
virtual address space
|
||||
.---------------------.
|
||||
| |
|
||||
| |
|
||||
.-|---------------------|
|
||||
| | |
|
||||
| | |<-- B(fault)
|
||||
| | |
|
||||
2 MB | |/////////////////////|-.
|
||||
huge < |/////////////////////| > A(range)
|
||||
page | |/////////////////////|-'
|
||||
| | |
|
||||
| | |
|
||||
'-|---------------------|
|
||||
| |
|
||||
| |
|
||||
'---------------------'
|
||||
|
||||
- Thread A is executing an madvise(..., MADV_DONTNEED) system call
|
||||
on the virtual address range "A(range)" shown in the picture.
|
||||
|
||||
sys_madvise
|
||||
// Acquire the semaphore in shared mode.
|
||||
down_read(¤t->mm->mmap_sem)
|
||||
...
|
||||
madvise_vma
|
||||
switch (behavior)
|
||||
case MADV_DONTNEED:
|
||||
madvise_dontneed
|
||||
zap_page_range
|
||||
unmap_vmas
|
||||
unmap_page_range
|
||||
zap_pud_range
|
||||
zap_pmd_range
|
||||
//
|
||||
// Assume that this huge page has never been accessed.
|
||||
// I.e. content of the PMD entry is zero (not mapped).
|
||||
//
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
// We don't get here due to the above assumption.
|
||||
}
|
||||
//
|
||||
// Assume that Thread B incurred a page fault and
|
||||
.---------> // sneaks in here as shown below.
|
||||
| //
|
||||
| if (pmd_none_or_clear_bad(pmd))
|
||||
| {
|
||||
| if (unlikely(pmd_bad(*pmd)))
|
||||
| pmd_clear_bad
|
||||
| {
|
||||
| pmd_ERROR
|
||||
| // Log "bad pmd ..." message here.
|
||||
| pmd_clear
|
||||
| // Clear the page's PMD entry.
|
||||
| // Thread B incremented the map count
|
||||
| // in page_add_new_anon_rmap(), but
|
||||
| // now the page is no longer mapped
|
||||
| // by a PMD entry (-> inconsistency).
|
||||
| }
|
||||
| }
|
||||
|
|
||||
v
|
||||
- Thread B is handling a page fault on virtual address "B(fault)" shown
|
||||
in the picture.
|
||||
|
||||
...
|
||||
do_page_fault
|
||||
__do_page_fault
|
||||
// Acquire the semaphore in shared mode.
|
||||
down_read_trylock(&mm->mmap_sem)
|
||||
...
|
||||
handle_mm_fault
|
||||
if (pmd_none(*pmd) && transparent_hugepage_enabled(vma))
|
||||
// We get here due to the above assumption (PMD entry is zero).
|
||||
do_huge_pmd_anonymous_page
|
||||
alloc_hugepage_vma
|
||||
// Allocate a new transparent huge page here.
|
||||
...
|
||||
__do_huge_pmd_anonymous_page
|
||||
...
|
||||
spin_lock(&mm->page_table_lock)
|
||||
...
|
||||
page_add_new_anon_rmap
|
||||
// Here we increment the page's map count (starts at -1).
|
||||
atomic_set(&page->_mapcount, 0)
|
||||
set_pmd_at
|
||||
// Here we set the page's PMD entry which will be cleared
|
||||
// when Thread A calls pmd_clear_bad().
|
||||
...
|
||||
spin_unlock(&mm->page_table_lock)
|
||||
|
||||
The mmap_sem does not prevent the race because both threads are acquiring
|
||||
it in shared mode (down_read). Thread B holds the page_table_lock while
|
||||
the page's map count and PMD table entry are updated. However, Thread A
|
||||
does not synchronize on that lock.
|
||||
====== end quote =======
|
||||
|
||||
Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
|
||||
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
|
||||
---
|
||||
arch/x86/kernel/vm86_32.c | 2 +
|
||||
fs/proc/task_mmu.c | 9 ++++++
|
||||
include/asm-generic/pgtable.h | 57 +++++++++++++++++++++++++++++++++++++++++
|
||||
mm/memcontrol.c | 4 +++
|
||||
mm/memory.c | 14 ++++++++--
|
||||
mm/mempolicy.c | 2 +-
|
||||
mm/mincore.c | 2 +-
|
||||
mm/pagewalk.c | 2 +-
|
||||
mm/swapfile.c | 4 +--
|
||||
9 files changed, 87 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
|
||||
index b466cab..328cb37 100644
|
||||
--- a/arch/x86/kernel/vm86_32.c
|
||||
+++ b/arch/x86/kernel/vm86_32.c
|
||||
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
spinlock_t *ptl;
|
||||
int i;
|
||||
|
||||
+ down_write(&mm->mmap_sem);
|
||||
pgd = pgd_offset(mm, 0xA0000);
|
||||
if (pgd_none_or_clear_bad(pgd))
|
||||
goto out;
|
||||
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
|
||||
}
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
out:
|
||||
+ up_write(&mm->mmap_sem);
|
||||
flush_tlb();
|
||||
}
|
||||
|
||||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
||||
index 7dcd2a2..3efa725 100644
|
||||
--- a/fs/proc/task_mmu.c
|
||||
+++ b/fs/proc/task_mmu.c
|
||||
@@ -409,6 +409,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||
} else {
|
||||
spin_unlock(&walk->mm->page_table_lock);
|
||||
}
|
||||
+
|
||||
+ if (pmd_trans_unstable(pmd))
|
||||
+ return 0;
|
||||
/*
|
||||
* The mmap_sem held all the way back in m_start() is what
|
||||
* keeps khugepaged out of here and from collapsing things
|
||||
@@ -507,6 +510,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
|
||||
struct page *page;
|
||||
|
||||
split_huge_page_pmd(walk->mm, pmd);
|
||||
+ if (pmd_trans_unstable(pmd))
|
||||
+ return 0;
|
||||
|
||||
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
||||
@@ -670,6 +675,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
||||
int err = 0;
|
||||
|
||||
split_huge_page_pmd(walk->mm, pmd);
|
||||
+ if (pmd_trans_unstable(pmd))
|
||||
+ return 0;
|
||||
|
||||
/* find the first VMA at or above 'addr' */
|
||||
vma = find_vma(walk->mm, addr);
|
||||
@@ -961,6 +968,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
|
||||
spin_unlock(&walk->mm->page_table_lock);
|
||||
}
|
||||
|
||||
+ if (pmd_trans_unstable(pmd))
|
||||
+ return 0;
|
||||
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
|
||||
do {
|
||||
struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
|
||||
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
|
||||
index 76bff2b..10f8291 100644
|
||||
--- a/include/asm-generic/pgtable.h
|
||||
+++ b/include/asm-generic/pgtable.h
|
||||
@@ -443,6 +443,63 @@ static inline int pmd_write(pmd_t pmd)
|
||||
#endif /* __HAVE_ARCH_PMD_WRITE */
|
||||
#endif
|
||||
|
||||
+/*
|
||||
+ * This function is meant to be used by sites walking pagetables with
|
||||
+ * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
|
||||
+ * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
|
||||
+ * into a null pmd and the transhuge page fault can convert a null pmd
|
||||
+ * into an hugepmd or into a regular pmd (if the hugepage allocation
|
||||
+ * fails). While holding the mmap_sem in read mode the pmd becomes
|
||||
+ * stable and stops changing under us only if it's not null and not a
|
||||
+ * transhuge pmd. When those races occurs and this function makes a
|
||||
+ * difference vs the standard pmd_none_or_clear_bad, the result is
|
||||
+ * undefined so behaving like if the pmd was none is safe (because it
|
||||
+ * can return none anyway). The compiler level barrier() is critically
|
||||
+ * important to compute the two checks atomically on the same pmdval.
|
||||
+ */
|
||||
+static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
|
||||
+{
|
||||
+ /* depend on compiler for an atomic pmd read */
|
||||
+ pmd_t pmdval = *pmd;
|
||||
+ /*
|
||||
+ * The barrier will stabilize the pmdval in a register or on
|
||||
+ * the stack so that it will stop changing under the code.
|
||||
+ */
|
||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+ barrier();
|
||||
+#endif
|
||||
+ if (pmd_none(pmdval))
|
||||
+ return 1;
|
||||
+ if (unlikely(pmd_bad(pmdval))) {
|
||||
+ if (!pmd_trans_huge(pmdval))
|
||||
+ pmd_clear_bad(pmd);
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * This is a noop if Transparent Hugepage Support is not built into
|
||||
+ * the kernel. Otherwise it is equivalent to
|
||||
+ * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
|
||||
+ * places that already verified the pmd is not none and they want to
|
||||
+ * walk ptes while holding the mmap sem in read mode (write mode don't
|
||||
+ * need this). If THP is not enabled, the pmd can't go away under the
|
||||
+ * code even if MADV_DONTNEED runs, but if THP is enabled we need to
|
||||
+ * run a pmd_trans_unstable before walking the ptes after
|
||||
+ * split_huge_page_pmd returns (because it may have run when the pmd
|
||||
+ * become null, but then a page fault can map in a THP and not a
|
||||
+ * regular page).
|
||||
+ */
|
||||
+static inline int pmd_trans_unstable(pmd_t *pmd)
|
||||
+{
|
||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+ return pmd_none_or_trans_huge_or_clear_bad(pmd);
|
||||
+#else
|
||||
+ return 0;
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_GENERIC_PGTABLE_H */
|
||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
||||
index d0e57a3..67b0578 100644
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -5193,6 +5193,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
||||
spinlock_t *ptl;
|
||||
|
||||
split_huge_page_pmd(walk->mm, pmd);
|
||||
+ if (pmd_trans_unstable(pmd))
|
||||
+ return 0;
|
||||
|
||||
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
for (; addr != end; pte++, addr += PAGE_SIZE)
|
||||
@@ -5355,6 +5357,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
||||
spinlock_t *ptl;
|
||||
|
||||
split_huge_page_pmd(walk->mm, pmd);
|
||||
+ if (pmd_trans_unstable(pmd))
|
||||
+ return 0;
|
||||
retry:
|
||||
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
|
||||
for (; addr != end; addr += PAGE_SIZE) {
|
||||
diff --git a/mm/memory.c b/mm/memory.c
|
||||
index fa2f04e..e3090fc 100644
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -1251,12 +1251,20 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
|
||||
VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
|
||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
||||
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
|
||||
- continue;
|
||||
+ goto next;
|
||||
/* fall through */
|
||||
}
|
||||
- if (pmd_none_or_clear_bad(pmd))
|
||||
- continue;
|
||||
+ /*
|
||||
+ * Here there can be other concurrent MADV_DONTNEED or
|
||||
+ * trans huge page faults running, and if the pmd is
|
||||
+ * none or trans huge it can change under us. This is
|
||||
+ * because MADV_DONTNEED holds the mmap_sem in read
|
||||
+ * mode.
|
||||
+ */
|
||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
+ goto next;
|
||||
next = zap_pte_range(tlb, vma, pmd, addr, next, details);
|
||||
+ next:
|
||||
cond_resched();
|
||||
} while (pmd++, addr = next, addr != end);
|
||||
|
||||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
|
||||
index 47296fe..0a37570 100644
|
||||
--- a/mm/mempolicy.c
|
||||
+++ b/mm/mempolicy.c
|
||||
@@ -512,7 +512,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||
do {
|
||||
next = pmd_addr_end(addr, end);
|
||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
||||
- if (pmd_none_or_clear_bad(pmd))
|
||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
continue;
|
||||
if (check_pte_range(vma, pmd, addr, next, nodes,
|
||||
flags, private))
|
||||
diff --git a/mm/mincore.c b/mm/mincore.c
|
||||
index 636a868..936b4ce 100644
|
||||
--- a/mm/mincore.c
|
||||
+++ b/mm/mincore.c
|
||||
@@ -164,7 +164,7 @@ static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||
}
|
||||
/* fall through */
|
||||
}
|
||||
- if (pmd_none_or_clear_bad(pmd))
|
||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
mincore_unmapped_range(vma, addr, next, vec);
|
||||
else
|
||||
mincore_pte_range(vma, pmd, addr, next, vec);
|
||||
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
|
||||
index 2f5cf10..aa9701e 100644
|
||||
--- a/mm/pagewalk.c
|
||||
+++ b/mm/pagewalk.c
|
||||
@@ -59,7 +59,7 @@ again:
|
||||
continue;
|
||||
|
||||
split_huge_page_pmd(walk->mm, pmd);
|
||||
- if (pmd_none_or_clear_bad(pmd))
|
||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
goto again;
|
||||
err = walk_pte_range(pmd, addr, next, walk);
|
||||
if (err)
|
||||
diff --git a/mm/swapfile.c b/mm/swapfile.c
|
||||
index d999f09..f31b29d 100644
|
||||
--- a/mm/swapfile.c
|
||||
+++ b/mm/swapfile.c
|
||||
@@ -932,9 +932,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||
pmd = pmd_offset(pud, addr);
|
||||
do {
|
||||
next = pmd_addr_end(addr, end);
|
||||
- if (unlikely(pmd_trans_huge(*pmd)))
|
||||
- continue;
|
||||
- if (pmd_none_or_clear_bad(pmd))
|
||||
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
continue;
|
||||
ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
|
||||
if (ret)
|
||||
|
||||
--
|
||||
To unsubscribe, send a message with 'unsubscribe linux-mm' in
|
||||
the body to majordomo@kvack.org. For more info on Linux MM,
|
||||
see: http://www.linux-mm.org/ .
|
||||
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
|
||||
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
|
2
sources
2
sources
@ -1,2 +1,2 @@
|
||||
7133f5a2086a7d7ef97abac610c094f5 linux-3.3.tar.xz
|
||||
fe8e2b8e93695cb876cc8394b3db83c4 patch-3.3-git1.xz
|
||||
72643cb2a29683201f2049d151564c56 patch-3.3-git2.xz
|
||||
|
@ -1,118 +0,0 @@
|
||||
From 92a9c19a89af2ca219fbb040a0059f414a4b7223 Mon Sep 17 00:00:00 2001
|
||||
From: Kay Sievers <kay.sievers@vrfy.org>
|
||||
Date: Sat, 28 Jan 2012 19:57:46 +0000
|
||||
Subject: [PATCH] udlfb: remove sysfs framebuffer device with USB
|
||||
.disconnect()
|
||||
|
||||
The USB graphics card driver delays the unregistering of the framebuffer
|
||||
device to a workqueue, which breaks the userspace visible remove uevent
|
||||
sequence. Recent userspace tools started to support USB graphics card
|
||||
hotplug out-of-the-box and rely on proper events sent by the kernel.
|
||||
|
||||
The framebuffer device is a direct child of the USB interface which is
|
||||
removed immediately after the USB .disconnect() callback. But the fb device
|
||||
in /sys stays around until its final cleanup, at a time where all the parent
|
||||
devices have been removed already.
|
||||
|
||||
To work around that, we remove the sysfs fb device directly in the USB
|
||||
.disconnect() callback and leave only the cleanup of the internal fb
|
||||
data to the delayed work.
|
||||
|
||||
Before:
|
||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb0 (graphics)
|
||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
||||
remove /2-1.2:1.0/graphics/fb0 (graphics)
|
||||
|
||||
After:
|
||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
||||
add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics)
|
||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics)
|
||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb)
|
||||
remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb)
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Tested-by: Bernie Thompson <bernie@plugable.com>
|
||||
Acked-by: Bernie Thompson <bernie@plugable.com>
|
||||
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
|
||||
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
|
||||
---
|
||||
drivers/video/fbmem.c | 18 +++++++++++++++++-
|
||||
drivers/video/udlfb.c | 2 +-
|
||||
include/linux/fb.h | 1 +
|
||||
3 files changed, 19 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
|
||||
index ac9141b..c6ce416 100644
|
||||
--- a/drivers/video/fbmem.c
|
||||
+++ b/drivers/video/fbmem.c
|
||||
@@ -1665,6 +1665,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
+ unlink_framebuffer(fb_info);
|
||||
if (fb_info->pixmap.addr &&
|
||||
(fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
|
||||
kfree(fb_info->pixmap.addr);
|
||||
@@ -1672,7 +1673,6 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
|
||||
registered_fb[i] = NULL;
|
||||
num_registered_fb--;
|
||||
fb_cleanup_device(fb_info);
|
||||
- device_destroy(fb_class, MKDEV(FB_MAJOR, i));
|
||||
event.info = fb_info;
|
||||
fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
|
||||
|
||||
@@ -1681,6 +1681,22 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int unlink_framebuffer(struct fb_info *fb_info)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ i = fb_info->node;
|
||||
+ if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ if (fb_info->dev) {
|
||||
+ device_destroy(fb_class, MKDEV(FB_MAJOR, i));
|
||||
+ fb_info->dev = NULL;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL(unlink_framebuffer);
|
||||
+
|
||||
void remove_conflicting_framebuffers(struct apertures_struct *a,
|
||||
const char *name, bool primary)
|
||||
{
|
||||
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
|
||||
index a197731..a40c05e 100644
|
||||
--- a/drivers/video/udlfb.c
|
||||
+++ b/drivers/video/udlfb.c
|
||||
@@ -1739,7 +1739,7 @@ static void dlfb_usb_disconnect(struct usb_interface *interface)
|
||||
for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++)
|
||||
device_remove_file(info->dev, &fb_device_attrs[i]);
|
||||
device_remove_bin_file(info->dev, &edid_attr);
|
||||
-
|
||||
+ unlink_framebuffer(info);
|
||||
usb_set_intfdata(interface, NULL);
|
||||
|
||||
/* if clients still have us open, will be freed on last close */
|
||||
diff --git a/include/linux/fb.h b/include/linux/fb.h
|
||||
index c18122f..a395b8c 100644
|
||||
--- a/include/linux/fb.h
|
||||
+++ b/include/linux/fb.h
|
||||
@@ -1003,6 +1003,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
|
||||
/* drivers/video/fbmem.c */
|
||||
extern int register_framebuffer(struct fb_info *fb_info);
|
||||
extern int unregister_framebuffer(struct fb_info *fb_info);
|
||||
+extern int unlink_framebuffer(struct fb_info *fb_info);
|
||||
extern void remove_conflicting_framebuffers(struct apertures_struct *a,
|
||||
const char *name, bool primary);
|
||||
extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
|
||||
--
|
||||
1.7.6.5
|
||||
|
Loading…
Reference in New Issue
Block a user