forked from rpms/kernel
		
	Linux v3.3-6972-ge22057c
This commit is contained in:
		
							parent
							
								
									d5a077e500
								
							
						
					
					
						commit
						62c169cbc3
					
				| @ -1,113 +0,0 @@ | ||||
| From davej  Thu Mar 22 16:38:38 2012 | ||||
| Return-Path: linux-kernel-owner@vger.kernel.org | ||||
| X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on | ||||
| 	gelk.kernelslacker.org | ||||
| X-Spam-Level:  | ||||
| X-Spam-Status: No, score=-1.2 required=5.0 tests=KB_DATE_CONTAINS_TAB, | ||||
| 	RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD,UNPARSEABLE_RELAY autolearn=unavailable | ||||
| 	version=3.3.2 | ||||
| Received: from mail.corp.redhat.com [10.5.5.51] | ||||
| 	by gelk.kernelslacker.org with IMAP (fetchmail-6.3.21) | ||||
| 	for <davej@localhost> (single-drop); Thu, 22 Mar 2012 16:38:38 -0400 (EDT) | ||||
| Received: from zmta02.collab.prod.int.phx2.redhat.com (LHLO | ||||
|  zmta02.collab.prod.int.phx2.redhat.com) (10.5.5.32) by | ||||
|  zmail11.collab.prod.int.phx2.redhat.com with LMTP; Thu, 22 Mar 2012 | ||||
|  16:37:12 -0400 (EDT) | ||||
| Received: from localhost (localhost.localdomain [127.0.0.1]) | ||||
| 	by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id BE4B31280F5; | ||||
| 	Thu, 22 Mar 2012 16:37:12 -0400 (EDT) | ||||
| X-Quarantine-ID: <rVyHUDnYJs0w> | ||||
| Received: from zmta02.collab.prod.int.phx2.redhat.com ([127.0.0.1]) | ||||
| 	by localhost (zmta02.collab.prod.int.phx2.redhat.com [127.0.0.1]) (amavisd-new, port 10024) | ||||
| 	with ESMTP id rVyHUDnYJs0w; Thu, 22 Mar 2012 16:37:12 -0400 (EDT) | ||||
| Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) | ||||
| 	by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id 34CCC1280EF; | ||||
| 	Thu, 22 Mar 2012 16:37:12 -0400 (EDT) | ||||
| Received: from mx1.redhat.com (ext-mx14.extmail.prod.ext.phx2.redhat.com [10.5.110.19]) | ||||
| 	by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id q2MKbBbO012811; | ||||
| 	Thu, 22 Mar 2012 16:37:11 -0400 | ||||
| Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) | ||||
| 	by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id q2MIJPCS018091; | ||||
| 	Thu, 22 Mar 2012 16:37:10 -0400 | ||||
| Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand | ||||
| 	id S1759738Ab2CVUhD (ORCPT <rfc822;agordeev@redhat.com> + 54 others); | ||||
| 	Thu, 22 Mar 2012 16:37:03 -0400 | ||||
| Received: from zeniv.linux.org.uk ([195.92.253.2]:35901 "EHLO | ||||
| 	ZenIV.linux.org.uk" rhost-flags-OK-OK-OK-OK) by vger.kernel.org | ||||
| 	with ESMTP id S1758619Ab2CVUg7 (ORCPT | ||||
| 	<rfc822;linux-kernel@vger.kernel.org>); | ||||
| 	Thu, 22 Mar 2012 16:36:59 -0400 | ||||
| Received: from viro by ZenIV.linux.org.uk with local (Exim 4.76 #1 (Red Hat Linux)) | ||||
| 	id 1SAokk-0008Fi-MR; Thu, 22 Mar 2012 20:36:58 +0000 | ||||
| Date:	Thu, 22 Mar 2012 20:36:58 +0000 | ||||
| From: Al Viro <viro@ZenIV.linux.org.uk> | ||||
| To: Linus Torvalds <torvalds@linux-foundation.org> | ||||
| Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xensource.com, | ||||
|         Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||||
| Subject: Re: Regression introduced by | ||||
|  bfcfaa77bdf0f775263e906015982a608df01c76 (vfs: use 'unsigned long' accesses | ||||
|  for dcache name comparison and hashing) | ||||
| Message-ID: <20120322203658.GC6589@ZenIV.linux.org.uk> | ||||
| References: <20120322183845.GA17264@phenom.dumpdata.com> | ||||
|  <20120322200918.GZ6589@ZenIV.linux.org.uk> | ||||
|  <20120322202445.GB6589@ZenIV.linux.org.uk> | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=us-ascii | ||||
| Content-Disposition: inline | ||||
| In-Reply-To: <20120322202445.GB6589@ZenIV.linux.org.uk> | ||||
| User-Agent: Mutt/1.5.21 (2010-09-15) | ||||
| Sender: linux-kernel-owner@vger.kernel.org | ||||
| Precedence: bulk | ||||
| List-ID: <linux-kernel.vger.kernel.org> | ||||
| X-Mailing-List:	linux-kernel@vger.kernel.org | ||||
| X-RedHat-Spam-Score: -5.01  (RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD) | ||||
| X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24 | ||||
| X-Scanned-By: MIMEDefang 2.68 on 10.5.110.19 | ||||
| Status: RO | ||||
| Content-Length: 1440 | ||||
| Lines: 43 | ||||
| 
 | ||||
| On Thu, Mar 22, 2012 at 08:24:45PM +0000, Al Viro wrote: | ||||
| > 
 | ||||
| > OK, full_name_hash()/hash_name() definitely have a mismatch and it's on the
 | ||||
| > names of length 8*n: trivial experiment shows that we have
 | ||||
| > name hash_name full_name_hash
 | ||||
| > a 61 61
 | ||||
| > ab 6261 6261
 | ||||
| > abc 636261 636261
 | ||||
| > abcd 64636261 64636261
 | ||||
| > abcdabc 64c6c4c2 64c6c4c2
 | ||||
| > abcdabcd efcead5 c8c6c4c2
 | ||||
| > abcdabcd9 efceb0e efceb0e
 | ||||
| > 
 | ||||
| > Linus, which way do you prefer to shift it?  Should hash_name() change to
 | ||||
| > match full_name_hash() or should it be the other way round?
 | ||||
| > 
 | ||||
| > What happens is that you get multiplication by 9 and adding 0 in the former,
 | ||||
| > after having added the last full word.  In the latter we add the last full
 | ||||
| > word, see that there's nothing left and bugger off.
 | ||||
| 
 | ||||
| Guys, could you check if this fixes it? | ||||
| 
 | ||||
| diff --git a/fs/namei.c b/fs/namei.c
 | ||||
| index 13e6a1f..7451d6f8 100644
 | ||||
| --- a/fs/namei.c
 | ||||
| +++ b/fs/namei.c
 | ||||
| @@ -1439,10 +1439,10 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 | ||||
|   | ||||
|  	for (;;) { | ||||
|  		a = *(unsigned long *)name; | ||||
| -		hash *= 9;
 | ||||
|  		if (len < sizeof(unsigned long)) | ||||
|  			break; | ||||
|  		hash += a; | ||||
| +		hash *= 9;
 | ||||
|  		name += sizeof(unsigned long); | ||||
|  		len -= sizeof(unsigned long); | ||||
|  		if (!len) | ||||
| --
 | ||||
| To unsubscribe from this list: send the line "unsubscribe linux-kernel" in | ||||
| the body of a message to majordomo@vger.kernel.org | ||||
| More majordomo info at  http://vger.kernel.org/majordomo-info.html | ||||
| Please read the FAQ at  http://www.tux.org/lkml/ | ||||
| 
 | ||||
							
								
								
									
										36
									
								
								kernel.spec
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								kernel.spec
									
									
									
									
									
								
							| @ -62,7 +62,7 @@ Summary: The Linux kernel | ||||
| # For non-released -rc kernels, this will be appended after the rcX and | ||||
| # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" | ||||
| # | ||||
| %global baserelease 2 | ||||
| %global baserelease 1 | ||||
| %global fedora_build %{baserelease} | ||||
| 
 | ||||
| # base_sublevel is the kernel version we're starting with and patching | ||||
| @ -95,7 +95,7 @@ Summary: The Linux kernel | ||||
| # The rc snapshot level | ||||
| %define rcrev 0 | ||||
| # The git snapshot level | ||||
| %define gitrev 1 | ||||
| %define gitrev 2 | ||||
| # Set rpm version accordingly | ||||
| %define rpmversion 3.%{upstream_sublevel}.0 | ||||
| %endif | ||||
| @ -653,8 +653,6 @@ Patch100: taint-vbox.patch | ||||
| Patch160: linux-2.6-32bit-mmap-exec-randomization.patch | ||||
| Patch161: linux-2.6-i386-nx-emulation.patch | ||||
| 
 | ||||
| Patch383: linux-2.6-defaults-aspm.patch | ||||
| 
 | ||||
| Patch390: linux-2.6-defaults-acpi-video.patch | ||||
| Patch391: linux-2.6-acpi-video-dos.patch | ||||
| Patch394: linux-2.6-acpi-debug-infinite-loop.patch | ||||
| @ -682,7 +680,6 @@ Patch900: modsign-20111207.patch | ||||
| 
 | ||||
| # virt + ksm patches | ||||
| Patch1555: fix_xen_guest_on_old_EC2.patch | ||||
| Patch1556: linux-3.3-virtio-scsi.patch | ||||
| 
 | ||||
| # DRM | ||||
| #atch1700: drm-edid-try-harder-to-fix-up-broken-headers.patch | ||||
| @ -708,9 +705,6 @@ Patch2901: linux-2.6-v4l-dvb-experimental.patch | ||||
| Patch4000: ext4-fix-resize-when-resizing-within-single-group.patch | ||||
| 
 | ||||
| # NFSv4 | ||||
| Patch1102: linux-3.3-newidmapper-01.patch | ||||
| Patch1103: linux-3.3-newidmapper-02.patch | ||||
| Patch1104: linux-3.3-newidmapper-03.patch | ||||
| 
 | ||||
| # patches headed upstream | ||||
| Patch12016: disable-i8042-check-on-apple-mac.patch | ||||
| @ -734,8 +728,6 @@ Patch21010: highbank-export-clock-functions.patch | ||||
| 
 | ||||
| Patch21070: ext4-Support-check-none-nocheck-mount-options.patch | ||||
| 
 | ||||
| Patch21092: udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch | ||||
| 
 | ||||
| Patch21094: power-x86-destdir.patch | ||||
| 
 | ||||
| #rhbz 788260 | ||||
| @ -744,7 +736,6 @@ Patch21233: jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch | ||||
| #rhbz 754518 | ||||
| Patch21235: scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch | ||||
| 
 | ||||
| Patch21250: mcelog-rcu-splat.patch | ||||
| Patch21260: x86-Avoid-invoking-RCU-when-CPU-is-idle.patch | ||||
| 
 | ||||
| #rhbz 727865 730007 | ||||
| @ -753,9 +744,6 @@ Patch21300: ACPICA-Fix-regression-in-FADT-revision-checks.patch | ||||
| #rhbz 728478 | ||||
| Patch21302: sony-laptop-Enable-keyboard-backlight-by-default.patch | ||||
| 
 | ||||
| #rhbz 803809 CVE-2012-1179 | ||||
| Patch21304: mm-thp-fix-pmd_bad-triggering.patch | ||||
| 
 | ||||
| #rhbz 804007 | ||||
| Patch21305: mac80211-fix-possible-tid_rx-reorder_timer-use-after-free.patch | ||||
| 
 | ||||
| @ -766,8 +754,6 @@ Patch21400: unhandled-irqs-switch-to-polling.patch | ||||
| 
 | ||||
| Patch22000: weird-root-dentry-name-debug.patch | ||||
| 
 | ||||
| Patch23000: fix-dentry-hash.patch | ||||
| 
 | ||||
| %endif | ||||
| 
 | ||||
| BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root | ||||
| @ -1344,9 +1330,6 @@ ApplyPatch ext4-fix-resize-when-resizing-within-single-group.patch | ||||
| # eCryptfs | ||||
| 
 | ||||
| # NFSv4 | ||||
| ApplyPatch linux-3.3-newidmapper-01.patch | ||||
| ApplyPatch linux-3.3-newidmapper-02.patch | ||||
| ApplyPatch linux-3.3-newidmapper-03.patch | ||||
| 
 | ||||
| # USB | ||||
| 
 | ||||
| @ -1362,8 +1345,6 @@ ApplyPatch acpi-sony-nonvs-blacklist.patch | ||||
| # | ||||
| # PCI | ||||
| # | ||||
| # enable ASPM by default on hardware we expect to work | ||||
| ApplyPatch linux-2.6-defaults-aspm.patch | ||||
| 
 | ||||
| # | ||||
| # SCSI Bits. | ||||
| @ -1433,7 +1414,6 @@ ApplyOptionalPatch linux-2.6-v4l-dvb-experimental.patch | ||||
| 
 | ||||
| # Patches headed upstream | ||||
| ApplyPatch disable-i8042-check-on-apple-mac.patch | ||||
| ApplyPatch linux-3.3-virtio-scsi.patch | ||||
| 
 | ||||
| # rhbz#605888 | ||||
| ApplyPatch dmar-disable-when-ricoh-multifunction.patch | ||||
| @ -1447,8 +1427,6 @@ ApplyPatch lis3-improve-handling-of-null-rate.patch | ||||
| 
 | ||||
| ApplyPatch ext4-Support-check-none-nocheck-mount-options.patch | ||||
| 
 | ||||
| ApplyPatch udlfb-remove-sysfs-framebuffer-device-with-USB-disconnect.patch | ||||
| 
 | ||||
| ApplyPatch power-x86-destdir.patch | ||||
| 
 | ||||
| #rhbz 788269 | ||||
| @ -1457,8 +1435,6 @@ ApplyPatch jbd2-clear-BH_Delay-and-BH_Unwritten-in-journal_unmap_buf.patch | ||||
| #rhbz 754518 | ||||
| ApplyPatch scsi-sd_revalidate_disk-prevent-NULL-ptr-deref.patch | ||||
| 
 | ||||
| ApplyPatch mcelog-rcu-splat.patch | ||||
| 
 | ||||
| #rhbz 727865 730007 | ||||
| ApplyPatch ACPICA-Fix-regression-in-FADT-revision-checks.patch | ||||
| 
 | ||||
| @ -1475,11 +1451,6 @@ ApplyPatch unhandled-irqs-switch-to-polling.patch | ||||
| 
 | ||||
| ApplyPatch weird-root-dentry-name-debug.patch | ||||
| 
 | ||||
| ApplyPatch fix-dentry-hash.patch | ||||
| 
 | ||||
| #rhbz 803809 CVE-2012-1179 | ||||
| ApplyPatch mm-thp-fix-pmd_bad-triggering.patch | ||||
| 
 | ||||
| #Highbank clock functions | ||||
| ApplyPatch highbank-export-clock-functions.patch  | ||||
| 
 | ||||
| @ -2336,6 +2307,9 @@ fi | ||||
| #                 ||----w | | ||||
| #                 ||     || | ||||
| %changelog | ||||
| * Mon Mar 26 2012 Justin M. Forbes <jforbes@redhat.com> - 3.4.0-0.rc0.git2.1 | ||||
| - Linux v3.3-6972-ge22057c | ||||
| 
 | ||||
| * Thu Mar 22 2012 Dave Jones <davej@redhat.com> 3.4.0-0.rc0.git1.2 | ||||
| - Fix occasional EBADMSG from signed modules. (rhbz 804345) | ||||
| 
 | ||||
|  | ||||
| @ -1,12 +0,0 @@ | ||||
| diff -up linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg linux-2.6.30.noarch/drivers/pci/pcie/aspm.c
 | ||||
| --- linux-2.6.30.noarch/drivers/pci/pcie/aspm.c.mjg	2009-07-16 22:01:11.000000000 +0100
 | ||||
| +++ linux-2.6.30.noarch/drivers/pci/pcie/aspm.c	2009-07-16 22:01:30.000000000 +0100
 | ||||
| @@ -65,7 +65,7 @@ static LIST_HEAD(link_list);
 | ||||
|  #define POLICY_DEFAULT 0	/* BIOS default setting */ | ||||
|  #define POLICY_PERFORMANCE 1	/* high performance */ | ||||
|  #define POLICY_POWERSAVE 2	/* high power saving */ | ||||
| -static int aspm_policy;
 | ||||
| +static int aspm_policy = POLICY_POWERSAVE;
 | ||||
|  static const char *policy_str[] = { | ||||
|  	[POLICY_DEFAULT] = "default", | ||||
|  	[POLICY_PERFORMANCE] = "performance", | ||||
| @ -1,217 +0,0 @@ | ||||
| commit e6499c6f4b5f56a16f8b8ef60529c1da28b13aea | ||||
| Author: Bryan Schumaker <bjschuma@netapp.com> | ||||
| Date:   Thu Jan 26 16:54:23 2012 -0500 | ||||
| 
 | ||||
|     NFS: Fall back on old idmapper if request_key() fails | ||||
|      | ||||
|     This patch removes the CONFIG_NFS_USE_NEW_IDMAPPER compile option. | ||||
|     First, the idmapper will attempt to map the id using /sbin/request-key | ||||
|     and nfsidmap.  If this fails (if /etc/request-key.conf is not configured | ||||
|     properly) then the idmapper will call the legacy code to perform the | ||||
|     mapping.  I left a comment stating where the legacy code begins to make | ||||
|     it easier for somebody to remove in the future. | ||||
|      | ||||
|     Signed-off-by: Bryan Schumaker <bjschuma@netapp.com> | ||||
|     Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> | ||||
| 
 | ||||
| diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c
 | ||||
| --- linux-3.2.noarch/fs/nfs/idmap.c.orig	2012-01-27 10:07:07.209851446 -0500
 | ||||
| +++ linux-3.2.noarch/fs/nfs/idmap.c	2012-01-27 10:15:42.914563082 -0500
 | ||||
| @@ -142,8 +142,6 @@ static int nfs_map_numeric_to_string(__u
 | ||||
|  	return snprintf(buf, buflen, "%u", id); | ||||
|  } | ||||
|   | ||||
| -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
 | ||||
| -
 | ||||
|  #include <linux/cred.h> | ||||
|  #include <linux/sunrpc/sched.h> | ||||
|  #include <linux/nfs4.h> | ||||
| @@ -328,43 +326,7 @@ static int nfs_idmap_lookup_id(const cha
 | ||||
|  	return ret; | ||||
|  } | ||||
|   | ||||
| -int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
 | ||||
| -{
 | ||||
| -	if (nfs_map_string_to_numeric(name, namelen, uid))
 | ||||
| -		return 0;
 | ||||
| -	return nfs_idmap_lookup_id(name, namelen, "uid", uid);
 | ||||
| -}
 | ||||
| -
 | ||||
| -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
 | ||||
| -{
 | ||||
| -	if (nfs_map_string_to_numeric(name, namelen, gid))
 | ||||
| -		return 0;
 | ||||
| -	return nfs_idmap_lookup_id(name, namelen, "gid", gid);
 | ||||
| -}
 | ||||
| -
 | ||||
| -int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
 | ||||
| -{
 | ||||
| -	int ret = -EINVAL;
 | ||||
| -
 | ||||
| -	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
 | ||||
| -		ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
 | ||||
| -	if (ret < 0)
 | ||||
| -		ret = nfs_map_numeric_to_string(uid, buf, buflen);
 | ||||
| -	return ret;
 | ||||
| -}
 | ||||
| -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
 | ||||
| -{
 | ||||
| -	int ret = -EINVAL;
 | ||||
| -
 | ||||
| -	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
 | ||||
| -		ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
 | ||||
| -	if (ret < 0)
 | ||||
| -		ret = nfs_map_numeric_to_string(gid, buf, buflen);
 | ||||
| -	return ret;
 | ||||
| -}
 | ||||
| -
 | ||||
| -#else  /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
 | ||||
| -
 | ||||
| +/* idmap classic begins here */
 | ||||
|  #include <linux/module.h> | ||||
|  #include <linux/mutex.h> | ||||
|  #include <linux/init.h> | ||||
| @@ -796,19 +758,27 @@ static unsigned int fnvhash32(const void
 | ||||
|  int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) | ||||
|  { | ||||
|  	struct idmap *idmap = server->nfs_client->cl_idmap; | ||||
| +	int ret = -EINVAL;
 | ||||
|   | ||||
|  	if (nfs_map_string_to_numeric(name, namelen, uid)) | ||||
|  		return 0; | ||||
| -	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 | ||||
| +	ret = nfs_idmap_lookup_id(name, namelen, "uid", uid);
 | ||||
| +	if (ret < 0)
 | ||||
| +		ret = nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 | ||||
| +	return ret;
 | ||||
|  } | ||||
|   | ||||
| -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
 | ||||
| +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
 | ||||
|  { | ||||
|  	struct idmap *idmap = server->nfs_client->cl_idmap; | ||||
| +	int ret = -EINVAL;
 | ||||
|   | ||||
| -	if (nfs_map_string_to_numeric(name, namelen, uid))
 | ||||
| +	if (nfs_map_string_to_numeric(name, namelen, gid))
 | ||||
|  		return 0; | ||||
| -	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 | ||||
| +	ret = nfs_idmap_lookup_id(name, namelen, "gid", gid);
 | ||||
| +	if (ret < 0)
 | ||||
| +		ret = nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, gid);
 | ||||
| +	return ret;
 | ||||
|  } | ||||
|   | ||||
|  int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) | ||||
| @@ -816,22 +786,26 @@ int nfs_map_uid_to_name(const struct nfs
 | ||||
|  	struct idmap *idmap = server->nfs_client->cl_idmap; | ||||
|  	int ret = -EINVAL; | ||||
|   | ||||
| -	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
 | ||||
| -		ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 | ||||
| +	if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
 | ||||
| +		ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
 | ||||
| +		if (ret < 0)
 | ||||
| +			ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 | ||||
| +	}
 | ||||
|  	if (ret < 0) | ||||
|  		ret = nfs_map_numeric_to_string(uid, buf, buflen); | ||||
|  	return ret; | ||||
|  } | ||||
| -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
 | ||||
| +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
 | ||||
|  { | ||||
|  	struct idmap *idmap = server->nfs_client->cl_idmap; | ||||
|  	int ret = -EINVAL; | ||||
|   | ||||
| -	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
 | ||||
| -		ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
 | ||||
| +	if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
 | ||||
| +		ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
 | ||||
| +		if (ret < 0)
 | ||||
| +			ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, gid, buf);
 | ||||
| +	}
 | ||||
|  	if (ret < 0) | ||||
| -		ret = nfs_map_numeric_to_string(uid, buf, buflen);
 | ||||
| +		ret = nfs_map_numeric_to_string(gid, buf, buflen);
 | ||||
|  	return ret; | ||||
|  } | ||||
| -
 | ||||
| -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 | ||||
| diff -up linux-3.2.noarch/fs/nfs/Kconfig.orig linux-3.2.noarch/fs/nfs/Kconfig
 | ||||
| --- linux-3.2.noarch/fs/nfs/Kconfig.orig	2012-01-04 18:55:44.000000000 -0500
 | ||||
| +++ linux-3.2.noarch/fs/nfs/Kconfig	2012-01-27 10:15:42.913562572 -0500
 | ||||
| @@ -132,14 +132,3 @@ config NFS_USE_KERNEL_DNS
 | ||||
|  	select DNS_RESOLVER | ||||
|  	select KEYS | ||||
|  	default y | ||||
| -
 | ||||
| -config NFS_USE_NEW_IDMAPPER
 | ||||
| -	bool "Use the new idmapper upcall routine"
 | ||||
| -	depends on NFS_V4 && KEYS
 | ||||
| -	help
 | ||||
| -	  Say Y here if you want NFS to use the new idmapper upcall functions.
 | ||||
| -	  You will need /sbin/request-key (usually provided by the keyutils
 | ||||
| -	  package).  For details, read
 | ||||
| -	  <file:Documentation/filesystems/nfs/idmapper.txt>.
 | ||||
| -
 | ||||
| -	  If you are unsure, say N.
 | ||||
| diff -up linux-3.2.noarch/fs/nfs/sysctl.c.orig linux-3.2.noarch/fs/nfs/sysctl.c
 | ||||
| --- linux-3.2.noarch/fs/nfs/sysctl.c.orig	2012-01-04 18:55:44.000000000 -0500
 | ||||
| +++ linux-3.2.noarch/fs/nfs/sysctl.c	2012-01-27 10:15:42.914563082 -0500
 | ||||
| @@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = {
 | ||||
|  		.extra1 = (int *)&nfs_set_port_min, | ||||
|  		.extra2 = (int *)&nfs_set_port_max, | ||||
|  	}, | ||||
| -#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
 | ||||
|  	{ | ||||
|  		.procname = "idmap_cache_timeout", | ||||
|  		.data = &nfs_idmap_cache_timeout, | ||||
| @@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = {
 | ||||
|  		.mode = 0644, | ||||
|  		.proc_handler = proc_dointvec_jiffies, | ||||
|  	}, | ||||
| -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 | ||||
|  #endif | ||||
|  	{ | ||||
|  		.procname	= "nfs_mountpoint_timeout", | ||||
| diff -up linux-3.2.noarch/include/linux/nfs_idmap.h.orig linux-3.2.noarch/include/linux/nfs_idmap.h
 | ||||
| --- linux-3.2.noarch/include/linux/nfs_idmap.h.orig	2012-01-27 10:06:46.783643915 -0500
 | ||||
| +++ linux-3.2.noarch/include/linux/nfs_idmap.h	2012-01-27 10:15:42.915563594 -0500
 | ||||
| @@ -69,36 +69,11 @@ struct nfs_server;
 | ||||
|  struct nfs_fattr; | ||||
|  struct nfs4_string; | ||||
|   | ||||
| -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
 | ||||
| -
 | ||||
|  int nfs_idmap_init(void); | ||||
|  void nfs_idmap_quit(void); | ||||
| -
 | ||||
| -static inline int nfs_idmap_new(struct nfs_client *clp)
 | ||||
| -{
 | ||||
| -	return 0;
 | ||||
| -}
 | ||||
| -
 | ||||
| -static inline void nfs_idmap_delete(struct nfs_client *clp)
 | ||||
| -{
 | ||||
| -}
 | ||||
| -
 | ||||
| -#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */
 | ||||
| -
 | ||||
| -static inline int nfs_idmap_init(void)
 | ||||
| -{
 | ||||
| -	return 0;
 | ||||
| -}
 | ||||
| -
 | ||||
| -static inline void nfs_idmap_quit(void)
 | ||||
| -{
 | ||||
| -}
 | ||||
| -
 | ||||
|  int nfs_idmap_new(struct nfs_client *); | ||||
|  void nfs_idmap_delete(struct nfs_client *); | ||||
|   | ||||
| -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 | ||||
| -
 | ||||
|  void nfs_fattr_init_names(struct nfs_fattr *fattr, | ||||
|  		struct nfs4_string *owner_name, | ||||
|  		struct nfs4_string *group_name); | ||||
| @ -1,97 +0,0 @@ | ||||
| commit 3cd0f37a2cc9e4d6188df10041a2441eaa41d991 | ||||
| Author: Bryan Schumaker <bjschuma@netapp.com> | ||||
| Date:   Thu Jan 26 16:54:24 2012 -0500 | ||||
| 
 | ||||
|     NFS: Keep idmapper include files in one place | ||||
|      | ||||
|     Signed-off-by: Bryan Schumaker <bjschuma@netapp.com> | ||||
|     Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> | ||||
| 
 | ||||
| diff -up linux-3.2.noarch/fs/nfs/idmap.c.orig linux-3.2.noarch/fs/nfs/idmap.c
 | ||||
| --- linux-3.2.noarch/fs/nfs/idmap.c.orig	2012-01-27 10:15:42.914563082 -0500
 | ||||
| +++ linux-3.2.noarch/fs/nfs/idmap.c	2012-01-27 10:19:22.711401559 -0500
 | ||||
| @@ -39,6 +39,36 @@
 | ||||
|  #include <linux/slab.h> | ||||
|  #include <linux/nfs_idmap.h> | ||||
|  #include <linux/nfs_fs.h> | ||||
| +#include <linux/cred.h>
 | ||||
| +#include <linux/sunrpc/sched.h>
 | ||||
| +#include <linux/nfs4.h>
 | ||||
| +#include <linux/nfs_fs_sb.h>
 | ||||
| +#include <linux/keyctl.h>
 | ||||
| +#include <linux/key-type.h>
 | ||||
| +#include <linux/rcupdate.h>
 | ||||
| +#include <linux/err.h>
 | ||||
| +#include <keys/user-type.h>
 | ||||
| +
 | ||||
| +/* include files needed by legacy idmapper */
 | ||||
| +#include <linux/module.h>
 | ||||
| +#include <linux/mutex.h>
 | ||||
| +#include <linux/init.h>
 | ||||
| +#include <linux/socket.h>
 | ||||
| +#include <linux/in.h>
 | ||||
| +#include <linux/sched.h>
 | ||||
| +#include <linux/sunrpc/clnt.h>
 | ||||
| +#include <linux/workqueue.h>
 | ||||
| +#include <linux/sunrpc/rpc_pipe_fs.h>
 | ||||
| +#include <linux/nfs_fs.h>
 | ||||
| +#include "nfs4_fs.h"
 | ||||
| +
 | ||||
| +#define NFS_UINT_MAXLEN 11
 | ||||
| +#define IDMAP_HASH_SZ          128
 | ||||
| +
 | ||||
| +/* Default cache timeout is 10 minutes */
 | ||||
| +unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 | ||||
| +const struct cred *id_resolver_cache;
 | ||||
| +
 | ||||
|   | ||||
|  /** | ||||
|   * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields | ||||
| @@ -142,21 +172,6 @@ static int nfs_map_numeric_to_string(__u
 | ||||
|  	return snprintf(buf, buflen, "%u", id); | ||||
|  } | ||||
|   | ||||
| -#include <linux/cred.h>
 | ||||
| -#include <linux/sunrpc/sched.h>
 | ||||
| -#include <linux/nfs4.h>
 | ||||
| -#include <linux/nfs_fs_sb.h>
 | ||||
| -#include <linux/keyctl.h>
 | ||||
| -#include <linux/key-type.h>
 | ||||
| -#include <linux/rcupdate.h>
 | ||||
| -#include <linux/err.h>
 | ||||
| -
 | ||||
| -#include <keys/user-type.h>
 | ||||
| -
 | ||||
| -#define NFS_UINT_MAXLEN 11
 | ||||
| -
 | ||||
| -const struct cred *id_resolver_cache;
 | ||||
| -
 | ||||
|  struct key_type key_type_id_resolver = { | ||||
|  	.name		= "id_resolver", | ||||
|  	.instantiate	= user_instantiate, | ||||
| @@ -327,25 +342,6 @@ static int nfs_idmap_lookup_id(const cha
 | ||||
|  } | ||||
|   | ||||
|  /* idmap classic begins here */ | ||||
| -#include <linux/module.h>
 | ||||
| -#include <linux/mutex.h>
 | ||||
| -#include <linux/init.h>
 | ||||
| -#include <linux/socket.h>
 | ||||
| -#include <linux/in.h>
 | ||||
| -#include <linux/sched.h>
 | ||||
| -#include <linux/sunrpc/clnt.h>
 | ||||
| -#include <linux/workqueue.h>
 | ||||
| -#include <linux/sunrpc/rpc_pipe_fs.h>
 | ||||
| -
 | ||||
| -#include <linux/nfs_fs.h>
 | ||||
| -
 | ||||
| -#include "nfs4_fs.h"
 | ||||
| -
 | ||||
| -#define IDMAP_HASH_SZ          128
 | ||||
| -
 | ||||
| -/* Default cache timeout is 10 minutes */
 | ||||
| -unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 | ||||
| -
 | ||||
|  static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) | ||||
|  { | ||||
|  	char *endp; | ||||
| @ -1,40 +0,0 @@ | ||||
| commit a602bea3e7ccc5ce3da61d2c18245c4058983926 | ||||
| Author: Bryan Schumaker <bjschuma@netapp.com> | ||||
| Date:   Thu Jan 26 16:54:25 2012 -0500 | ||||
| 
 | ||||
|     NFS: Update idmapper documentation | ||||
|      | ||||
|     Signed-off-by: Bryan Schumaker <bjschuma@netapp.com> | ||||
|     Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> | ||||
| 
 | ||||
| diff -up linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt
 | ||||
| --- linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt.orig	2012-01-04 18:55:44.000000000 -0500
 | ||||
| +++ linux-3.2.noarch/Documentation/filesystems/nfs/idmapper.txt	2012-01-27 10:19:55.406740364 -0500
 | ||||
| @@ -4,13 +4,21 @@ ID Mapper
 | ||||
|  ========= | ||||
|  Id mapper is used by NFS to translate user and group ids into names, and to | ||||
|  translate user and group names into ids.  Part of this translation involves | ||||
| -performing an upcall to userspace to request the information.  Id mapper will
 | ||||
| -user request-key to perform this upcall and cache the result.  The program
 | ||||
| -/usr/sbin/nfs.idmap should be called by request-key, and will perform the
 | ||||
| -translation and initialize a key with the resulting information.
 | ||||
| +performing an upcall to userspace to request the information.  There are two
 | ||||
| +ways NFS could obtain this information: placing a call to /sbin/request-key
 | ||||
| +or by placing a call to the rpc.idmap daemon.
 | ||||
| +
 | ||||
| +NFS will attempt to call /sbin/request-key first.  If this succeeds, the
 | ||||
| +result will be cached using the generic request-key cache.  This call should
 | ||||
| +only fail if /etc/request-key.conf is not configured for the id_resolver key
 | ||||
| +type, see the "Configuring" section below if you wish to use the request-key
 | ||||
| +method.
 | ||||
| +
 | ||||
| +If the call to /sbin/request-key fails (if /etc/request-key.conf is not
 | ||||
| +configured with the id_resolver key type), then the idmapper will ask the
 | ||||
| +legacy rpc.idmap daemon for the id mapping.  This result will be stored
 | ||||
| +in a custom NFS idmap cache.
 | ||||
|   | ||||
| - NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
 | ||||
| - feature.
 | ||||
|   | ||||
|  =========== | ||||
|  Configuring | ||||
| @ -1,993 +0,0 @@ | ||||
| From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001 | ||||
| From: Paolo Bonzini <pbonzini@redhat.com> | ||||
| Date: Fri, 20 Jan 2012 17:27:20 +0100 | ||||
| Cc: <linux-scsi@vger.kernel.org> | ||||
| Cc: Rusty Russell <rusty@rustcorp.com.au> | ||||
| Cc: kvm@vger.kernel.org | ||||
| Cc: Pekka Enberg <penberg@kernel.org> | ||||
| Cc: Michael S. Tsirkin <mst@redhat.com> | ||||
| Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu> | ||||
| Subject: [PATCH v5 0/3] virtio-scsi driver | ||||
| 
 | ||||
| This is the first implementation of the virtio-scsi driver, a virtual | ||||
| HBA that will be supported by KVM.  It implements a subset of the spec, | ||||
| in particular it does not implement asynchronous notifications for either | ||||
| LUN reset/removal/addition or CD-ROM media events, but it is already | ||||
| functional and usable. | ||||
| 
 | ||||
| Other matching bits: | ||||
| 
 | ||||
| - spec at http://people.redhat.com/pbonzini/virtio-spec.pdf
 | ||||
| 
 | ||||
| - QEMU implementation at git://github.com/bonzini/qemu.git,
 | ||||
|   branch virtio-scsi | ||||
| 
 | ||||
| Please review.  Getting this in 3.3 is starting to look like wishful thinking, | ||||
| but the possibility of regressions is obviously zero so I'm still dreaming. | ||||
| Otherwise, that would be 3.4. | ||||
| 
 | ||||
| Paolo Bonzini (3): | ||||
|   virtio-scsi: first version | ||||
|   virtio-scsi: add error handling | ||||
|   virtio-scsi: add power management support | ||||
| 
 | ||||
| v4->v5: change virtio id from 7 to 8 | ||||
| 
 | ||||
| v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN; | ||||
|     fixed 32-bit compilation; added power management support; | ||||
|     adjusted calls to virtqueue_add_buf | ||||
| 
 | ||||
|  drivers/scsi/Kconfig        |    8 + | ||||
|  drivers/scsi/Makefile       |    1 + | ||||
|  drivers/scsi/virtio_scsi.c  |  594 +++++++++++++++++++++++++++++++++++++++++++ | ||||
|  include/linux/virtio_ids.h  |    1 + | ||||
|  include/linux/virtio_scsi.h |  114 +++++++++ | ||||
|  5 files changed, 718 insertions(+), 0 deletions(-) | ||||
|  create mode 100644 drivers/scsi/virtio_scsi.c | ||||
|  create mode 100644 include/linux/virtio_scsi.h | ||||
| 
 | ||||
| From 84ad93b7215e18ab1755a625ede0fb00175e79bb Mon Sep 17 00:00:00 2001 | ||||
| From: Paolo Bonzini <pbonzini@redhat.com> | ||||
| Date: Tue, 29 Nov 2011 16:31:09 +0100 | ||||
| Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org> | ||||
| Subject: [PATCH v5 1/3] virtio-scsi: first version | ||||
| 
 | ||||
| The virtio-scsi HBA is the basis of an alternative storage stack | ||||
| for QEMU-based virtual machines (including KVM).  Compared to | ||||
| virtio-blk it is more scalable, because it supports many LUNs | ||||
| on a single PCI slot), more powerful (it more easily supports | ||||
| passthrough of host devices to the guest) and more easily | ||||
| extensible (new SCSI features implemented by QEMU should not | ||||
| require updating the driver in the guest). | ||||
| 
 | ||||
| Cc: linux-scsi <linux-scsi@vger.kernel.org> | ||||
| Cc: Rusty Russell <rusty@rustcorp.com.au> | ||||
| Cc: Michael S. Tsirkin <mst@redhat.com> | ||||
| Cc: kvm@vger.kernel.org | ||||
| Acked-by: Pekka Enberg <penberg@kernel.org>  | ||||
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||||
| ---
 | ||||
| 	v4->v5: change virtio id from 7 to 8 | ||||
| 
 | ||||
| 	v3->v4: renamed VIRTIO_SCSI_S_UNDERRUN to VIRTIO_SCSI_S_OVERRUN; | ||||
| 	fixed 32-bit compilation; adjust call to virtqueue_add_buf | ||||
| 
 | ||||
| 	v2->v3: added mempool, formatting fixes | ||||
| 
 | ||||
| 	v1->v2: use dbg_dev, sdev_printk, scmd_printk | ||||
| 	   - renamed lock to vq_lock | ||||
| 	   - renamed cmd_vq to req_vq (and other similar changes) | ||||
| 	   - fixed missing break in VIRTIO_SCSI_S_OVERRUN | ||||
| 	   - added VIRTIO_SCSI_S_BUSY | ||||
| 	   - removed unused argument from virtscsi_map_cmd | ||||
| 	   - fixed two tabs that had slipped in | ||||
| 	   - moved max_sectors and cmd_per_lun from template to config space | ||||
| 	   - __attribute__((packed)) -> __packed | ||||
| 
 | ||||
|  drivers/scsi/Kconfig        |    8 + | ||||
|  drivers/scsi/Makefile       |    1 + | ||||
|  drivers/scsi/virtio_scsi.c  |  503 +++++++++++++++++++++++++++++++++++++++++++ | ||||
|  include/linux/virtio_ids.h  |    1 + | ||||
|  include/linux/virtio_scsi.h |  114 ++++++++++ | ||||
|  5 files changed, 627 insertions(+), 0 deletions(-) | ||||
|  create mode 100644 drivers/scsi/virtio_scsi.c | ||||
|  create mode 100644 include/linux/virtio_scsi.h | ||||
| 
 | ||||
| diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
 | ||||
| index 16570aa..827ebaf 100644
 | ||||
| --- a/drivers/scsi/Kconfig
 | ||||
| +++ b/drivers/scsi/Kconfig
 | ||||
| @@ -1897,6 +1897,14 @@ config SCSI_BFA_FC
 | ||||
|  	  To compile this driver as a module, choose M here. The module will | ||||
|  	  be called bfa. | ||||
|   | ||||
| +config SCSI_VIRTIO
 | ||||
| +	tristate "virtio-scsi support (EXPERIMENTAL)"
 | ||||
| +	depends on EXPERIMENTAL && VIRTIO
 | ||||
| +	help
 | ||||
| +          This is the virtual HBA driver for virtio.  If the kernel will
 | ||||
| +          be used in a virtual machine, say Y or M.
 | ||||
| +
 | ||||
| +
 | ||||
|  endif # SCSI_LOWLEVEL | ||||
|   | ||||
|  source "drivers/scsi/pcmcia/Kconfig" | ||||
| diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
 | ||||
| index e4c1a69..ad24e06 100644
 | ||||
| --- a/drivers/scsi/Makefile
 | ||||
| +++ b/drivers/scsi/Makefile
 | ||||
| @@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_CXGB4_ISCSI)	+= libiscsi.o libiscsi_tcp.o cxgbi/
 | ||||
|  obj-$(CONFIG_SCSI_BNX2_ISCSI)	+= libiscsi.o bnx2i/ | ||||
|  obj-$(CONFIG_BE2ISCSI)		+= libiscsi.o be2iscsi/ | ||||
|  obj-$(CONFIG_SCSI_PMCRAID)	+= pmcraid.o | ||||
| +obj-$(CONFIG_SCSI_VIRTIO)	+= virtio_scsi.o
 | ||||
|  obj-$(CONFIG_VMWARE_PVSCSI)	+= vmw_pvscsi.o | ||||
|  obj-$(CONFIG_HYPERV_STORAGE)	+= hv_storvsc.o | ||||
|   | ||||
| diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
 | ||||
| new file mode 100644 | ||||
| index 0000000..3f87ae0
 | ||||
| --- /dev/null
 | ||||
| +++ b/drivers/scsi/virtio_scsi.c
 | ||||
| @@ -0,0 +1,503 @@
 | ||||
| +/*
 | ||||
| + * Virtio SCSI HBA driver
 | ||||
| + *
 | ||||
| + * Copyright IBM Corp. 2010
 | ||||
| + * Copyright Red Hat, Inc. 2011
 | ||||
| + *
 | ||||
| + * Authors:
 | ||||
| + *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
 | ||||
| + *  Paolo Bonzini   <pbonzini@redhat.com>
 | ||||
| + *
 | ||||
| + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 | ||||
| + * See the COPYING file in the top-level directory.
 | ||||
| + *
 | ||||
| + */
 | ||||
| +
 | ||||
| +#include <linux/module.h>
 | ||||
| +#include <linux/slab.h>
 | ||||
| +#include <linux/mempool.h>
 | ||||
| +#include <linux/virtio.h>
 | ||||
| +#include <linux/virtio_ids.h>
 | ||||
| +#include <linux/virtio_config.h>
 | ||||
| +#include <linux/virtio_scsi.h>
 | ||||
| +#include <scsi/scsi_host.h>
 | ||||
| +#include <scsi/scsi_device.h>
 | ||||
| +#include <scsi/scsi_cmnd.h>
 | ||||
| +
 | ||||
| +#define VIRTIO_SCSI_MEMPOOL_SZ 64
 | ||||
| +
 | ||||
| +/* Command queue element */
 | ||||
| +struct virtio_scsi_cmd {
 | ||||
| +	struct scsi_cmnd *sc;
 | ||||
| +	union {
 | ||||
| +		struct virtio_scsi_cmd_req       cmd;
 | ||||
| +		struct virtio_scsi_ctrl_tmf_req  tmf;
 | ||||
| +		struct virtio_scsi_ctrl_an_req   an;
 | ||||
| +	} req;
 | ||||
| +	union {
 | ||||
| +		struct virtio_scsi_cmd_resp      cmd;
 | ||||
| +		struct virtio_scsi_ctrl_tmf_resp tmf;
 | ||||
| +		struct virtio_scsi_ctrl_an_resp  an;
 | ||||
| +		struct virtio_scsi_event         evt;
 | ||||
| +	} resp;
 | ||||
| +} ____cacheline_aligned_in_smp;
 | ||||
| +
 | ||||
| +/* Driver instance state */
 | ||||
| +struct virtio_scsi {
 | ||||
| +	/* Protects ctrl_vq, req_vq and sg[] */
 | ||||
| +	spinlock_t vq_lock;
 | ||||
| +
 | ||||
| +	struct virtio_device *vdev;
 | ||||
| +	struct virtqueue *ctrl_vq;
 | ||||
| +	struct virtqueue *event_vq;
 | ||||
| +	struct virtqueue *req_vq;
 | ||||
| +
 | ||||
| +	/* For sglist construction when adding commands to the virtqueue.  */
 | ||||
| +	struct scatterlist sg[];
 | ||||
| +};
 | ||||
| +
 | ||||
| +static struct kmem_cache *virtscsi_cmd_cache;
 | ||||
| +static mempool_t *virtscsi_cmd_pool;
 | ||||
| +
 | ||||
| +static inline struct Scsi_Host *virtio_scsi_host(struct virtio_device *vdev)
 | ||||
| +{
 | ||||
| +	return vdev->priv;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
 | ||||
| +{
 | ||||
| +	if (!resid)
 | ||||
| +		return;
 | ||||
| +
 | ||||
| +	if (!scsi_bidi_cmnd(sc)) {
 | ||||
| +		scsi_set_resid(sc, resid);
 | ||||
| +		return;
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	scsi_in(sc)->resid = min(resid, scsi_in(sc)->length);
 | ||||
| +	scsi_out(sc)->resid = resid - scsi_in(sc)->resid;
 | ||||
| +}
 | ||||
| +
 | ||||
| +/**
 | ||||
| + * virtscsi_complete_cmd - finish a scsi_cmd and invoke scsi_done
 | ||||
| + *
 | ||||
| + * Called with vq_lock held.
 | ||||
| + */
 | ||||
| +static void virtscsi_complete_cmd(void *buf)
 | ||||
| +{
 | ||||
| +	struct virtio_scsi_cmd *cmd = buf;
 | ||||
| +	struct scsi_cmnd *sc = cmd->sc;
 | ||||
| +	struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
 | ||||
| +
 | ||||
| +	dev_dbg(&sc->device->sdev_gendev,
 | ||||
| +		"cmd %p response %u status %#02x sense_len %u\n",
 | ||||
| +		sc, resp->response, resp->status, resp->sense_len);
 | ||||
| +
 | ||||
| +	sc->result = resp->status;
 | ||||
| +	virtscsi_compute_resid(sc, resp->resid);
 | ||||
| +	switch (resp->response) {
 | ||||
| +	case VIRTIO_SCSI_S_OK:
 | ||||
| +		set_host_byte(sc, DID_OK);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_OVERRUN:
 | ||||
| +		set_host_byte(sc, DID_ERROR);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_ABORTED:
 | ||||
| +		set_host_byte(sc, DID_ABORT);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_BAD_TARGET:
 | ||||
| +		set_host_byte(sc, DID_BAD_TARGET);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_RESET:
 | ||||
| +		set_host_byte(sc, DID_RESET);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_BUSY:
 | ||||
| +		set_host_byte(sc, DID_BUS_BUSY);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_TRANSPORT_FAILURE:
 | ||||
| +		set_host_byte(sc, DID_TRANSPORT_DISRUPTED);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_TARGET_FAILURE:
 | ||||
| +		set_host_byte(sc, DID_TARGET_FAILURE);
 | ||||
| +		break;
 | ||||
| +	case VIRTIO_SCSI_S_NEXUS_FAILURE:
 | ||||
| +		set_host_byte(sc, DID_NEXUS_FAILURE);
 | ||||
| +		break;
 | ||||
| +	default:
 | ||||
| +		scmd_printk(KERN_WARNING, sc, "Unknown response %d",
 | ||||
| +			    resp->response);
 | ||||
| +		/* fall through */
 | ||||
| +	case VIRTIO_SCSI_S_FAILURE:
 | ||||
| +		set_host_byte(sc, DID_ERROR);
 | ||||
| +		break;
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	WARN_ON(resp->sense_len > VIRTIO_SCSI_SENSE_SIZE);
 | ||||
| +	if (sc->sense_buffer) {
 | ||||
| +		memcpy(sc->sense_buffer, resp->sense,
 | ||||
| +		       min_t(u32, resp->sense_len, VIRTIO_SCSI_SENSE_SIZE));
 | ||||
| +		if (resp->sense_len)
 | ||||
| +			set_driver_byte(sc, DRIVER_SENSE);
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	mempool_free(cmd, virtscsi_cmd_pool);
 | ||||
| +	sc->scsi_done(sc);
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf))
 | ||||
| +{
 | ||||
| +	struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
 | ||||
| +	struct virtio_scsi *vscsi = shost_priv(sh);
 | ||||
| +	void *buf;
 | ||||
| +	unsigned long flags;
 | ||||
| +	unsigned int len;
 | ||||
| +
 | ||||
| +	spin_lock_irqsave(&vscsi->vq_lock, flags);
 | ||||
| +
 | ||||
| +	do {
 | ||||
| +		virtqueue_disable_cb(vq);
 | ||||
| +		while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
 | ||||
| +			fn(buf);
 | ||||
| +	} while (!virtqueue_enable_cb(vq));
 | ||||
| +
 | ||||
| +	spin_unlock_irqrestore(&vscsi->vq_lock, flags);
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void virtscsi_req_done(struct virtqueue *vq)
 | ||||
| +{
 | ||||
| +	virtscsi_vq_done(vq, virtscsi_complete_cmd);
 | ||||
| +};
 | ||||
| +
 | ||||
| +/* These are still stubs.  */
 | ||||
| +static void virtscsi_complete_free(void *buf)
 | ||||
| +{
 | ||||
| +	struct virtio_scsi_cmd *cmd = buf;
 | ||||
| +
 | ||||
| +	mempool_free(cmd, virtscsi_cmd_pool);
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void virtscsi_ctrl_done(struct virtqueue *vq)
 | ||||
| +{
 | ||||
| +	virtscsi_vq_done(vq, virtscsi_complete_free);
 | ||||
| +};
 | ||||
| +
 | ||||
| +static void virtscsi_event_done(struct virtqueue *vq)
 | ||||
| +{
 | ||||
| +	virtscsi_vq_done(vq, virtscsi_complete_free);
 | ||||
| +};
 | ||||
| +
 | ||||
| +static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx,
 | ||||
| +			     struct scsi_data_buffer *sdb)
 | ||||
| +{
 | ||||
| +	struct sg_table *table = &sdb->table;
 | ||||
| +	struct scatterlist *sg_elem;
 | ||||
| +	unsigned int idx = *p_idx;
 | ||||
| +	int i;
 | ||||
| +
 | ||||
| +	for_each_sg(table->sgl, sg_elem, table->nents, i)
 | ||||
| +		sg_set_buf(&sg[idx++], sg_virt(sg_elem), sg_elem->length);
 | ||||
| +
 | ||||
| +	*p_idx = idx;
 | ||||
| +}
 | ||||
| +
 | ||||
| +/**
 | ||||
| + * virtscsi_map_cmd - map a scsi_cmd to a virtqueue scatterlist
 | ||||
| + * @vscsi	: virtio_scsi state
 | ||||
| + * @cmd		: command structure
 | ||||
| + * @out_num	: number of read-only elements
 | ||||
| + * @in_num	: number of write-only elements
 | ||||
| + * @req_size	: size of the request buffer
 | ||||
| + * @resp_size	: size of the response buffer
 | ||||
| + *
 | ||||
| + * Called with vq_lock held.
 | ||||
| + */
 | ||||
| +static void virtscsi_map_cmd(struct virtio_scsi *vscsi,
 | ||||
| +			     struct virtio_scsi_cmd *cmd,
 | ||||
| +			     unsigned *out_num, unsigned *in_num,
 | ||||
| +			     size_t req_size, size_t resp_size)
 | ||||
| +{
 | ||||
| +	struct scsi_cmnd *sc = cmd->sc;
 | ||||
| +	struct scatterlist *sg = vscsi->sg;
 | ||||
| +	unsigned int idx = 0;
 | ||||
| +
 | ||||
| +	if (sc) {
 | ||||
| +		struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
 | ||||
| +		BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
 | ||||
| +
 | ||||
| +		/* TODO: check feature bit and fail if unsupported?  */
 | ||||
| +		BUG_ON(sc->sc_data_direction == DMA_BIDIRECTIONAL);
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	/* Request header.  */
 | ||||
| +	sg_set_buf(&sg[idx++], &cmd->req, req_size);
 | ||||
| +
 | ||||
| +	/* Data-out buffer.  */
 | ||||
| +	if (sc && sc->sc_data_direction != DMA_FROM_DEVICE)
 | ||||
| +		virtscsi_map_sgl(sg, &idx, scsi_out(sc));
 | ||||
| +
 | ||||
| +	*out_num = idx;
 | ||||
| +
 | ||||
| +	/* Response header.  */
 | ||||
| +	sg_set_buf(&sg[idx++], &cmd->resp, resp_size);
 | ||||
| +
 | ||||
| +	/* Data-in buffer */
 | ||||
| +	if (sc && sc->sc_data_direction != DMA_TO_DEVICE)
 | ||||
| +		virtscsi_map_sgl(sg, &idx, scsi_in(sc));
 | ||||
| +
 | ||||
| +	*in_num = idx - *out_num;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static int virtscsi_kick_cmd(struct virtio_scsi *vscsi, struct virtqueue *vq,
 | ||||
| +			     struct virtio_scsi_cmd *cmd,
 | ||||
| +			     size_t req_size, size_t resp_size, gfp_t gfp)
 | ||||
| +{
 | ||||
| +	unsigned int out_num, in_num;
 | ||||
| +	unsigned long flags;
 | ||||
| +	int ret;
 | ||||
| +
 | ||||
| +	spin_lock_irqsave(&vscsi->vq_lock, flags);
 | ||||
| +
 | ||||
| +	virtscsi_map_cmd(vscsi, cmd, &out_num, &in_num, req_size, resp_size);
 | ||||
| +
 | ||||
| +	ret = virtqueue_add_buf(vq, vscsi->sg, out_num, in_num, cmd, gfp);
 | ||||
| +	if (ret >= 0)
 | ||||
| +		virtqueue_kick(vq);
 | ||||
| +
 | ||||
| +	spin_unlock_irqrestore(&vscsi->vq_lock, flags);
 | ||||
| +	return ret;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
 | ||||
| +{
 | ||||
| +	struct virtio_scsi *vscsi = shost_priv(sh);
 | ||||
| +	struct virtio_scsi_cmd *cmd;
 | ||||
| +	int ret;
 | ||||
| +
 | ||||
| +	dev_dbg(&sc->device->sdev_gendev,
 | ||||
| +		"cmd %p CDB: %#02x\n", sc, sc->cmnd[0]);
 | ||||
| +
 | ||||
| +	ret = SCSI_MLQUEUE_HOST_BUSY;
 | ||||
| +	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC);
 | ||||
| +	if (!cmd)
 | ||||
| +		goto out;
 | ||||
| +
 | ||||
| +	memset(cmd, 0, sizeof(*cmd));
 | ||||
| +	cmd->sc = sc;
 | ||||
| +	cmd->req.cmd = (struct virtio_scsi_cmd_req){
 | ||||
| +		.lun[0] = 1,
 | ||||
| +		.lun[1] = sc->device->id,
 | ||||
| +		.lun[2] = (sc->device->lun >> 8) | 0x40,
 | ||||
| +		.lun[3] = sc->device->lun & 0xff,
 | ||||
| +		.tag = (unsigned long)sc,
 | ||||
| +		.task_attr = VIRTIO_SCSI_S_SIMPLE,
 | ||||
| +		.prio = 0,
 | ||||
| +		.crn = 0,
 | ||||
| +	};
 | ||||
| +
 | ||||
| +	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
 | ||||
| +	memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
 | ||||
| +
 | ||||
| +	if (virtscsi_kick_cmd(vscsi, vscsi->req_vq, cmd,
 | ||||
| +			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
 | ||||
| +			      GFP_ATOMIC) >= 0)
 | ||||
| +		ret = 0;
 | ||||
| +
 | ||||
| +out:
 | ||||
| +	return ret;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static struct scsi_host_template virtscsi_host_template = {
 | ||||
| +	.module = THIS_MODULE,
 | ||||
| +	.name = "Virtio SCSI HBA",
 | ||||
| +	.proc_name = "virtio_scsi",
 | ||||
| +	.queuecommand = virtscsi_queuecommand,
 | ||||
| +	.this_id = -1,
 | ||||
| +
 | ||||
| +	.can_queue = 1024,
 | ||||
| +	.dma_boundary = UINT_MAX,
 | ||||
| +	.use_clustering = ENABLE_CLUSTERING,
 | ||||
| +};
 | ||||
| +
 | ||||
| +#define virtscsi_config_get(vdev, fld) \
 | ||||
| +	({ \
 | ||||
| +		typeof(((struct virtio_scsi_config *)0)->fld) __val; \
 | ||||
| +		vdev->config->get(vdev, \
 | ||||
| +				  offsetof(struct virtio_scsi_config, fld), \
 | ||||
| +				  &__val, sizeof(__val)); \
 | ||||
| +		__val; \
 | ||||
| +	})
 | ||||
| +
 | ||||
| +#define virtscsi_config_set(vdev, fld, val) \
 | ||||
| +	(void)({ \
 | ||||
| +		typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \
 | ||||
| +		vdev->config->set(vdev, \
 | ||||
| +				  offsetof(struct virtio_scsi_config, fld), \
 | ||||
| +				  &__val, sizeof(__val)); \
 | ||||
| +	})
 | ||||
| +
 | ||||
| +static int __devinit virtscsi_init(struct virtio_device *vdev,
 | ||||
| +				   struct virtio_scsi *vscsi)
 | ||||
| +{
 | ||||
| +	int err;
 | ||||
| +	struct virtqueue *vqs[3];
 | ||||
| +	vq_callback_t *callbacks[] = {
 | ||||
| +		virtscsi_ctrl_done,
 | ||||
| +		virtscsi_event_done,
 | ||||
| +		virtscsi_req_done
 | ||||
| +	};
 | ||||
| +	const char *names[] = {
 | ||||
| +		"control",
 | ||||
| +		"event",
 | ||||
| +		"request"
 | ||||
| +	};
 | ||||
| +
 | ||||
| +	/* Discover virtqueues and write information to configuration.  */
 | ||||
| +	err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
 | ||||
| +	if (err)
 | ||||
| +		return err;
 | ||||
| +
 | ||||
| +	vscsi->ctrl_vq = vqs[0];
 | ||||
| +	vscsi->event_vq = vqs[1];
 | ||||
| +	vscsi->req_vq = vqs[2];
 | ||||
| +
 | ||||
| +	virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
 | ||||
| +	virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
 | ||||
| +	return 0;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static int __devinit virtscsi_probe(struct virtio_device *vdev)
 | ||||
| +{
 | ||||
| +	struct Scsi_Host *shost;
 | ||||
| +	struct virtio_scsi *vscsi;
 | ||||
| +	int err;
 | ||||
| +	u32 sg_elems;
 | ||||
| +	u32 cmd_per_lun;
 | ||||
| +
 | ||||
| +	/* We need to know how many segments before we allocate.
 | ||||
| +	 * We need an extra sg elements at head and tail.
 | ||||
| +	 */
 | ||||
| +	sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
 | ||||
| +
 | ||||
| +	/* Allocate memory and link the structs together.  */
 | ||||
| +	shost = scsi_host_alloc(&virtscsi_host_template,
 | ||||
| +		sizeof(*vscsi) + sizeof(vscsi->sg[0]) * (sg_elems + 2));
 | ||||
| +
 | ||||
| +	if (!shost)
 | ||||
| +		return -ENOMEM;
 | ||||
| +
 | ||||
| +	shost->sg_tablesize = sg_elems;
 | ||||
| +	vscsi = shost_priv(shost);
 | ||||
| +	vscsi->vdev = vdev;
 | ||||
| +	vdev->priv = shost;
 | ||||
| +
 | ||||
| +	/* Random initializations.  */
 | ||||
| +	spin_lock_init(&vscsi->vq_lock);
 | ||||
| +	sg_init_table(vscsi->sg, sg_elems + 2);
 | ||||
| +
 | ||||
| +	err = virtscsi_init(vdev, vscsi);
 | ||||
| +	if (err)
 | ||||
| +		goto virtscsi_init_failed;
 | ||||
| +
 | ||||
| +	cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
 | ||||
| +	shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
 | ||||
| +	shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
 | ||||
| +	shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1;
 | ||||
| +	shost->max_id = virtscsi_config_get(vdev, max_target) + 1;
 | ||||
| +	shost->max_channel = 0;
 | ||||
| +	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
 | ||||
| +	err = scsi_add_host(shost, &vdev->dev);
 | ||||
| +	if (err)
 | ||||
| +		goto scsi_add_host_failed;
 | ||||
| +
 | ||||
| +	scsi_scan_host(shost);
 | ||||
| +
 | ||||
| +	return 0;
 | ||||
| +
 | ||||
| +scsi_add_host_failed:
 | ||||
| +	vdev->config->del_vqs(vdev);
 | ||||
| +virtscsi_init_failed:
 | ||||
| +	scsi_host_put(shost);
 | ||||
| +	return err;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev)
 | ||||
| +{
 | ||||
| +	/* Stop all the virtqueues. */
 | ||||
| +	vdev->config->reset(vdev);
 | ||||
| +
 | ||||
| +	vdev->config->del_vqs(vdev);
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void __devexit virtscsi_remove(struct virtio_device *vdev)
 | ||||
| +{
 | ||||
| +	struct Scsi_Host *shost = virtio_scsi_host(vdev);
 | ||||
| +
 | ||||
| +	scsi_remove_host(shost);
 | ||||
| +
 | ||||
| +	virtscsi_remove_vqs(vdev);
 | ||||
| +	scsi_host_put(shost);
 | ||||
| +}
 | ||||
| +
 | ||||
| +static struct virtio_device_id id_table[] = {
 | ||||
| +	{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
 | ||||
| +	{ 0 },
 | ||||
| +};
 | ||||
| +
 | ||||
| +static struct virtio_driver virtio_scsi_driver = {
 | ||||
| +	.driver.name = KBUILD_MODNAME,
 | ||||
| +	.driver.owner = THIS_MODULE,
 | ||||
| +	.id_table = id_table,
 | ||||
| +	.probe = virtscsi_probe,
 | ||||
| +	.remove = __devexit_p(virtscsi_remove),
 | ||||
| +};
 | ||||
| +
 | ||||
| +static int __init init(void)
 | ||||
| +{
 | ||||
| +	int ret = -ENOMEM;
 | ||||
| +
 | ||||
| +	virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
 | ||||
| +	if (!virtscsi_cmd_cache) {
 | ||||
| +		printk(KERN_ERR "kmem_cache_create() for "
 | ||||
| +				"virtscsi_cmd_cache failed\n");
 | ||||
| +		goto error;
 | ||||
| +	}
 | ||||
| +
 | ||||
| +
 | ||||
| +	virtscsi_cmd_pool =
 | ||||
| +		mempool_create_slab_pool(VIRTIO_SCSI_MEMPOOL_SZ,
 | ||||
| +					 virtscsi_cmd_cache);
 | ||||
| +	if (!virtscsi_cmd_pool) {
 | ||||
| +		printk(KERN_ERR "mempool_create() for"
 | ||||
| +				"virtscsi_cmd_pool failed\n");
 | ||||
| +		goto error;
 | ||||
| +	}
 | ||||
| +	ret = register_virtio_driver(&virtio_scsi_driver);
 | ||||
| +	if (ret < 0)
 | ||||
| +		goto error;
 | ||||
| +
 | ||||
| +	return 0;
 | ||||
| +
 | ||||
| +error:
 | ||||
| +	if (virtscsi_cmd_pool) {
 | ||||
| +		mempool_destroy(virtscsi_cmd_pool);
 | ||||
| +		virtscsi_cmd_pool = NULL;
 | ||||
| +	}
 | ||||
| +	if (virtscsi_cmd_cache) {
 | ||||
| +		kmem_cache_destroy(virtscsi_cmd_cache);
 | ||||
| +		virtscsi_cmd_cache = NULL;
 | ||||
| +	}
 | ||||
| +	return ret;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void __exit fini(void)
 | ||||
| +{
 | ||||
| +	unregister_virtio_driver(&virtio_scsi_driver);
 | ||||
| +	mempool_destroy(virtscsi_cmd_pool);
 | ||||
| +	kmem_cache_destroy(virtscsi_cmd_cache);
 | ||||
| +}
 | ||||
| +module_init(init);
 | ||||
| +module_exit(fini);
 | ||||
| +
 | ||||
| +MODULE_DEVICE_TABLE(virtio, id_table);
 | ||||
| +MODULE_DESCRIPTION("Virtio SCSI HBA driver");
 | ||||
| +MODULE_LICENSE("GPL");
 | ||||
| diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
 | ||||
| index 85bb0bb..d83ae52 100644
 | ||||
| --- a/include/linux/virtio_ids.h
 | ||||
| +++ b/include/linux/virtio_ids.h
 | ||||
| @@ -34,6 +34,7 @@
 | ||||
|  #define VIRTIO_ID_CONSOLE	3 /* virtio console */ | ||||
|  #define VIRTIO_ID_RNG		4 /* virtio ring */ | ||||
|  #define VIRTIO_ID_BALLOON	5 /* virtio balloon */ | ||||
| +#define VIRTIO_ID_SCSI		8 /* virtio scsi */
 | ||||
|  #define VIRTIO_ID_9P		9 /* 9p virtio console */ | ||||
|   | ||||
|  #endif /* _LINUX_VIRTIO_IDS_H */ | ||||
| diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
 | ||||
| new file mode 100644 | ||||
| index 0000000..8ddeafd
 | ||||
| --- /dev/null
 | ||||
| +++ b/include/linux/virtio_scsi.h
 | ||||
| @@ -0,0 +1,114 @@
 | ||||
| +#ifndef _LINUX_VIRTIO_SCSI_H
 | ||||
| +#define _LINUX_VIRTIO_SCSI_H
 | ||||
| +/* This header is BSD licensed so anyone can use the definitions to implement
 | ||||
| + * compatible drivers/servers. */
 | ||||
| +
 | ||||
| +#define VIRTIO_SCSI_CDB_SIZE   32
 | ||||
| +#define VIRTIO_SCSI_SENSE_SIZE 96
 | ||||
| +
 | ||||
| +/* SCSI command request, followed by data-out */
 | ||||
| +struct virtio_scsi_cmd_req {
 | ||||
| +	u8 lun[8];		/* Logical Unit Number */
 | ||||
| +	u64 tag;		/* Command identifier */
 | ||||
| +	u8 task_attr;		/* Task attribute */
 | ||||
| +	u8 prio;
 | ||||
| +	u8 crn;
 | ||||
| +	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +/* Response, followed by sense data and data-in */
 | ||||
| +struct virtio_scsi_cmd_resp {
 | ||||
| +	u32 sense_len;		/* Sense data length */
 | ||||
| +	u32 resid;		/* Residual bytes in data buffer */
 | ||||
| +	u16 status_qualifier;	/* Status qualifier */
 | ||||
| +	u8 status;		/* Command completion status */
 | ||||
| +	u8 response;		/* Response values */
 | ||||
| +	u8 sense[VIRTIO_SCSI_SENSE_SIZE];
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +/* Task Management Request */
 | ||||
| +struct virtio_scsi_ctrl_tmf_req {
 | ||||
| +	u32 type;
 | ||||
| +	u32 subtype;
 | ||||
| +	u8 lun[8];
 | ||||
| +	u64 tag;
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +struct virtio_scsi_ctrl_tmf_resp {
 | ||||
| +	u8 response;
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +/* Asynchronous notification query/subscription */
 | ||||
| +struct virtio_scsi_ctrl_an_req {
 | ||||
| +	u32 type;
 | ||||
| +	u8 lun[8];
 | ||||
| +	u32 event_requested;
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +struct virtio_scsi_ctrl_an_resp {
 | ||||
| +	u32 event_actual;
 | ||||
| +	u8 response;
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +struct virtio_scsi_event {
 | ||||
| +	u32 event;
 | ||||
| +	u8 lun[8];
 | ||||
| +	u32 reason;
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +struct virtio_scsi_config {
 | ||||
| +	u32 num_queues;
 | ||||
| +	u32 seg_max;
 | ||||
| +	u32 max_sectors;
 | ||||
| +	u32 cmd_per_lun;
 | ||||
| +	u32 event_info_size;
 | ||||
| +	u32 sense_size;
 | ||||
| +	u32 cdb_size;
 | ||||
| +	u16 max_channel;
 | ||||
| +	u16 max_target;
 | ||||
| +	u32 max_lun;
 | ||||
| +} __packed;
 | ||||
| +
 | ||||
| +/* Response codes */
 | ||||
| +#define VIRTIO_SCSI_S_OK                       0
 | ||||
| +#define VIRTIO_SCSI_S_OVERRUN                  1
 | ||||
| +#define VIRTIO_SCSI_S_ABORTED                  2
 | ||||
| +#define VIRTIO_SCSI_S_BAD_TARGET               3
 | ||||
| +#define VIRTIO_SCSI_S_RESET                    4
 | ||||
| +#define VIRTIO_SCSI_S_BUSY                     5
 | ||||
| +#define VIRTIO_SCSI_S_TRANSPORT_FAILURE        6
 | ||||
| +#define VIRTIO_SCSI_S_TARGET_FAILURE           7
 | ||||
| +#define VIRTIO_SCSI_S_NEXUS_FAILURE            8
 | ||||
| +#define VIRTIO_SCSI_S_FAILURE                  9
 | ||||
| +#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED       10
 | ||||
| +#define VIRTIO_SCSI_S_FUNCTION_REJECTED        11
 | ||||
| +#define VIRTIO_SCSI_S_INCORRECT_LUN            12
 | ||||
| +
 | ||||
| +/* Controlq type codes.  */
 | ||||
| +#define VIRTIO_SCSI_T_TMF                      0
 | ||||
| +#define VIRTIO_SCSI_T_AN_QUERY                 1
 | ||||
| +#define VIRTIO_SCSI_T_AN_SUBSCRIBE             2
 | ||||
| +
 | ||||
| +/* Valid TMF subtypes.  */
 | ||||
| +#define VIRTIO_SCSI_T_TMF_ABORT_TASK           0
 | ||||
| +#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET       1
 | ||||
| +#define VIRTIO_SCSI_T_TMF_CLEAR_ACA            2
 | ||||
| +#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET       3
 | ||||
| +#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET      4
 | ||||
| +#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET   5
 | ||||
| +#define VIRTIO_SCSI_T_TMF_QUERY_TASK           6
 | ||||
| +#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET       7
 | ||||
| +
 | ||||
| +/* Events.  */
 | ||||
| +#define VIRTIO_SCSI_T_EVENTS_MISSED            0x80000000
 | ||||
| +#define VIRTIO_SCSI_T_NO_EVENT                 0
 | ||||
| +#define VIRTIO_SCSI_T_TRANSPORT_RESET          1
 | ||||
| +#define VIRTIO_SCSI_T_ASYNC_NOTIFY             2
 | ||||
| +
 | ||||
| +#define VIRTIO_SCSI_S_SIMPLE                   0
 | ||||
| +#define VIRTIO_SCSI_S_ORDERED                  1
 | ||||
| +#define VIRTIO_SCSI_S_HEAD                     2
 | ||||
| +#define VIRTIO_SCSI_S_ACA                      3
 | ||||
| +
 | ||||
| +
 | ||||
| +#endif /* _LINUX_VIRTIO_SCSI_H */
 | ||||
| -- 
 | ||||
| 1.7.1 | ||||
| 
 | ||||
| 
 | ||||
| From 3c0e8846ac0fc2175dd0e06f495b16a30b549762 Mon Sep 17 00:00:00 2001 | ||||
| From: Paolo Bonzini <pbonzini@redhat.com> | ||||
| Date: Tue, 29 Nov 2011 16:33:28 +0100 | ||||
| Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org> | ||||
| Subject: [PATCH v5 2/3] virtio-scsi: add error handling | ||||
| 
 | ||||
| This commit adds basic error handling to the virtio-scsi | ||||
| HBA device.  Task management functions are sent synchronously | ||||
| via the control virtqueue. | ||||
| 
 | ||||
| Cc: linux-scsi <linux-scsi@vger.kernel.org> | ||||
| Cc: Rusty Russell <rusty@rustcorp.com.au> | ||||
| Cc: Michael S. Tsirkin <mst@redhat.com> | ||||
| Cc: kvm@vger.kernel.org | ||||
| Acked-by: Pekka Enberg <penberg@kernel.org>  | ||||
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||||
| ---
 | ||||
| 	v3->v4: fixed 32-bit compilation; adjusted call to virtscsi_kick_cmd | ||||
| 
 | ||||
| 	v2->v3: added mempool, used GFP_NOIO instead of GFP_ATOMIC, | ||||
| 	formatting fixes | ||||
| 
 | ||||
| 	v1->v2: use scmd_printk | ||||
| 
 | ||||
|  drivers/scsi/virtio_scsi.c |   73 +++++++++++++++++++++++++++++++++++++++++++- | ||||
|  1 files changed, 72 insertions(+), 1 deletions(-) | ||||
| 
 | ||||
| diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
 | ||||
| index 3f87ae0..68104cd 100644
 | ||||
| --- a/drivers/scsi/virtio_scsi.c
 | ||||
| +++ b/drivers/scsi/virtio_scsi.c
 | ||||
| @@ -29,6 +29,7 @@
 | ||||
|  /* Command queue element */ | ||||
|  struct virtio_scsi_cmd { | ||||
|  	struct scsi_cmnd *sc; | ||||
| +	struct completion *comp;
 | ||||
|  	union { | ||||
|  		struct virtio_scsi_cmd_req       cmd; | ||||
|  		struct virtio_scsi_ctrl_tmf_req  tmf; | ||||
| @@ -168,11 +169,12 @@ static void virtscsi_req_done(struct virtqueue *vq)
 | ||||
|  	virtscsi_vq_done(vq, virtscsi_complete_cmd); | ||||
|  }; | ||||
|   | ||||
| -/* These are still stubs.  */
 | ||||
|  static void virtscsi_complete_free(void *buf) | ||||
|  { | ||||
|  	struct virtio_scsi_cmd *cmd = buf; | ||||
|   | ||||
| +	if (cmd->comp)
 | ||||
| +		complete_all(cmd->comp);
 | ||||
|  	mempool_free(cmd, virtscsi_cmd_pool); | ||||
|  } | ||||
|   | ||||
| @@ -306,12 +308,81 @@ out:
 | ||||
|  	return ret; | ||||
|  } | ||||
|   | ||||
| +static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
 | ||||
| +{
 | ||||
| +	DECLARE_COMPLETION_ONSTACK(comp);
 | ||||
| +	int ret;
 | ||||
| +
 | ||||
| +	cmd->comp = ∁
 | ||||
| +	ret = virtscsi_kick_cmd(vscsi, vscsi->ctrl_vq, cmd,
 | ||||
| +			       sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
 | ||||
| +			       GFP_NOIO);
 | ||||
| +	if (ret < 0)
 | ||||
| +		return FAILED;
 | ||||
| +
 | ||||
| +	wait_for_completion(&comp);
 | ||||
| +	if (cmd->resp.tmf.response != VIRTIO_SCSI_S_OK &&
 | ||||
| +	    cmd->resp.tmf.response != VIRTIO_SCSI_S_FUNCTION_SUCCEEDED)
 | ||||
| +		return FAILED;
 | ||||
| +
 | ||||
| +	return SUCCESS;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static int virtscsi_device_reset(struct scsi_cmnd *sc)
 | ||||
| +{
 | ||||
| +	struct virtio_scsi *vscsi = shost_priv(sc->device->host);
 | ||||
| +	struct virtio_scsi_cmd *cmd;
 | ||||
| +
 | ||||
| +	sdev_printk(KERN_INFO, sc->device, "device reset\n");
 | ||||
| +	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
 | ||||
| +	if (!cmd)
 | ||||
| +		return FAILED;
 | ||||
| +
 | ||||
| +	memset(cmd, 0, sizeof(*cmd));
 | ||||
| +	cmd->sc = sc;
 | ||||
| +	cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
 | ||||
| +		.type = VIRTIO_SCSI_T_TMF,
 | ||||
| +		.subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET,
 | ||||
| +		.lun[0] = 1,
 | ||||
| +		.lun[1] = sc->device->id,
 | ||||
| +		.lun[2] = (sc->device->lun >> 8) | 0x40,
 | ||||
| +		.lun[3] = sc->device->lun & 0xff,
 | ||||
| +	};
 | ||||
| +	return virtscsi_tmf(vscsi, cmd);
 | ||||
| +}
 | ||||
| +
 | ||||
| +static int virtscsi_abort(struct scsi_cmnd *sc)
 | ||||
| +{
 | ||||
| +	struct virtio_scsi *vscsi = shost_priv(sc->device->host);
 | ||||
| +	struct virtio_scsi_cmd *cmd;
 | ||||
| +
 | ||||
| +	scmd_printk(KERN_INFO, sc, "abort\n");
 | ||||
| +	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_NOIO);
 | ||||
| +	if (!cmd)
 | ||||
| +		return FAILED;
 | ||||
| +
 | ||||
| +	memset(cmd, 0, sizeof(*cmd));
 | ||||
| +	cmd->sc = sc;
 | ||||
| +	cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
 | ||||
| +		.type = VIRTIO_SCSI_T_TMF,
 | ||||
| +		.subtype = VIRTIO_SCSI_T_TMF_ABORT_TASK,
 | ||||
| +		.lun[0] = 1,
 | ||||
| +		.lun[1] = sc->device->id,
 | ||||
| +		.lun[2] = (sc->device->lun >> 8) | 0x40,
 | ||||
| +		.lun[3] = sc->device->lun & 0xff,
 | ||||
| +		.tag = (unsigned long)sc,
 | ||||
| +	};
 | ||||
| +	return virtscsi_tmf(vscsi, cmd);
 | ||||
| +}
 | ||||
| +
 | ||||
|  static struct scsi_host_template virtscsi_host_template = { | ||||
|  	.module = THIS_MODULE, | ||||
|  	.name = "Virtio SCSI HBA", | ||||
|  	.proc_name = "virtio_scsi", | ||||
|  	.queuecommand = virtscsi_queuecommand, | ||||
|  	.this_id = -1, | ||||
| +	.eh_abort_handler = virtscsi_abort,
 | ||||
| +	.eh_device_reset_handler = virtscsi_device_reset,
 | ||||
|   | ||||
|  	.can_queue = 1024, | ||||
|  	.dma_boundary = UINT_MAX, | ||||
| -- 
 | ||||
| 1.7.1 | ||||
| 
 | ||||
| 
 | ||||
| From 43cf1b6a4ee31e69581042a0c85d1398f83dcedc Mon Sep 17 00:00:00 2001 | ||||
| From: Paolo Bonzini <pbonzini@redhat.com> | ||||
| Date: Fri, 13 Jan 2012 15:30:08 +0100 | ||||
| Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>, Mike Christie <michaelc@cs.wisc.edu>, Pekka Enberg <penberg@kernel.org> | ||||
| Subject: [PATCH v5 3/3] virtio-scsi: add power management support | ||||
| 
 | ||||
| This patch adds freeze/restore handlers for the HBA.  Block queues | ||||
| are managed independently by the disk devices. | ||||
| 
 | ||||
| Cc: linux-scsi <linux-scsi@vger.kernel.org> | ||||
| Cc: Rusty Russell <rusty@rustcorp.com.au> | ||||
| Cc: Michael S. Tsirkin <mst@redhat.com> | ||||
| Cc: kvm@vger.kernel.org | ||||
| Acked-by: Pekka Enberg <penberg@kernel.org>  | ||||
| Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||||
| ---
 | ||||
| 	The feature has been merged in the virtio core for 3.3, so the patch | ||||
| 	is new in v4. | ||||
| 
 | ||||
|  drivers/scsi/virtio_scsi.c |   26 +++++++++++++++++++++++--- | ||||
|  1 files changed, 23 insertions(+), 3 deletions(-) | ||||
| 
 | ||||
| diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
 | ||||
| index 68104cd..efccd72 100644
 | ||||
| --- a/drivers/scsi/virtio_scsi.c
 | ||||
| +++ b/drivers/scsi/virtio_scsi.c
 | ||||
| @@ -406,8 +406,8 @@ static struct scsi_host_template virtscsi_host_template = {
 | ||||
|  				  &__val, sizeof(__val)); \ | ||||
|  	}) | ||||
|   | ||||
| -static int __devinit virtscsi_init(struct virtio_device *vdev,
 | ||||
| -				   struct virtio_scsi *vscsi)
 | ||||
| +static int virtscsi_init(struct virtio_device *vdev,
 | ||||
| +			 struct virtio_scsi *vscsi)
 | ||||
|  { | ||||
|  	int err; | ||||
|  	struct virtqueue *vqs[3]; | ||||
| @@ -491,7 +491,7 @@ virtscsi_init_failed:
 | ||||
|  	return err; | ||||
|  } | ||||
|   | ||||
| -static void __devexit virtscsi_remove_vqs(struct virtio_device *vdev)
 | ||||
| +static void virtscsi_remove_vqs(struct virtio_device *vdev)
 | ||||
|  { | ||||
|  	/* Stop all the virtqueues. */ | ||||
|  	vdev->config->reset(vdev); | ||||
| @@ -509,6 +509,22 @@ static void __devexit virtscsi_remove(struct virtio_device *vdev)
 | ||||
|  	scsi_host_put(shost); | ||||
|  } | ||||
|   | ||||
| +#ifdef CONFIG_PM
 | ||||
| +static int virtscsi_freeze(struct virtio_device *vdev)
 | ||||
| +{
 | ||||
| +	virtscsi_remove_vqs(vdev);
 | ||||
| +	return 0;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static int virtscsi_restore(struct virtio_device *vdev)
 | ||||
| +{
 | ||||
| +	struct Scsi_Host *sh = virtio_scsi_host(vdev);
 | ||||
| +	struct virtio_scsi *vscsi = shost_priv(sh);
 | ||||
| +
 | ||||
| +	return virtscsi_init(vdev, vscsi);
 | ||||
| +}
 | ||||
| +#endif
 | ||||
| +
 | ||||
|  static struct virtio_device_id id_table[] = { | ||||
|  	{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID }, | ||||
|  	{ 0 }, | ||||
| @@ -519,6 +535,10 @@ static struct virtio_driver virtio_scsi_driver = {
 | ||||
|  	.driver.owner = THIS_MODULE, | ||||
|  	.id_table = id_table, | ||||
|  	.probe = virtscsi_probe, | ||||
| +#ifdef CONFIG_PM
 | ||||
| +	.freeze = virtscsi_freeze,
 | ||||
| +	.restore = virtscsi_restore,
 | ||||
| +#endif
 | ||||
|  	.remove = __devexit_p(virtscsi_remove), | ||||
|  }; | ||||
|   | ||||
| -- 
 | ||||
| 1.7.1 | ||||
| 
 | ||||
| @ -1,15 +0,0 @@ | ||||
| diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
 | ||||
| index f22a9f7..f525f99 100644
 | ||||
| --- a/arch/x86/kernel/cpu/mcheck/mce.c
 | ||||
| +++ b/arch/x86/kernel/cpu/mcheck/mce.c
 | ||||
| @@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
 | ||||
|  { | ||||
|  	unsigned int next, i, prev = 0; | ||||
|   | ||||
| -	next = rcu_dereference_check_mce(mcelog.next);
 | ||||
| +	next = ACCESS_ONCE(mcelog.next);
 | ||||
|   | ||||
|  	do { | ||||
|  		struct mce *m; | ||||
| 
 | ||||
|    | ||||
| @ -1,447 +0,0 @@ | ||||
| In some cases it may happen that pmd_none_or_clear_bad() is called | ||||
| with the mmap_sem hold in read mode. In those cases the huge page | ||||
| faults can allocate hugepmds under pmd_none_or_clear_bad() and that | ||||
| can trigger a false positive from pmd_bad() that will not like to see | ||||
| a pmd materializing as trans huge. | ||||
| 
 | ||||
| It's not khugepaged the problem, khugepaged holds the mmap_sem in | ||||
| write mode (and all those sites must hold the mmap_sem in read mode to | ||||
| prevent pagetables to go away from under them, during code review it | ||||
| seems vm86 mode on 32bit kernels requires that too unless it's | ||||
| restricted to 1 thread per process or UP builds). The race is only | ||||
| with the huge pagefaults that can convert a pmd_none() into a | ||||
| pmd_trans_huge(). | ||||
| 
 | ||||
| Effectively all these pmd_none_or_clear_bad() sites running with | ||||
| mmap_sem in read mode are somewhat speculative with the page faults, | ||||
| and the result is always undefined when they run simultaneously. This | ||||
| is probably why it wasn't common to run into this. For example if the | ||||
| madvise(MADV_DONTNEED) runs zap_page_range() shortly before the page | ||||
| fault, the hugepage will not be zapped, if the page fault runs first | ||||
| it will be zapped. | ||||
| 
 | ||||
| Altering pmd_bad() not to error out if it finds hugepmds won't be | ||||
| enough to fix this, because zap_pmd_range would then proceed to call | ||||
| zap_pte_range (which would be incorrect if the pmd become a | ||||
| pmd_trans_huge()). | ||||
| 
 | ||||
| The simplest way to fix this is to read the pmd in the local stack | ||||
| (regardless of what we read, no need of actual CPU barriers, only | ||||
| compiler barrier needed), and be sure it is not changing under the | ||||
| code that computes its value. Even if the real pmd is changing under | ||||
| the value we hold on the stack, we don't care. If we actually end up | ||||
| in zap_pte_range it means the pmd was not none already and it was not | ||||
| huge, and it can't become huge from under us (khugepaged locking | ||||
| explained above). | ||||
| 
 | ||||
| All we need is to enforce that there is no way anymore that in a code | ||||
| path like below, pmd_trans_huge can be false, but | ||||
| pmd_none_or_clear_bad can run into a hugepmd. The overhead of a | ||||
| barrier() is just a compiler tweak and should not be measurable (I | ||||
| only added it for THP builds). I don't exclude different compiler | ||||
| versions may have prevented the race too by caching the value of *pmd | ||||
| on the stack (that hasn't been verified, but it wouldn't be impossible | ||||
| considering pmd_none_or_clear_bad, pmd_bad, pmd_trans_huge, pmd_none | ||||
| are all inlines and there's no external function called in between | ||||
| pmd_trans_huge and pmd_none_or_clear_bad). | ||||
| 
 | ||||
| 		if (pmd_trans_huge(*pmd)) { | ||||
| 			if (next-addr != HPAGE_PMD_SIZE) { | ||||
| 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); | ||||
| 				split_huge_page_pmd(vma->vm_mm, pmd); | ||||
| 			} else if (zap_huge_pmd(tlb, vma, pmd, addr)) | ||||
| 				continue; | ||||
| 			/* fall through */ | ||||
| 		} | ||||
| 		if (pmd_none_or_clear_bad(pmd)) | ||||
| 
 | ||||
| Because this race condition could be exercised without special | ||||
| privileges this was reported in CVE-2012-1179. | ||||
| 
 | ||||
| The race was identified and fully explained by Ulrich who debugged it. | ||||
| I'm quoting his accurate explanation below, for reference. | ||||
| 
 | ||||
| ====== start quote =======
 | ||||
|   mapcount 0 page_mapcount 1 | ||||
|   kernel BUG at mm/huge_memory.c:1384! | ||||
| 
 | ||||
| At some point prior to the panic, a "bad pmd ..." message similar to the | ||||
| following is logged on the console: | ||||
| 
 | ||||
|   mm/memory.c:145: bad pmd ffff8800376e1f98(80000000314000e7). | ||||
| 
 | ||||
| The "bad pmd ..." message is logged by pmd_clear_bad() before it clears | ||||
| the page's PMD table entry. | ||||
| 
 | ||||
|     143 void pmd_clear_bad(pmd_t *pmd) | ||||
|     144 { | ||||
| ->  145         pmd_ERROR(*pmd);
 | ||||
|     146         pmd_clear(pmd); | ||||
|     147 } | ||||
| 
 | ||||
| After the PMD table entry has been cleared, there is an inconsistency | ||||
| between the actual number of PMD table entries that are mapping the page | ||||
| and the page's map count (_mapcount field in struct page). When the page | ||||
| is subsequently reclaimed, __split_huge_page() detects this inconsistency. | ||||
| 
 | ||||
|    1381         if (mapcount != page_mapcount(page)) | ||||
|    1382                 printk(KERN_ERR "mapcount %d page_mapcount %d\n", | ||||
|    1383                        mapcount, page_mapcount(page)); | ||||
| -> 1384         BUG_ON(mapcount != page_mapcount(page));
 | ||||
| 
 | ||||
| The root cause of the problem is a race of two threads in a multithreaded | ||||
| process. Thread B incurs a page fault on a virtual address that has never | ||||
| been accessed (PMD entry is zero) while Thread A is executing an madvise() | ||||
| system call on a virtual address within the same 2 MB (huge page) range. | ||||
| 
 | ||||
|            virtual address space | ||||
|           .---------------------. | ||||
|           |                     | | ||||
|           |                     | | ||||
|         .-|---------------------| | ||||
|         | |                     | | ||||
|         | |                     |<-- B(fault) | ||||
|         | |                     | | ||||
|   2 MB  | |/////////////////////|-. | ||||
|   huge <  |/////////////////////|  > A(range) | ||||
|   page  | |/////////////////////|-' | ||||
|         | |                     | | ||||
|         | |                     | | ||||
|         '-|---------------------| | ||||
|           |                     | | ||||
|           |                     | | ||||
|           '---------------------' | ||||
| 
 | ||||
| - Thread A is executing an madvise(..., MADV_DONTNEED) system call
 | ||||
|   on the virtual address range "A(range)" shown in the picture. | ||||
| 
 | ||||
| sys_madvise | ||||
|   // Acquire the semaphore in shared mode. | ||||
|   down_read(¤t->mm->mmap_sem) | ||||
|   ... | ||||
|   madvise_vma | ||||
|     switch (behavior) | ||||
|     case MADV_DONTNEED: | ||||
|          madvise_dontneed | ||||
|            zap_page_range | ||||
|              unmap_vmas | ||||
|                unmap_page_range | ||||
|                  zap_pud_range | ||||
|                    zap_pmd_range | ||||
|                      // | ||||
|                      // Assume that this huge page has never been accessed. | ||||
|                      // I.e. content of the PMD entry is zero (not mapped). | ||||
|                      // | ||||
|                      if (pmd_trans_huge(*pmd)) { | ||||
|                          // We don't get here due to the above assumption. | ||||
|                      } | ||||
|                      // | ||||
|                      // Assume that Thread B incurred a page fault and | ||||
|          .---------> // sneaks in here as shown below. | ||||
|          |           // | ||||
|          |           if (pmd_none_or_clear_bad(pmd)) | ||||
|          |               { | ||||
|          |                 if (unlikely(pmd_bad(*pmd))) | ||||
|          |                     pmd_clear_bad | ||||
|          |                     { | ||||
|          |                       pmd_ERROR | ||||
|          |                         // Log "bad pmd ..." message here. | ||||
|          |                       pmd_clear | ||||
|          |                         // Clear the page's PMD entry. | ||||
|          |                         // Thread B incremented the map count | ||||
|          |                         // in page_add_new_anon_rmap(), but | ||||
|          |                         // now the page is no longer mapped | ||||
|          |                         // by a PMD entry (-> inconsistency). | ||||
|          |                     } | ||||
|          |               } | ||||
|          | | ||||
|          v | ||||
| - Thread B is handling a page fault on virtual address "B(fault)" shown
 | ||||
|   in the picture. | ||||
| 
 | ||||
| ... | ||||
| do_page_fault | ||||
|   __do_page_fault | ||||
|     // Acquire the semaphore in shared mode. | ||||
|     down_read_trylock(&mm->mmap_sem) | ||||
|     ... | ||||
|     handle_mm_fault | ||||
|       if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) | ||||
|           // We get here due to the above assumption (PMD entry is zero). | ||||
|           do_huge_pmd_anonymous_page | ||||
|             alloc_hugepage_vma | ||||
|               // Allocate a new transparent huge page here. | ||||
|             ... | ||||
|             __do_huge_pmd_anonymous_page | ||||
|               ... | ||||
|               spin_lock(&mm->page_table_lock) | ||||
|               ... | ||||
|               page_add_new_anon_rmap | ||||
|                 // Here we increment the page's map count (starts at -1). | ||||
|                 atomic_set(&page->_mapcount, 0) | ||||
|               set_pmd_at | ||||
|                 // Here we set the page's PMD entry which will be cleared | ||||
|                 // when Thread A calls pmd_clear_bad(). | ||||
|               ... | ||||
|               spin_unlock(&mm->page_table_lock) | ||||
| 
 | ||||
| The mmap_sem does not prevent the race because both threads are acquiring | ||||
| it in shared mode (down_read). Thread B holds the page_table_lock while | ||||
| the page's map count and PMD table entry are updated. However, Thread A | ||||
| does not synchronize on that lock. | ||||
| ====== end quote =======
 | ||||
| 
 | ||||
| Reported-by: Ulrich Obergfell <uobergfe@redhat.com> | ||||
| Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> | ||||
| ---
 | ||||
|  arch/x86/kernel/vm86_32.c     |    2 + | ||||
|  fs/proc/task_mmu.c            |    9 ++++++ | ||||
|  include/asm-generic/pgtable.h |   57 +++++++++++++++++++++++++++++++++++++++++ | ||||
|  mm/memcontrol.c               |    4 +++ | ||||
|  mm/memory.c                   |   14 ++++++++-- | ||||
|  mm/mempolicy.c                |    2 +- | ||||
|  mm/mincore.c                  |    2 +- | ||||
|  mm/pagewalk.c                 |    2 +- | ||||
|  mm/swapfile.c                 |    4 +-- | ||||
|  9 files changed, 87 insertions(+), 9 deletions(-) | ||||
| 
 | ||||
| diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
 | ||||
| index b466cab..328cb37 100644
 | ||||
| --- a/arch/x86/kernel/vm86_32.c
 | ||||
| +++ b/arch/x86/kernel/vm86_32.c
 | ||||
| @@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
 | ||||
|  	spinlock_t *ptl; | ||||
|  	int i; | ||||
|   | ||||
| +	down_write(&mm->mmap_sem);
 | ||||
|  	pgd = pgd_offset(mm, 0xA0000); | ||||
|  	if (pgd_none_or_clear_bad(pgd)) | ||||
|  		goto out; | ||||
| @@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
 | ||||
|  	} | ||||
|  	pte_unmap_unlock(pte, ptl); | ||||
|  out: | ||||
| +	up_write(&mm->mmap_sem);
 | ||||
|  	flush_tlb(); | ||||
|  } | ||||
|   | ||||
| diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
 | ||||
| index 7dcd2a2..3efa725 100644
 | ||||
| --- a/fs/proc/task_mmu.c
 | ||||
| +++ b/fs/proc/task_mmu.c
 | ||||
| @@ -409,6 +409,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 | ||||
|  	} else { | ||||
|  		spin_unlock(&walk->mm->page_table_lock); | ||||
|  	} | ||||
| +
 | ||||
| +	if (pmd_trans_unstable(pmd))
 | ||||
| +		return 0;
 | ||||
|  	/* | ||||
|  	 * The mmap_sem held all the way back in m_start() is what | ||||
|  	 * keeps khugepaged out of here and from collapsing things | ||||
| @@ -507,6 +510,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 | ||||
|  	struct page *page; | ||||
|   | ||||
|  	split_huge_page_pmd(walk->mm, pmd); | ||||
| +	if (pmd_trans_unstable(pmd))
 | ||||
| +		return 0;
 | ||||
|   | ||||
|  	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||||
|  	for (; addr != end; pte++, addr += PAGE_SIZE) { | ||||
| @@ -670,6 +675,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 | ||||
|  	int err = 0; | ||||
|   | ||||
|  	split_huge_page_pmd(walk->mm, pmd); | ||||
| +	if (pmd_trans_unstable(pmd))
 | ||||
| +		return 0;
 | ||||
|   | ||||
|  	/* find the first VMA at or above 'addr' */ | ||||
|  	vma = find_vma(walk->mm, addr); | ||||
| @@ -961,6 +968,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 | ||||
|  		spin_unlock(&walk->mm->page_table_lock); | ||||
|  	} | ||||
|   | ||||
| +	if (pmd_trans_unstable(pmd))
 | ||||
| +		return 0;
 | ||||
|  	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | ||||
|  	do { | ||||
|  		struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | ||||
| diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
 | ||||
| index 76bff2b..10f8291 100644
 | ||||
| --- a/include/asm-generic/pgtable.h
 | ||||
| +++ b/include/asm-generic/pgtable.h
 | ||||
| @@ -443,6 +443,63 @@ static inline int pmd_write(pmd_t pmd)
 | ||||
|  #endif /* __HAVE_ARCH_PMD_WRITE */ | ||||
|  #endif | ||||
|   | ||||
| +/*
 | ||||
| + * This function is meant to be used by sites walking pagetables with
 | ||||
| + * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
 | ||||
| + * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
 | ||||
| + * into a null pmd and the transhuge page fault can convert a null pmd
 | ||||
| + * into an hugepmd or into a regular pmd (if the hugepage allocation
 | ||||
| + * fails). While holding the mmap_sem in read mode the pmd becomes
 | ||||
| + * stable and stops changing under us only if it's not null and not a
 | ||||
| + * transhuge pmd. When those races occurs and this function makes a
 | ||||
| + * difference vs the standard pmd_none_or_clear_bad, the result is
 | ||||
| + * undefined so behaving like if the pmd was none is safe (because it
 | ||||
| + * can return none anyway). The compiler level barrier() is critically
 | ||||
| + * important to compute the two checks atomically on the same pmdval.
 | ||||
| + */
 | ||||
| +static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 | ||||
| +{
 | ||||
| +	/* depend on compiler for an atomic pmd read */
 | ||||
| +	pmd_t pmdval = *pmd;
 | ||||
| +	/*
 | ||||
| +	 * The barrier will stabilize the pmdval in a register or on
 | ||||
| +	 * the stack so that it will stop changing under the code.
 | ||||
| +	 */
 | ||||
| +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | ||||
| +	barrier();
 | ||||
| +#endif
 | ||||
| +	if (pmd_none(pmdval))
 | ||||
| +		return 1;
 | ||||
| +	if (unlikely(pmd_bad(pmdval))) {
 | ||||
| +		if (!pmd_trans_huge(pmdval))
 | ||||
| +			pmd_clear_bad(pmd);
 | ||||
| +		return 1;
 | ||||
| +	}
 | ||||
| +	return 0;
 | ||||
| +}
 | ||||
| +
 | ||||
| +/*
 | ||||
| + * This is a noop if Transparent Hugepage Support is not built into
 | ||||
| + * the kernel. Otherwise it is equivalent to
 | ||||
| + * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
 | ||||
| + * places that already verified the pmd is not none and they want to
 | ||||
| + * walk ptes while holding the mmap sem in read mode (write mode don't
 | ||||
| + * need this). If THP is not enabled, the pmd can't go away under the
 | ||||
| + * code even if MADV_DONTNEED runs, but if THP is enabled we need to
 | ||||
| + * run a pmd_trans_unstable before walking the ptes after
 | ||||
| + * split_huge_page_pmd returns (because it may have run when the pmd
 | ||||
| + * become null, but then a page fault can map in a THP and not a
 | ||||
| + * regular page).
 | ||||
| + */
 | ||||
| +static inline int pmd_trans_unstable(pmd_t *pmd)
 | ||||
| +{
 | ||||
| +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | ||||
| +	return pmd_none_or_trans_huge_or_clear_bad(pmd);
 | ||||
| +#else
 | ||||
| +	return 0;
 | ||||
| +#endif
 | ||||
| +}
 | ||||
| +
 | ||||
|  #endif /* !__ASSEMBLY__ */ | ||||
|   | ||||
|  #endif /* _ASM_GENERIC_PGTABLE_H */ | ||||
| diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 | ||||
| index d0e57a3..67b0578 100644
 | ||||
| --- a/mm/memcontrol.c
 | ||||
| +++ b/mm/memcontrol.c
 | ||||
| @@ -5193,6 +5193,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 | ||||
|  	spinlock_t *ptl; | ||||
|   | ||||
|  	split_huge_page_pmd(walk->mm, pmd); | ||||
| +	if (pmd_trans_unstable(pmd))
 | ||||
| +		return 0;
 | ||||
|   | ||||
|  	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||||
|  	for (; addr != end; pte++, addr += PAGE_SIZE) | ||||
| @@ -5355,6 +5357,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 | ||||
|  	spinlock_t *ptl; | ||||
|   | ||||
|  	split_huge_page_pmd(walk->mm, pmd); | ||||
| +	if (pmd_trans_unstable(pmd))
 | ||||
| +		return 0;
 | ||||
|  retry: | ||||
|  	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||||
|  	for (; addr != end; addr += PAGE_SIZE) { | ||||
| diff --git a/mm/memory.c b/mm/memory.c
 | ||||
| index fa2f04e..e3090fc 100644
 | ||||
| --- a/mm/memory.c
 | ||||
| +++ b/mm/memory.c
 | ||||
| @@ -1251,12 +1251,20 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 | ||||
|  				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); | ||||
|  				split_huge_page_pmd(vma->vm_mm, pmd); | ||||
|  			} else if (zap_huge_pmd(tlb, vma, pmd, addr)) | ||||
| -				continue;
 | ||||
| +				goto next;
 | ||||
|  			/* fall through */ | ||||
|  		} | ||||
| -		if (pmd_none_or_clear_bad(pmd))
 | ||||
| -			continue;
 | ||||
| +		/*
 | ||||
| +		 * Here there can be other concurrent MADV_DONTNEED or
 | ||||
| +		 * trans huge page faults running, and if the pmd is
 | ||||
| +		 * none or trans huge it can change under us. This is
 | ||||
| +		 * because MADV_DONTNEED holds the mmap_sem in read
 | ||||
| +		 * mode.
 | ||||
| +		 */
 | ||||
| +		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 | ||||
| +			goto next;
 | ||||
|  		next = zap_pte_range(tlb, vma, pmd, addr, next, details); | ||||
| +	next:
 | ||||
|  		cond_resched(); | ||||
|  	} while (pmd++, addr = next, addr != end); | ||||
|   | ||||
| diff --git a/mm/mempolicy.c b/mm/mempolicy.c
 | ||||
| index 47296fe..0a37570 100644
 | ||||
| --- a/mm/mempolicy.c
 | ||||
| +++ b/mm/mempolicy.c
 | ||||
| @@ -512,7 +512,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 | ||||
|  	do { | ||||
|  		next = pmd_addr_end(addr, end); | ||||
|  		split_huge_page_pmd(vma->vm_mm, pmd); | ||||
| -		if (pmd_none_or_clear_bad(pmd))
 | ||||
| +		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 | ||||
|  			continue; | ||||
|  		if (check_pte_range(vma, pmd, addr, next, nodes, | ||||
|  				    flags, private)) | ||||
| diff --git a/mm/mincore.c b/mm/mincore.c
 | ||||
| index 636a868..936b4ce 100644
 | ||||
| --- a/mm/mincore.c
 | ||||
| +++ b/mm/mincore.c
 | ||||
| @@ -164,7 +164,7 @@ static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 | ||||
|  			} | ||||
|  			/* fall through */ | ||||
|  		} | ||||
| -		if (pmd_none_or_clear_bad(pmd))
 | ||||
| +		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 | ||||
|  			mincore_unmapped_range(vma, addr, next, vec); | ||||
|  		else | ||||
|  			mincore_pte_range(vma, pmd, addr, next, vec); | ||||
| diff --git a/mm/pagewalk.c b/mm/pagewalk.c
 | ||||
| index 2f5cf10..aa9701e 100644
 | ||||
| --- a/mm/pagewalk.c
 | ||||
| +++ b/mm/pagewalk.c
 | ||||
| @@ -59,7 +59,7 @@ again:
 | ||||
|  			continue; | ||||
|   | ||||
|  		split_huge_page_pmd(walk->mm, pmd); | ||||
| -		if (pmd_none_or_clear_bad(pmd))
 | ||||
| +		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 | ||||
|  			goto again; | ||||
|  		err = walk_pte_range(pmd, addr, next, walk); | ||||
|  		if (err) | ||||
| diff --git a/mm/swapfile.c b/mm/swapfile.c
 | ||||
| index d999f09..f31b29d 100644
 | ||||
| --- a/mm/swapfile.c
 | ||||
| +++ b/mm/swapfile.c
 | ||||
| @@ -932,9 +932,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 | ||||
|  	pmd = pmd_offset(pud, addr); | ||||
|  	do { | ||||
|  		next = pmd_addr_end(addr, end); | ||||
| -		if (unlikely(pmd_trans_huge(*pmd)))
 | ||||
| -			continue;
 | ||||
| -		if (pmd_none_or_clear_bad(pmd))
 | ||||
| +		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 | ||||
|  			continue; | ||||
|  		ret = unuse_pte_range(vma, pmd, addr, next, entry, page); | ||||
|  		if (ret) | ||||
| 
 | ||||
| --
 | ||||
| To unsubscribe, send a message with 'unsubscribe linux-mm' in | ||||
| the body to majordomo@kvack.org.  For more info on Linux MM, | ||||
| see: http://www.linux-mm.org/ . | ||||
| Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ | ||||
| Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> | ||||
							
								
								
									
										2
									
								
								sources
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								sources
									
									
									
									
									
								
							| @ -1,2 +1,2 @@ | ||||
| 7133f5a2086a7d7ef97abac610c094f5  linux-3.3.tar.xz | ||||
| fe8e2b8e93695cb876cc8394b3db83c4  patch-3.3-git1.xz | ||||
| 72643cb2a29683201f2049d151564c56  patch-3.3-git2.xz | ||||
|  | ||||
| @ -1,118 +0,0 @@ | ||||
| From 92a9c19a89af2ca219fbb040a0059f414a4b7223 Mon Sep 17 00:00:00 2001 | ||||
| From: Kay Sievers <kay.sievers@vrfy.org> | ||||
| Date: Sat, 28 Jan 2012 19:57:46 +0000 | ||||
| Subject: [PATCH] udlfb: remove sysfs framebuffer device with USB | ||||
|  .disconnect() | ||||
| 
 | ||||
| The USB graphics card driver delays the unregistering of the framebuffer | ||||
| device to a workqueue, which breaks the userspace visible remove uevent | ||||
| sequence. Recent userspace tools started to support USB graphics card | ||||
| hotplug out-of-the-box and rely on proper events sent by the kernel. | ||||
| 
 | ||||
| The framebuffer device is a direct child of the USB interface which is | ||||
| removed immediately after the USB .disconnect() callback. But the fb device | ||||
| in /sys stays around until its final cleanup, at a time where all the parent | ||||
| devices have been removed already. | ||||
| 
 | ||||
| To work around that, we remove the sysfs fb device directly in the USB | ||||
| .disconnect() callback and leave only the cleanup of the internal fb | ||||
| data to the delayed work. | ||||
| 
 | ||||
| Before: | ||||
|  add      /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) | ||||
|  add      /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) | ||||
|  add      /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb0 (graphics) | ||||
|  remove   /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) | ||||
|  remove   /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) | ||||
|  remove   /2-1.2:1.0/graphics/fb0 (graphics) | ||||
| 
 | ||||
| After: | ||||
|  add      /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) | ||||
|  add      /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) | ||||
|  add      /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics) | ||||
|  remove   /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics) | ||||
|  remove   /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) | ||||
|  remove   /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) | ||||
| 
 | ||||
| Cc: stable@vger.kernel.org | ||||
| Tested-by: Bernie Thompson <bernie@plugable.com> | ||||
| Acked-by: Bernie Thompson <bernie@plugable.com> | ||||
| Signed-off-by: Kay Sievers <kay.sievers@vrfy.org> | ||||
| Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de> | ||||
| ---
 | ||||
|  drivers/video/fbmem.c |   18 +++++++++++++++++- | ||||
|  drivers/video/udlfb.c |    2 +- | ||||
|  include/linux/fb.h    |    1 + | ||||
|  3 files changed, 19 insertions(+), 2 deletions(-) | ||||
| 
 | ||||
| diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
 | ||||
| index ac9141b..c6ce416 100644
 | ||||
| --- a/drivers/video/fbmem.c
 | ||||
| +++ b/drivers/video/fbmem.c
 | ||||
| @@ -1665,6 +1665,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
 | ||||
|  	if (ret) | ||||
|  		return -EINVAL; | ||||
|   | ||||
| +	unlink_framebuffer(fb_info);
 | ||||
|  	if (fb_info->pixmap.addr && | ||||
|  	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) | ||||
|  		kfree(fb_info->pixmap.addr); | ||||
| @@ -1672,7 +1673,6 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
 | ||||
|  	registered_fb[i] = NULL; | ||||
|  	num_registered_fb--; | ||||
|  	fb_cleanup_device(fb_info); | ||||
| -	device_destroy(fb_class, MKDEV(FB_MAJOR, i));
 | ||||
|  	event.info = fb_info; | ||||
|  	fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); | ||||
|   | ||||
| @@ -1681,6 +1681,22 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
 | ||||
|  	return 0; | ||||
|  } | ||||
|   | ||||
| +int unlink_framebuffer(struct fb_info *fb_info)
 | ||||
| +{
 | ||||
| +	int i;
 | ||||
| +
 | ||||
| +	i = fb_info->node;
 | ||||
| +	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
 | ||||
| +		return -EINVAL;
 | ||||
| +
 | ||||
| +	if (fb_info->dev) {
 | ||||
| +		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
 | ||||
| +		fb_info->dev = NULL;
 | ||||
| +	}
 | ||||
| +	return 0;
 | ||||
| +}
 | ||||
| +EXPORT_SYMBOL(unlink_framebuffer);
 | ||||
| +
 | ||||
|  void remove_conflicting_framebuffers(struct apertures_struct *a, | ||||
|  				     const char *name, bool primary) | ||||
|  { | ||||
| diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
 | ||||
| index a197731..a40c05e 100644
 | ||||
| --- a/drivers/video/udlfb.c
 | ||||
| +++ b/drivers/video/udlfb.c
 | ||||
| @@ -1739,7 +1739,7 @@ static void dlfb_usb_disconnect(struct usb_interface *interface)
 | ||||
|  	for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++) | ||||
|  		device_remove_file(info->dev, &fb_device_attrs[i]); | ||||
|  	device_remove_bin_file(info->dev, &edid_attr); | ||||
| -
 | ||||
| +	unlink_framebuffer(info);
 | ||||
|  	usb_set_intfdata(interface, NULL); | ||||
|   | ||||
|  	/* if clients still have us open, will be freed on last close */ | ||||
| diff --git a/include/linux/fb.h b/include/linux/fb.h
 | ||||
| index c18122f..a395b8c 100644
 | ||||
| --- a/include/linux/fb.h
 | ||||
| +++ b/include/linux/fb.h
 | ||||
| @@ -1003,6 +1003,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 | ||||
|  /* drivers/video/fbmem.c */ | ||||
|  extern int register_framebuffer(struct fb_info *fb_info); | ||||
|  extern int unregister_framebuffer(struct fb_info *fb_info); | ||||
| +extern int unlink_framebuffer(struct fb_info *fb_info);
 | ||||
|  extern void remove_conflicting_framebuffers(struct apertures_struct *a, | ||||
|  				const char *name, bool primary); | ||||
|  extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); | ||||
| -- 
 | ||||
| 1.7.6.5 | ||||
| 
 | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user