Import of kernel-5.14.0-611.9.1.el9_7
This commit is contained in:
parent
4bb5e61054
commit
d18cc6d0be
@ -533,6 +533,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
|||||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||||
/sys/devices/system/cpu/vulnerabilities/tsa
|
/sys/devices/system/cpu/vulnerabilities/tsa
|
||||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||||
|
/sys/devices/system/cpu/vulnerabilities/vmscape
|
||||||
Date: January 2018
|
Date: January 2018
|
||||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||||
Description: Information about CPU vulnerabilities
|
Description: Information about CPU vulnerabilities
|
||||||
|
|||||||
@ -24,3 +24,4 @@ are configurable at compile, boot or run time.
|
|||||||
reg-file-data-sampling
|
reg-file-data-sampling
|
||||||
rsb
|
rsb
|
||||||
indirect-target-selection
|
indirect-target-selection
|
||||||
|
vmscape
|
||||||
|
|||||||
110
Documentation/admin-guide/hw-vuln/vmscape.rst
Normal file
110
Documentation/admin-guide/hw-vuln/vmscape.rst
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
VMSCAPE
|
||||||
|
=======
|
||||||
|
|
||||||
|
VMSCAPE is a vulnerability that may allow a guest to influence the branch
|
||||||
|
prediction in host userspace. It particularly affects hypervisors like QEMU.
|
||||||
|
|
||||||
|
Even if a hypervisor may not have any sensitive data like disk encryption keys,
|
||||||
|
guest-userspace may be able to attack the guest-kernel using the hypervisor as
|
||||||
|
a confused deputy.
|
||||||
|
|
||||||
|
Affected processors
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
The following CPU families are affected by VMSCAPE:
|
||||||
|
|
||||||
|
**Intel processors:**
|
||||||
|
- Skylake generation (Parts without Enhanced-IBRS)
|
||||||
|
- Cascade Lake generation - (Parts affected by ITS guest/host separation)
|
||||||
|
- Alder Lake and newer (Parts affected by BHI)
|
||||||
|
|
||||||
|
Note that, BHI affected parts that use BHB clearing software mitigation e.g.
|
||||||
|
Icelake are not vulnerable to VMSCAPE.
|
||||||
|
|
||||||
|
**AMD processors:**
|
||||||
|
- Zen series (families 0x17, 0x19, 0x1a)
|
||||||
|
|
||||||
|
** Hygon processors:**
|
||||||
|
- Family 0x18
|
||||||
|
|
||||||
|
Mitigation
|
||||||
|
----------
|
||||||
|
|
||||||
|
Conditional IBPB
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Kernel tracks when a CPU has run a potentially malicious guest and issues an
|
||||||
|
IBPB before the first exit to userspace after VM-exit. If userspace did not run
|
||||||
|
between VM-exit and the next VM-entry, no IBPB is issued.
|
||||||
|
|
||||||
|
Note that the existing userspace mitigation against Spectre-v2 is effective in
|
||||||
|
protecting the userspace. They are insufficient to protect the userspace VMMs
|
||||||
|
from a malicious guest. This is because Spectre-v2 mitigations are applied at
|
||||||
|
context switch time, while the userspace VMM can run after a VM-exit without a
|
||||||
|
context switch.
|
||||||
|
|
||||||
|
Vulnerability enumeration and mitigation is not applied inside a guest. This is
|
||||||
|
because nested hypervisors should already be deploying IBPB to isolate
|
||||||
|
themselves from nested guests.
|
||||||
|
|
||||||
|
SMT considerations
|
||||||
|
------------------
|
||||||
|
|
||||||
|
When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be
|
||||||
|
vulnerable to cross-thread attacks. For complete protection against VMSCAPE
|
||||||
|
attacks in SMT environments, STIBP should be enabled.
|
||||||
|
|
||||||
|
The kernel will issue a warning if SMT is enabled without adequate STIBP
|
||||||
|
protection. Warning is not issued when:
|
||||||
|
|
||||||
|
- SMT is disabled
|
||||||
|
- STIBP is enabled system-wide
|
||||||
|
- Intel eIBRS is enabled (which implies STIBP protection)
|
||||||
|
|
||||||
|
System information and options
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
The sysfs file showing VMSCAPE mitigation status is:
|
||||||
|
|
||||||
|
/sys/devices/system/cpu/vulnerabilities/vmscape
|
||||||
|
|
||||||
|
The possible values in this file are:
|
||||||
|
|
||||||
|
* 'Not affected':
|
||||||
|
|
||||||
|
The processor is not vulnerable to VMSCAPE attacks.
|
||||||
|
|
||||||
|
* 'Vulnerable':
|
||||||
|
|
||||||
|
The processor is vulnerable and no mitigation has been applied.
|
||||||
|
|
||||||
|
* 'Mitigation: IBPB before exit to userspace':
|
||||||
|
|
||||||
|
Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has
|
||||||
|
run a potentially malicious guest and issues an IBPB before the first
|
||||||
|
exit to userspace after VM-exit.
|
||||||
|
|
||||||
|
* 'Mitigation: IBPB on VMEXIT':
|
||||||
|
|
||||||
|
IBPB is issued on every VM-exit. This occurs when other mitigations like
|
||||||
|
RETBLEED or SRSO are already issuing IBPB on VM-exit.
|
||||||
|
|
||||||
|
Mitigation control on the kernel command line
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
The mitigation can be controlled via the ``vmscape=`` command line parameter:
|
||||||
|
|
||||||
|
* ``vmscape=off``:
|
||||||
|
|
||||||
|
Disable the VMSCAPE mitigation.
|
||||||
|
|
||||||
|
* ``vmscape=ibpb``:
|
||||||
|
|
||||||
|
Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y).
|
||||||
|
|
||||||
|
* ``vmscape=force``:
|
||||||
|
|
||||||
|
Force vulnerability detection and mitigation even on processors that are
|
||||||
|
not known to be affected.
|
||||||
@ -3435,6 +3435,7 @@
|
|||||||
srbds=off [X86,INTEL]
|
srbds=off [X86,INTEL]
|
||||||
ssbd=force-off [ARM64]
|
ssbd=force-off [ARM64]
|
||||||
tsx_async_abort=off [X86]
|
tsx_async_abort=off [X86]
|
||||||
|
vmscape=off [X86]
|
||||||
|
|
||||||
Exceptions:
|
Exceptions:
|
||||||
This does not have any effect on
|
This does not have any effect on
|
||||||
@ -7152,6 +7153,16 @@
|
|||||||
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
||||||
Format: <command>
|
Format: <command>
|
||||||
|
|
||||||
|
vmscape= [X86] Controls mitigation for VMscape attacks.
|
||||||
|
VMscape attacks can leak information from a userspace
|
||||||
|
hypervisor to a guest via speculative side-channels.
|
||||||
|
|
||||||
|
off - disable the mitigation
|
||||||
|
ibpb - use Indirect Branch Prediction Barrier
|
||||||
|
(IBPB) mitigation (default)
|
||||||
|
force - force vulnerability detection even on
|
||||||
|
unaffected processors
|
||||||
|
|
||||||
vsyscall= [X86-64]
|
vsyscall= [X86-64]
|
||||||
Controls the behavior of vsyscalls (i.e. calls to
|
Controls the behavior of vsyscalls (i.e. calls to
|
||||||
fixed addresses of 0xffffffffff600x00 from legacy
|
fixed addresses of 0xffffffffff600x00 from legacy
|
||||||
|
|||||||
@ -142,13 +142,6 @@ but depends on the BIOS to behave correctly.
|
|||||||
Note TDX works with CPU logical online/offline, thus the kernel still
|
Note TDX works with CPU logical online/offline, thus the kernel still
|
||||||
allows to offline logical CPU and online it again.
|
allows to offline logical CPU and online it again.
|
||||||
|
|
||||||
Kexec()
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
TDX host support currently lacks the ability to handle kexec. For
|
|
||||||
simplicity only one of them can be enabled in the Kconfig. This will be
|
|
||||||
fixed in the future.
|
|
||||||
|
|
||||||
Erratum
|
Erratum
|
||||||
~~~~~~~
|
~~~~~~~
|
||||||
|
|
||||||
@ -171,6 +164,13 @@ If the platform has such erratum, the kernel prints additional message in
|
|||||||
machine check handler to tell user the machine check may be caused by
|
machine check handler to tell user the machine check may be caused by
|
||||||
kernel bug on TDX private memory.
|
kernel bug on TDX private memory.
|
||||||
|
|
||||||
|
Kexec
|
||||||
|
~~~~~~~
|
||||||
|
|
||||||
|
Currently kexec doesn't work on the TDX platforms with the aforementioned
|
||||||
|
erratum. It fails when loading the kexec kernel image. Otherwise it
|
||||||
|
works normally.
|
||||||
|
|
||||||
Interaction vs S3 and deeper states
|
Interaction vs S3 and deeper states
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,7 @@ RHEL_MINOR = 7
|
|||||||
#
|
#
|
||||||
# Use this spot to avoid future merge conflicts.
|
# Use this spot to avoid future merge conflicts.
|
||||||
# Do not trim this comment.
|
# Do not trim this comment.
|
||||||
RHEL_RELEASE = 611.5.1
|
RHEL_RELEASE = 611.9.1
|
||||||
|
|
||||||
#
|
#
|
||||||
# ZSTREAM
|
# ZSTREAM
|
||||||
|
|||||||
@ -53,17 +53,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
|
|||||||
syscall_set_return_value(current, regs, 0, ret);
|
syscall_set_return_value(current, regs, 0, ret);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
|
* This value will get limited by KSTACK_OFFSET_MAX(), which is 10
|
||||||
* but not enough for arm64 stack utilization comfort. To keep
|
* bits. The actual entropy will be further reduced by the compiler
|
||||||
* reasonable stack head room, reduce the maximum offset to 9 bits.
|
* when applying stack alignment constraints: the AAPCS mandates a
|
||||||
|
* 16-byte aligned SP at function boundaries, which will remove the
|
||||||
|
* 4 low bits from any entropy chosen here.
|
||||||
*
|
*
|
||||||
* The actual entropy will be further reduced by the compiler when
|
* The resulting 6 bits of entropy is seen in SP[9:4].
|
||||||
* applying stack alignment constraints: the AAPCS mandates a
|
|
||||||
* 16-byte (i.e. 4-bit) aligned SP at function boundaries.
|
|
||||||
*
|
|
||||||
* The resulting 5 bits of entropy is seen in SP[8:4].
|
|
||||||
*/
|
*/
|
||||||
choose_random_kstack_offset(get_random_int() & 0x1FF);
|
choose_random_kstack_offset(get_random_u16());
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool has_syscall_work(unsigned long flags)
|
static inline bool has_syscall_work(unsigned long flags)
|
||||||
|
|||||||
@ -258,6 +258,7 @@
|
|||||||
#define H_QUERY_INT_STATE 0x1E4
|
#define H_QUERY_INT_STATE 0x1E4
|
||||||
#define H_POLL_PENDING 0x1D8
|
#define H_POLL_PENDING 0x1D8
|
||||||
#define H_ILLAN_ATTRIBUTES 0x244
|
#define H_ILLAN_ATTRIBUTES 0x244
|
||||||
|
#define H_ADD_LOGICAL_LAN_BUFFERS 0x248
|
||||||
#define H_MODIFY_HEA_QP 0x250
|
#define H_MODIFY_HEA_QP 0x250
|
||||||
#define H_QUERY_HEA_QP 0x254
|
#define H_QUERY_HEA_QP 0x254
|
||||||
#define H_QUERY_HEA 0x258
|
#define H_QUERY_HEA 0x258
|
||||||
|
|||||||
@ -6,6 +6,7 @@
|
|||||||
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
|
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <linux/security.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include "hypfs.h"
|
#include "hypfs.h"
|
||||||
|
|
||||||
@ -64,24 +65,29 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|||||||
long rc;
|
long rc;
|
||||||
|
|
||||||
mutex_lock(&df->lock);
|
mutex_lock(&df->lock);
|
||||||
if (df->unlocked_ioctl)
|
rc = df->unlocked_ioctl(file, cmd, arg);
|
||||||
rc = df->unlocked_ioctl(file, cmd, arg);
|
|
||||||
else
|
|
||||||
rc = -ENOTTY;
|
|
||||||
mutex_unlock(&df->lock);
|
mutex_unlock(&df->lock);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct file_operations dbfs_ops = {
|
static const struct file_operations dbfs_ops_ioctl = {
|
||||||
.read = dbfs_read,
|
.read = dbfs_read,
|
||||||
.llseek = no_llseek,
|
.llseek = no_llseek,
|
||||||
.unlocked_ioctl = dbfs_ioctl,
|
.unlocked_ioctl = dbfs_ioctl,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct file_operations dbfs_ops = {
|
||||||
|
.read = dbfs_read,
|
||||||
|
.llseek = no_llseek,
|
||||||
|
};
|
||||||
|
|
||||||
void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
|
void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
|
||||||
{
|
{
|
||||||
df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
|
const struct file_operations *fops = &dbfs_ops;
|
||||||
&dbfs_ops);
|
|
||||||
|
if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS))
|
||||||
|
fops = &dbfs_ops_ioctl;
|
||||||
|
df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops);
|
||||||
mutex_init(&df->lock);
|
mutex_init(&df->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -55,7 +55,7 @@ static __always_inline void arch_exit_to_user_mode(void)
|
|||||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||||
unsigned long ti_work)
|
unsigned long ti_work)
|
||||||
{
|
{
|
||||||
choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
|
choose_random_kstack_offset(get_tod_clock_fast());
|
||||||
}
|
}
|
||||||
|
|
||||||
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
||||||
|
|||||||
@ -106,6 +106,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
|
|||||||
struct zpci_dev *zdev = to_zpci(pdev);
|
struct zpci_dev *zdev = to_zpci(pdev);
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
/* The underlying device may have been disabled by the event */
|
||||||
|
if (!zdev_enabled(zdev))
|
||||||
|
return PCI_ERS_RESULT_NEED_RESET;
|
||||||
|
|
||||||
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
|
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
|
||||||
rc = zpci_reset_load_store_blocked(zdev);
|
rc = zpci_reset_load_store_blocked(zdev);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
@ -273,6 +277,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
|
|||||||
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
|
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
|
||||||
struct pci_dev *pdev = NULL;
|
struct pci_dev *pdev = NULL;
|
||||||
pci_ers_result_t ers_res;
|
pci_ers_result_t ers_res;
|
||||||
|
u32 fh = 0;
|
||||||
|
int rc;
|
||||||
|
|
||||||
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
|
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
|
||||||
ccdf->fid, ccdf->fh, ccdf->pec);
|
ccdf->fid, ccdf->fh, ccdf->pec);
|
||||||
@ -281,6 +287,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
|
|||||||
|
|
||||||
if (zdev) {
|
if (zdev) {
|
||||||
mutex_lock(&zdev->state_lock);
|
mutex_lock(&zdev->state_lock);
|
||||||
|
rc = clp_refresh_fh(zdev->fid, &fh);
|
||||||
|
if (rc)
|
||||||
|
goto no_pdev;
|
||||||
|
if (!fh || ccdf->fh != fh) {
|
||||||
|
/* Ignore events with stale handles */
|
||||||
|
zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
|
||||||
|
ccdf->fid, fh, ccdf->fh);
|
||||||
|
goto no_pdev;
|
||||||
|
}
|
||||||
zpci_update_fh(zdev, ccdf->fh);
|
zpci_update_fh(zdev, ccdf->fh);
|
||||||
if (zdev->zbus->bus)
|
if (zdev->zbus->bus)
|
||||||
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
|
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
|
||||||
|
|||||||
@ -1949,7 +1949,6 @@ config INTEL_TDX_HOST
|
|||||||
depends on X86_X2APIC
|
depends on X86_X2APIC
|
||||||
select ARCH_KEEP_MEMBLOCK
|
select ARCH_KEEP_MEMBLOCK
|
||||||
depends on CONTIG_ALLOC
|
depends on CONTIG_ALLOC
|
||||||
depends on !KEXEC_CORE
|
|
||||||
depends on X86_MCE
|
depends on X86_MCE
|
||||||
help
|
help
|
||||||
Intel Trust Domain Extensions (TDX) protects guest VMs from malicious
|
Intel Trust Domain Extensions (TDX) protects guest VMs from malicious
|
||||||
@ -2754,6 +2753,15 @@ config MITIGATION_TSA
|
|||||||
security vulnerability on AMD CPUs which can lead to forwarding of
|
security vulnerability on AMD CPUs which can lead to forwarding of
|
||||||
invalid info to subsequent instructions and thus can affect their
|
invalid info to subsequent instructions and thus can affect their
|
||||||
timing and thereby cause a leakage.
|
timing and thereby cause a leakage.
|
||||||
|
|
||||||
|
config MITIGATION_VMSCAPE
|
||||||
|
bool "Mitigate VMSCAPE"
|
||||||
|
depends on KVM
|
||||||
|
default y
|
||||||
|
help
|
||||||
|
Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security
|
||||||
|
vulnerability on Intel and AMD CPUs that may allow a guest to do
|
||||||
|
Spectre v2 style attacks on userspace hypervisor.
|
||||||
endif
|
endif
|
||||||
|
|
||||||
config ARCH_HAS_ADD_PAGES
|
config ARCH_HAS_ADD_PAGES
|
||||||
|
|||||||
@ -491,6 +491,7 @@
|
|||||||
#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
|
#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
|
||||||
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
|
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
|
||||||
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
|
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
|
||||||
|
#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BUG word(s)
|
* BUG word(s)
|
||||||
@ -546,4 +547,5 @@
|
|||||||
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
|
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
|
||||||
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
|
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
|
||||||
#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
|
#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
|
||||||
|
#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
|
||||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||||
|
|||||||
@ -73,19 +73,23 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
|
* This value will get limited by KSTACK_OFFSET_MAX(), which is 10
|
||||||
* but not enough for x86 stack utilization comfort. To keep
|
* bits. The actual entropy will be further reduced by the compiler
|
||||||
* reasonable stack head room, reduce the maximum offset to 8 bits.
|
* when applying stack alignment constraints (see cc_stack_align4/8 in
|
||||||
*
|
|
||||||
* The actual entropy will be further reduced by the compiler when
|
|
||||||
* applying stack alignment constraints (see cc_stack_align4/8 in
|
|
||||||
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
|
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
|
||||||
* low bits from any entropy chosen here.
|
* low bits from any entropy chosen here.
|
||||||
*
|
*
|
||||||
* Therefore, final stack offset entropy will be 5 (x86_64) or
|
* Therefore, final stack offset entropy will be 7 (x86_64) or
|
||||||
* 6 (ia32) bits.
|
* 8 (ia32) bits.
|
||||||
*/
|
*/
|
||||||
choose_random_kstack_offset(rdtsc() & 0xFF);
|
choose_random_kstack_offset(rdtsc());
|
||||||
|
|
||||||
|
/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
|
||||||
|
if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
|
||||||
|
this_cpu_read(x86_ibpb_exit_to_user)) {
|
||||||
|
indirect_branch_prediction_barrier();
|
||||||
|
this_cpu_write(x86_ibpb_exit_to_user, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
||||||
|
|
||||||
|
|||||||
@ -129,7 +129,7 @@ relocate_kernel(unsigned long indirection_page,
|
|||||||
unsigned long page_list,
|
unsigned long page_list,
|
||||||
unsigned long start_address,
|
unsigned long start_address,
|
||||||
unsigned int preserve_context,
|
unsigned int preserve_context,
|
||||||
unsigned int host_mem_enc_active);
|
unsigned int cache_incoherent);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ARCH_HAS_KIMAGE_ARCH
|
#define ARCH_HAS_KIMAGE_ARCH
|
||||||
|
|||||||
@ -545,6 +545,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
|
|||||||
: "memory");
|
: "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
|
||||||
|
|
||||||
static inline void indirect_branch_prediction_barrier(void)
|
static inline void indirect_branch_prediction_barrier(void)
|
||||||
{
|
{
|
||||||
asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
|
asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
|
||||||
|
|||||||
@ -773,6 +773,8 @@ void __noreturn stop_this_cpu(void *dummy);
|
|||||||
void microcode_check(struct cpuinfo_x86 *prev_info);
|
void microcode_check(struct cpuinfo_x86 *prev_info);
|
||||||
void store_cpu_caps(struct cpuinfo_x86 *info);
|
void store_cpu_caps(struct cpuinfo_x86 *info);
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(bool, cache_state_incoherent);
|
||||||
|
|
||||||
enum l1tf_mitigations {
|
enum l1tf_mitigations {
|
||||||
L1TF_MITIGATION_OFF,
|
L1TF_MITIGATION_OFF,
|
||||||
L1TF_MITIGATION_FLUSH_NOWARN,
|
L1TF_MITIGATION_FLUSH_NOWARN,
|
||||||
|
|||||||
@ -98,18 +98,41 @@ u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
|
|||||||
u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
|
u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
|
||||||
void tdx_init(void);
|
void tdx_init(void);
|
||||||
|
|
||||||
|
#include <linux/preempt.h>
|
||||||
#include <asm/archrandom.h>
|
#include <asm/archrandom.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
|
||||||
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
|
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
|
||||||
|
|
||||||
static inline u64 sc_retry(sc_func_t func, u64 fn,
|
static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
|
||||||
|
struct tdx_module_args *args)
|
||||||
|
{
|
||||||
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SEAMCALLs are made to the TDX module and can generate dirty
|
||||||
|
* cachelines of TDX private memory. Mark cache state incoherent
|
||||||
|
* so that the cache can be flushed during kexec.
|
||||||
|
*
|
||||||
|
* This needs to be done before actually making the SEAMCALL,
|
||||||
|
* because kexec-ing CPU could send NMI to stop remote CPUs,
|
||||||
|
* in which case even disabling IRQ won't help here.
|
||||||
|
*/
|
||||||
|
this_cpu_write(cache_state_incoherent, true);
|
||||||
|
|
||||||
|
return func(fn, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
|
||||||
struct tdx_module_args *args)
|
struct tdx_module_args *args)
|
||||||
{
|
{
|
||||||
int retry = RDRAND_RETRY_LOOPS;
|
int retry = RDRAND_RETRY_LOOPS;
|
||||||
u64 ret;
|
u64 ret;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
ret = func(fn, args);
|
preempt_disable();
|
||||||
|
ret = __seamcall_dirty_cache(func, fn, args);
|
||||||
|
preempt_enable();
|
||||||
} while (ret == TDX_RND_NO_ENTROPY && --retry);
|
} while (ret == TDX_RND_NO_ENTROPY && --retry);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -199,5 +222,11 @@ static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
|
|||||||
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
|
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
|
||||||
#endif /* CONFIG_INTEL_TDX_HOST */
|
#endif /* CONFIG_INTEL_TDX_HOST */
|
||||||
|
|
||||||
|
#ifdef CONFIG_KEXEC_CORE
|
||||||
|
void tdx_cpu_flush_cache_for_kexec(void);
|
||||||
|
#else
|
||||||
|
static inline void tdx_cpu_flush_cache_for_kexec(void) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
#endif /* _ASM_X86_TDX_H */
|
#endif /* _ASM_X86_TDX_H */
|
||||||
|
|||||||
@ -530,6 +530,23 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
|
|||||||
{
|
{
|
||||||
u64 msr;
|
u64 msr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark using WBINVD is needed during kexec on processors that
|
||||||
|
* support SME. This provides support for performing a successful
|
||||||
|
* kexec when going from SME inactive to SME active (or vice-versa).
|
||||||
|
*
|
||||||
|
* The cache must be cleared so that if there are entries with the
|
||||||
|
* same physical address, both with and without the encryption bit,
|
||||||
|
* they don't race each other when flushed and potentially end up
|
||||||
|
* with the wrong entry being committed to memory.
|
||||||
|
*
|
||||||
|
* Test the CPUID bit directly because with mem_encrypt=off the
|
||||||
|
* BSP will clear the X86_FEATURE_SME bit and the APs will not
|
||||||
|
* see it set after that.
|
||||||
|
*/
|
||||||
|
if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0)))
|
||||||
|
__this_cpu_write(cache_state_incoherent, true);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BIOS support is required for SME and SEV.
|
* BIOS support is required for SME and SEV.
|
||||||
* For SME: If BIOS has enabled SME then adjust x86_phys_bits by
|
* For SME: If BIOS has enabled SME then adjust x86_phys_bits by
|
||||||
|
|||||||
@ -53,6 +53,9 @@ static void __init gds_select_mitigation(void);
|
|||||||
static void __init its_select_mitigation(void);
|
static void __init its_select_mitigation(void);
|
||||||
static void __init tsa_select_mitigation(void);
|
static void __init tsa_select_mitigation(void);
|
||||||
static void __init tsa_apply_mitigation(void);
|
static void __init tsa_apply_mitigation(void);
|
||||||
|
static void __init vmscape_select_mitigation(void);
|
||||||
|
static void __init vmscape_update_mitigation(void);
|
||||||
|
static void __init vmscape_apply_mitigation(void);
|
||||||
|
|
||||||
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
|
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
|
||||||
u64 x86_spec_ctrl_base;
|
u64 x86_spec_ctrl_base;
|
||||||
@ -62,6 +65,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
|
|||||||
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
|
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
|
||||||
EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
|
EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set when the CPU has run a potentially malicious guest. An IBPB will
|
||||||
|
* be needed to before running userspace. That IBPB will flush the branch
|
||||||
|
* predictor content.
|
||||||
|
*/
|
||||||
|
DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user);
|
||||||
|
EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user);
|
||||||
|
|
||||||
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
|
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
|
||||||
|
|
||||||
static u64 __ro_after_init x86_arch_cap_msr;
|
static u64 __ro_after_init x86_arch_cap_msr;
|
||||||
@ -197,6 +208,9 @@ void __init cpu_select_mitigations(void)
|
|||||||
its_select_mitigation();
|
its_select_mitigation();
|
||||||
tsa_select_mitigation();
|
tsa_select_mitigation();
|
||||||
tsa_apply_mitigation();
|
tsa_apply_mitigation();
|
||||||
|
vmscape_select_mitigation();
|
||||||
|
vmscape_update_mitigation();
|
||||||
|
vmscape_apply_mitigation();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2173,88 +2187,6 @@ static void update_mds_branch_idle(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
|
|
||||||
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
|
|
||||||
#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
|
|
||||||
|
|
||||||
void cpu_bugs_smt_update(void)
|
|
||||||
{
|
|
||||||
mutex_lock(&spec_ctrl_mutex);
|
|
||||||
|
|
||||||
if (sched_smt_active() && unprivileged_ebpf_enabled() &&
|
|
||||||
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
|
|
||||||
pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
|
|
||||||
|
|
||||||
switch (spectre_v2_user_stibp) {
|
|
||||||
case SPECTRE_V2_USER_NONE:
|
|
||||||
break;
|
|
||||||
case SPECTRE_V2_USER_STRICT:
|
|
||||||
case SPECTRE_V2_USER_STRICT_PREFERRED:
|
|
||||||
update_stibp_strict();
|
|
||||||
break;
|
|
||||||
case SPECTRE_V2_USER_PRCTL:
|
|
||||||
case SPECTRE_V2_USER_SECCOMP:
|
|
||||||
update_indir_branch_cond();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (mds_mitigation) {
|
|
||||||
case MDS_MITIGATION_FULL:
|
|
||||||
case MDS_MITIGATION_AUTO:
|
|
||||||
case MDS_MITIGATION_VMWERV:
|
|
||||||
if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
|
|
||||||
pr_warn_once(MDS_MSG_SMT);
|
|
||||||
update_mds_branch_idle();
|
|
||||||
break;
|
|
||||||
case MDS_MITIGATION_OFF:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (taa_mitigation) {
|
|
||||||
case TAA_MITIGATION_VERW:
|
|
||||||
case TAA_MITIGATION_AUTO:
|
|
||||||
case TAA_MITIGATION_UCODE_NEEDED:
|
|
||||||
if (sched_smt_active())
|
|
||||||
pr_warn_once(TAA_MSG_SMT);
|
|
||||||
break;
|
|
||||||
case TAA_MITIGATION_TSX_DISABLED:
|
|
||||||
case TAA_MITIGATION_OFF:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (mmio_mitigation) {
|
|
||||||
case MMIO_MITIGATION_VERW:
|
|
||||||
case MMIO_MITIGATION_AUTO:
|
|
||||||
case MMIO_MITIGATION_UCODE_NEEDED:
|
|
||||||
if (sched_smt_active())
|
|
||||||
pr_warn_once(MMIO_MSG_SMT);
|
|
||||||
break;
|
|
||||||
case MMIO_MITIGATION_OFF:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (tsa_mitigation) {
|
|
||||||
case TSA_MITIGATION_USER_KERNEL:
|
|
||||||
case TSA_MITIGATION_VM:
|
|
||||||
case TSA_MITIGATION_AUTO:
|
|
||||||
case TSA_MITIGATION_FULL:
|
|
||||||
/*
|
|
||||||
* TSA-SQ can potentially lead to info leakage between
|
|
||||||
* SMT threads.
|
|
||||||
*/
|
|
||||||
if (sched_smt_active())
|
|
||||||
static_branch_enable(&cpu_buf_idle_clear);
|
|
||||||
else
|
|
||||||
static_branch_disable(&cpu_buf_idle_clear);
|
|
||||||
break;
|
|
||||||
case TSA_MITIGATION_NONE:
|
|
||||||
case TSA_MITIGATION_UCODE_NEEDED:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&spec_ctrl_mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
/*
|
/*
|
||||||
* Provide a debugfs file to dump SPEC_CTRL MSRs of all the CPUs
|
* Provide a debugfs file to dump SPEC_CTRL MSRs of all the CPUs
|
||||||
@ -3037,9 +2969,185 @@ out:
|
|||||||
pr_info("%s\n", srso_strings[srso_mitigation]);
|
pr_info("%s\n", srso_strings[srso_mitigation]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef pr_fmt
|
||||||
|
#define pr_fmt(fmt) "VMSCAPE: " fmt
|
||||||
|
|
||||||
|
enum vmscape_mitigations {
|
||||||
|
VMSCAPE_MITIGATION_NONE,
|
||||||
|
VMSCAPE_MITIGATION_AUTO,
|
||||||
|
VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER,
|
||||||
|
VMSCAPE_MITIGATION_IBPB_ON_VMEXIT,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const vmscape_strings[] = {
|
||||||
|
[VMSCAPE_MITIGATION_NONE] = "Vulnerable",
|
||||||
|
/* [VMSCAPE_MITIGATION_AUTO] */
|
||||||
|
[VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace",
|
||||||
|
[VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT",
|
||||||
|
};
|
||||||
|
|
||||||
|
static enum vmscape_mitigations vmscape_mitigation __ro_after_init =
|
||||||
|
IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE;
|
||||||
|
|
||||||
|
static int __init vmscape_parse_cmdline(char *str)
|
||||||
|
{
|
||||||
|
if (!str)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!strcmp(str, "off")) {
|
||||||
|
vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
|
||||||
|
} else if (!strcmp(str, "ibpb")) {
|
||||||
|
vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
|
||||||
|
} else if (!strcmp(str, "force")) {
|
||||||
|
setup_force_cpu_bug(X86_BUG_VMSCAPE);
|
||||||
|
vmscape_mitigation = VMSCAPE_MITIGATION_AUTO;
|
||||||
|
} else {
|
||||||
|
pr_err("Ignoring unknown vmscape=%s option.\n", str);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
early_param("vmscape", vmscape_parse_cmdline);
|
||||||
|
|
||||||
|
static void __init vmscape_select_mitigation(void)
|
||||||
|
{
|
||||||
|
if (cpu_mitigations_off() ||
|
||||||
|
!boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
|
||||||
|
!boot_cpu_has(X86_FEATURE_IBPB)) {
|
||||||
|
vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO)
|
||||||
|
vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init vmscape_update_mitigation(void)
|
||||||
|
{
|
||||||
|
if (!boot_cpu_has_bug(X86_BUG_VMSCAPE))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB ||
|
||||||
|
srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT)
|
||||||
|
vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT;
|
||||||
|
|
||||||
|
pr_info("%s\n", vmscape_strings[vmscape_mitigation]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init vmscape_apply_mitigation(void)
|
||||||
|
{
|
||||||
|
if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER)
|
||||||
|
setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER);
|
||||||
|
}
|
||||||
|
|
||||||
#undef pr_fmt
|
#undef pr_fmt
|
||||||
#define pr_fmt(fmt) fmt
|
#define pr_fmt(fmt) fmt
|
||||||
|
|
||||||
|
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
|
||||||
|
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
|
||||||
|
#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
|
||||||
|
#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n"
|
||||||
|
|
||||||
|
void cpu_bugs_smt_update(void)
|
||||||
|
{
|
||||||
|
mutex_lock(&spec_ctrl_mutex);
|
||||||
|
|
||||||
|
if (sched_smt_active() && unprivileged_ebpf_enabled() &&
|
||||||
|
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
|
||||||
|
pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
|
||||||
|
|
||||||
|
switch (spectre_v2_user_stibp) {
|
||||||
|
case SPECTRE_V2_USER_NONE:
|
||||||
|
break;
|
||||||
|
case SPECTRE_V2_USER_STRICT:
|
||||||
|
case SPECTRE_V2_USER_STRICT_PREFERRED:
|
||||||
|
update_stibp_strict();
|
||||||
|
break;
|
||||||
|
case SPECTRE_V2_USER_PRCTL:
|
||||||
|
case SPECTRE_V2_USER_SECCOMP:
|
||||||
|
update_indir_branch_cond();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mds_mitigation) {
|
||||||
|
case MDS_MITIGATION_FULL:
|
||||||
|
case MDS_MITIGATION_AUTO:
|
||||||
|
case MDS_MITIGATION_VMWERV:
|
||||||
|
if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
|
||||||
|
pr_warn_once(MDS_MSG_SMT);
|
||||||
|
update_mds_branch_idle();
|
||||||
|
break;
|
||||||
|
case MDS_MITIGATION_OFF:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (taa_mitigation) {
|
||||||
|
case TAA_MITIGATION_VERW:
|
||||||
|
case TAA_MITIGATION_AUTO:
|
||||||
|
case TAA_MITIGATION_UCODE_NEEDED:
|
||||||
|
if (sched_smt_active())
|
||||||
|
pr_warn_once(TAA_MSG_SMT);
|
||||||
|
break;
|
||||||
|
case TAA_MITIGATION_TSX_DISABLED:
|
||||||
|
case TAA_MITIGATION_OFF:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mmio_mitigation) {
|
||||||
|
case MMIO_MITIGATION_VERW:
|
||||||
|
case MMIO_MITIGATION_AUTO:
|
||||||
|
case MMIO_MITIGATION_UCODE_NEEDED:
|
||||||
|
if (sched_smt_active())
|
||||||
|
pr_warn_once(MMIO_MSG_SMT);
|
||||||
|
break;
|
||||||
|
case MMIO_MITIGATION_OFF:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (tsa_mitigation) {
|
||||||
|
case TSA_MITIGATION_USER_KERNEL:
|
||||||
|
case TSA_MITIGATION_VM:
|
||||||
|
case TSA_MITIGATION_AUTO:
|
||||||
|
case TSA_MITIGATION_FULL:
|
||||||
|
/*
|
||||||
|
* TSA-SQ can potentially lead to info leakage between
|
||||||
|
* SMT threads.
|
||||||
|
*/
|
||||||
|
if (sched_smt_active())
|
||||||
|
static_branch_enable(&cpu_buf_idle_clear);
|
||||||
|
else
|
||||||
|
static_branch_disable(&cpu_buf_idle_clear);
|
||||||
|
break;
|
||||||
|
case TSA_MITIGATION_NONE:
|
||||||
|
case TSA_MITIGATION_UCODE_NEEDED:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (vmscape_mitigation) {
|
||||||
|
case VMSCAPE_MITIGATION_NONE:
|
||||||
|
case VMSCAPE_MITIGATION_AUTO:
|
||||||
|
break;
|
||||||
|
case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT:
|
||||||
|
case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER:
|
||||||
|
/*
|
||||||
|
* Hypervisors can be attacked across-threads, warn for SMT when
|
||||||
|
* STIBP is not already enabled system-wide.
|
||||||
|
*
|
||||||
|
* Intel eIBRS (!AUTOIBRS) implies STIBP on.
|
||||||
|
*/
|
||||||
|
if (!sched_smt_active() ||
|
||||||
|
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
|
||||||
|
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
|
||||||
|
(spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
|
||||||
|
!boot_cpu_has(X86_FEATURE_AUTOIBRS)))
|
||||||
|
break;
|
||||||
|
pr_warn_once(VMSCAPE_MSG_SMT);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&spec_ctrl_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SYSFS
|
#ifdef CONFIG_SYSFS
|
||||||
|
|
||||||
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
|
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
|
||||||
@ -3283,6 +3391,11 @@ static ssize_t tsa_show_state(char *buf)
|
|||||||
return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
|
return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t vmscape_show_state(char *buf)
|
||||||
|
{
|
||||||
|
return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]);
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
||||||
char *buf, unsigned int bug)
|
char *buf, unsigned int bug)
|
||||||
{
|
{
|
||||||
@ -3347,6 +3460,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
|||||||
case X86_BUG_TSA:
|
case X86_BUG_TSA:
|
||||||
return tsa_show_state(buf);
|
return tsa_show_state(buf);
|
||||||
|
|
||||||
|
case X86_BUG_VMSCAPE:
|
||||||
|
return vmscape_show_state(buf);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -3436,6 +3552,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu
|
|||||||
{
|
{
|
||||||
return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
|
return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void __warn_thunk(void)
|
void __warn_thunk(void)
|
||||||
|
|||||||
@ -1280,55 +1280,71 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
|||||||
#define ITS_NATIVE_ONLY BIT(9)
|
#define ITS_NATIVE_ONLY BIT(9)
|
||||||
/* CPU is affected by Transient Scheduler Attacks */
|
/* CPU is affected by Transient Scheduler Attacks */
|
||||||
#define TSA BIT(10)
|
#define TSA BIT(10)
|
||||||
|
/* CPU is affected by VMSCAPE */
|
||||||
|
#define VMSCAPE BIT(11)
|
||||||
|
|
||||||
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
|
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
|
||||||
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO),
|
VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO),
|
VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO),
|
VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS),
|
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS),
|
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
|
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS),
|
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
|
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED),
|
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
|
VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
|
VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
|
VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
|
||||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
|
VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS),
|
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
|
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
|
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
|
||||||
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
|
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
|
||||||
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
|
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
|
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
|
||||||
VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
|
VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE),
|
||||||
VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS),
|
||||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS),
|
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS),
|
||||||
|
VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE),
|
||||||
|
|
||||||
VULNBL_AMD(0x15, RETBLEED),
|
VULNBL_AMD(0x15, RETBLEED),
|
||||||
VULNBL_AMD(0x16, RETBLEED),
|
VULNBL_AMD(0x16, RETBLEED),
|
||||||
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
|
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
|
||||||
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
|
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
|
||||||
VULNBL_AMD(0x19, SRSO | TSA),
|
VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE),
|
||||||
VULNBL_AMD(0x1a, SRSO),
|
VULNBL_AMD(0x1a, SRSO | VMSCAPE),
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1551,6 +1567,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the bug only on bare-metal. A nested hypervisor should already be
|
||||||
|
* deploying IBPB to isolate itself from nested guests.
|
||||||
|
*/
|
||||||
|
if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) &&
|
||||||
|
!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||||
|
setup_force_cpu_bug(X86_BUG_VMSCAPE);
|
||||||
|
|
||||||
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,7 @@
|
|||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
#include <asm/cpu.h>
|
#include <asm/cpu.h>
|
||||||
#include <asm/efi.h>
|
#include <asm/efi.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
|
||||||
#ifdef CONFIG_ACPI
|
#ifdef CONFIG_ACPI
|
||||||
/*
|
/*
|
||||||
@ -299,6 +300,22 @@ int machine_kexec_prepare(struct kimage *image)
|
|||||||
unsigned long start_pgtable;
|
unsigned long start_pgtable;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some early TDX-capable platforms have an erratum. A kernel
|
||||||
|
* partial write (a write transaction of less than cacheline
|
||||||
|
* lands at memory controller) to TDX private memory poisons that
|
||||||
|
* memory, and a subsequent read triggers a machine check.
|
||||||
|
*
|
||||||
|
* On those platforms the old kernel must reset TDX private
|
||||||
|
* memory before jumping to the new kernel otherwise the new
|
||||||
|
* kernel may see unexpected machine check. For simplicity
|
||||||
|
* just fail kexec/kdump on those platforms.
|
||||||
|
*/
|
||||||
|
if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) {
|
||||||
|
pr_info_once("Not allowed on platform with tdx_pw_mce bug\n");
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
/* Calculate the offsets */
|
/* Calculate the offsets */
|
||||||
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
|
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
|
||||||
|
|
||||||
@ -323,6 +340,7 @@ void machine_kexec(struct kimage *image)
|
|||||||
{
|
{
|
||||||
unsigned long page_list[PAGES_NR];
|
unsigned long page_list[PAGES_NR];
|
||||||
void *control_page;
|
void *control_page;
|
||||||
|
unsigned int cache_incoherent;
|
||||||
int save_ftrace_enabled;
|
int save_ftrace_enabled;
|
||||||
|
|
||||||
#ifdef CONFIG_KEXEC_JUMP
|
#ifdef CONFIG_KEXEC_JUMP
|
||||||
@ -362,6 +380,12 @@ void machine_kexec(struct kimage *image)
|
|||||||
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
|
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
|
||||||
<< PAGE_SHIFT);
|
<< PAGE_SHIFT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This must be done before load_segments(), since it resets
|
||||||
|
* GS to 0 and percpu data needs the correct GS to work.
|
||||||
|
*/
|
||||||
|
cache_incoherent = this_cpu_read(cache_state_incoherent);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The segment registers are funny things, they have both a
|
* The segment registers are funny things, they have both a
|
||||||
* visible and an invisible part. Whenever the visible part is
|
* visible and an invisible part. Whenever the visible part is
|
||||||
@ -371,6 +395,11 @@ void machine_kexec(struct kimage *image)
|
|||||||
*
|
*
|
||||||
* I take advantage of this here by force loading the
|
* I take advantage of this here by force loading the
|
||||||
* segments, before I zap the gdt with an invalid value.
|
* segments, before I zap the gdt with an invalid value.
|
||||||
|
*
|
||||||
|
* load_segments() resets GS to 0. Don't make any function call
|
||||||
|
* after here since call depth tracking uses percpu variables to
|
||||||
|
* operate (relocate_kernel() is explicitly ignored by call depth
|
||||||
|
* tracking).
|
||||||
*/
|
*/
|
||||||
load_segments();
|
load_segments();
|
||||||
/*
|
/*
|
||||||
|
|||||||
@ -84,6 +84,16 @@ EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
|
|||||||
DEFINE_PER_CPU(bool, __tss_limit_invalid);
|
DEFINE_PER_CPU(bool, __tss_limit_invalid);
|
||||||
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
|
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The cache may be in an incoherent state and needs flushing during kexec.
|
||||||
|
* E.g., on SME/TDX platforms, dirty cacheline aliases with and without
|
||||||
|
* encryption bit(s) can coexist and the cache needs to be flushed before
|
||||||
|
* booting to the new kernel to avoid the silent memory corruption due to
|
||||||
|
* dirty cachelines with different encryption property being written back
|
||||||
|
* to the memory.
|
||||||
|
*/
|
||||||
|
DEFINE_PER_CPU(bool, cache_state_incoherent);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this gets called so that we can store lazy state into memory and copy the
|
* this gets called so that we can store lazy state into memory and copy the
|
||||||
* current task into the new thread.
|
* current task into the new thread.
|
||||||
@ -785,19 +795,7 @@ void __noreturn stop_this_cpu(void *dummy)
|
|||||||
disable_local_APIC();
|
disable_local_APIC();
|
||||||
mcheck_cpu_clear(c);
|
mcheck_cpu_clear(c);
|
||||||
|
|
||||||
/*
|
if (this_cpu_read(cache_state_incoherent))
|
||||||
* Use wbinvd on processors that support SME. This provides support
|
|
||||||
* for performing a successful kexec when going from SME inactive
|
|
||||||
* to SME active (or vice-versa). The cache must be cleared so that
|
|
||||||
* if there are entries with the same physical address, both with and
|
|
||||||
* without the encryption bit, they don't race each other when flushed
|
|
||||||
* and potentially end up with the wrong entry being committed to
|
|
||||||
* memory.
|
|
||||||
*
|
|
||||||
* Test the CPUID bit directly because the machine might've cleared
|
|
||||||
* X86_FEATURE_SME due to cmdline options.
|
|
||||||
*/
|
|
||||||
if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0)))
|
|
||||||
native_wbinvd();
|
native_wbinvd();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@ -52,7 +52,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
|
|||||||
* %rsi page_list
|
* %rsi page_list
|
||||||
* %rdx start address
|
* %rdx start address
|
||||||
* %rcx preserve_context
|
* %rcx preserve_context
|
||||||
* %r8 host_mem_enc_active
|
* %r8 cache_incoherent
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Save the CPU context, used for jumping back */
|
/* Save the CPU context, used for jumping back */
|
||||||
@ -161,14 +161,21 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|||||||
movq %r9, %cr3
|
movq %r9, %cr3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* If the memory cache is in incoherent state, e.g., due to
|
||||||
|
* memory encryption, do WBINVD to flush cache.
|
||||||
|
*
|
||||||
* If SME is active, there could be old encrypted cache line
|
* If SME is active, there could be old encrypted cache line
|
||||||
* entries that will conflict with the now unencrypted memory
|
* entries that will conflict with the now unencrypted memory
|
||||||
* used by kexec. Flush the caches before copying the kernel.
|
* used by kexec. Flush the caches before copying the kernel.
|
||||||
|
*
|
||||||
|
* Note SME sets this flag to true when the platform supports
|
||||||
|
* SME, so the WBINVD is performed even SME is not activated
|
||||||
|
* by the kernel. But this has no harm.
|
||||||
*/
|
*/
|
||||||
testq %r12, %r12
|
testq %r12, %r12
|
||||||
jz .Lsme_off
|
jz .Lnowbinvd
|
||||||
wbinvd
|
wbinvd
|
||||||
.Lsme_off:
|
.Lnowbinvd:
|
||||||
|
|
||||||
movq %rcx, %r11
|
movq %rcx, %r11
|
||||||
call swap_pages
|
call swap_pages
|
||||||
|
|||||||
@ -1980,6 +1980,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
|
|||||||
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
|
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
|
||||||
goto out_flush_all;
|
goto out_flush_all;
|
||||||
|
|
||||||
|
if (is_noncanonical_invlpg_address(entries[i], vcpu))
|
||||||
|
continue;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lower 12 bits of 'address' encode the number of additional
|
* Lower 12 bits of 'address' encode the number of additional
|
||||||
* pages to flush.
|
* pages to flush.
|
||||||
|
|||||||
@ -442,6 +442,16 @@ void tdx_disable_virtualization_cpu(void)
|
|||||||
tdx_flush_vp(&arg);
|
tdx_flush_vp(&arg);
|
||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush cache now if kexec is possible: this is necessary to avoid
|
||||||
|
* having dirty private memory cachelines when the new kernel boots,
|
||||||
|
* but WBINVD is a relatively expensive operation and doing it during
|
||||||
|
* kexec can exacerbate races in native_stop_other_cpus(). Do it
|
||||||
|
* now, since this is a safe moment and there is going to be no more
|
||||||
|
* TDX activity on this CPU from this point on.
|
||||||
|
*/
|
||||||
|
tdx_cpu_flush_cache_for_kexec();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TDX_SEAMCALL_RETRIES 10000
|
#define TDX_SEAMCALL_RETRIES 10000
|
||||||
|
|||||||
@ -11160,6 +11160,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||||||
if (vcpu->arch.guest_fpu.xfd_err)
|
if (vcpu->arch.guest_fpu.xfd_err)
|
||||||
wrmsrl(MSR_IA32_XFD_ERR, 0);
|
wrmsrl(MSR_IA32_XFD_ERR, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark this CPU as needing a branch predictor flush before running
|
||||||
|
* userspace. Must be done before enabling preemption to ensure it gets
|
||||||
|
* set for the CPU that actually ran the guest, and not the CPU that it
|
||||||
|
* may migrate to.
|
||||||
|
*/
|
||||||
|
if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER))
|
||||||
|
this_cpu_write(x86_ibpb_exit_to_user, true);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Consume any pending interrupts, including the possible source of
|
* Consume any pending interrupts, including the possible source of
|
||||||
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
|
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
|
||||||
|
|||||||
@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err,
|
|||||||
args->r9, args->r10, args->r11);
|
args->r9, args->r10, args->r11);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
|
static __always_inline int sc_retry_prerr(sc_func_t func,
|
||||||
u64 fn, struct tdx_module_args *args)
|
sc_err_func_t err_func,
|
||||||
|
u64 fn, struct tdx_module_args *args)
|
||||||
{
|
{
|
||||||
u64 sret = sc_retry(func, fn, args);
|
u64 sret = sc_retry(func, fn, args);
|
||||||
|
|
||||||
@ -1288,7 +1289,7 @@ static bool paddr_is_tdx_private(unsigned long phys)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Get page type from the TDX module */
|
/* Get page type from the TDX module */
|
||||||
sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args);
|
sret = __seamcall_dirty_cache(__seamcall_ret, TDH_PHYMEM_PAGE_RDMD, &args);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The SEAMCALL will not return success unless there is a
|
* The SEAMCALL will not return success unless there is a
|
||||||
@ -1544,7 +1545,7 @@ noinstr __flatten u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *ar
|
|||||||
{
|
{
|
||||||
args->rcx = tdx_tdvpr_pa(td);
|
args->rcx = tdx_tdvpr_pa(td);
|
||||||
|
|
||||||
return __seamcall_saved_ret(TDH_VP_ENTER, args);
|
return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tdh_vp_enter);
|
EXPORT_SYMBOL_GPL(tdh_vp_enter);
|
||||||
|
|
||||||
@ -1892,3 +1893,22 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
|
|||||||
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
|
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
|
EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
|
||||||
|
|
||||||
|
#ifdef CONFIG_KEXEC_CORE
|
||||||
|
void tdx_cpu_flush_cache_for_kexec(void)
|
||||||
|
{
|
||||||
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
|
if (!this_cpu_read(cache_state_incoherent))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Private memory cachelines need to be clean at the time of
|
||||||
|
* kexec. Write them back now, as the caller promises that
|
||||||
|
* there should be no more SEAMCALLs on this CPU.
|
||||||
|
*/
|
||||||
|
wbinvd();
|
||||||
|
this_cpu_write(cache_state_incoherent, false);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(tdx_cpu_flush_cache_for_kexec);
|
||||||
|
#endif
|
||||||
|
|||||||
@ -489,6 +489,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
|||||||
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
||||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||||
CONFIG_X86_SGX=y
|
CONFIG_X86_SGX=y
|
||||||
|
CONFIG_INTEL_TDX_HOST=y
|
||||||
CONFIG_X86_USER_SHADOW_STACK=y
|
CONFIG_X86_USER_SHADOW_STACK=y
|
||||||
CONFIG_EFI=y
|
CONFIG_EFI=y
|
||||||
CONFIG_EFI_STUB=y
|
CONFIG_EFI_STUB=y
|
||||||
@ -568,6 +569,7 @@ CONFIG_MITIGATION_SRBDS=y
|
|||||||
CONFIG_MITIGATION_SSB=y
|
CONFIG_MITIGATION_SSB=y
|
||||||
CONFIG_MITIGATION_ITS=y
|
CONFIG_MITIGATION_ITS=y
|
||||||
CONFIG_MITIGATION_TSA=y
|
CONFIG_MITIGATION_TSA=y
|
||||||
|
CONFIG_MITIGATION_VMSCAPE=y
|
||||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -776,6 +778,7 @@ CONFIG_KVM=m
|
|||||||
CONFIG_KVM_SW_PROTECTED_VM=y
|
CONFIG_KVM_SW_PROTECTED_VM=y
|
||||||
CONFIG_KVM_INTEL=m
|
CONFIG_KVM_INTEL=m
|
||||||
CONFIG_X86_SGX_KVM=y
|
CONFIG_X86_SGX_KVM=y
|
||||||
|
CONFIG_KVM_INTEL_TDX=y
|
||||||
CONFIG_KVM_AMD=m
|
CONFIG_KVM_AMD=m
|
||||||
CONFIG_KVM_AMD_SEV=y
|
CONFIG_KVM_AMD_SEV=y
|
||||||
CONFIG_KVM_SMM=y
|
CONFIG_KVM_SMM=y
|
||||||
@ -1115,6 +1118,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
|||||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||||
CONFIG_HAVE_FAST_GUP=y
|
CONFIG_HAVE_FAST_GUP=y
|
||||||
|
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||||
CONFIG_MEMORY_ISOLATION=y
|
CONFIG_MEMORY_ISOLATION=y
|
||||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||||
|
|||||||
@ -490,6 +490,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
|||||||
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
||||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||||
CONFIG_X86_SGX=y
|
CONFIG_X86_SGX=y
|
||||||
|
CONFIG_INTEL_TDX_HOST=y
|
||||||
CONFIG_X86_USER_SHADOW_STACK=y
|
CONFIG_X86_USER_SHADOW_STACK=y
|
||||||
CONFIG_EFI=y
|
CONFIG_EFI=y
|
||||||
CONFIG_EFI_STUB=y
|
CONFIG_EFI_STUB=y
|
||||||
@ -570,6 +571,7 @@ CONFIG_MITIGATION_SRBDS=y
|
|||||||
CONFIG_MITIGATION_SSB=y
|
CONFIG_MITIGATION_SSB=y
|
||||||
CONFIG_MITIGATION_ITS=y
|
CONFIG_MITIGATION_ITS=y
|
||||||
CONFIG_MITIGATION_TSA=y
|
CONFIG_MITIGATION_TSA=y
|
||||||
|
CONFIG_MITIGATION_VMSCAPE=y
|
||||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -785,6 +787,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
|||||||
CONFIG_KVM_INTEL=m
|
CONFIG_KVM_INTEL=m
|
||||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||||
CONFIG_X86_SGX_KVM=y
|
CONFIG_X86_SGX_KVM=y
|
||||||
|
CONFIG_KVM_INTEL_TDX=y
|
||||||
CONFIG_KVM_AMD=m
|
CONFIG_KVM_AMD=m
|
||||||
CONFIG_KVM_AMD_SEV=y
|
CONFIG_KVM_AMD_SEV=y
|
||||||
CONFIG_KVM_SMM=y
|
CONFIG_KVM_SMM=y
|
||||||
@ -1123,6 +1126,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
|||||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||||
CONFIG_HAVE_FAST_GUP=y
|
CONFIG_HAVE_FAST_GUP=y
|
||||||
|
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||||
CONFIG_MEMORY_ISOLATION=y
|
CONFIG_MEMORY_ISOLATION=y
|
||||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||||
|
|||||||
@ -490,6 +490,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
|||||||
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
||||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||||
CONFIG_X86_SGX=y
|
CONFIG_X86_SGX=y
|
||||||
|
CONFIG_INTEL_TDX_HOST=y
|
||||||
CONFIG_X86_USER_SHADOW_STACK=y
|
CONFIG_X86_USER_SHADOW_STACK=y
|
||||||
CONFIG_EFI=y
|
CONFIG_EFI=y
|
||||||
CONFIG_EFI_STUB=y
|
CONFIG_EFI_STUB=y
|
||||||
@ -570,6 +571,7 @@ CONFIG_MITIGATION_SRBDS=y
|
|||||||
CONFIG_MITIGATION_SSB=y
|
CONFIG_MITIGATION_SSB=y
|
||||||
CONFIG_MITIGATION_ITS=y
|
CONFIG_MITIGATION_ITS=y
|
||||||
CONFIG_MITIGATION_TSA=y
|
CONFIG_MITIGATION_TSA=y
|
||||||
|
CONFIG_MITIGATION_VMSCAPE=y
|
||||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -783,6 +785,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
|||||||
CONFIG_KVM_INTEL=m
|
CONFIG_KVM_INTEL=m
|
||||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||||
CONFIG_X86_SGX_KVM=y
|
CONFIG_X86_SGX_KVM=y
|
||||||
|
CONFIG_KVM_INTEL_TDX=y
|
||||||
CONFIG_KVM_AMD=m
|
CONFIG_KVM_AMD=m
|
||||||
CONFIG_KVM_AMD_SEV=y
|
CONFIG_KVM_AMD_SEV=y
|
||||||
CONFIG_KVM_SMM=y
|
CONFIG_KVM_SMM=y
|
||||||
@ -1120,6 +1123,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
|||||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||||
CONFIG_HAVE_FAST_GUP=y
|
CONFIG_HAVE_FAST_GUP=y
|
||||||
|
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||||
CONFIG_MEMORY_ISOLATION=y
|
CONFIG_MEMORY_ISOLATION=y
|
||||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||||
|
|||||||
@ -486,6 +486,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
|||||||
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
# CONFIG_X86_INTEL_TSX_MODE_ON is not set
|
||||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||||
CONFIG_X86_SGX=y
|
CONFIG_X86_SGX=y
|
||||||
|
CONFIG_INTEL_TDX_HOST=y
|
||||||
CONFIG_X86_USER_SHADOW_STACK=y
|
CONFIG_X86_USER_SHADOW_STACK=y
|
||||||
CONFIG_EFI=y
|
CONFIG_EFI=y
|
||||||
CONFIG_EFI_STUB=y
|
CONFIG_EFI_STUB=y
|
||||||
@ -566,6 +567,7 @@ CONFIG_MITIGATION_SRBDS=y
|
|||||||
CONFIG_MITIGATION_SSB=y
|
CONFIG_MITIGATION_SSB=y
|
||||||
CONFIG_MITIGATION_ITS=y
|
CONFIG_MITIGATION_ITS=y
|
||||||
CONFIG_MITIGATION_TSA=y
|
CONFIG_MITIGATION_TSA=y
|
||||||
|
CONFIG_MITIGATION_VMSCAPE=y
|
||||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -772,6 +774,7 @@ CONFIG_KVM=m
|
|||||||
CONFIG_KVM_SW_PROTECTED_VM=y
|
CONFIG_KVM_SW_PROTECTED_VM=y
|
||||||
CONFIG_KVM_INTEL=m
|
CONFIG_KVM_INTEL=m
|
||||||
CONFIG_X86_SGX_KVM=y
|
CONFIG_X86_SGX_KVM=y
|
||||||
|
CONFIG_KVM_INTEL_TDX=y
|
||||||
CONFIG_KVM_AMD=m
|
CONFIG_KVM_AMD=m
|
||||||
CONFIG_KVM_AMD_SEV=y
|
CONFIG_KVM_AMD_SEV=y
|
||||||
CONFIG_KVM_SMM=y
|
CONFIG_KVM_SMM=y
|
||||||
@ -1111,6 +1114,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
|||||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||||
CONFIG_HAVE_FAST_GUP=y
|
CONFIG_HAVE_FAST_GUP=y
|
||||||
|
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||||
CONFIG_MEMORY_ISOLATION=y
|
CONFIG_MEMORY_ISOLATION=y
|
||||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||||
|
|||||||
@ -23,7 +23,7 @@ static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
|
|||||||
struct aead_request *subreq = aead_request_ctx(req);
|
struct aead_request *subreq = aead_request_ctx(req);
|
||||||
struct crypto_aead *geniv;
|
struct crypto_aead *geniv;
|
||||||
|
|
||||||
if (err == -EINPROGRESS)
|
if (err == -EINPROGRESS || err == -EBUSY)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
|
|||||||
@ -203,12 +203,12 @@ static void xts_encrypt_done(struct crypto_async_request *areq, int err)
|
|||||||
if (!err) {
|
if (!err) {
|
||||||
struct xts_request_ctx *rctx = skcipher_request_ctx(req);
|
struct xts_request_ctx *rctx = skcipher_request_ctx(req);
|
||||||
|
|
||||||
rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
|
||||||
err = xts_xor_tweak_post(req, true);
|
err = xts_xor_tweak_post(req, true);
|
||||||
|
|
||||||
if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
|
if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
|
||||||
err = xts_cts_final(req, crypto_skcipher_encrypt);
|
err = xts_cts_final(req, crypto_skcipher_encrypt);
|
||||||
if (err == -EINPROGRESS)
|
if (err == -EINPROGRESS || err == -EBUSY)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -223,12 +223,12 @@ static void xts_decrypt_done(struct crypto_async_request *areq, int err)
|
|||||||
if (!err) {
|
if (!err) {
|
||||||
struct xts_request_ctx *rctx = skcipher_request_ctx(req);
|
struct xts_request_ctx *rctx = skcipher_request_ctx(req);
|
||||||
|
|
||||||
rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
|
||||||
err = xts_xor_tweak_post(req, false);
|
err = xts_xor_tweak_post(req, false);
|
||||||
|
|
||||||
if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
|
if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
|
||||||
err = xts_cts_final(req, crypto_skcipher_decrypt);
|
err = xts_cts_final(req, crypto_skcipher_decrypt);
|
||||||
if (err == -EINPROGRESS)
|
if (err == -EINPROGRESS || err == -EBUSY)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -581,6 +581,7 @@ CPU_SHOW_VULN_FALLBACK(gds);
|
|||||||
CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
|
CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
|
||||||
CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
|
CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
|
||||||
CPU_SHOW_VULN_FALLBACK(tsa);
|
CPU_SHOW_VULN_FALLBACK(tsa);
|
||||||
|
CPU_SHOW_VULN_FALLBACK(vmscape);
|
||||||
|
|
||||||
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
|
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
|
||||||
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
|
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
|
||||||
@ -598,6 +599,7 @@ static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
|
|||||||
static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
|
static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
|
||||||
static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
|
static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
|
||||||
static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
|
static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
|
||||||
|
static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL);
|
||||||
|
|
||||||
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
||||||
&dev_attr_meltdown.attr,
|
&dev_attr_meltdown.attr,
|
||||||
@ -616,6 +618,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
|||||||
&dev_attr_reg_file_data_sampling.attr,
|
&dev_attr_reg_file_data_sampling.attr,
|
||||||
&dev_attr_indirect_target_selection.attr,
|
&dev_attr_indirect_target_selection.attr,
|
||||||
&dev_attr_tsa.attr,
|
&dev_attr_tsa.attr,
|
||||||
|
&dev_attr_vmscape.attr,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -815,7 +815,7 @@ static int scpi_init_versions(struct scpi_drvinfo *info)
|
|||||||
info->firmware_version = le32_to_cpu(caps.platform_version);
|
info->firmware_version = le32_to_cpu(caps.platform_version);
|
||||||
}
|
}
|
||||||
/* Ignore error if not implemented */
|
/* Ignore error if not implemented */
|
||||||
if (scpi_info->is_legacy && ret == -EOPNOTSUPP)
|
if (info->is_legacy && ret == -EOPNOTSUPP)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -911,13 +911,14 @@ static int scpi_probe(struct platform_device *pdev)
|
|||||||
struct resource res;
|
struct resource res;
|
||||||
struct device *dev = &pdev->dev;
|
struct device *dev = &pdev->dev;
|
||||||
struct device_node *np = dev->of_node;
|
struct device_node *np = dev->of_node;
|
||||||
|
struct scpi_drvinfo *scpi_drvinfo;
|
||||||
|
|
||||||
scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
|
scpi_drvinfo = devm_kzalloc(dev, sizeof(*scpi_drvinfo), GFP_KERNEL);
|
||||||
if (!scpi_info)
|
if (!scpi_drvinfo)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (of_match_device(legacy_scpi_of_match, &pdev->dev))
|
if (of_match_device(legacy_scpi_of_match, &pdev->dev))
|
||||||
scpi_info->is_legacy = true;
|
scpi_drvinfo->is_legacy = true;
|
||||||
|
|
||||||
count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
|
count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
|
||||||
if (count < 0) {
|
if (count < 0) {
|
||||||
@ -925,19 +926,19 @@ static int scpi_probe(struct platform_device *pdev)
|
|||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan),
|
scpi_drvinfo->channels =
|
||||||
GFP_KERNEL);
|
devm_kcalloc(dev, count, sizeof(struct scpi_chan), GFP_KERNEL);
|
||||||
if (!scpi_info->channels)
|
if (!scpi_drvinfo->channels)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
ret = devm_add_action(dev, scpi_free_channels, scpi_info);
|
ret = devm_add_action(dev, scpi_free_channels, scpi_drvinfo);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
for (; scpi_info->num_chans < count; scpi_info->num_chans++) {
|
for (; scpi_drvinfo->num_chans < count; scpi_drvinfo->num_chans++) {
|
||||||
resource_size_t size;
|
resource_size_t size;
|
||||||
int idx = scpi_info->num_chans;
|
int idx = scpi_drvinfo->num_chans;
|
||||||
struct scpi_chan *pchan = scpi_info->channels + idx;
|
struct scpi_chan *pchan = scpi_drvinfo->channels + idx;
|
||||||
struct mbox_client *cl = &pchan->cl;
|
struct mbox_client *cl = &pchan->cl;
|
||||||
struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
|
struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
|
||||||
|
|
||||||
@ -984,45 +985,53 @@ static int scpi_probe(struct platform_device *pdev)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
scpi_info->commands = scpi_std_commands;
|
scpi_drvinfo->commands = scpi_std_commands;
|
||||||
|
|
||||||
platform_set_drvdata(pdev, scpi_info);
|
platform_set_drvdata(pdev, scpi_drvinfo);
|
||||||
|
|
||||||
if (scpi_info->is_legacy) {
|
if (scpi_drvinfo->is_legacy) {
|
||||||
/* Replace with legacy variants */
|
/* Replace with legacy variants */
|
||||||
scpi_ops.clk_set_val = legacy_scpi_clk_set_val;
|
scpi_ops.clk_set_val = legacy_scpi_clk_set_val;
|
||||||
scpi_info->commands = scpi_legacy_commands;
|
scpi_drvinfo->commands = scpi_legacy_commands;
|
||||||
|
|
||||||
/* Fill priority bitmap */
|
/* Fill priority bitmap */
|
||||||
for (idx = 0; idx < ARRAY_SIZE(legacy_hpriority_cmds); idx++)
|
for (idx = 0; idx < ARRAY_SIZE(legacy_hpriority_cmds); idx++)
|
||||||
set_bit(legacy_hpriority_cmds[idx],
|
set_bit(legacy_hpriority_cmds[idx],
|
||||||
scpi_info->cmd_priority);
|
scpi_drvinfo->cmd_priority);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = scpi_init_versions(scpi_info);
|
scpi_info = scpi_drvinfo;
|
||||||
|
|
||||||
|
ret = scpi_init_versions(scpi_drvinfo);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_err(dev, "incorrect or no SCP firmware found\n");
|
dev_err(dev, "incorrect or no SCP firmware found\n");
|
||||||
|
scpi_info = NULL;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scpi_info->is_legacy && !scpi_info->protocol_version &&
|
if (scpi_drvinfo->is_legacy && !scpi_drvinfo->protocol_version &&
|
||||||
!scpi_info->firmware_version)
|
!scpi_drvinfo->firmware_version)
|
||||||
dev_info(dev, "SCP Protocol legacy pre-1.0 firmware\n");
|
dev_info(dev, "SCP Protocol legacy pre-1.0 firmware\n");
|
||||||
else
|
else
|
||||||
dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n",
|
dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n",
|
||||||
FIELD_GET(PROTO_REV_MAJOR_MASK,
|
FIELD_GET(PROTO_REV_MAJOR_MASK,
|
||||||
scpi_info->protocol_version),
|
scpi_drvinfo->protocol_version),
|
||||||
FIELD_GET(PROTO_REV_MINOR_MASK,
|
FIELD_GET(PROTO_REV_MINOR_MASK,
|
||||||
scpi_info->protocol_version),
|
scpi_drvinfo->protocol_version),
|
||||||
FIELD_GET(FW_REV_MAJOR_MASK,
|
FIELD_GET(FW_REV_MAJOR_MASK,
|
||||||
scpi_info->firmware_version),
|
scpi_drvinfo->firmware_version),
|
||||||
FIELD_GET(FW_REV_MINOR_MASK,
|
FIELD_GET(FW_REV_MINOR_MASK,
|
||||||
scpi_info->firmware_version),
|
scpi_drvinfo->firmware_version),
|
||||||
FIELD_GET(FW_REV_PATCH_MASK,
|
FIELD_GET(FW_REV_PATCH_MASK,
|
||||||
scpi_info->firmware_version));
|
scpi_drvinfo->firmware_version));
|
||||||
scpi_info->scpi_ops = &scpi_ops;
|
|
||||||
|
|
||||||
return devm_of_platform_populate(dev);
|
scpi_drvinfo->scpi_ops = &scpi_ops;
|
||||||
|
|
||||||
|
ret = devm_of_platform_populate(dev);
|
||||||
|
if (ret)
|
||||||
|
scpi_info = NULL;
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct of_device_id scpi_of_match[] = {
|
static const struct of_device_id scpi_of_match[] = {
|
||||||
|
|||||||
@ -34,6 +34,22 @@ static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
|
|||||||
[MANA_IB_CURRENT_RATE].name = "current_rate",
|
[MANA_IB_CURRENT_RATE].name = "current_rate",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct rdma_stat_desc mana_ib_device_stats_desc[] = {
|
||||||
|
[MANA_IB_SENT_CNPS].name = "sent_cnps",
|
||||||
|
[MANA_IB_RECEIVED_ECNS].name = "received_ecns",
|
||||||
|
[MANA_IB_RECEIVED_CNP_COUNT].name = "received_cnp_count",
|
||||||
|
[MANA_IB_QP_CONGESTED_EVENTS].name = "qp_congested_events",
|
||||||
|
[MANA_IB_QP_RECOVERED_EVENTS].name = "qp_recovered_events",
|
||||||
|
[MANA_IB_DEV_RATE_INC_EVENTS].name = "rate_inc_events",
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rdma_hw_stats *mana_ib_alloc_hw_device_stats(struct ib_device *ibdev)
|
||||||
|
{
|
||||||
|
return rdma_alloc_hw_stats_struct(mana_ib_device_stats_desc,
|
||||||
|
ARRAY_SIZE(mana_ib_device_stats_desc),
|
||||||
|
RDMA_HW_STATS_DEFAULT_LIFESPAN);
|
||||||
|
}
|
||||||
|
|
||||||
struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
||||||
u32 port_num)
|
u32 port_num)
|
||||||
{
|
{
|
||||||
@ -42,8 +58,39 @@ struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
|||||||
RDMA_HW_STATS_DEFAULT_LIFESPAN);
|
RDMA_HW_STATS_DEFAULT_LIFESPAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
static int mana_ib_get_hw_device_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats)
|
||||||
u32 port_num, int index)
|
{
|
||||||
|
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
|
||||||
|
ib_dev);
|
||||||
|
struct mana_rnic_query_device_cntrs_resp resp = {};
|
||||||
|
struct mana_rnic_query_device_cntrs_req req = {};
|
||||||
|
int err;
|
||||||
|
|
||||||
|
mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_DEVICE_COUNTERS,
|
||||||
|
sizeof(req), sizeof(resp));
|
||||||
|
req.hdr.dev_id = mdev->gdma_dev->dev_id;
|
||||||
|
req.adapter = mdev->adapter_handle;
|
||||||
|
|
||||||
|
err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
|
||||||
|
sizeof(resp), &resp);
|
||||||
|
if (err) {
|
||||||
|
ibdev_err(&mdev->ib_dev, "Failed to query device counters err %d",
|
||||||
|
err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
stats->value[MANA_IB_SENT_CNPS] = resp.sent_cnps;
|
||||||
|
stats->value[MANA_IB_RECEIVED_ECNS] = resp.received_ecns;
|
||||||
|
stats->value[MANA_IB_RECEIVED_CNP_COUNT] = resp.received_cnp_count;
|
||||||
|
stats->value[MANA_IB_QP_CONGESTED_EVENTS] = resp.qp_congested_events;
|
||||||
|
stats->value[MANA_IB_QP_RECOVERED_EVENTS] = resp.qp_recovered_events;
|
||||||
|
stats->value[MANA_IB_DEV_RATE_INC_EVENTS] = resp.rate_inc_events;
|
||||||
|
|
||||||
|
return ARRAY_SIZE(mana_ib_device_stats_desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mana_ib_get_hw_port_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
||||||
|
u32 port_num)
|
||||||
{
|
{
|
||||||
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
|
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
|
||||||
ib_dev);
|
ib_dev);
|
||||||
@ -103,3 +150,12 @@ int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
|||||||
|
|
||||||
return ARRAY_SIZE(mana_ib_port_stats_desc);
|
return ARRAY_SIZE(mana_ib_port_stats_desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
||||||
|
u32 port_num, int index)
|
||||||
|
{
|
||||||
|
if (!port_num)
|
||||||
|
return mana_ib_get_hw_device_stats(ibdev, stats);
|
||||||
|
else
|
||||||
|
return mana_ib_get_hw_port_stats(ibdev, stats, port_num);
|
||||||
|
}
|
||||||
|
|||||||
@ -37,8 +37,18 @@ enum mana_ib_port_counters {
|
|||||||
MANA_IB_CURRENT_RATE,
|
MANA_IB_CURRENT_RATE,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum mana_ib_device_counters {
|
||||||
|
MANA_IB_SENT_CNPS,
|
||||||
|
MANA_IB_RECEIVED_ECNS,
|
||||||
|
MANA_IB_RECEIVED_CNP_COUNT,
|
||||||
|
MANA_IB_QP_CONGESTED_EVENTS,
|
||||||
|
MANA_IB_QP_RECOVERED_EVENTS,
|
||||||
|
MANA_IB_DEV_RATE_INC_EVENTS,
|
||||||
|
};
|
||||||
|
|
||||||
struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
|
||||||
u32 port_num);
|
u32 port_num);
|
||||||
|
struct rdma_hw_stats *mana_ib_alloc_hw_device_stats(struct ib_device *ibdev);
|
||||||
int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
|
||||||
u32 port_num, int index);
|
u32 port_num, int index);
|
||||||
#endif /* _COUNTERS_H_ */
|
#endif /* _COUNTERS_H_ */
|
||||||
|
|||||||
@ -66,6 +66,10 @@ static const struct ib_device_ops mana_ib_stats_ops = {
|
|||||||
.get_hw_stats = mana_ib_get_hw_stats,
|
.get_hw_stats = mana_ib_get_hw_stats,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct ib_device_ops mana_ib_device_stats_ops = {
|
||||||
|
.alloc_hw_device_stats = mana_ib_alloc_hw_device_stats,
|
||||||
|
};
|
||||||
|
|
||||||
static int mana_ib_netdev_event(struct notifier_block *this,
|
static int mana_ib_netdev_event(struct notifier_block *this,
|
||||||
unsigned long event, void *ptr)
|
unsigned long event, void *ptr)
|
||||||
{
|
{
|
||||||
@ -154,6 +158,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
|
ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
|
||||||
|
if (dev->adapter_caps.feature_flags & MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT)
|
||||||
|
ib_set_device_ops(&dev->ib_dev, &mana_ib_device_stats_ops);
|
||||||
|
|
||||||
ret = mana_ib_create_eqs(dev);
|
ret = mana_ib_create_eqs(dev);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
|||||||
@ -210,6 +210,7 @@ enum mana_ib_command_code {
|
|||||||
MANA_IB_DESTROY_RC_QP = 0x3000b,
|
MANA_IB_DESTROY_RC_QP = 0x3000b,
|
||||||
MANA_IB_SET_QP_STATE = 0x3000d,
|
MANA_IB_SET_QP_STATE = 0x3000d,
|
||||||
MANA_IB_QUERY_VF_COUNTERS = 0x30022,
|
MANA_IB_QUERY_VF_COUNTERS = 0x30022,
|
||||||
|
MANA_IB_QUERY_DEVICE_COUNTERS = 0x30023,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mana_ib_query_adapter_caps_req {
|
struct mana_ib_query_adapter_caps_req {
|
||||||
@ -218,6 +219,7 @@ struct mana_ib_query_adapter_caps_req {
|
|||||||
|
|
||||||
enum mana_ib_adapter_features {
|
enum mana_ib_adapter_features {
|
||||||
MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
|
MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
|
||||||
|
MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT = BIT(5),
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mana_ib_query_adapter_caps_resp {
|
struct mana_ib_query_adapter_caps_resp {
|
||||||
@ -516,6 +518,23 @@ struct mana_rnic_query_vf_cntrs_resp {
|
|||||||
u64 current_rate;
|
u64 current_rate;
|
||||||
}; /* HW Data */
|
}; /* HW Data */
|
||||||
|
|
||||||
|
struct mana_rnic_query_device_cntrs_req {
|
||||||
|
struct gdma_req_hdr hdr;
|
||||||
|
mana_handle_t adapter;
|
||||||
|
}; /* HW Data */
|
||||||
|
|
||||||
|
struct mana_rnic_query_device_cntrs_resp {
|
||||||
|
struct gdma_resp_hdr hdr;
|
||||||
|
u32 sent_cnps;
|
||||||
|
u32 received_ecns;
|
||||||
|
u32 reserved1;
|
||||||
|
u32 received_cnp_count;
|
||||||
|
u32 qp_congested_events;
|
||||||
|
u32 qp_recovered_events;
|
||||||
|
u32 rate_inc_events;
|
||||||
|
u32 reserved2;
|
||||||
|
}; /* HW Data */
|
||||||
|
|
||||||
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
|
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
|
||||||
{
|
{
|
||||||
return mdev->gdma_dev->gdma_context;
|
return mdev->gdma_dev->gdma_context;
|
||||||
|
|||||||
@ -772,7 +772,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
|||||||
req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
|
req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
|
||||||
req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
|
req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
|
||||||
ibqp->qp_num, attr->dest_qp_num);
|
ibqp->qp_num, attr->dest_qp_num);
|
||||||
req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class;
|
req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2;
|
||||||
req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
|
req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -39,8 +39,6 @@
|
|||||||
#include "ibmveth.h"
|
#include "ibmveth.h"
|
||||||
|
|
||||||
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
|
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
|
||||||
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
|
|
||||||
bool reuse);
|
|
||||||
static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
|
static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
|
||||||
|
|
||||||
static struct kobj_type ktype_veth_pool;
|
static struct kobj_type ktype_veth_pool;
|
||||||
@ -213,95 +211,170 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
|
|||||||
static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
|
static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
|
||||||
struct ibmveth_buff_pool *pool)
|
struct ibmveth_buff_pool *pool)
|
||||||
{
|
{
|
||||||
u32 i;
|
union ibmveth_buf_desc descs[IBMVETH_MAX_RX_PER_HCALL] = {0};
|
||||||
u32 count = pool->size - atomic_read(&pool->available);
|
u32 remaining = pool->size - atomic_read(&pool->available);
|
||||||
u32 buffers_added = 0;
|
u64 correlators[IBMVETH_MAX_RX_PER_HCALL] = {0};
|
||||||
struct sk_buff *skb;
|
|
||||||
unsigned int free_index, index;
|
|
||||||
u64 correlator;
|
|
||||||
unsigned long lpar_rc;
|
unsigned long lpar_rc;
|
||||||
|
u32 buffers_added = 0;
|
||||||
|
u32 i, filled, batch;
|
||||||
|
struct vio_dev *vdev;
|
||||||
dma_addr_t dma_addr;
|
dma_addr_t dma_addr;
|
||||||
|
struct device *dev;
|
||||||
|
u32 index;
|
||||||
|
|
||||||
|
vdev = adapter->vdev;
|
||||||
|
dev = &vdev->dev;
|
||||||
|
|
||||||
mb();
|
mb();
|
||||||
|
|
||||||
for (i = 0; i < count; ++i) {
|
batch = adapter->rx_buffers_per_hcall;
|
||||||
union ibmveth_buf_desc desc;
|
|
||||||
|
|
||||||
free_index = pool->consumer_index;
|
while (remaining > 0) {
|
||||||
index = pool->free_map[free_index];
|
unsigned int free_index = pool->consumer_index;
|
||||||
skb = NULL;
|
|
||||||
|
|
||||||
BUG_ON(index == IBM_VETH_INVALID_MAP);
|
/* Fill a batch of descriptors */
|
||||||
|
for (filled = 0; filled < min(remaining, batch); filled++) {
|
||||||
|
index = pool->free_map[free_index];
|
||||||
|
if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
|
||||||
|
adapter->replenish_add_buff_failure++;
|
||||||
|
netdev_info(adapter->netdev,
|
||||||
|
"Invalid map index %u, reset\n",
|
||||||
|
index);
|
||||||
|
schedule_work(&adapter->work);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* are we allocating a new buffer or recycling an old one */
|
if (!pool->skbuff[index]) {
|
||||||
if (pool->skbuff[index])
|
struct sk_buff *skb = NULL;
|
||||||
goto reuse;
|
|
||||||
|
|
||||||
skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
|
skb = netdev_alloc_skb(adapter->netdev,
|
||||||
|
pool->buff_size);
|
||||||
|
if (!skb) {
|
||||||
|
adapter->replenish_no_mem++;
|
||||||
|
adapter->replenish_add_buff_failure++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!skb) {
|
dma_addr = dma_map_single(dev, skb->data,
|
||||||
netdev_dbg(adapter->netdev,
|
pool->buff_size,
|
||||||
"replenish: unable to allocate skb\n");
|
DMA_FROM_DEVICE);
|
||||||
adapter->replenish_no_mem++;
|
if (dma_mapping_error(dev, dma_addr)) {
|
||||||
|
dev_kfree_skb_any(skb);
|
||||||
|
adapter->replenish_add_buff_failure++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pool->dma_addr[index] = dma_addr;
|
||||||
|
pool->skbuff[index] = skb;
|
||||||
|
} else {
|
||||||
|
/* re-use case */
|
||||||
|
dma_addr = pool->dma_addr[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rx_flush) {
|
||||||
|
unsigned int len;
|
||||||
|
|
||||||
|
len = adapter->netdev->mtu + IBMVETH_BUFF_OH;
|
||||||
|
len = min(pool->buff_size, len);
|
||||||
|
ibmveth_flush_buffer(pool->skbuff[index]->data,
|
||||||
|
len);
|
||||||
|
}
|
||||||
|
|
||||||
|
descs[filled].fields.flags_len = IBMVETH_BUF_VALID |
|
||||||
|
pool->buff_size;
|
||||||
|
descs[filled].fields.address = dma_addr;
|
||||||
|
|
||||||
|
correlators[filled] = ((u64)pool->index << 32) | index;
|
||||||
|
*(u64 *)pool->skbuff[index]->data = correlators[filled];
|
||||||
|
|
||||||
|
free_index++;
|
||||||
|
if (free_index >= pool->size)
|
||||||
|
free_index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!filled)
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
|
|
||||||
pool->buff_size, DMA_FROM_DEVICE);
|
|
||||||
|
|
||||||
if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
|
|
||||||
goto failure;
|
|
||||||
|
|
||||||
pool->dma_addr[index] = dma_addr;
|
|
||||||
pool->skbuff[index] = skb;
|
|
||||||
|
|
||||||
if (rx_flush) {
|
|
||||||
unsigned int len = min(pool->buff_size,
|
|
||||||
adapter->netdev->mtu +
|
|
||||||
IBMVETH_BUFF_OH);
|
|
||||||
ibmveth_flush_buffer(skb->data, len);
|
|
||||||
}
|
|
||||||
reuse:
|
|
||||||
dma_addr = pool->dma_addr[index];
|
|
||||||
desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
|
|
||||||
desc.fields.address = dma_addr;
|
|
||||||
|
|
||||||
correlator = ((u64)pool->index << 32) | index;
|
|
||||||
*(u64 *)pool->skbuff[index]->data = correlator;
|
|
||||||
|
|
||||||
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
|
|
||||||
desc.desc);
|
|
||||||
|
|
||||||
|
/* single buffer case*/
|
||||||
|
if (filled == 1)
|
||||||
|
lpar_rc = h_add_logical_lan_buffer(vdev->unit_address,
|
||||||
|
descs[0].desc);
|
||||||
|
else
|
||||||
|
/* Multi-buffer hcall */
|
||||||
|
lpar_rc = h_add_logical_lan_buffers(vdev->unit_address,
|
||||||
|
descs[0].desc,
|
||||||
|
descs[1].desc,
|
||||||
|
descs[2].desc,
|
||||||
|
descs[3].desc,
|
||||||
|
descs[4].desc,
|
||||||
|
descs[5].desc,
|
||||||
|
descs[6].desc,
|
||||||
|
descs[7].desc);
|
||||||
if (lpar_rc != H_SUCCESS) {
|
if (lpar_rc != H_SUCCESS) {
|
||||||
netdev_warn(adapter->netdev,
|
dev_warn_ratelimited(dev,
|
||||||
"%sadd_logical_lan failed %lu\n",
|
"RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n",
|
||||||
skb ? "" : "When recycling: ", lpar_rc);
|
filled, lpar_rc, batch);
|
||||||
goto failure;
|
goto hcall_failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
|
/* Only update pool state after hcall succeeds */
|
||||||
pool->consumer_index++;
|
for (i = 0; i < filled; i++) {
|
||||||
if (pool->consumer_index >= pool->size)
|
free_index = pool->consumer_index;
|
||||||
pool->consumer_index = 0;
|
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
|
||||||
|
|
||||||
buffers_added++;
|
pool->consumer_index++;
|
||||||
adapter->replenish_add_buff_success++;
|
if (pool->consumer_index >= pool->size)
|
||||||
|
pool->consumer_index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffers_added += filled;
|
||||||
|
adapter->replenish_add_buff_success += filled;
|
||||||
|
remaining -= filled;
|
||||||
|
|
||||||
|
memset(&descs, 0, sizeof(descs));
|
||||||
|
memset(&correlators, 0, sizeof(correlators));
|
||||||
|
continue;
|
||||||
|
|
||||||
|
hcall_failure:
|
||||||
|
for (i = 0; i < filled; i++) {
|
||||||
|
index = correlators[i] & 0xffffffffUL;
|
||||||
|
dma_addr = pool->dma_addr[index];
|
||||||
|
|
||||||
|
if (pool->skbuff[index]) {
|
||||||
|
if (dma_addr &&
|
||||||
|
!dma_mapping_error(dev, dma_addr))
|
||||||
|
dma_unmap_single(dev, dma_addr,
|
||||||
|
pool->buff_size,
|
||||||
|
DMA_FROM_DEVICE);
|
||||||
|
|
||||||
|
dev_kfree_skb_any(pool->skbuff[index]);
|
||||||
|
pool->skbuff[index] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
adapter->replenish_add_buff_failure += filled;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If multi rx buffers hcall is no longer supported by FW
|
||||||
|
* e.g. in the case of Live Parttion Migration
|
||||||
|
*/
|
||||||
|
if (batch > 1 && lpar_rc == H_FUNCTION) {
|
||||||
|
/*
|
||||||
|
* Instead of retry submit single buffer individually
|
||||||
|
* here just set the max rx buffer per hcall to 1
|
||||||
|
* buffers will be respleshed next time
|
||||||
|
* when ibmveth_replenish_buffer_pool() is called again
|
||||||
|
* with single-buffer case
|
||||||
|
*/
|
||||||
|
netdev_info(adapter->netdev,
|
||||||
|
"RX Multi buffers not supported by FW, rc=%lu\n",
|
||||||
|
lpar_rc);
|
||||||
|
adapter->rx_buffers_per_hcall = 1;
|
||||||
|
netdev_info(adapter->netdev,
|
||||||
|
"Next rx replesh will fall back to single-buffer hcall\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
mb();
|
|
||||||
atomic_add(buffers_added, &(pool->available));
|
|
||||||
return;
|
|
||||||
|
|
||||||
failure:
|
|
||||||
|
|
||||||
if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
|
|
||||||
dma_unmap_single(&adapter->vdev->dev,
|
|
||||||
pool->dma_addr[index], pool->buff_size,
|
|
||||||
DMA_FROM_DEVICE);
|
|
||||||
dev_kfree_skb_any(pool->skbuff[index]);
|
|
||||||
pool->skbuff[index] = NULL;
|
|
||||||
adapter->replenish_add_buff_failure++;
|
|
||||||
|
|
||||||
mb();
|
mb();
|
||||||
atomic_add(buffers_added, &(pool->available));
|
atomic_add(buffers_added, &(pool->available));
|
||||||
}
|
}
|
||||||
@ -370,20 +443,36 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* remove a buffer from a pool */
|
/**
|
||||||
static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
* ibmveth_remove_buffer_from_pool - remove a buffer from a pool
|
||||||
u64 correlator, bool reuse)
|
* @adapter: adapter instance
|
||||||
|
* @correlator: identifies pool and index
|
||||||
|
* @reuse: whether to reuse buffer
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* * %0 - success
|
||||||
|
* * %-EINVAL - correlator maps to pool or index out of range
|
||||||
|
* * %-EFAULT - pool and index map to null skb
|
||||||
|
*/
|
||||||
|
static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
||||||
|
u64 correlator, bool reuse)
|
||||||
{
|
{
|
||||||
unsigned int pool = correlator >> 32;
|
unsigned int pool = correlator >> 32;
|
||||||
unsigned int index = correlator & 0xffffffffUL;
|
unsigned int index = correlator & 0xffffffffUL;
|
||||||
unsigned int free_index;
|
unsigned int free_index;
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
|
||||||
BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
|
if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
|
||||||
BUG_ON(index >= adapter->rx_buff_pool[pool].size);
|
WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
|
||||||
|
schedule_work(&adapter->work);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
skb = adapter->rx_buff_pool[pool].skbuff[index];
|
skb = adapter->rx_buff_pool[pool].skbuff[index];
|
||||||
BUG_ON(skb == NULL);
|
if (WARN_ON(!skb)) {
|
||||||
|
schedule_work(&adapter->work);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
/* if we are going to reuse the buffer then keep the pointers around
|
/* if we are going to reuse the buffer then keep the pointers around
|
||||||
* but mark index as available. replenish will see the skb pointer and
|
* but mark index as available. replenish will see the skb pointer and
|
||||||
@ -411,6 +500,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
|||||||
mb();
|
mb();
|
||||||
|
|
||||||
atomic_dec(&(adapter->rx_buff_pool[pool].available));
|
atomic_dec(&(adapter->rx_buff_pool[pool].available));
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the current buffer on the rx queue */
|
/* get the current buffer on the rx queue */
|
||||||
@ -420,24 +511,44 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada
|
|||||||
unsigned int pool = correlator >> 32;
|
unsigned int pool = correlator >> 32;
|
||||||
unsigned int index = correlator & 0xffffffffUL;
|
unsigned int index = correlator & 0xffffffffUL;
|
||||||
|
|
||||||
BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
|
if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
|
||||||
BUG_ON(index >= adapter->rx_buff_pool[pool].size);
|
WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
|
||||||
|
schedule_work(&adapter->work);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
return adapter->rx_buff_pool[pool].skbuff[index];
|
return adapter->rx_buff_pool[pool].skbuff[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
|
/**
|
||||||
bool reuse)
|
* ibmveth_rxq_harvest_buffer - Harvest buffer from pool
|
||||||
|
*
|
||||||
|
* @adapter: pointer to adapter
|
||||||
|
* @reuse: whether to reuse buffer
|
||||||
|
*
|
||||||
|
* Context: called from ibmveth_poll
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* * %0 - success
|
||||||
|
* * other - non-zero return from ibmveth_remove_buffer_from_pool
|
||||||
|
*/
|
||||||
|
static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
|
||||||
|
bool reuse)
|
||||||
{
|
{
|
||||||
u64 cor;
|
u64 cor;
|
||||||
|
int rc;
|
||||||
|
|
||||||
cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
|
cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
|
||||||
ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
|
rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
|
||||||
|
if (unlikely(rc))
|
||||||
|
return rc;
|
||||||
|
|
||||||
if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
|
if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
|
||||||
adapter->rx_queue.index = 0;
|
adapter->rx_queue.index = 0;
|
||||||
adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
|
adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
|
static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
|
||||||
@ -709,6 +820,35 @@ static int ibmveth_close(struct net_device *netdev)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ibmveth_reset - Handle scheduled reset work
|
||||||
|
*
|
||||||
|
* @w: pointer to work_struct embedded in adapter structure
|
||||||
|
*
|
||||||
|
* Context: This routine acquires rtnl_mutex and disables its NAPI through
|
||||||
|
* ibmveth_close. It can't be called directly in a context that has
|
||||||
|
* already acquired rtnl_mutex or disabled its NAPI, or directly from
|
||||||
|
* a poll routine.
|
||||||
|
*
|
||||||
|
* Return: void
|
||||||
|
*/
|
||||||
|
static void ibmveth_reset(struct work_struct *w)
|
||||||
|
{
|
||||||
|
struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work);
|
||||||
|
struct net_device *netdev = adapter->netdev;
|
||||||
|
|
||||||
|
netdev_dbg(netdev, "reset starting\n");
|
||||||
|
|
||||||
|
rtnl_lock();
|
||||||
|
|
||||||
|
dev_close(adapter->netdev);
|
||||||
|
dev_open(adapter->netdev, NULL);
|
||||||
|
|
||||||
|
rtnl_unlock();
|
||||||
|
|
||||||
|
netdev_dbg(netdev, "reset complete\n");
|
||||||
|
}
|
||||||
|
|
||||||
static int ibmveth_set_link_ksettings(struct net_device *dev,
|
static int ibmveth_set_link_ksettings(struct net_device *dev,
|
||||||
const struct ethtool_link_ksettings *cmd)
|
const struct ethtool_link_ksettings *cmd)
|
||||||
{
|
{
|
||||||
@ -1324,7 +1464,8 @@ restart_poll:
|
|||||||
wmb(); /* suggested by larson1 */
|
wmb(); /* suggested by larson1 */
|
||||||
adapter->rx_invalid_buffer++;
|
adapter->rx_invalid_buffer++;
|
||||||
netdev_dbg(netdev, "recycling invalid buffer\n");
|
netdev_dbg(netdev, "recycling invalid buffer\n");
|
||||||
ibmveth_rxq_harvest_buffer(adapter, true);
|
if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
struct sk_buff *skb, *new_skb;
|
struct sk_buff *skb, *new_skb;
|
||||||
int length = ibmveth_rxq_frame_length(adapter);
|
int length = ibmveth_rxq_frame_length(adapter);
|
||||||
@ -1334,6 +1475,8 @@ restart_poll:
|
|||||||
__sum16 iph_check = 0;
|
__sum16 iph_check = 0;
|
||||||
|
|
||||||
skb = ibmveth_rxq_get_buffer(adapter);
|
skb = ibmveth_rxq_get_buffer(adapter);
|
||||||
|
if (unlikely(!skb))
|
||||||
|
break;
|
||||||
|
|
||||||
/* if the large packet bit is set in the rx queue
|
/* if the large packet bit is set in the rx queue
|
||||||
* descriptor, the mss will be written by PHYP eight
|
* descriptor, the mss will be written by PHYP eight
|
||||||
@ -1357,10 +1500,12 @@ restart_poll:
|
|||||||
if (rx_flush)
|
if (rx_flush)
|
||||||
ibmveth_flush_buffer(skb->data,
|
ibmveth_flush_buffer(skb->data,
|
||||||
length + offset);
|
length + offset);
|
||||||
ibmveth_rxq_harvest_buffer(adapter, true);
|
if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
|
||||||
|
break;
|
||||||
skb = new_skb;
|
skb = new_skb;
|
||||||
} else {
|
} else {
|
||||||
ibmveth_rxq_harvest_buffer(adapter, false);
|
if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false)))
|
||||||
|
break;
|
||||||
skb_reserve(skb, offset);
|
skb_reserve(skb, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1407,7 +1552,10 @@ restart_poll:
|
|||||||
* then check once more to make sure we are done.
|
* then check once more to make sure we are done.
|
||||||
*/
|
*/
|
||||||
lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
|
lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
|
||||||
BUG_ON(lpar_rc != H_SUCCESS);
|
if (WARN_ON(lpar_rc != H_SUCCESS)) {
|
||||||
|
schedule_work(&adapter->work);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
|
if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
|
||||||
lpar_rc = h_vio_signal(adapter->vdev->unit_address,
|
lpar_rc = h_vio_signal(adapter->vdev->unit_address,
|
||||||
@ -1428,7 +1576,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
|
|||||||
if (napi_schedule_prep(&adapter->napi)) {
|
if (napi_schedule_prep(&adapter->napi)) {
|
||||||
lpar_rc = h_vio_signal(adapter->vdev->unit_address,
|
lpar_rc = h_vio_signal(adapter->vdev->unit_address,
|
||||||
VIO_IRQ_DISABLE);
|
VIO_IRQ_DISABLE);
|
||||||
BUG_ON(lpar_rc != H_SUCCESS);
|
WARN_ON(lpar_rc != H_SUCCESS);
|
||||||
__napi_schedule(&adapter->napi);
|
__napi_schedule(&adapter->napi);
|
||||||
}
|
}
|
||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
@ -1670,6 +1818,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
|||||||
|
|
||||||
adapter->vdev = dev;
|
adapter->vdev = dev;
|
||||||
adapter->netdev = netdev;
|
adapter->netdev = netdev;
|
||||||
|
INIT_WORK(&adapter->work, ibmveth_reset);
|
||||||
adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
|
adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
|
||||||
ibmveth_init_link_settings(netdev);
|
ibmveth_init_link_settings(netdev);
|
||||||
|
|
||||||
@ -1705,6 +1854,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
|||||||
netdev->features |= NETIF_F_FRAGLIST;
|
netdev->features |= NETIF_F_FRAGLIST;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ret == H_SUCCESS &&
|
||||||
|
(ret_attr & IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT)) {
|
||||||
|
adapter->rx_buffers_per_hcall = IBMVETH_MAX_RX_PER_HCALL;
|
||||||
|
netdev_dbg(netdev,
|
||||||
|
"RX Multi-buffer hcall supported by FW, batch set to %u\n",
|
||||||
|
adapter->rx_buffers_per_hcall);
|
||||||
|
} else {
|
||||||
|
adapter->rx_buffers_per_hcall = 1;
|
||||||
|
netdev_dbg(netdev,
|
||||||
|
"RX Single-buffer hcall mode, batch set to %u\n",
|
||||||
|
adapter->rx_buffers_per_hcall);
|
||||||
|
}
|
||||||
|
|
||||||
netdev->min_mtu = IBMVETH_MIN_MTU;
|
netdev->min_mtu = IBMVETH_MIN_MTU;
|
||||||
netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
|
netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
|
||||||
|
|
||||||
@ -1762,6 +1924,8 @@ static void ibmveth_remove(struct vio_dev *dev)
|
|||||||
struct ibmveth_adapter *adapter = netdev_priv(netdev);
|
struct ibmveth_adapter *adapter = netdev_priv(netdev);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
cancel_work_sync(&adapter->work);
|
||||||
|
|
||||||
for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
|
for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
|
||||||
kobject_put(&adapter->rx_buff_pool[i].kobj);
|
kobject_put(&adapter->rx_buff_pool[i].kobj);
|
||||||
|
|
||||||
|
|||||||
@ -28,6 +28,7 @@
|
|||||||
#define IbmVethMcastRemoveFilter 0x2UL
|
#define IbmVethMcastRemoveFilter 0x2UL
|
||||||
#define IbmVethMcastClearFilterTable 0x3UL
|
#define IbmVethMcastClearFilterTable 0x3UL
|
||||||
|
|
||||||
|
#define IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT 0x0000000000040000UL
|
||||||
#define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL
|
#define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL
|
||||||
#define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL
|
#define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL
|
||||||
#define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL
|
#define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL
|
||||||
@ -46,6 +47,24 @@
|
|||||||
#define h_add_logical_lan_buffer(ua, buf) \
|
#define h_add_logical_lan_buffer(ua, buf) \
|
||||||
plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
|
plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
|
||||||
|
|
||||||
|
static inline long h_add_logical_lan_buffers(unsigned long unit_address,
|
||||||
|
unsigned long desc1,
|
||||||
|
unsigned long desc2,
|
||||||
|
unsigned long desc3,
|
||||||
|
unsigned long desc4,
|
||||||
|
unsigned long desc5,
|
||||||
|
unsigned long desc6,
|
||||||
|
unsigned long desc7,
|
||||||
|
unsigned long desc8)
|
||||||
|
{
|
||||||
|
unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
|
||||||
|
|
||||||
|
return plpar_hcall9(H_ADD_LOGICAL_LAN_BUFFERS,
|
||||||
|
retbuf, unit_address,
|
||||||
|
desc1, desc2, desc3, desc4,
|
||||||
|
desc5, desc6, desc7, desc8);
|
||||||
|
}
|
||||||
|
|
||||||
/* FW allows us to send 6 descriptors but we only use one so mark
|
/* FW allows us to send 6 descriptors but we only use one so mark
|
||||||
* the other 5 as unused (0)
|
* the other 5 as unused (0)
|
||||||
*/
|
*/
|
||||||
@ -101,6 +120,7 @@ static inline long h_illan_attributes(unsigned long unit_address,
|
|||||||
#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
|
#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
|
||||||
#define IBMVETH_MAX_QUEUES 16U
|
#define IBMVETH_MAX_QUEUES 16U
|
||||||
#define IBMVETH_DEFAULT_QUEUES 8U
|
#define IBMVETH_DEFAULT_QUEUES 8U
|
||||||
|
#define IBMVETH_MAX_RX_PER_HCALL 8U
|
||||||
|
|
||||||
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
|
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
|
||||||
static int pool_count[] = { 256, 512, 256, 256, 256 };
|
static int pool_count[] = { 256, 512, 256, 256, 256 };
|
||||||
@ -137,6 +157,7 @@ struct ibmveth_adapter {
|
|||||||
struct vio_dev *vdev;
|
struct vio_dev *vdev;
|
||||||
struct net_device *netdev;
|
struct net_device *netdev;
|
||||||
struct napi_struct napi;
|
struct napi_struct napi;
|
||||||
|
struct work_struct work;
|
||||||
unsigned int mcastFilterSize;
|
unsigned int mcastFilterSize;
|
||||||
void * buffer_list_addr;
|
void * buffer_list_addr;
|
||||||
void * filter_list_addr;
|
void * filter_list_addr;
|
||||||
@ -150,6 +171,7 @@ struct ibmveth_adapter {
|
|||||||
int rx_csum;
|
int rx_csum;
|
||||||
int large_send;
|
int large_send;
|
||||||
bool is_active_trunk;
|
bool is_active_trunk;
|
||||||
|
unsigned int rx_buffers_per_hcall;
|
||||||
|
|
||||||
u64 fw_ipv6_csum_support;
|
u64 fw_ipv6_csum_support;
|
||||||
u64 fw_ipv4_csum_support;
|
u64 fw_ipv4_csum_support;
|
||||||
|
|||||||
@ -547,6 +547,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
|
|||||||
adapter->rx_pool[i].active = 0;
|
adapter->rx_pool[i].active = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter)
|
||||||
|
{
|
||||||
|
if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) {
|
||||||
|
netdev_info(adapter->netdev,
|
||||||
|
"set max ind descs from %u to safe limit %u\n",
|
||||||
|
adapter->cur_max_ind_descs,
|
||||||
|
IBMVNIC_SAFE_IND_DESC);
|
||||||
|
adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
||||||
struct ibmvnic_rx_pool *pool)
|
struct ibmvnic_rx_pool *pool)
|
||||||
{
|
{
|
||||||
@ -633,7 +644,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
|||||||
sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
|
sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
|
||||||
|
|
||||||
/* if send_subcrq_indirect queue is full, flush to VIOS */
|
/* if send_subcrq_indirect queue is full, flush to VIOS */
|
||||||
if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
|
if (ind_bufp->index == adapter->cur_max_ind_descs ||
|
||||||
i == count - 1) {
|
i == count - 1) {
|
||||||
lpar_rc =
|
lpar_rc =
|
||||||
send_subcrq_indirect(adapter, handle,
|
send_subcrq_indirect(adapter, handle,
|
||||||
@ -652,6 +663,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
|||||||
failure:
|
failure:
|
||||||
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
|
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
|
||||||
dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
|
dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
|
||||||
|
|
||||||
|
/* Detect platform limit H_PARAMETER */
|
||||||
|
if (lpar_rc == H_PARAMETER)
|
||||||
|
ibmvnic_set_safe_max_ind_descs(adapter);
|
||||||
|
|
||||||
|
/* For all error case, temporarily drop only this batch
|
||||||
|
* Rely on TCP/IP retransmissions to retry and recover
|
||||||
|
*/
|
||||||
for (i = ind_bufp->index - 1; i >= 0; --i) {
|
for (i = ind_bufp->index - 1; i >= 0; --i) {
|
||||||
struct ibmvnic_rx_buff *rx_buff;
|
struct ibmvnic_rx_buff *rx_buff;
|
||||||
|
|
||||||
@ -2103,9 +2122,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
|
|||||||
tx_pool->num_buffers - 1 :
|
tx_pool->num_buffers - 1 :
|
||||||
tx_pool->consumer_index - 1;
|
tx_pool->consumer_index - 1;
|
||||||
tx_buff = &tx_pool->tx_buff[index];
|
tx_buff = &tx_pool->tx_buff[index];
|
||||||
adapter->netdev->stats.tx_packets--;
|
adapter->tx_stats_buffers[queue_num].batched_packets--;
|
||||||
adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
|
|
||||||
adapter->tx_stats_buffers[queue_num].packets--;
|
|
||||||
adapter->tx_stats_buffers[queue_num].bytes -=
|
adapter->tx_stats_buffers[queue_num].bytes -=
|
||||||
tx_buff->skb->len;
|
tx_buff->skb->len;
|
||||||
dev_kfree_skb_any(tx_buff->skb);
|
dev_kfree_skb_any(tx_buff->skb);
|
||||||
@ -2174,16 +2191,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
|
|||||||
rc = send_subcrq_direct(adapter, handle,
|
rc = send_subcrq_direct(adapter, handle,
|
||||||
(u64 *)ind_bufp->indir_arr);
|
(u64 *)ind_bufp->indir_arr);
|
||||||
|
|
||||||
if (rc)
|
if (rc) {
|
||||||
|
dev_err_ratelimited(&adapter->vdev->dev,
|
||||||
|
"tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n",
|
||||||
|
rc, entries, &dma_addr, handle);
|
||||||
|
/* Detect platform limit H_PARAMETER */
|
||||||
|
if (rc == H_PARAMETER)
|
||||||
|
ibmvnic_set_safe_max_ind_descs(adapter);
|
||||||
|
|
||||||
|
/* For all error case, temporarily drop only this batch
|
||||||
|
* Rely on TCP/IP retransmissions to retry and recover
|
||||||
|
*/
|
||||||
ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
|
ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
|
||||||
else
|
} else {
|
||||||
ind_bufp->index = 0;
|
ind_bufp->index = 0;
|
||||||
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
||||||
{
|
{
|
||||||
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
|
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
|
||||||
|
u32 cur_max_ind_descs = adapter->cur_max_ind_descs;
|
||||||
int queue_num = skb_get_queue_mapping(skb);
|
int queue_num = skb_get_queue_mapping(skb);
|
||||||
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
|
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
|
||||||
struct device *dev = &adapter->vdev->dev;
|
struct device *dev = &adapter->vdev->dev;
|
||||||
@ -2196,7 +2225,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||||||
unsigned int tx_map_failed = 0;
|
unsigned int tx_map_failed = 0;
|
||||||
union sub_crq indir_arr[16];
|
union sub_crq indir_arr[16];
|
||||||
unsigned int tx_dropped = 0;
|
unsigned int tx_dropped = 0;
|
||||||
unsigned int tx_packets = 0;
|
unsigned int tx_dpackets = 0;
|
||||||
|
unsigned int tx_bpackets = 0;
|
||||||
unsigned int tx_bytes = 0;
|
unsigned int tx_bytes = 0;
|
||||||
dma_addr_t data_dma_addr;
|
dma_addr_t data_dma_addr;
|
||||||
struct netdev_queue *txq;
|
struct netdev_queue *txq;
|
||||||
@ -2370,6 +2400,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||||||
if (lpar_rc != H_SUCCESS)
|
if (lpar_rc != H_SUCCESS)
|
||||||
goto tx_err;
|
goto tx_err;
|
||||||
|
|
||||||
|
tx_dpackets++;
|
||||||
goto early_exit;
|
goto early_exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2379,7 +2410,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||||||
tx_crq.v1.n_crq_elem = num_entries;
|
tx_crq.v1.n_crq_elem = num_entries;
|
||||||
tx_buff->num_entries = num_entries;
|
tx_buff->num_entries = num_entries;
|
||||||
/* flush buffer if current entry can not fit */
|
/* flush buffer if current entry can not fit */
|
||||||
if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
|
if (num_entries + ind_bufp->index > cur_max_ind_descs) {
|
||||||
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
|
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
|
||||||
if (lpar_rc != H_SUCCESS)
|
if (lpar_rc != H_SUCCESS)
|
||||||
goto tx_flush_err;
|
goto tx_flush_err;
|
||||||
@ -2392,11 +2423,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||||||
ind_bufp->index += num_entries;
|
ind_bufp->index += num_entries;
|
||||||
if (__netdev_tx_sent_queue(txq, skb->len,
|
if (__netdev_tx_sent_queue(txq, skb->len,
|
||||||
netdev_xmit_more() &&
|
netdev_xmit_more() &&
|
||||||
ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
|
ind_bufp->index < cur_max_ind_descs)) {
|
||||||
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
|
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
|
||||||
if (lpar_rc != H_SUCCESS)
|
if (lpar_rc != H_SUCCESS)
|
||||||
goto tx_err;
|
goto tx_err;
|
||||||
}
|
}
|
||||||
|
tx_bpackets++;
|
||||||
|
|
||||||
early_exit:
|
early_exit:
|
||||||
if (atomic_add_return(num_entries, &tx_scrq->used)
|
if (atomic_add_return(num_entries, &tx_scrq->used)
|
||||||
@ -2405,7 +2437,6 @@ early_exit:
|
|||||||
netif_stop_subqueue(netdev, queue_num);
|
netif_stop_subqueue(netdev, queue_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
tx_packets++;
|
|
||||||
tx_bytes += skblen;
|
tx_bytes += skblen;
|
||||||
txq_trans_cond_update(txq);
|
txq_trans_cond_update(txq);
|
||||||
ret = NETDEV_TX_OK;
|
ret = NETDEV_TX_OK;
|
||||||
@ -2433,12 +2464,10 @@ tx_err:
|
|||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
netdev->stats.tx_dropped += tx_dropped;
|
|
||||||
netdev->stats.tx_bytes += tx_bytes;
|
|
||||||
netdev->stats.tx_packets += tx_packets;
|
|
||||||
adapter->tx_send_failed += tx_send_failed;
|
adapter->tx_send_failed += tx_send_failed;
|
||||||
adapter->tx_map_failed += tx_map_failed;
|
adapter->tx_map_failed += tx_map_failed;
|
||||||
adapter->tx_stats_buffers[queue_num].packets += tx_packets;
|
adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets;
|
||||||
|
adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets;
|
||||||
adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
|
adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
|
||||||
adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
|
adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
|
||||||
|
|
||||||
@ -3237,6 +3266,25 @@ err:
|
|||||||
return -ret;
|
return -ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ibmvnic_get_stats64(struct net_device *netdev,
|
||||||
|
struct rtnl_link_stats64 *stats)
|
||||||
|
{
|
||||||
|
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < adapter->req_rx_queues; i++) {
|
||||||
|
stats->rx_packets += adapter->rx_stats_buffers[i].packets;
|
||||||
|
stats->rx_bytes += adapter->rx_stats_buffers[i].bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < adapter->req_tx_queues; i++) {
|
||||||
|
stats->tx_packets += adapter->tx_stats_buffers[i].batched_packets;
|
||||||
|
stats->tx_packets += adapter->tx_stats_buffers[i].direct_packets;
|
||||||
|
stats->tx_bytes += adapter->tx_stats_buffers[i].bytes;
|
||||||
|
stats->tx_dropped += adapter->tx_stats_buffers[i].dropped_packets;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
|
static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
|
||||||
{
|
{
|
||||||
struct ibmvnic_adapter *adapter = netdev_priv(dev);
|
struct ibmvnic_adapter *adapter = netdev_priv(dev);
|
||||||
@ -3352,8 +3400,6 @@ restart_poll:
|
|||||||
|
|
||||||
length = skb->len;
|
length = skb->len;
|
||||||
napi_gro_receive(napi, skb); /* send it up */
|
napi_gro_receive(napi, skb); /* send it up */
|
||||||
netdev->stats.rx_packets++;
|
|
||||||
netdev->stats.rx_bytes += length;
|
|
||||||
adapter->rx_stats_buffers[scrq_num].packets++;
|
adapter->rx_stats_buffers[scrq_num].packets++;
|
||||||
adapter->rx_stats_buffers[scrq_num].bytes += length;
|
adapter->rx_stats_buffers[scrq_num].bytes += length;
|
||||||
frames_processed++;
|
frames_processed++;
|
||||||
@ -3463,6 +3509,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
|
|||||||
.ndo_set_rx_mode = ibmvnic_set_multi,
|
.ndo_set_rx_mode = ibmvnic_set_multi,
|
||||||
.ndo_set_mac_address = ibmvnic_set_mac,
|
.ndo_set_mac_address = ibmvnic_set_mac,
|
||||||
.ndo_validate_addr = eth_validate_addr,
|
.ndo_validate_addr = eth_validate_addr,
|
||||||
|
.ndo_get_stats64 = ibmvnic_get_stats64,
|
||||||
.ndo_tx_timeout = ibmvnic_tx_timeout,
|
.ndo_tx_timeout = ibmvnic_tx_timeout,
|
||||||
.ndo_change_mtu = ibmvnic_change_mtu,
|
.ndo_change_mtu = ibmvnic_change_mtu,
|
||||||
.ndo_features_check = ibmvnic_features_check,
|
.ndo_features_check = ibmvnic_features_check,
|
||||||
@ -3627,7 +3674,10 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
|
|||||||
memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
|
memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
|
||||||
|
|
||||||
for (i = 0; i < adapter->req_tx_queues; i++) {
|
for (i = 0; i < adapter->req_tx_queues; i++) {
|
||||||
snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
|
snprintf(data, ETH_GSTRING_LEN, "tx%d_batched_packets", i);
|
||||||
|
data += ETH_GSTRING_LEN;
|
||||||
|
|
||||||
|
snprintf(data, ETH_GSTRING_LEN, "tx%d_direct_packets", i);
|
||||||
data += ETH_GSTRING_LEN;
|
data += ETH_GSTRING_LEN;
|
||||||
|
|
||||||
snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
|
snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
|
||||||
@ -3705,7 +3755,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
|
|||||||
(adapter, ibmvnic_stats[i].offset));
|
(adapter, ibmvnic_stats[i].offset));
|
||||||
|
|
||||||
for (j = 0; j < adapter->req_tx_queues; j++) {
|
for (j = 0; j < adapter->req_tx_queues; j++) {
|
||||||
data[i] = adapter->tx_stats_buffers[j].packets;
|
data[i] = adapter->tx_stats_buffers[j].batched_packets;
|
||||||
|
i++;
|
||||||
|
data[i] = adapter->tx_stats_buffers[j].direct_packets;
|
||||||
i++;
|
i++;
|
||||||
data[i] = adapter->tx_stats_buffers[j].bytes;
|
data[i] = adapter->tx_stats_buffers[j].bytes;
|
||||||
i++;
|
i++;
|
||||||
@ -3844,7 +3896,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
|
|||||||
}
|
}
|
||||||
|
|
||||||
dma_free_coherent(dev,
|
dma_free_coherent(dev,
|
||||||
IBMVNIC_IND_ARR_SZ,
|
IBMVNIC_IND_MAX_ARR_SZ,
|
||||||
scrq->ind_buf.indir_arr,
|
scrq->ind_buf.indir_arr,
|
||||||
scrq->ind_buf.indir_dma);
|
scrq->ind_buf.indir_dma);
|
||||||
|
|
||||||
@ -3901,7 +3953,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
|
|||||||
|
|
||||||
scrq->ind_buf.indir_arr =
|
scrq->ind_buf.indir_arr =
|
||||||
dma_alloc_coherent(dev,
|
dma_alloc_coherent(dev,
|
||||||
IBMVNIC_IND_ARR_SZ,
|
IBMVNIC_IND_MAX_ARR_SZ,
|
||||||
&scrq->ind_buf.indir_dma,
|
&scrq->ind_buf.indir_dma,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
|
|
||||||
@ -6206,6 +6258,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
|
|||||||
rc = reset_sub_crq_queues(adapter);
|
rc = reset_sub_crq_queues(adapter);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
|
||||||
|
/* After an LPM, reset the max number of indirect
|
||||||
|
* subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT
|
||||||
|
* hcall to the default max (e.g POWER8 -> POWER10)
|
||||||
|
*
|
||||||
|
* If the new destination platform does not support
|
||||||
|
* the higher limit max (e.g. POWER10-> POWER8 LPM)
|
||||||
|
* H_PARAMETER will trigger automatic fallback to the
|
||||||
|
* safe minimum limit.
|
||||||
|
*/
|
||||||
|
adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
|
||||||
|
}
|
||||||
|
|
||||||
rc = init_sub_crqs(adapter);
|
rc = init_sub_crqs(adapter);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6357,6 +6422,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
|||||||
|
|
||||||
adapter->wait_for_reset = false;
|
adapter->wait_for_reset = false;
|
||||||
adapter->last_reset_time = jiffies;
|
adapter->last_reset_time = jiffies;
|
||||||
|
adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
|
||||||
|
|
||||||
rc = register_netdev(netdev);
|
rc = register_netdev(netdev);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
|
|||||||
@ -29,8 +29,9 @@
|
|||||||
#define IBMVNIC_BUFFS_PER_POOL 100
|
#define IBMVNIC_BUFFS_PER_POOL 100
|
||||||
#define IBMVNIC_MAX_QUEUES 16
|
#define IBMVNIC_MAX_QUEUES 16
|
||||||
#define IBMVNIC_MAX_QUEUE_SZ 4096
|
#define IBMVNIC_MAX_QUEUE_SZ 4096
|
||||||
#define IBMVNIC_MAX_IND_DESCS 16
|
#define IBMVNIC_MAX_IND_DESCS 128
|
||||||
#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
|
#define IBMVNIC_SAFE_IND_DESC 16
|
||||||
|
#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
|
||||||
|
|
||||||
#define IBMVNIC_TSO_BUF_SZ 65536
|
#define IBMVNIC_TSO_BUF_SZ 65536
|
||||||
#define IBMVNIC_TSO_BUFS 64
|
#define IBMVNIC_TSO_BUFS 64
|
||||||
@ -175,20 +176,25 @@ struct ibmvnic_statistics {
|
|||||||
u8 reserved[72];
|
u8 reserved[72];
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
|
||||||
#define NUM_TX_STATS 3
|
|
||||||
struct ibmvnic_tx_queue_stats {
|
struct ibmvnic_tx_queue_stats {
|
||||||
u64 packets;
|
u64 batched_packets;
|
||||||
|
u64 direct_packets;
|
||||||
u64 bytes;
|
u64 bytes;
|
||||||
u64 dropped_packets;
|
u64 dropped_packets;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NUM_RX_STATS 3
|
#define NUM_TX_STATS \
|
||||||
|
(sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64))
|
||||||
|
|
||||||
struct ibmvnic_rx_queue_stats {
|
struct ibmvnic_rx_queue_stats {
|
||||||
u64 packets;
|
u64 packets;
|
||||||
u64 bytes;
|
u64 bytes;
|
||||||
u64 interrupts;
|
u64 interrupts;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define NUM_RX_STATS \
|
||||||
|
(sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64))
|
||||||
|
|
||||||
struct ibmvnic_acl_buffer {
|
struct ibmvnic_acl_buffer {
|
||||||
__be32 len;
|
__be32 len;
|
||||||
__be32 version;
|
__be32 version;
|
||||||
@ -885,6 +891,7 @@ struct ibmvnic_adapter {
|
|||||||
dma_addr_t ip_offload_ctrl_tok;
|
dma_addr_t ip_offload_ctrl_tok;
|
||||||
u32 msg_enable;
|
u32 msg_enable;
|
||||||
u32 priv_flags;
|
u32 priv_flags;
|
||||||
|
u32 cur_max_ind_descs;
|
||||||
|
|
||||||
/* Vital Product Data (VPD) */
|
/* Vital Product Data (VPD) */
|
||||||
struct ibmvnic_vpd *vpd;
|
struct ibmvnic_vpd *vpd;
|
||||||
|
|||||||
@ -448,7 +448,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
|
|||||||
(qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
|
(qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
|
||||||
(pf_queue_id << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
|
(pf_queue_id << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
|
||||||
BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) |
|
BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) |
|
||||||
(itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT);
|
FIELD_PREP(I40E_QINT_RQCTL_ITR_INDX_MASK, itr_idx);
|
||||||
wr32(hw, reg_idx, reg);
|
wr32(hw, reg_idx, reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -653,6 +653,13 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
|
|||||||
|
|
||||||
/* only set the required fields */
|
/* only set the required fields */
|
||||||
tx_ctx.base = info->dma_ring_addr / 128;
|
tx_ctx.base = info->dma_ring_addr / 128;
|
||||||
|
|
||||||
|
/* ring_len has to be multiple of 8 */
|
||||||
|
if (!IS_ALIGNED(info->ring_len, 8) ||
|
||||||
|
info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto error_context;
|
||||||
|
}
|
||||||
tx_ctx.qlen = info->ring_len;
|
tx_ctx.qlen = info->ring_len;
|
||||||
tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]);
|
tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]);
|
||||||
tx_ctx.rdylist_act = 0;
|
tx_ctx.rdylist_act = 0;
|
||||||
@ -716,6 +723,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
|
|||||||
|
|
||||||
/* only set the required fields */
|
/* only set the required fields */
|
||||||
rx_ctx.base = info->dma_ring_addr / 128;
|
rx_ctx.base = info->dma_ring_addr / 128;
|
||||||
|
|
||||||
|
/* ring_len has to be multiple of 32 */
|
||||||
|
if (!IS_ALIGNED(info->ring_len, 32) ||
|
||||||
|
info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto error_param;
|
||||||
|
}
|
||||||
rx_ctx.qlen = info->ring_len;
|
rx_ctx.qlen = info->ring_len;
|
||||||
|
|
||||||
if (info->splithdr_enabled) {
|
if (info->splithdr_enabled) {
|
||||||
@ -1453,6 +1467,7 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
|
|||||||
* functions that may still be running at this point.
|
* functions that may still be running at this point.
|
||||||
*/
|
*/
|
||||||
clear_bit(I40E_VF_STATE_INIT, &vf->vf_states);
|
clear_bit(I40E_VF_STATE_INIT, &vf->vf_states);
|
||||||
|
clear_bit(I40E_VF_STATE_RESOURCES_LOADED, &vf->vf_states);
|
||||||
|
|
||||||
/* In the case of a VFLR, the HW has already reset the VF and we
|
/* In the case of a VFLR, the HW has already reset the VF and we
|
||||||
* just need to clean up, so don't hit the VFRTRIG register.
|
* just need to clean up, so don't hit the VFRTRIG register.
|
||||||
@ -2119,7 +2134,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
|
|||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) {
|
i40e_sync_vf_state(vf, I40E_VF_STATE_INIT);
|
||||||
|
|
||||||
|
if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states) ||
|
||||||
|
test_bit(I40E_VF_STATE_RESOURCES_LOADED, &vf->vf_states)) {
|
||||||
aq_ret = -EINVAL;
|
aq_ret = -EINVAL;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
@ -2222,6 +2240,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
|
|||||||
vf->default_lan_addr.addr);
|
vf->default_lan_addr.addr);
|
||||||
}
|
}
|
||||||
set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
|
set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
|
||||||
|
set_bit(I40E_VF_STATE_RESOURCES_LOADED, &vf->vf_states);
|
||||||
|
|
||||||
err:
|
err:
|
||||||
/* send the response back to the VF */
|
/* send the response back to the VF */
|
||||||
@ -2384,7 +2403,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (vf->adq_enabled) {
|
if (vf->adq_enabled) {
|
||||||
if (idx >= ARRAY_SIZE(vf->ch)) {
|
if (idx >= vf->num_tc) {
|
||||||
aq_ret = -ENODEV;
|
aq_ret = -ENODEV;
|
||||||
goto error_param;
|
goto error_param;
|
||||||
}
|
}
|
||||||
@ -2405,7 +2424,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
|
|||||||
* to its appropriate VSIs based on TC mapping
|
* to its appropriate VSIs based on TC mapping
|
||||||
*/
|
*/
|
||||||
if (vf->adq_enabled) {
|
if (vf->adq_enabled) {
|
||||||
if (idx >= ARRAY_SIZE(vf->ch)) {
|
if (idx >= vf->num_tc) {
|
||||||
aq_ret = -ENODEV;
|
aq_ret = -ENODEV;
|
||||||
goto error_param;
|
goto error_param;
|
||||||
}
|
}
|
||||||
@ -2455,8 +2474,10 @@ static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
|
|||||||
u16 vsi_queue_id, queue_id;
|
u16 vsi_queue_id, queue_id;
|
||||||
|
|
||||||
for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
|
for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
|
||||||
if (vf->adq_enabled) {
|
u16 idx = vsi_queue_id / I40E_MAX_VF_VSI;
|
||||||
vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
|
|
||||||
|
if (vf->adq_enabled && idx < vf->num_tc) {
|
||||||
|
vsi_id = vf->ch[idx].vsi_id;
|
||||||
queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
|
queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
|
||||||
} else {
|
} else {
|
||||||
queue_id = vsi_queue_id;
|
queue_id = vsi_queue_id;
|
||||||
@ -3589,7 +3610,7 @@ static int i40e_validate_cloud_filter(struct i40e_vf *vf,
|
|||||||
|
|
||||||
/* action_meta is TC number here to which the filter is applied */
|
/* action_meta is TC number here to which the filter is applied */
|
||||||
if (!tc_filter->action_meta ||
|
if (!tc_filter->action_meta ||
|
||||||
tc_filter->action_meta > vf->num_tc) {
|
tc_filter->action_meta >= vf->num_tc) {
|
||||||
dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
|
dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
|
||||||
vf->vf_id, tc_filter->action_meta);
|
vf->vf_id, tc_filter->action_meta);
|
||||||
goto err;
|
goto err;
|
||||||
@ -3887,6 +3908,8 @@ err:
|
|||||||
aq_ret);
|
aq_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define I40E_MAX_VF_CLOUD_FILTER 0xFF00
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* i40e_vc_add_cloud_filter
|
* i40e_vc_add_cloud_filter
|
||||||
* @vf: pointer to the VF info
|
* @vf: pointer to the VF info
|
||||||
@ -3926,6 +3949,14 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
|
|||||||
goto err_out;
|
goto err_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (vf->num_cloud_filters >= I40E_MAX_VF_CLOUD_FILTER) {
|
||||||
|
dev_warn(&pf->pdev->dev,
|
||||||
|
"VF %d: Max number of filters reached, can't apply cloud filter\n",
|
||||||
|
vf->vf_id);
|
||||||
|
aq_ret = -ENOSPC;
|
||||||
|
goto err_out;
|
||||||
|
}
|
||||||
|
|
||||||
cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
|
cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
|
||||||
if (!cfilter) {
|
if (!cfilter) {
|
||||||
aq_ret = -ENOMEM;
|
aq_ret = -ENOMEM;
|
||||||
|
|||||||
@ -41,7 +41,8 @@ enum i40e_vf_states {
|
|||||||
I40E_VF_STATE_MC_PROMISC,
|
I40E_VF_STATE_MC_PROMISC,
|
||||||
I40E_VF_STATE_UC_PROMISC,
|
I40E_VF_STATE_UC_PROMISC,
|
||||||
I40E_VF_STATE_PRE_ENABLE,
|
I40E_VF_STATE_PRE_ENABLE,
|
||||||
I40E_VF_STATE_RESETTING
|
I40E_VF_STATE_RESETTING,
|
||||||
|
I40E_VF_STATE_RESOURCES_LOADED,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* VF capabilities */
|
/* VF capabilities */
|
||||||
|
|||||||
@ -3230,12 +3230,14 @@ static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data)
|
|||||||
hw = &pf->hw;
|
hw = &pf->hw;
|
||||||
tx = &pf->ptp.port.tx;
|
tx = &pf->ptp.port.tx;
|
||||||
spin_lock_irqsave(&tx->lock, flags);
|
spin_lock_irqsave(&tx->lock, flags);
|
||||||
ice_ptp_complete_tx_single_tstamp(tx);
|
if (tx->init) {
|
||||||
|
ice_ptp_complete_tx_single_tstamp(tx);
|
||||||
|
|
||||||
idx = find_next_bit_wrap(tx->in_use, tx->len,
|
idx = find_next_bit_wrap(tx->in_use, tx->len,
|
||||||
tx->last_ll_ts_idx_read + 1);
|
tx->last_ll_ts_idx_read + 1);
|
||||||
if (idx != tx->len)
|
if (idx != tx->len)
|
||||||
ice_ptp_req_tx_single_tstamp(tx, idx);
|
ice_ptp_req_tx_single_tstamp(tx, idx);
|
||||||
|
}
|
||||||
spin_unlock_irqrestore(&tx->lock, flags);
|
spin_unlock_irqrestore(&tx->lock, flags);
|
||||||
|
|
||||||
val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
|
val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
|
||||||
|
|||||||
@ -2882,16 +2882,19 @@ irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf)
|
|||||||
*/
|
*/
|
||||||
if (hw->dev_caps.ts_dev_info.ts_ll_int_read) {
|
if (hw->dev_caps.ts_dev_info.ts_ll_int_read) {
|
||||||
struct ice_ptp_tx *tx = &pf->ptp.port.tx;
|
struct ice_ptp_tx *tx = &pf->ptp.port.tx;
|
||||||
u8 idx;
|
u8 idx, last;
|
||||||
|
|
||||||
if (!ice_pf_state_is_nominal(pf))
|
if (!ice_pf_state_is_nominal(pf))
|
||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
|
|
||||||
spin_lock(&tx->lock);
|
spin_lock(&tx->lock);
|
||||||
idx = find_next_bit_wrap(tx->in_use, tx->len,
|
if (tx->init) {
|
||||||
tx->last_ll_ts_idx_read + 1);
|
last = tx->last_ll_ts_idx_read + 1;
|
||||||
if (idx != tx->len)
|
idx = find_next_bit_wrap(tx->in_use, tx->len,
|
||||||
ice_ptp_req_tx_single_tstamp(tx, idx);
|
last);
|
||||||
|
if (idx != tx->len)
|
||||||
|
ice_ptp_req_tx_single_tstamp(tx, idx);
|
||||||
|
}
|
||||||
spin_unlock(&tx->lock);
|
spin_unlock(&tx->lock);
|
||||||
|
|
||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
|
|||||||
@ -865,10 +865,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
|||||||
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
|
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
|
||||||
rx_buf->page_offset, size);
|
rx_buf->page_offset, size);
|
||||||
sinfo->xdp_frags_size += size;
|
sinfo->xdp_frags_size += size;
|
||||||
/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
|
|
||||||
* can pop off frags but driver has to handle it on its own
|
|
||||||
*/
|
|
||||||
rx_ring->nr_frags = sinfo->nr_frags;
|
|
||||||
|
|
||||||
if (page_is_pfmemalloc(rx_buf->page))
|
if (page_is_pfmemalloc(rx_buf->page))
|
||||||
xdp_buff_set_frag_pfmemalloc(xdp);
|
xdp_buff_set_frag_pfmemalloc(xdp);
|
||||||
@ -939,20 +935,20 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
|
|||||||
/**
|
/**
|
||||||
* ice_get_pgcnts - grab page_count() for gathered fragments
|
* ice_get_pgcnts - grab page_count() for gathered fragments
|
||||||
* @rx_ring: Rx descriptor ring to store the page counts on
|
* @rx_ring: Rx descriptor ring to store the page counts on
|
||||||
|
* @ntc: the next to clean element (not included in this frame!)
|
||||||
*
|
*
|
||||||
* This function is intended to be called right before running XDP
|
* This function is intended to be called right before running XDP
|
||||||
* program so that the page recycling mechanism will be able to take
|
* program so that the page recycling mechanism will be able to take
|
||||||
* a correct decision regarding underlying pages; this is done in such
|
* a correct decision regarding underlying pages; this is done in such
|
||||||
* way as XDP program can change the refcount of page
|
* way as XDP program can change the refcount of page
|
||||||
*/
|
*/
|
||||||
static void ice_get_pgcnts(struct ice_rx_ring *rx_ring)
|
static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc)
|
||||||
{
|
{
|
||||||
u32 nr_frags = rx_ring->nr_frags + 1;
|
|
||||||
u32 idx = rx_ring->first_desc;
|
u32 idx = rx_ring->first_desc;
|
||||||
struct ice_rx_buf *rx_buf;
|
struct ice_rx_buf *rx_buf;
|
||||||
u32 cnt = rx_ring->count;
|
u32 cnt = rx_ring->count;
|
||||||
|
|
||||||
for (int i = 0; i < nr_frags; i++) {
|
while (idx != ntc) {
|
||||||
rx_buf = &rx_ring->rx_buf[idx];
|
rx_buf = &rx_ring->rx_buf[idx];
|
||||||
rx_buf->pgcnt = page_count(rx_buf->page);
|
rx_buf->pgcnt = page_count(rx_buf->page);
|
||||||
|
|
||||||
@ -1125,62 +1121,51 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags
|
* ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame
|
||||||
* @rx_ring: Rx ring with all the auxiliary data
|
* @rx_ring: Rx ring with all the auxiliary data
|
||||||
* @xdp: XDP buffer carrying linear + frags part
|
* @xdp: XDP buffer carrying linear + frags part
|
||||||
* @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
|
* @ntc: the next to clean element (not included in this frame!)
|
||||||
* @ntc: a current next_to_clean value to be stored at rx_ring
|
|
||||||
* @verdict: return code from XDP program execution
|
* @verdict: return code from XDP program execution
|
||||||
*
|
*
|
||||||
* Walk through gathered fragments and satisfy internal page
|
* Called after XDP program is completed, or on error with verdict set to
|
||||||
* recycle mechanism; we take here an action related to verdict
|
* ICE_XDP_CONSUMED.
|
||||||
* returned by XDP program;
|
*
|
||||||
|
* Walk through buffers from first_desc to the end of the frame, releasing
|
||||||
|
* buffers and satisfying internal page recycle mechanism. The action depends
|
||||||
|
* on verdict from XDP program.
|
||||||
*/
|
*/
|
||||||
static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
||||||
u32 *xdp_xmit, u32 ntc, u32 verdict)
|
u32 ntc, u32 verdict)
|
||||||
{
|
{
|
||||||
u32 nr_frags = rx_ring->nr_frags + 1;
|
|
||||||
u32 idx = rx_ring->first_desc;
|
u32 idx = rx_ring->first_desc;
|
||||||
u32 cnt = rx_ring->count;
|
u32 cnt = rx_ring->count;
|
||||||
u32 post_xdp_frags = 1;
|
|
||||||
struct ice_rx_buf *buf;
|
struct ice_rx_buf *buf;
|
||||||
int i;
|
u32 xdp_frags = 0;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
if (unlikely(xdp_buff_has_frags(xdp)))
|
if (unlikely(xdp_buff_has_frags(xdp)))
|
||||||
post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags;
|
xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
|
||||||
|
|
||||||
for (i = 0; i < post_xdp_frags; i++) {
|
while (idx != ntc) {
|
||||||
buf = &rx_ring->rx_buf[idx];
|
buf = &rx_ring->rx_buf[idx];
|
||||||
|
if (++idx == cnt)
|
||||||
|
idx = 0;
|
||||||
|
|
||||||
if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) {
|
/* An XDP program could release fragments from the end of the
|
||||||
|
* buffer. For these, we need to keep the pagecnt_bias as-is.
|
||||||
|
* To do this, only adjust pagecnt_bias for fragments up to
|
||||||
|
* the total remaining after the XDP program has run.
|
||||||
|
*/
|
||||||
|
if (verdict != ICE_XDP_CONSUMED)
|
||||||
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
|
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
|
||||||
*xdp_xmit |= verdict;
|
else if (i++ <= xdp_frags)
|
||||||
} else if (verdict & ICE_XDP_CONSUMED) {
|
|
||||||
buf->pagecnt_bias++;
|
buf->pagecnt_bias++;
|
||||||
} else if (verdict == ICE_XDP_PASS) {
|
|
||||||
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
|
|
||||||
}
|
|
||||||
|
|
||||||
ice_put_rx_buf(rx_ring, buf);
|
ice_put_rx_buf(rx_ring, buf);
|
||||||
|
|
||||||
if (++idx == cnt)
|
|
||||||
idx = 0;
|
|
||||||
}
|
|
||||||
/* handle buffers that represented frags released by XDP prog;
|
|
||||||
* for these we keep pagecnt_bias as-is; refcount from struct page
|
|
||||||
* has been decremented within XDP prog and we do not have to increase
|
|
||||||
* the biased refcnt
|
|
||||||
*/
|
|
||||||
for (; i < nr_frags; i++) {
|
|
||||||
buf = &rx_ring->rx_buf[idx];
|
|
||||||
ice_put_rx_buf(rx_ring, buf);
|
|
||||||
if (++idx == cnt)
|
|
||||||
idx = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xdp->data = NULL;
|
xdp->data = NULL;
|
||||||
rx_ring->first_desc = ntc;
|
rx_ring->first_desc = ntc;
|
||||||
rx_ring->nr_frags = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1260,6 +1245,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
|
|||||||
/* retrieve a buffer from the ring */
|
/* retrieve a buffer from the ring */
|
||||||
rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
|
rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
|
||||||
|
|
||||||
|
/* Increment ntc before calls to ice_put_rx_mbuf() */
|
||||||
|
if (++ntc == cnt)
|
||||||
|
ntc = 0;
|
||||||
|
|
||||||
if (!xdp->data) {
|
if (!xdp->data) {
|
||||||
void *hard_start;
|
void *hard_start;
|
||||||
|
|
||||||
@ -1268,24 +1257,23 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
|
|||||||
xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
|
xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
|
||||||
xdp_buff_clear_frags_flag(xdp);
|
xdp_buff_clear_frags_flag(xdp);
|
||||||
} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
|
} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
|
||||||
ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED);
|
ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (++ntc == cnt)
|
|
||||||
ntc = 0;
|
|
||||||
|
|
||||||
/* skip if it is NOP desc */
|
/* skip if it is NOP desc */
|
||||||
if (ice_is_non_eop(rx_ring, rx_desc))
|
if (ice_is_non_eop(rx_ring, rx_desc))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ice_get_pgcnts(rx_ring);
|
ice_get_pgcnts(rx_ring, ntc);
|
||||||
xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
|
xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
|
||||||
if (xdp_verdict == ICE_XDP_PASS)
|
if (xdp_verdict == ICE_XDP_PASS)
|
||||||
goto construct_skb;
|
goto construct_skb;
|
||||||
total_rx_bytes += xdp_get_buff_len(xdp);
|
total_rx_bytes += xdp_get_buff_len(xdp);
|
||||||
total_rx_pkts++;
|
total_rx_pkts++;
|
||||||
|
|
||||||
ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
|
ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
|
||||||
|
xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
construct_skb:
|
construct_skb:
|
||||||
@ -1298,7 +1286,7 @@ construct_skb:
|
|||||||
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
|
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
|
||||||
xdp_verdict = ICE_XDP_CONSUMED;
|
xdp_verdict = ICE_XDP_CONSUMED;
|
||||||
}
|
}
|
||||||
ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
|
ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
|
||||||
|
|
||||||
if (!skb)
|
if (!skb)
|
||||||
break;
|
break;
|
||||||
|
|||||||
@ -358,7 +358,6 @@ struct ice_rx_ring {
|
|||||||
struct ice_tx_ring *xdp_ring;
|
struct ice_tx_ring *xdp_ring;
|
||||||
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
|
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
|
||||||
struct xsk_buff_pool *xsk_pool;
|
struct xsk_buff_pool *xsk_pool;
|
||||||
u32 nr_frags;
|
|
||||||
u16 max_frame;
|
u16 max_frame;
|
||||||
u16 rx_buf_len;
|
u16 rx_buf_len;
|
||||||
dma_addr_t dma; /* physical address of ring */
|
dma_addr_t dma; /* physical address of ring */
|
||||||
|
|||||||
@ -3094,7 +3094,7 @@ static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw,
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
combo_ver = le32_to_cpu(civd.combo_ver);
|
combo_ver = get_unaligned_le32(&civd.combo_ver);
|
||||||
|
|
||||||
orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver);
|
orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver);
|
||||||
orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver);
|
orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver);
|
||||||
|
|||||||
@ -1136,7 +1136,7 @@ struct ixgbe_orom_civd_info {
|
|||||||
__le32 combo_ver; /* Combo Image Version number */
|
__le32 combo_ver; /* Combo Image Version number */
|
||||||
u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */
|
u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */
|
||||||
__le16 combo_name[32]; /* Unicode string representing the Combo Image version */
|
__le16 combo_name[32]; /* Unicode string representing the Combo Image version */
|
||||||
};
|
} __packed;
|
||||||
|
|
||||||
/* Function specific capabilities */
|
/* Function specific capabilities */
|
||||||
struct ixgbe_hw_func_caps {
|
struct ixgbe_hw_func_caps {
|
||||||
|
|||||||
@ -6,8 +6,10 @@
|
|||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/utsname.h>
|
#include <linux/utsname.h>
|
||||||
#include <linux/version.h>
|
#include <linux/version.h>
|
||||||
|
#include <linux/export.h>
|
||||||
|
|
||||||
#include <net/mana/mana.h>
|
#include <net/mana/mana.h>
|
||||||
|
#include <net/mana/hw_channel.h>
|
||||||
|
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
struct dentry *mana_debugfs_root;
|
struct dentry *mana_debugfs_root;
|
||||||
@ -32,6 +34,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev)
|
|||||||
gc->db_page_base = gc->bar0_va +
|
gc->db_page_base = gc->bar0_va +
|
||||||
mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
|
mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
|
||||||
|
|
||||||
|
gc->phys_db_page_base = gc->bar0_pa +
|
||||||
|
mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF);
|
||||||
|
|
||||||
sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF);
|
sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF);
|
||||||
|
|
||||||
sriov_base_va = gc->bar0_va + sriov_base_off;
|
sriov_base_va = gc->bar0_va + sriov_base_off;
|
||||||
@ -64,6 +69,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev)
|
|||||||
mana_gd_init_vf_regs(pdev);
|
mana_gd_init_vf_regs(pdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Suppress logging when we set timeout to zero */
|
||||||
|
bool mana_need_log(struct gdma_context *gc, int err)
|
||||||
|
{
|
||||||
|
struct hw_channel_context *hwc;
|
||||||
|
|
||||||
|
if (err != -ETIMEDOUT)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!gc)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
hwc = gc->hwc.driver_data;
|
||||||
|
if (hwc && hwc->hwc_timeout == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static int mana_gd_query_max_resources(struct pci_dev *pdev)
|
static int mana_gd_query_max_resources(struct pci_dev *pdev)
|
||||||
{
|
{
|
||||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||||
@ -267,8 +290,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue)
|
|||||||
|
|
||||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||||
if (err || resp.hdr.status) {
|
if (err || resp.hdr.status) {
|
||||||
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
|
if (mana_need_log(gc, err))
|
||||||
resp.hdr.status);
|
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
|
||||||
|
resp.hdr.status);
|
||||||
return err ? err : -EPROTO;
|
return err ? err : -EPROTO;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,11 +377,113 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_NS(mana_gd_ring_cq, NET_MANA);
|
EXPORT_SYMBOL_NS(mana_gd_ring_cq, NET_MANA);
|
||||||
|
|
||||||
|
#define MANA_SERVICE_PERIOD 10
|
||||||
|
|
||||||
|
static void mana_serv_fpga(struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
struct pci_bus *bus, *parent;
|
||||||
|
|
||||||
|
pci_lock_rescan_remove();
|
||||||
|
|
||||||
|
bus = pdev->bus;
|
||||||
|
if (!bus) {
|
||||||
|
dev_err(&pdev->dev, "MANA service: no bus\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
parent = bus->parent;
|
||||||
|
if (!parent) {
|
||||||
|
dev_err(&pdev->dev, "MANA service: no parent bus\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
pci_stop_and_remove_bus_device(bus->self);
|
||||||
|
|
||||||
|
msleep(MANA_SERVICE_PERIOD * 1000);
|
||||||
|
|
||||||
|
pci_rescan_bus(parent);
|
||||||
|
|
||||||
|
out:
|
||||||
|
pci_unlock_rescan_remove();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mana_serv_reset(struct pci_dev *pdev)
|
||||||
|
{
|
||||||
|
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||||
|
struct hw_channel_context *hwc;
|
||||||
|
|
||||||
|
if (!gc) {
|
||||||
|
dev_err(&pdev->dev, "MANA service: no GC\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hwc = gc->hwc.driver_data;
|
||||||
|
if (!hwc) {
|
||||||
|
dev_err(&pdev->dev, "MANA service: no HWC\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* HWC is not responding in this case, so don't wait */
|
||||||
|
hwc->hwc_timeout = 0;
|
||||||
|
|
||||||
|
dev_info(&pdev->dev, "MANA reset cycle start\n");
|
||||||
|
|
||||||
|
mana_gd_suspend(pdev, PMSG_SUSPEND);
|
||||||
|
|
||||||
|
msleep(MANA_SERVICE_PERIOD * 1000);
|
||||||
|
|
||||||
|
mana_gd_resume(pdev);
|
||||||
|
|
||||||
|
dev_info(&pdev->dev, "MANA reset cycle completed\n");
|
||||||
|
|
||||||
|
out:
|
||||||
|
gc->in_service = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct mana_serv_work {
|
||||||
|
struct work_struct serv_work;
|
||||||
|
struct pci_dev *pdev;
|
||||||
|
enum gdma_eqe_type type;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void mana_serv_func(struct work_struct *w)
|
||||||
|
{
|
||||||
|
struct mana_serv_work *mns_wk;
|
||||||
|
struct pci_dev *pdev;
|
||||||
|
|
||||||
|
mns_wk = container_of(w, struct mana_serv_work, serv_work);
|
||||||
|
pdev = mns_wk->pdev;
|
||||||
|
|
||||||
|
if (!pdev)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
switch (mns_wk->type) {
|
||||||
|
case GDMA_EQE_HWC_FPGA_RECONFIG:
|
||||||
|
mana_serv_fpga(pdev);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GDMA_EQE_HWC_RESET_REQUEST:
|
||||||
|
mana_serv_reset(pdev);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
dev_err(&pdev->dev, "MANA service: unknown type %d\n",
|
||||||
|
mns_wk->type);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
pci_dev_put(pdev);
|
||||||
|
kfree(mns_wk);
|
||||||
|
module_put(THIS_MODULE);
|
||||||
|
}
|
||||||
|
|
||||||
static void mana_gd_process_eqe(struct gdma_queue *eq)
|
static void mana_gd_process_eqe(struct gdma_queue *eq)
|
||||||
{
|
{
|
||||||
u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
|
u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
|
||||||
struct gdma_context *gc = eq->gdma_dev->gdma_context;
|
struct gdma_context *gc = eq->gdma_dev->gdma_context;
|
||||||
struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
|
struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
|
||||||
|
struct mana_serv_work *mns_wk;
|
||||||
union gdma_eqe_info eqe_info;
|
union gdma_eqe_info eqe_info;
|
||||||
enum gdma_eqe_type type;
|
enum gdma_eqe_type type;
|
||||||
struct gdma_event event;
|
struct gdma_event event;
|
||||||
@ -402,6 +528,35 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
|
|||||||
eq->eq.callback(eq->eq.context, eq, &event);
|
eq->eq.callback(eq->eq.context, eq, &event);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case GDMA_EQE_HWC_FPGA_RECONFIG:
|
||||||
|
case GDMA_EQE_HWC_RESET_REQUEST:
|
||||||
|
dev_info(gc->dev, "Recv MANA service type:%d\n", type);
|
||||||
|
|
||||||
|
if (gc->in_service) {
|
||||||
|
dev_info(gc->dev, "Already in service\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!try_module_get(THIS_MODULE)) {
|
||||||
|
dev_info(gc->dev, "Module is unloading\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
|
||||||
|
if (!mns_wk) {
|
||||||
|
module_put(THIS_MODULE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
dev_info(gc->dev, "Start MANA service type:%d\n", type);
|
||||||
|
gc->in_service = true;
|
||||||
|
mns_wk->pdev = to_pci_dev(gc->dev);
|
||||||
|
mns_wk->type = type;
|
||||||
|
pci_dev_get(mns_wk->pdev);
|
||||||
|
INIT_WORK(&mns_wk->serv_work, mana_serv_func);
|
||||||
|
schedule_work(&mns_wk->serv_work);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -543,7 +698,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
|
|||||||
|
|
||||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||||
if (err) {
|
if (err) {
|
||||||
dev_err(dev, "test_eq failed: %d\n", err);
|
if (mana_need_log(gc, err))
|
||||||
|
dev_err(dev, "test_eq failed: %d\n", err);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -578,7 +734,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
|
|||||||
|
|
||||||
if (flush_evenets) {
|
if (flush_evenets) {
|
||||||
err = mana_gd_test_eq(gc, queue);
|
err = mana_gd_test_eq(gc, queue);
|
||||||
if (err)
|
if (err && mana_need_log(gc, err))
|
||||||
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
|
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -724,8 +880,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
|
|||||||
|
|
||||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||||
if (err || resp.hdr.status) {
|
if (err || resp.hdr.status) {
|
||||||
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
|
if (mana_need_log(gc, err))
|
||||||
err, resp.hdr.status);
|
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
|
||||||
|
err, resp.hdr.status);
|
||||||
return -EPROTO;
|
return -EPROTO;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1025,8 +1182,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
|
|||||||
|
|
||||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||||
if (err || resp.hdr.status) {
|
if (err || resp.hdr.status) {
|
||||||
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
|
if (mana_need_log(gc, err))
|
||||||
err, resp.hdr.status);
|
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
|
||||||
|
err, resp.hdr.status);
|
||||||
if (!err)
|
if (!err)
|
||||||
err = -EPROTO;
|
err = -EPROTO;
|
||||||
}
|
}
|
||||||
@ -1642,7 +1800,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* The 'state' parameter is not used. */
|
/* The 'state' parameter is not used. */
|
||||||
static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
||||||
{
|
{
|
||||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||||
|
|
||||||
@ -1658,7 +1816,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
|||||||
* fail -- if this happens, it's safer to just report an error than try to undo
|
* fail -- if this happens, it's safer to just report an error than try to undo
|
||||||
* what has been done.
|
* what has been done.
|
||||||
*/
|
*/
|
||||||
static int mana_gd_resume(struct pci_dev *pdev)
|
int mana_gd_resume(struct pci_dev *pdev)
|
||||||
{
|
{
|
||||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||||
int err;
|
int err;
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
/* Copyright (c) 2021, Microsoft Corporation. */
|
/* Copyright (c) 2021, Microsoft Corporation. */
|
||||||
|
|
||||||
#include <net/mana/gdma.h>
|
#include <net/mana/gdma.h>
|
||||||
|
#include <net/mana/mana.h>
|
||||||
#include <net/mana/hw_channel.h>
|
#include <net/mana/hw_channel.h>
|
||||||
|
|
||||||
static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
|
static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
|
||||||
@ -878,7 +879,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
|
|||||||
|
|
||||||
if (!wait_for_completion_timeout(&ctx->comp_event,
|
if (!wait_for_completion_timeout(&ctx->comp_event,
|
||||||
(msecs_to_jiffies(hwc->hwc_timeout)))) {
|
(msecs_to_jiffies(hwc->hwc_timeout)))) {
|
||||||
dev_err(hwc->dev, "HWC: Request timed out!\n");
|
if (hwc->hwc_timeout != 0)
|
||||||
|
dev_err(hwc->dev, "HWC: Request timed out!\n");
|
||||||
|
|
||||||
err = -ETIMEDOUT;
|
err = -ETIMEDOUT;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -889,8 +892,13 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) {
|
if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) {
|
||||||
dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
|
if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) {
|
||||||
ctx->status_code);
|
err = -EOPNOTSUPP;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT)
|
||||||
|
dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
|
||||||
|
ctx->status_code);
|
||||||
err = -EPROTO;
|
err = -EPROTO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,6 +10,7 @@
|
|||||||
#include <linux/filter.h>
|
#include <linux/filter.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
|
#include <linux/export.h>
|
||||||
|
|
||||||
#include <net/checksum.h>
|
#include <net/checksum.h>
|
||||||
#include <net/ip6_checksum.h>
|
#include <net/ip6_checksum.h>
|
||||||
@ -45,6 +46,15 @@ static const struct file_operations mana_dbg_q_fops = {
|
|||||||
.read = mana_dbg_q_read,
|
.read = mana_dbg_q_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool mana_en_need_log(struct mana_port_context *apc, int err)
|
||||||
|
{
|
||||||
|
if (apc && apc->ac && apc->ac->gdma_dev &&
|
||||||
|
apc->ac->gdma_dev->gdma_context)
|
||||||
|
return mana_need_log(apc->ac->gdma_dev->gdma_context, err);
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Microsoft Azure Network Adapter (MANA) functions */
|
/* Microsoft Azure Network Adapter (MANA) functions */
|
||||||
|
|
||||||
static int mana_open(struct net_device *ndev)
|
static int mana_open(struct net_device *ndev)
|
||||||
@ -249,10 +259,10 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
|||||||
struct netdev_queue *net_txq;
|
struct netdev_queue *net_txq;
|
||||||
struct mana_stats_tx *tx_stats;
|
struct mana_stats_tx *tx_stats;
|
||||||
struct gdma_queue *gdma_sq;
|
struct gdma_queue *gdma_sq;
|
||||||
|
int err, len, num_gso_seg;
|
||||||
unsigned int csum_type;
|
unsigned int csum_type;
|
||||||
struct mana_txq *txq;
|
struct mana_txq *txq;
|
||||||
struct mana_cq *cq;
|
struct mana_cq *cq;
|
||||||
int err, len;
|
|
||||||
|
|
||||||
if (unlikely(!apc->port_is_up))
|
if (unlikely(!apc->port_is_up))
|
||||||
goto tx_drop;
|
goto tx_drop;
|
||||||
@ -405,6 +415,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
|||||||
skb_queue_tail(&txq->pending_skbs, skb);
|
skb_queue_tail(&txq->pending_skbs, skb);
|
||||||
|
|
||||||
len = skb->len;
|
len = skb->len;
|
||||||
|
num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
|
||||||
net_txq = netdev_get_tx_queue(ndev, txq_idx);
|
net_txq = netdev_get_tx_queue(ndev, txq_idx);
|
||||||
|
|
||||||
err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
|
err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
|
||||||
@ -429,10 +440,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
|||||||
/* skb may be freed after mana_gd_post_work_request. Do not use it. */
|
/* skb may be freed after mana_gd_post_work_request. Do not use it. */
|
||||||
skb = NULL;
|
skb = NULL;
|
||||||
|
|
||||||
|
/* Populated the packet and bytes counters based on post GSO packet
|
||||||
|
* calculations
|
||||||
|
*/
|
||||||
tx_stats = &txq->stats;
|
tx_stats = &txq->stats;
|
||||||
u64_stats_update_begin(&tx_stats->syncp);
|
u64_stats_update_begin(&tx_stats->syncp);
|
||||||
tx_stats->packets++;
|
tx_stats->packets += num_gso_seg;
|
||||||
tx_stats->bytes += len;
|
tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs);
|
||||||
u64_stats_update_end(&tx_stats->syncp);
|
u64_stats_update_end(&tx_stats->syncp);
|
||||||
|
|
||||||
tx_busy:
|
tx_busy:
|
||||||
@ -772,8 +786,13 @@ static int mana_send_request(struct mana_context *ac, void *in_buf,
|
|||||||
err = mana_gd_send_request(gc, in_len, in_buf, out_len,
|
err = mana_gd_send_request(gc, in_len, in_buf, out_len,
|
||||||
out_buf);
|
out_buf);
|
||||||
if (err || resp->status) {
|
if (err || resp->status) {
|
||||||
dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
|
if (err == -EOPNOTSUPP)
|
||||||
err, resp->status);
|
return err;
|
||||||
|
|
||||||
|
if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
|
||||||
|
mana_need_log(gc, err))
|
||||||
|
dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
|
||||||
|
err, resp->status);
|
||||||
return err ? err : -EPROTO;
|
return err ? err : -EPROTO;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -848,8 +867,10 @@ static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
|
|||||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||||
sizeof(resp));
|
sizeof(resp));
|
||||||
if (err) {
|
if (err) {
|
||||||
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
|
if (mana_en_need_log(apc, err))
|
||||||
err);
|
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
|
||||||
|
err);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -904,8 +925,10 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc)
|
|||||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||||
sizeof(resp));
|
sizeof(resp));
|
||||||
if (err) {
|
if (err) {
|
||||||
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
|
if (mana_en_need_log(apc, err))
|
||||||
err);
|
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
|
||||||
|
err);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1135,7 +1158,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
|
|||||||
err = mana_send_request(apc->ac, req, req_buf_size, &resp,
|
err = mana_send_request(apc->ac, req, req_buf_size, &resp,
|
||||||
sizeof(resp));
|
sizeof(resp));
|
||||||
if (err) {
|
if (err) {
|
||||||
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
|
if (mana_en_need_log(apc, err))
|
||||||
|
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
|
||||||
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1230,7 +1255,9 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
|
|||||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||||
sizeof(resp));
|
sizeof(resp));
|
||||||
if (err) {
|
if (err) {
|
||||||
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
|
if (mana_en_need_log(apc, err))
|
||||||
|
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2609,6 +2636,88 @@ void mana_query_gf_stats(struct mana_port_context *apc)
|
|||||||
apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma;
|
apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mana_query_phy_stats(struct mana_port_context *apc)
|
||||||
|
{
|
||||||
|
struct mana_query_phy_stat_resp resp = {};
|
||||||
|
struct mana_query_phy_stat_req req = {};
|
||||||
|
struct net_device *ndev = apc->ndev;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT,
|
||||||
|
sizeof(req), sizeof(resp));
|
||||||
|
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||||
|
sizeof(resp));
|
||||||
|
if (err)
|
||||||
|
return;
|
||||||
|
|
||||||
|
err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT,
|
||||||
|
sizeof(resp));
|
||||||
|
if (err || resp.hdr.status) {
|
||||||
|
netdev_err(ndev,
|
||||||
|
"Failed to query PHY stats: %d, resp:0x%x\n",
|
||||||
|
err, resp.hdr.status);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Aggregate drop counters */
|
||||||
|
apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy;
|
||||||
|
apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy;
|
||||||
|
|
||||||
|
/* Per TC traffic Counters */
|
||||||
|
apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy;
|
||||||
|
apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy;
|
||||||
|
apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy;
|
||||||
|
|
||||||
|
/* Per TC byte Counters */
|
||||||
|
apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy;
|
||||||
|
apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy;
|
||||||
|
apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy;
|
||||||
|
|
||||||
|
/* Per TC pause Counters */
|
||||||
|
apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy;
|
||||||
|
apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy;
|
||||||
|
apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy;
|
||||||
|
}
|
||||||
|
|
||||||
static int mana_init_port(struct net_device *ndev)
|
static int mana_init_port(struct net_device *ndev)
|
||||||
{
|
{
|
||||||
struct mana_port_context *apc = netdev_priv(ndev);
|
struct mana_port_context *apc = netdev_priv(ndev);
|
||||||
@ -2803,11 +2912,10 @@ static int mana_dealloc_queues(struct net_device *ndev)
|
|||||||
|
|
||||||
apc->rss_state = TRI_STATE_FALSE;
|
apc->rss_state = TRI_STATE_FALSE;
|
||||||
err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
|
err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
|
||||||
if (err) {
|
if (err && mana_en_need_log(apc, err))
|
||||||
netdev_err(ndev, "Failed to disable vPort: %d\n", err);
|
netdev_err(ndev, "Failed to disable vPort: %d\n", err);
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* Even in err case, still need to cleanup the vPort */
|
||||||
mana_destroy_vport(apc);
|
mana_destroy_vport(apc);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@ -7,10 +7,12 @@
|
|||||||
|
|
||||||
#include <net/mana/mana.h>
|
#include <net/mana/mana.h>
|
||||||
|
|
||||||
static const struct {
|
struct mana_stats_desc {
|
||||||
char name[ETH_GSTRING_LEN];
|
char name[ETH_GSTRING_LEN];
|
||||||
u16 offset;
|
u16 offset;
|
||||||
} mana_eth_stats[] = {
|
};
|
||||||
|
|
||||||
|
static const struct mana_stats_desc mana_eth_stats[] = {
|
||||||
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
|
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
|
||||||
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
|
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
|
||||||
{"hc_rx_discards_no_wqe", offsetof(struct mana_ethtool_stats,
|
{"hc_rx_discards_no_wqe", offsetof(struct mana_ethtool_stats,
|
||||||
@ -75,6 +77,59 @@ static const struct {
|
|||||||
rx_cqe_unknown_type)},
|
rx_cqe_unknown_type)},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct mana_stats_desc mana_phy_stats[] = {
|
||||||
|
{ "hc_rx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_drop_phy) },
|
||||||
|
{ "hc_tx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_drop_phy) },
|
||||||
|
{ "hc_tc0_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc0_phy) },
|
||||||
|
{ "hc_tc0_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc0_phy) },
|
||||||
|
{ "hc_tc0_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc0_phy) },
|
||||||
|
{ "hc_tc0_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc0_phy) },
|
||||||
|
{ "hc_tc1_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc1_phy) },
|
||||||
|
{ "hc_tc1_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc1_phy) },
|
||||||
|
{ "hc_tc1_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc1_phy) },
|
||||||
|
{ "hc_tc1_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc1_phy) },
|
||||||
|
{ "hc_tc2_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc2_phy) },
|
||||||
|
{ "hc_tc2_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc2_phy) },
|
||||||
|
{ "hc_tc2_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc2_phy) },
|
||||||
|
{ "hc_tc2_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc2_phy) },
|
||||||
|
{ "hc_tc3_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc3_phy) },
|
||||||
|
{ "hc_tc3_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc3_phy) },
|
||||||
|
{ "hc_tc3_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc3_phy) },
|
||||||
|
{ "hc_tc3_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc3_phy) },
|
||||||
|
{ "hc_tc4_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc4_phy) },
|
||||||
|
{ "hc_tc4_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc4_phy) },
|
||||||
|
{ "hc_tc4_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc4_phy) },
|
||||||
|
{ "hc_tc4_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc4_phy) },
|
||||||
|
{ "hc_tc5_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc5_phy) },
|
||||||
|
{ "hc_tc5_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc5_phy) },
|
||||||
|
{ "hc_tc5_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc5_phy) },
|
||||||
|
{ "hc_tc5_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc5_phy) },
|
||||||
|
{ "hc_tc6_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc6_phy) },
|
||||||
|
{ "hc_tc6_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc6_phy) },
|
||||||
|
{ "hc_tc6_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc6_phy) },
|
||||||
|
{ "hc_tc6_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc6_phy) },
|
||||||
|
{ "hc_tc7_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc7_phy) },
|
||||||
|
{ "hc_tc7_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc7_phy) },
|
||||||
|
{ "hc_tc7_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc7_phy) },
|
||||||
|
{ "hc_tc7_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc7_phy) },
|
||||||
|
{ "hc_tc0_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc0_phy) },
|
||||||
|
{ "hc_tc0_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc0_phy) },
|
||||||
|
{ "hc_tc1_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc1_phy) },
|
||||||
|
{ "hc_tc1_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc1_phy) },
|
||||||
|
{ "hc_tc2_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc2_phy) },
|
||||||
|
{ "hc_tc2_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc2_phy) },
|
||||||
|
{ "hc_tc3_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc3_phy) },
|
||||||
|
{ "hc_tc3_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc3_phy) },
|
||||||
|
{ "hc_tc4_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc4_phy) },
|
||||||
|
{ "hc_tc4_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc4_phy) },
|
||||||
|
{ "hc_tc5_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc5_phy) },
|
||||||
|
{ "hc_tc5_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc5_phy) },
|
||||||
|
{ "hc_tc6_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc6_phy) },
|
||||||
|
{ "hc_tc6_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc6_phy) },
|
||||||
|
{ "hc_tc7_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc7_phy) },
|
||||||
|
{ "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) },
|
||||||
|
};
|
||||||
|
|
||||||
static int mana_get_sset_count(struct net_device *ndev, int stringset)
|
static int mana_get_sset_count(struct net_device *ndev, int stringset)
|
||||||
{
|
{
|
||||||
struct mana_port_context *apc = netdev_priv(ndev);
|
struct mana_port_context *apc = netdev_priv(ndev);
|
||||||
@ -83,8 +138,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
|
|||||||
if (stringset != ETH_SS_STATS)
|
if (stringset != ETH_SS_STATS)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
return ARRAY_SIZE(mana_eth_stats) + num_queues *
|
return ARRAY_SIZE(mana_eth_stats) + ARRAY_SIZE(mana_phy_stats) +
|
||||||
(MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT);
|
num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
|
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
|
||||||
@ -99,6 +154,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
|
|||||||
for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++)
|
for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++)
|
||||||
ethtool_puts(&data, mana_eth_stats[i].name);
|
ethtool_puts(&data, mana_eth_stats[i].name);
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE(mana_phy_stats); i++)
|
||||||
|
ethtool_puts(&data, mana_phy_stats[i].name);
|
||||||
|
|
||||||
for (i = 0; i < num_queues; i++) {
|
for (i = 0; i < num_queues; i++) {
|
||||||
ethtool_sprintf(&data, "rx_%d_packets", i);
|
ethtool_sprintf(&data, "rx_%d_packets", i);
|
||||||
ethtool_sprintf(&data, "rx_%d_bytes", i);
|
ethtool_sprintf(&data, "rx_%d_bytes", i);
|
||||||
@ -128,6 +186,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
|
|||||||
struct mana_port_context *apc = netdev_priv(ndev);
|
struct mana_port_context *apc = netdev_priv(ndev);
|
||||||
unsigned int num_queues = apc->num_queues;
|
unsigned int num_queues = apc->num_queues;
|
||||||
void *eth_stats = &apc->eth_stats;
|
void *eth_stats = &apc->eth_stats;
|
||||||
|
void *phy_stats = &apc->phy_stats;
|
||||||
struct mana_stats_rx *rx_stats;
|
struct mana_stats_rx *rx_stats;
|
||||||
struct mana_stats_tx *tx_stats;
|
struct mana_stats_tx *tx_stats;
|
||||||
unsigned int start;
|
unsigned int start;
|
||||||
@ -151,9 +210,18 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
|
|||||||
/* we call mana function to update stats from GDMA */
|
/* we call mana function to update stats from GDMA */
|
||||||
mana_query_gf_stats(apc);
|
mana_query_gf_stats(apc);
|
||||||
|
|
||||||
|
/* We call this mana function to get the phy stats from GDMA and includes
|
||||||
|
* aggregate tx/rx drop counters, Per-TC(Traffic Channel) tx/rx and pause
|
||||||
|
* counters.
|
||||||
|
*/
|
||||||
|
mana_query_phy_stats(apc);
|
||||||
|
|
||||||
for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++)
|
for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++)
|
||||||
data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
|
data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
|
||||||
|
|
||||||
|
for (q = 0; q < ARRAY_SIZE(mana_phy_stats); q++)
|
||||||
|
data[i++] = *(u64 *)(phy_stats + mana_phy_stats[q].offset);
|
||||||
|
|
||||||
for (q = 0; q < num_queues; q++) {
|
for (q = 0; q < num_queues; q++) {
|
||||||
rx_stats = &apc->rxqs[q]->stats;
|
rx_stats = &apc->rxqs[q]->stats;
|
||||||
|
|
||||||
|
|||||||
@ -1060,6 +1060,7 @@ struct net_device_context {
|
|||||||
struct net_device __rcu *vf_netdev;
|
struct net_device __rcu *vf_netdev;
|
||||||
struct netvsc_vf_pcpu_stats __percpu *vf_stats;
|
struct netvsc_vf_pcpu_stats __percpu *vf_stats;
|
||||||
struct delayed_work vf_takeover;
|
struct delayed_work vf_takeover;
|
||||||
|
struct delayed_work vfns_work;
|
||||||
|
|
||||||
/* 1: allocated, serial number is valid. 0: not allocated */
|
/* 1: allocated, serial number is valid. 0: not allocated */
|
||||||
u32 vf_alloc;
|
u32 vf_alloc;
|
||||||
@ -1074,6 +1075,8 @@ struct net_device_context {
|
|||||||
struct netvsc_device_info *saved_netvsc_dev_info;
|
struct netvsc_device_info *saved_netvsc_dev_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void netvsc_vfns_work(struct work_struct *w);
|
||||||
|
|
||||||
/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
|
/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
|
||||||
* packets. We can use ethtool to change UDP hash level when necessary.
|
* packets. We can use ethtool to change UDP hash level when necessary.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -2531,6 +2531,7 @@ static int netvsc_probe(struct hv_device *dev,
|
|||||||
spin_lock_init(&net_device_ctx->lock);
|
spin_lock_init(&net_device_ctx->lock);
|
||||||
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
|
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
|
||||||
INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
|
INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
|
||||||
|
INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work);
|
||||||
|
|
||||||
net_device_ctx->vf_stats
|
net_device_ctx->vf_stats
|
||||||
= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
|
= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
|
||||||
@ -2673,6 +2674,8 @@ static void netvsc_remove(struct hv_device *dev)
|
|||||||
cancel_delayed_work_sync(&ndev_ctx->dwork);
|
cancel_delayed_work_sync(&ndev_ctx->dwork);
|
||||||
|
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
|
cancel_delayed_work_sync(&ndev_ctx->vfns_work);
|
||||||
|
|
||||||
nvdev = rtnl_dereference(ndev_ctx->nvdev);
|
nvdev = rtnl_dereference(ndev_ctx->nvdev);
|
||||||
if (nvdev) {
|
if (nvdev) {
|
||||||
cancel_work_sync(&nvdev->subchan_work);
|
cancel_work_sync(&nvdev->subchan_work);
|
||||||
@ -2714,6 +2717,7 @@ static int netvsc_suspend(struct hv_device *dev)
|
|||||||
cancel_delayed_work_sync(&ndev_ctx->dwork);
|
cancel_delayed_work_sync(&ndev_ctx->dwork);
|
||||||
|
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
|
cancel_delayed_work_sync(&ndev_ctx->vfns_work);
|
||||||
|
|
||||||
nvdev = rtnl_dereference(ndev_ctx->nvdev);
|
nvdev = rtnl_dereference(ndev_ctx->nvdev);
|
||||||
if (nvdev == NULL) {
|
if (nvdev == NULL) {
|
||||||
@ -2807,6 +2811,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void netvsc_vfns_work(struct work_struct *w)
|
||||||
|
{
|
||||||
|
struct net_device_context *ndev_ctx =
|
||||||
|
container_of(w, struct net_device_context, vfns_work.work);
|
||||||
|
struct net_device *ndev;
|
||||||
|
|
||||||
|
if (!rtnl_trylock()) {
|
||||||
|
schedule_delayed_work(&ndev_ctx->vfns_work, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ndev = hv_get_drvdata(ndev_ctx->device_ctx);
|
||||||
|
if (!ndev)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
netvsc_event_set_vf_ns(ndev);
|
||||||
|
|
||||||
|
out:
|
||||||
|
rtnl_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On Hyper-V, every VF interface is matched with a corresponding
|
* On Hyper-V, every VF interface is matched with a corresponding
|
||||||
* synthetic interface. The synthetic interface is presented first
|
* synthetic interface. The synthetic interface is presented first
|
||||||
@ -2817,10 +2842,12 @@ static int netvsc_netdev_event(struct notifier_block *this,
|
|||||||
unsigned long event, void *ptr)
|
unsigned long event, void *ptr)
|
||||||
{
|
{
|
||||||
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
|
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
struct net_device_context *ndev_ctx;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) {
|
if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) {
|
||||||
netvsc_event_set_vf_ns(event_dev);
|
ndev_ctx = netdev_priv(event_dev);
|
||||||
|
schedule_delayed_work(&ndev_ctx->vfns_work, 0);
|
||||||
return NOTIFY_DONE;
|
return NOTIFY_DONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -137,12 +137,14 @@ void nvme_mpath_start_request(struct request *rq)
|
|||||||
struct nvme_ns *ns = rq->q->queuedata;
|
struct nvme_ns *ns = rq->q->queuedata;
|
||||||
struct gendisk *disk = ns->head->disk;
|
struct gendisk *disk = ns->head->disk;
|
||||||
|
|
||||||
if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) {
|
if ((READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) &&
|
||||||
|
!(nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)) {
|
||||||
atomic_inc(&ns->ctrl->nr_active);
|
atomic_inc(&ns->ctrl->nr_active);
|
||||||
nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
|
nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
|
if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq) ||
|
||||||
|
(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
|
nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
|
||||||
|
|||||||
@ -1245,7 +1245,7 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
|||||||
struct lpfc_nvmet_tgtport *tgtp;
|
struct lpfc_nvmet_tgtport *tgtp;
|
||||||
struct lpfc_async_xchg_ctx *ctxp =
|
struct lpfc_async_xchg_ctx *ctxp =
|
||||||
container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
|
container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
|
||||||
struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
|
struct rqb_dmabuf *nvmebuf;
|
||||||
struct lpfc_hba *phba = ctxp->phba;
|
struct lpfc_hba *phba = ctxp->phba;
|
||||||
unsigned long iflag;
|
unsigned long iflag;
|
||||||
|
|
||||||
@ -1253,13 +1253,18 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
|||||||
lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n",
|
lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n",
|
||||||
ctxp->oxid, ctxp->size, raw_smp_processor_id());
|
ctxp->oxid, ctxp->size, raw_smp_processor_id());
|
||||||
|
|
||||||
|
spin_lock_irqsave(&ctxp->ctxlock, iflag);
|
||||||
|
nvmebuf = ctxp->rqb_buffer;
|
||||||
if (!nvmebuf) {
|
if (!nvmebuf) {
|
||||||
|
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||||
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
|
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
|
||||||
"6425 Defer rcv: no buffer oxid x%x: "
|
"6425 Defer rcv: no buffer oxid x%x: "
|
||||||
"flg %x ste %x\n",
|
"flg %x ste %x\n",
|
||||||
ctxp->oxid, ctxp->flag, ctxp->state);
|
ctxp->oxid, ctxp->flag, ctxp->state);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
ctxp->rqb_buffer = NULL;
|
||||||
|
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||||
|
|
||||||
tgtp = phba->targetport->private;
|
tgtp = phba->targetport->private;
|
||||||
if (tgtp)
|
if (tgtp)
|
||||||
@ -1267,9 +1272,6 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
|||||||
|
|
||||||
/* Free the nvmebuf since a new buffer already replaced it */
|
/* Free the nvmebuf since a new buffer already replaced it */
|
||||||
nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
|
nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
|
||||||
spin_lock_irqsave(&ctxp->ctxlock, iflag);
|
|
||||||
ctxp->rqb_buffer = NULL;
|
|
||||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -90,6 +90,10 @@ static int efivarfs_d_compare(const struct dentry *dentry,
|
|||||||
{
|
{
|
||||||
int guid = len - EFI_VARIABLE_GUID_LEN;
|
int guid = len - EFI_VARIABLE_GUID_LEN;
|
||||||
|
|
||||||
|
/* Parallel lookups may produce a temporary invalid filename */
|
||||||
|
if (guid <= 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
if (name->len != len)
|
if (name->len != len)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
|||||||
@ -193,8 +193,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
|
|||||||
inode->i_wb_frn_history = 0;
|
inode->i_wb_frn_history = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (security_inode_alloc(inode))
|
|
||||||
goto out;
|
|
||||||
spin_lock_init(&inode->i_lock);
|
spin_lock_init(&inode->i_lock);
|
||||||
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
|
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
|
||||||
|
|
||||||
@ -231,11 +229,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
|
|||||||
inode->i_fsnotify_mask = 0;
|
inode->i_fsnotify_mask = 0;
|
||||||
#endif
|
#endif
|
||||||
inode->i_flctx = NULL;
|
inode->i_flctx = NULL;
|
||||||
|
|
||||||
|
if (unlikely(security_inode_alloc(inode)))
|
||||||
|
return -ENOMEM;
|
||||||
this_cpu_inc(nr_inodes);
|
this_cpu_inc(nr_inodes);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
out:
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(inode_init_always);
|
EXPORT_SYMBOL(inode_init_always);
|
||||||
|
|
||||||
|
|||||||
@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
|
|||||||
!list_empty(&of->list));
|
!list_empty(&of->list));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Get active reference to kernfs node for an open file */
|
||||||
|
static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
|
||||||
|
{
|
||||||
|
/* Skip if file was already released */
|
||||||
|
if (unlikely(of->released))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!kernfs_get_active(of->kn))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return of;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kernfs_put_active_of(struct kernfs_open_file *of)
|
||||||
|
{
|
||||||
|
return kernfs_put_active(of->kn);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
|
* kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
|
||||||
*
|
*
|
||||||
@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
|
|||||||
|
|
||||||
if (ops->seq_stop)
|
if (ops->seq_stop)
|
||||||
ops->seq_stop(sf, v);
|
ops->seq_stop(sf, v);
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
|
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
|
||||||
@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
|
|||||||
* the ops aren't called concurrently for the same open file.
|
* the ops aren't called concurrently for the same open file.
|
||||||
*/
|
*/
|
||||||
mutex_lock(&of->mutex);
|
mutex_lock(&of->mutex);
|
||||||
if (!kernfs_get_active(of->kn))
|
if (!kernfs_get_active_of(of))
|
||||||
return ERR_PTR(-ENODEV);
|
return ERR_PTR(-ENODEV);
|
||||||
|
|
||||||
ops = kernfs_ops(of->kn);
|
ops = kernfs_ops(of->kn);
|
||||||
@ -243,7 +261,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
* the ops aren't called concurrently for the same open file.
|
* the ops aren't called concurrently for the same open file.
|
||||||
*/
|
*/
|
||||||
mutex_lock(&of->mutex);
|
mutex_lock(&of->mutex);
|
||||||
if (!kernfs_get_active(of->kn)) {
|
if (!kernfs_get_active_of(of)) {
|
||||||
len = -ENODEV;
|
len = -ENODEV;
|
||||||
mutex_unlock(&of->mutex);
|
mutex_unlock(&of->mutex);
|
||||||
goto out_free;
|
goto out_free;
|
||||||
@ -257,7 +275,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
else
|
else
|
||||||
len = -EINVAL;
|
len = -EINVAL;
|
||||||
|
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
mutex_unlock(&of->mutex);
|
mutex_unlock(&of->mutex);
|
||||||
|
|
||||||
if (len < 0)
|
if (len < 0)
|
||||||
@ -328,7 +346,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
* the ops aren't called concurrently for the same open file.
|
* the ops aren't called concurrently for the same open file.
|
||||||
*/
|
*/
|
||||||
mutex_lock(&of->mutex);
|
mutex_lock(&of->mutex);
|
||||||
if (!kernfs_get_active(of->kn)) {
|
if (!kernfs_get_active_of(of)) {
|
||||||
mutex_unlock(&of->mutex);
|
mutex_unlock(&of->mutex);
|
||||||
len = -ENODEV;
|
len = -ENODEV;
|
||||||
goto out_free;
|
goto out_free;
|
||||||
@ -340,7 +358,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
else
|
else
|
||||||
len = -EINVAL;
|
len = -EINVAL;
|
||||||
|
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
mutex_unlock(&of->mutex);
|
mutex_unlock(&of->mutex);
|
||||||
|
|
||||||
if (len > 0)
|
if (len > 0)
|
||||||
@ -362,13 +380,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
|
|||||||
if (!of->vm_ops)
|
if (!of->vm_ops)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!kernfs_get_active(of->kn))
|
if (!kernfs_get_active_of(of))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (of->vm_ops->open)
|
if (of->vm_ops->open)
|
||||||
of->vm_ops->open(vma);
|
of->vm_ops->open(vma);
|
||||||
|
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
}
|
}
|
||||||
|
|
||||||
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
|
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
|
||||||
@ -380,14 +398,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
|
|||||||
if (!of->vm_ops)
|
if (!of->vm_ops)
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
if (!kernfs_get_active(of->kn))
|
if (!kernfs_get_active_of(of))
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
ret = VM_FAULT_SIGBUS;
|
ret = VM_FAULT_SIGBUS;
|
||||||
if (of->vm_ops->fault)
|
if (of->vm_ops->fault)
|
||||||
ret = of->vm_ops->fault(vmf);
|
ret = of->vm_ops->fault(vmf);
|
||||||
|
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -400,7 +418,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
|
|||||||
if (!of->vm_ops)
|
if (!of->vm_ops)
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
if (!kernfs_get_active(of->kn))
|
if (!kernfs_get_active_of(of))
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
@ -409,7 +427,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
|
|||||||
else
|
else
|
||||||
file_update_time(file);
|
file_update_time(file);
|
||||||
|
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -423,14 +441,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
|
|||||||
if (!of->vm_ops)
|
if (!of->vm_ops)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!kernfs_get_active(of->kn))
|
if (!kernfs_get_active_of(of))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
if (of->vm_ops->access)
|
if (of->vm_ops->access)
|
||||||
ret = of->vm_ops->access(vma, addr, buf, len, write);
|
ret = of->vm_ops->access(vma, addr, buf, len, write);
|
||||||
|
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -460,7 +478,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
|
|||||||
mutex_lock(&of->mutex);
|
mutex_lock(&of->mutex);
|
||||||
|
|
||||||
rc = -ENODEV;
|
rc = -ENODEV;
|
||||||
if (!kernfs_get_active(of->kn))
|
if (!kernfs_get_active_of(of))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
ops = kernfs_ops(of->kn);
|
ops = kernfs_ops(of->kn);
|
||||||
@ -493,7 +511,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
|
|||||||
of->vm_ops = vma->vm_ops;
|
of->vm_ops = vma->vm_ops;
|
||||||
vma->vm_ops = &kernfs_vm_ops;
|
vma->vm_ops = &kernfs_vm_ops;
|
||||||
out_put:
|
out_put:
|
||||||
kernfs_put_active(of->kn);
|
kernfs_put_active_of(of);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mutex_unlock(&of->mutex);
|
mutex_unlock(&of->mutex);
|
||||||
|
|
||||||
@ -847,7 +865,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
|
|||||||
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
|
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
|
||||||
__poll_t ret;
|
__poll_t ret;
|
||||||
|
|
||||||
if (!kernfs_get_active(kn))
|
if (!kernfs_get_active_of(of))
|
||||||
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
|
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
|
||||||
|
|
||||||
if (kn->attr.ops->poll)
|
if (kn->attr.ops->poll)
|
||||||
@ -855,7 +873,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
|
|||||||
else
|
else
|
||||||
ret = kernfs_generic_poll(of, wait);
|
ret = kernfs_generic_poll(of, wait);
|
||||||
|
|
||||||
kernfs_put_active(kn);
|
kernfs_put_active_of(of);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -2294,6 +2294,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
|
|||||||
return attach_recursive_mnt(mnt, p, mp, false);
|
return attach_recursive_mnt(mnt, p, mp, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int may_change_propagation(const struct mount *m)
|
||||||
|
{
|
||||||
|
struct mnt_namespace *ns = m->mnt_ns;
|
||||||
|
|
||||||
|
// it must be mounted in some namespace
|
||||||
|
if (IS_ERR_OR_NULL(ns)) // is_mounted()
|
||||||
|
return -EINVAL;
|
||||||
|
// and the caller must be admin in userns of that namespace
|
||||||
|
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sanity check the flags to change_mnt_propagation.
|
* Sanity check the flags to change_mnt_propagation.
|
||||||
*/
|
*/
|
||||||
@ -2330,6 +2343,10 @@ static int do_change_type(struct path *path, int ms_flags)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
namespace_lock();
|
namespace_lock();
|
||||||
|
err = may_change_propagation(mnt);
|
||||||
|
if (err)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
if (type == MS_SHARED) {
|
if (type == MS_SHARED) {
|
||||||
err = invent_group_ids(mnt, recurse);
|
err = invent_group_ids(mnt, recurse);
|
||||||
if (err)
|
if (err)
|
||||||
|
|||||||
@ -1840,9 +1840,7 @@ static void block_revalidate(struct dentry *dentry)
|
|||||||
|
|
||||||
static void unblock_revalidate(struct dentry *dentry)
|
static void unblock_revalidate(struct dentry *dentry)
|
||||||
{
|
{
|
||||||
/* store_release ensures wait_var_event() sees the update */
|
store_release_wake_up(&dentry->d_fsdata, NULL);
|
||||||
smp_store_release(&dentry->d_fsdata, NULL);
|
|
||||||
wake_up_var(&dentry->d_fsdata);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@ -7815,10 +7815,10 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
|
|||||||
return err;
|
return err;
|
||||||
do {
|
do {
|
||||||
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
|
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
|
||||||
if (err != -NFS4ERR_DELAY)
|
if (err != -NFS4ERR_DELAY && err != -NFS4ERR_GRACE)
|
||||||
break;
|
break;
|
||||||
ssleep(1);
|
ssleep(1);
|
||||||
} while (err == -NFS4ERR_DELAY);
|
} while (err == -NFS4ERR_DELAY || err == -NFSERR_GRACE);
|
||||||
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
|
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req)
|
|||||||
nfs_page_clear_headlock(req);
|
nfs_page_clear_headlock(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* nfs_page_group_sync_on_bit_locked
|
* nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set
|
||||||
|
* @req: request in page group
|
||||||
|
* @bit: PG_* bit that is used to sync page group
|
||||||
*
|
*
|
||||||
* must be called with page group lock held
|
* must be called with page group lock held
|
||||||
*/
|
*/
|
||||||
static bool
|
bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
|
||||||
nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
|
|
||||||
{
|
{
|
||||||
struct nfs_page *head = req->wb_head;
|
struct nfs_page *head = req->wb_head;
|
||||||
struct nfs_page *tmp;
|
struct nfs_page *tmp;
|
||||||
|
|||||||
@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
|
||||||
nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
|
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!test_bit(PG_REMOVE, &req->wb_flags))
|
|
||||||
return 0;
|
|
||||||
ret = nfs_page_group_lock(req);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
|
if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
|
||||||
nfs_page_set_inode_ref(req, inode);
|
nfs_page_set_inode_ref(req, inode);
|
||||||
nfs_page_group_unlock(req);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -584,19 +574,18 @@ retry:
|
|||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = nfs_page_group_lock(head);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
/* Ensure that nobody removed the request before we locked it */
|
/* Ensure that nobody removed the request before we locked it */
|
||||||
if (head != folio->private) {
|
if (head != folio->private) {
|
||||||
|
nfs_page_group_unlock(head);
|
||||||
nfs_unlock_and_release_request(head);
|
nfs_unlock_and_release_request(head);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = nfs_cancel_remove_inode(head, inode);
|
nfs_cancel_remove_inode(head, inode);
|
||||||
if (ret < 0)
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
ret = nfs_page_group_lock(head);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
/* lock each request in the page group */
|
/* lock each request in the page group */
|
||||||
for (subreq = head->wb_this_page;
|
for (subreq = head->wb_this_page;
|
||||||
@ -801,7 +790,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
|
|||||||
{
|
{
|
||||||
struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
|
struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
|
||||||
|
|
||||||
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
|
nfs_page_group_lock(req);
|
||||||
|
if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) {
|
||||||
struct folio *folio = nfs_page_to_folio(req->wb_head);
|
struct folio *folio = nfs_page_to_folio(req->wb_head);
|
||||||
struct address_space *mapping = folio->mapping;
|
struct address_space *mapping = folio->mapping;
|
||||||
|
|
||||||
@ -813,6 +803,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
|
|||||||
}
|
}
|
||||||
spin_unlock(&mapping->private_lock);
|
spin_unlock(&mapping->private_lock);
|
||||||
}
|
}
|
||||||
|
nfs_page_group_unlock(req);
|
||||||
|
|
||||||
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
|
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
|
||||||
atomic_long_dec(&nfsi->nrequests);
|
atomic_long_dec(&nfsi->nrequests);
|
||||||
|
|||||||
@ -48,6 +48,21 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
|
|||||||
switch (nfserr) {
|
switch (nfserr) {
|
||||||
case nfs_ok:
|
case nfs_ok:
|
||||||
return 0;
|
return 0;
|
||||||
|
case nfserr_jukebox:
|
||||||
|
/* this error can indicate a presence of a conflicting
|
||||||
|
* delegation to an NLM lock request. Options are:
|
||||||
|
* (1) For now, drop this request and make the client
|
||||||
|
* retry. When delegation is returned, client's lock retry
|
||||||
|
* will complete.
|
||||||
|
* (2) NLM4_DENIED as per "spec" signals to the client
|
||||||
|
* that the lock is unavailable now but client can retry.
|
||||||
|
* Linux client implementation does not. It treats
|
||||||
|
* NLM4_DENIED same as NLM4_FAILED and errors the request.
|
||||||
|
* (3) For the future, treat this as blocked lock and try
|
||||||
|
* to callback when the delegation is returned but might
|
||||||
|
* not have a proper lock request to block on.
|
||||||
|
*/
|
||||||
|
fallthrough;
|
||||||
case nfserr_dropit:
|
case nfserr_dropit:
|
||||||
return nlm_drop_reply;
|
return nlm_drop_reply;
|
||||||
case nfserr_stale:
|
case nfserr_stale:
|
||||||
|
|||||||
@ -95,10 +95,10 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
|
|||||||
fattr->ncf_cb_fsize = 0;
|
fattr->ncf_cb_fsize = 0;
|
||||||
if (bitmap[0] & FATTR4_WORD0_CHANGE)
|
if (bitmap[0] & FATTR4_WORD0_CHANGE)
|
||||||
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
|
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
|
||||||
return -NFSERR_BAD_XDR;
|
return -EIO;
|
||||||
if (bitmap[0] & FATTR4_WORD0_SIZE)
|
if (bitmap[0] & FATTR4_WORD0_SIZE)
|
||||||
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
|
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
|
||||||
return -NFSERR_BAD_XDR;
|
return -EIO;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -605,14 +605,14 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
|
|||||||
return status;
|
return status;
|
||||||
|
|
||||||
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
|
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
|
||||||
if (status)
|
if (unlikely(status || cb->cb_status))
|
||||||
return status;
|
return status;
|
||||||
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
|
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
|
||||||
return -NFSERR_BAD_XDR;
|
return -EIO;
|
||||||
if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
|
if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
|
||||||
return -NFSERR_BAD_XDR;
|
return -EIO;
|
||||||
if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
|
if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
|
||||||
return -NFSERR_BAD_XDR;
|
return -EIO;
|
||||||
status = decode_cb_fattr4(xdr, bitmap, ncf);
|
status = decode_cb_fattr4(xdr, bitmap, ncf);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -514,7 +514,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
|
|||||||
sig ^= PERSISTENT_RAM_SIG;
|
sig ^= PERSISTENT_RAM_SIG;
|
||||||
|
|
||||||
if (prz->buffer->sig == sig) {
|
if (prz->buffer->sig == sig) {
|
||||||
if (buffer_size(prz) == 0) {
|
if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
|
||||||
pr_debug("found existing empty buffer\n");
|
pr_debug("found existing empty buffer\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -87,7 +87,7 @@
|
|||||||
#define SMB_INTERFACE_POLL_INTERVAL 600
|
#define SMB_INTERFACE_POLL_INTERVAL 600
|
||||||
|
|
||||||
/* maximum number of PDUs in one compound */
|
/* maximum number of PDUs in one compound */
|
||||||
#define MAX_COMPOUND 7
|
#define MAX_COMPOUND 10
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default number of credits to keep available for SMB3.
|
* Default number of credits to keep available for SMB3.
|
||||||
@ -1938,9 +1938,12 @@ static inline bool is_replayable_error(int error)
|
|||||||
|
|
||||||
|
|
||||||
/* cifs_get_writable_file() flags */
|
/* cifs_get_writable_file() flags */
|
||||||
#define FIND_WR_ANY 0
|
enum cifs_writable_file_flags {
|
||||||
#define FIND_WR_FSUID_ONLY 1
|
FIND_WR_ANY = 0U,
|
||||||
#define FIND_WR_WITH_DELETE 2
|
FIND_WR_FSUID_ONLY = (1U << 0),
|
||||||
|
FIND_WR_WITH_DELETE = (1U << 1),
|
||||||
|
FIND_WR_NO_PENDING_DELETE = (1U << 2),
|
||||||
|
};
|
||||||
|
|
||||||
#define MID_FREE 0
|
#define MID_FREE 0
|
||||||
#define MID_REQUEST_ALLOCATED 1
|
#define MID_REQUEST_ALLOCATED 1
|
||||||
@ -2374,6 +2377,8 @@ struct smb2_compound_vars {
|
|||||||
struct kvec qi_iov;
|
struct kvec qi_iov;
|
||||||
struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
|
struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
|
||||||
struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
|
struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
|
||||||
|
struct kvec unlink_iov[SMB2_SET_INFO_IOV_SIZE];
|
||||||
|
struct kvec rename_iov[SMB2_SET_INFO_IOV_SIZE];
|
||||||
struct kvec close_iov;
|
struct kvec close_iov;
|
||||||
struct smb2_file_rename_info_hdr rename_info;
|
struct smb2_file_rename_info_hdr rename_info;
|
||||||
struct smb2_file_link_info_hdr link_info;
|
struct smb2_file_link_info_hdr link_info;
|
||||||
|
|||||||
@ -297,8 +297,8 @@ extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
|
|||||||
|
|
||||||
extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
|
extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
|
||||||
|
|
||||||
extern void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon,
|
void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon,
|
||||||
const char *path);
|
struct dentry *dentry);
|
||||||
|
|
||||||
extern void cifs_mark_open_handles_for_deleted_file(struct inode *inode,
|
extern void cifs_mark_open_handles_for_deleted_file(struct inode *inode,
|
||||||
const char *path);
|
const char *path);
|
||||||
|
|||||||
@ -681,7 +681,10 @@ int cifs_open(struct inode *inode, struct file *file)
|
|||||||
|
|
||||||
/* Get the cached handle as SMB2 close is deferred */
|
/* Get the cached handle as SMB2 close is deferred */
|
||||||
if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) {
|
if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) {
|
||||||
rc = cifs_get_writable_path(tcon, full_path, FIND_WR_FSUID_ONLY, &cfile);
|
rc = cifs_get_writable_path(tcon, full_path,
|
||||||
|
FIND_WR_FSUID_ONLY |
|
||||||
|
FIND_WR_NO_PENDING_DELETE,
|
||||||
|
&cfile);
|
||||||
} else {
|
} else {
|
||||||
rc = cifs_get_readable_path(tcon, full_path, &cfile);
|
rc = cifs_get_readable_path(tcon, full_path, &cfile);
|
||||||
}
|
}
|
||||||
@ -2286,6 +2289,9 @@ refind_writable:
|
|||||||
continue;
|
continue;
|
||||||
if (with_delete && !(open_file->fid.access & DELETE))
|
if (with_delete && !(open_file->fid.access & DELETE))
|
||||||
continue;
|
continue;
|
||||||
|
if ((flags & FIND_WR_NO_PENDING_DELETE) &&
|
||||||
|
open_file->status_file_deleted)
|
||||||
|
continue;
|
||||||
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
|
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
|
||||||
if (!open_file->invalidHandle) {
|
if (!open_file->invalidHandle) {
|
||||||
/* found a good writable file */
|
/* found a good writable file */
|
||||||
@ -2403,6 +2409,16 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
|
|||||||
spin_unlock(&tcon->open_file_lock);
|
spin_unlock(&tcon->open_file_lock);
|
||||||
free_dentry_path(page);
|
free_dentry_path(page);
|
||||||
*ret_file = find_readable_file(cinode, 0);
|
*ret_file = find_readable_file(cinode, 0);
|
||||||
|
if (*ret_file) {
|
||||||
|
spin_lock(&cinode->open_file_lock);
|
||||||
|
if ((*ret_file)->status_file_deleted) {
|
||||||
|
spin_unlock(&cinode->open_file_lock);
|
||||||
|
cifsFileInfo_put(*ret_file);
|
||||||
|
*ret_file = NULL;
|
||||||
|
} else {
|
||||||
|
spin_unlock(&cinode->open_file_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
return *ret_file ? 0 : -ENOENT;
|
return *ret_file ? 0 : -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1912,7 +1912,7 @@ cifs_drop_nlink(struct inode *inode)
|
|||||||
* but will return the EACCES to the caller. Note that the VFS does not call
|
* but will return the EACCES to the caller. Note that the VFS does not call
|
||||||
* unlink on negative dentries currently.
|
* unlink on negative dentries currently.
|
||||||
*/
|
*/
|
||||||
int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
static int __cifs_unlink(struct inode *dir, struct dentry *dentry, bool sillyrename)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
unsigned int xid;
|
unsigned int xid;
|
||||||
@ -1964,7 +1964,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
|||||||
goto unlink_out;
|
goto unlink_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
cifs_close_deferred_file_under_dentry(tcon, full_path);
|
cifs_close_deferred_file_under_dentry(tcon, dentry);
|
||||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||||
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
|
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
|
||||||
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
|
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
|
||||||
@ -1983,7 +1983,24 @@ retry_std_delete:
|
|||||||
goto psx_del_no_retry;
|
goto psx_del_no_retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry);
|
/* For SMB2+, if the file is open, we always perform a silly rename.
|
||||||
|
*
|
||||||
|
* We check for d_count() right after calling
|
||||||
|
* cifs_close_deferred_file_under_dentry() to make sure that the
|
||||||
|
* dentry's refcount gets dropped in case the file had any deferred
|
||||||
|
* close.
|
||||||
|
*/
|
||||||
|
if (!sillyrename && server->vals->protocol_id > SMB10_PROT_ID) {
|
||||||
|
spin_lock(&dentry->d_lock);
|
||||||
|
if (d_count(dentry) > 1)
|
||||||
|
sillyrename = true;
|
||||||
|
spin_unlock(&dentry->d_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sillyrename)
|
||||||
|
rc = -EBUSY;
|
||||||
|
else
|
||||||
|
rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry);
|
||||||
|
|
||||||
psx_del_no_retry:
|
psx_del_no_retry:
|
||||||
if (!rc) {
|
if (!rc) {
|
||||||
@ -2051,6 +2068,11 @@ unlink_out:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
||||||
|
{
|
||||||
|
return __cifs_unlink(dir, dentry, false);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
|
cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
|
||||||
const char *full_path, struct cifs_sb_info *cifs_sb,
|
const char *full_path, struct cifs_sb_info *cifs_sb,
|
||||||
@ -2338,14 +2360,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
|
|||||||
rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb);
|
rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb);
|
||||||
cifs_put_tlink(tlink);
|
cifs_put_tlink(tlink);
|
||||||
|
|
||||||
|
cifsInode = CIFS_I(d_inode(direntry));
|
||||||
|
|
||||||
if (!rc) {
|
if (!rc) {
|
||||||
|
set_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags);
|
||||||
spin_lock(&d_inode(direntry)->i_lock);
|
spin_lock(&d_inode(direntry)->i_lock);
|
||||||
i_size_write(d_inode(direntry), 0);
|
i_size_write(d_inode(direntry), 0);
|
||||||
clear_nlink(d_inode(direntry));
|
clear_nlink(d_inode(direntry));
|
||||||
spin_unlock(&d_inode(direntry)->i_lock);
|
spin_unlock(&d_inode(direntry)->i_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
cifsInode = CIFS_I(d_inode(direntry));
|
|
||||||
/* force revalidate to go get info when needed */
|
/* force revalidate to go get info when needed */
|
||||||
cifsInode->time = 0;
|
cifsInode->time = 0;
|
||||||
|
|
||||||
@ -2438,8 +2462,11 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
|
|||||||
}
|
}
|
||||||
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
||||||
do_rename_exit:
|
do_rename_exit:
|
||||||
if (rc == 0)
|
if (rc == 0) {
|
||||||
d_move(from_dentry, to_dentry);
|
d_move(from_dentry, to_dentry);
|
||||||
|
/* Force a new lookup */
|
||||||
|
d_drop(from_dentry);
|
||||||
|
}
|
||||||
cifs_put_tlink(tlink);
|
cifs_put_tlink(tlink);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -2450,6 +2477,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
|||||||
struct dentry *target_dentry, unsigned int flags)
|
struct dentry *target_dentry, unsigned int flags)
|
||||||
{
|
{
|
||||||
const char *from_name, *to_name;
|
const char *from_name, *to_name;
|
||||||
|
struct TCP_Server_Info *server;
|
||||||
void *page1, *page2;
|
void *page1, *page2;
|
||||||
struct cifs_sb_info *cifs_sb;
|
struct cifs_sb_info *cifs_sb;
|
||||||
struct tcon_link *tlink;
|
struct tcon_link *tlink;
|
||||||
@ -2485,6 +2513,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
|||||||
if (IS_ERR(tlink))
|
if (IS_ERR(tlink))
|
||||||
return PTR_ERR(tlink);
|
return PTR_ERR(tlink);
|
||||||
tcon = tlink_tcon(tlink);
|
tcon = tlink_tcon(tlink);
|
||||||
|
server = tcon->ses->server;
|
||||||
|
|
||||||
page1 = alloc_dentry_path();
|
page1 = alloc_dentry_path();
|
||||||
page2 = alloc_dentry_path();
|
page2 = alloc_dentry_path();
|
||||||
@ -2502,9 +2531,9 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
|||||||
goto cifs_rename_exit;
|
goto cifs_rename_exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
cifs_close_deferred_file_under_dentry(tcon, from_name);
|
cifs_close_deferred_file_under_dentry(tcon, source_dentry);
|
||||||
if (d_inode(target_dentry) != NULL)
|
if (d_inode(target_dentry) != NULL)
|
||||||
cifs_close_deferred_file_under_dentry(tcon, to_name);
|
cifs_close_deferred_file_under_dentry(tcon, target_dentry);
|
||||||
|
|
||||||
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
|
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
|
||||||
to_name);
|
to_name);
|
||||||
@ -2569,19 +2598,52 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
|||||||
|
|
||||||
unlink_target:
|
unlink_target:
|
||||||
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
||||||
|
if (d_really_is_positive(target_dentry)) {
|
||||||
/* Try unlinking the target dentry if it's not negative */
|
if (!rc) {
|
||||||
if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
|
struct inode *inode = d_inode(target_dentry);
|
||||||
if (d_is_dir(target_dentry))
|
/*
|
||||||
tmprc = cifs_rmdir(target_dir, target_dentry);
|
* Samba and ksmbd servers allow renaming a target
|
||||||
else
|
* directory that is open, so make sure to update
|
||||||
tmprc = cifs_unlink(target_dir, target_dentry);
|
* ->i_nlink and then mark it as delete pending.
|
||||||
if (tmprc)
|
*/
|
||||||
goto cifs_rename_exit;
|
if (S_ISDIR(inode->i_mode)) {
|
||||||
rc = cifs_do_rename(xid, source_dentry, from_name,
|
drop_cached_dir_by_name(xid, tcon, to_name, cifs_sb);
|
||||||
target_dentry, to_name);
|
spin_lock(&inode->i_lock);
|
||||||
if (!rc)
|
i_size_write(inode, 0);
|
||||||
rehash = false;
|
clear_nlink(inode);
|
||||||
|
spin_unlock(&inode->i_lock);
|
||||||
|
set_bit(CIFS_INO_DELETE_PENDING, &CIFS_I(inode)->flags);
|
||||||
|
CIFS_I(inode)->time = 0; /* force reval */
|
||||||
|
inode->i_mtime = inode_set_ctime_current(inode);
|
||||||
|
}
|
||||||
|
} else if (rc == -EACCES || rc == -EEXIST) {
|
||||||
|
/*
|
||||||
|
* Rename failed, possibly due to a busy target.
|
||||||
|
* Retry it by unliking the target first.
|
||||||
|
*/
|
||||||
|
if (d_is_dir(target_dentry)) {
|
||||||
|
tmprc = cifs_rmdir(target_dir, target_dentry);
|
||||||
|
} else {
|
||||||
|
tmprc = __cifs_unlink(target_dir, target_dentry,
|
||||||
|
server->vals->protocol_id > SMB10_PROT_ID);
|
||||||
|
}
|
||||||
|
if (tmprc) {
|
||||||
|
/*
|
||||||
|
* Some servers will return STATUS_ACCESS_DENIED
|
||||||
|
* or STATUS_DIRECTORY_NOT_EMPTY when failing to
|
||||||
|
* rename a non-empty directory. Make sure to
|
||||||
|
* propagate the appropriate error back to
|
||||||
|
* userspace.
|
||||||
|
*/
|
||||||
|
if (tmprc == -EEXIST || tmprc == -ENOTEMPTY)
|
||||||
|
rc = tmprc;
|
||||||
|
goto cifs_rename_exit;
|
||||||
|
}
|
||||||
|
rc = cifs_do_rename(xid, source_dentry, from_name,
|
||||||
|
target_dentry, to_name);
|
||||||
|
if (!rc)
|
||||||
|
rehash = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* force revalidate to go get info when needed */
|
/* force revalidate to go get info when needed */
|
||||||
@ -2610,6 +2672,8 @@ cifs_dentry_needs_reval(struct dentry *dentry)
|
|||||||
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
|
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
|
||||||
struct cached_fid *cfid = NULL;
|
struct cached_fid *cfid = NULL;
|
||||||
|
|
||||||
|
if (test_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags))
|
||||||
|
return false;
|
||||||
if (cifs_i->time == 0)
|
if (cifs_i->time == 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|||||||
@ -832,33 +832,28 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
|
|||||||
kfree(tmp_list);
|
kfree(tmp_list);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void
|
|
||||||
cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
|
void cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon,
|
||||||
|
struct dentry *dentry)
|
||||||
{
|
{
|
||||||
struct cifsFileInfo *cfile;
|
|
||||||
struct file_list *tmp_list, *tmp_next_list;
|
struct file_list *tmp_list, *tmp_next_list;
|
||||||
void *page;
|
struct cifsFileInfo *cfile;
|
||||||
const char *full_path;
|
|
||||||
LIST_HEAD(file_head);
|
LIST_HEAD(file_head);
|
||||||
|
|
||||||
page = alloc_dentry_path();
|
|
||||||
spin_lock(&tcon->open_file_lock);
|
spin_lock(&tcon->open_file_lock);
|
||||||
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
|
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
|
||||||
full_path = build_path_from_dentry(cfile->dentry, page);
|
if ((cfile->dentry == dentry) &&
|
||||||
if (strstr(full_path, path)) {
|
delayed_work_pending(&cfile->deferred) &&
|
||||||
if (delayed_work_pending(&cfile->deferred)) {
|
cancel_delayed_work(&cfile->deferred)) {
|
||||||
if (cancel_delayed_work(&cfile->deferred)) {
|
spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
|
||||||
spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
|
cifs_del_deferred_close(cfile);
|
||||||
cifs_del_deferred_close(cfile);
|
spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
|
||||||
spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
|
|
||||||
|
|
||||||
tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
|
tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
|
||||||
if (tmp_list == NULL)
|
if (tmp_list == NULL)
|
||||||
break;
|
break;
|
||||||
tmp_list->cfile = cfile;
|
tmp_list->cfile = cfile;
|
||||||
list_add_tail(&tmp_list->list, &file_head);
|
list_add_tail(&tmp_list->list, &file_head);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock(&tcon->open_file_lock);
|
spin_unlock(&tcon->open_file_lock);
|
||||||
@ -868,7 +863,6 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
|
|||||||
list_del(&tmp_list->list);
|
list_del(&tmp_list->list);
|
||||||
kfree(tmp_list);
|
kfree(tmp_list);
|
||||||
}
|
}
|
||||||
free_dentry_path(page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@ -30,10 +30,9 @@ enum smb2_compound_ops {
|
|||||||
SMB2_OP_QUERY_DIR,
|
SMB2_OP_QUERY_DIR,
|
||||||
SMB2_OP_MKDIR,
|
SMB2_OP_MKDIR,
|
||||||
SMB2_OP_RENAME,
|
SMB2_OP_RENAME,
|
||||||
SMB2_OP_DELETE,
|
|
||||||
SMB2_OP_HARDLINK,
|
SMB2_OP_HARDLINK,
|
||||||
SMB2_OP_SET_EOF,
|
SMB2_OP_SET_EOF,
|
||||||
SMB2_OP_RMDIR,
|
SMB2_OP_UNLINK,
|
||||||
SMB2_OP_POSIX_QUERY_INFO,
|
SMB2_OP_POSIX_QUERY_INFO,
|
||||||
SMB2_OP_SET_REPARSE,
|
SMB2_OP_SET_REPARSE,
|
||||||
SMB2_OP_GET_REPARSE,
|
SMB2_OP_GET_REPARSE,
|
||||||
|
|||||||
@ -207,8 +207,10 @@ replay_again:
|
|||||||
server = cifs_pick_channel(ses);
|
server = cifs_pick_channel(ses);
|
||||||
|
|
||||||
vars = kzalloc(sizeof(*vars), GFP_ATOMIC);
|
vars = kzalloc(sizeof(*vars), GFP_ATOMIC);
|
||||||
if (vars == NULL)
|
if (vars == NULL) {
|
||||||
return -ENOMEM;
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
rqst = &vars->rqst[0];
|
rqst = &vars->rqst[0];
|
||||||
rsp_iov = &vars->rsp_iov[0];
|
rsp_iov = &vars->rsp_iov[0];
|
||||||
|
|
||||||
@ -344,9 +346,6 @@ replay_again:
|
|||||||
trace_smb3_posix_query_info_compound_enter(xid, tcon->tid,
|
trace_smb3_posix_query_info_compound_enter(xid, tcon->tid,
|
||||||
ses->Suid, full_path);
|
ses->Suid, full_path);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_DELETE:
|
|
||||||
trace_smb3_delete_enter(xid, tcon->tid, ses->Suid, full_path);
|
|
||||||
break;
|
|
||||||
case SMB2_OP_MKDIR:
|
case SMB2_OP_MKDIR:
|
||||||
/*
|
/*
|
||||||
* Directories are created through parameters in the
|
* Directories are created through parameters in the
|
||||||
@ -354,23 +353,40 @@ replay_again:
|
|||||||
*/
|
*/
|
||||||
trace_smb3_mkdir_enter(xid, tcon->tid, ses->Suid, full_path);
|
trace_smb3_mkdir_enter(xid, tcon->tid, ses->Suid, full_path);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_RMDIR:
|
case SMB2_OP_UNLINK:
|
||||||
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
rqst[num_rqst].rq_iov = vars->unlink_iov;
|
||||||
rqst[num_rqst].rq_nvec = 1;
|
rqst[num_rqst].rq_nvec = 1;
|
||||||
|
|
||||||
size[0] = 1; /* sizeof __u8 See MS-FSCC section 2.4.11 */
|
size[0] = 1; /* sizeof __u8 See MS-FSCC section 2.4.11 */
|
||||||
data[0] = &delete_pending[0];
|
data[0] = &delete_pending[0];
|
||||||
|
|
||||||
rc = SMB2_set_info_init(tcon, server,
|
if (cfile) {
|
||||||
&rqst[num_rqst], COMPOUND_FID,
|
rc = SMB2_set_info_init(tcon, server,
|
||||||
COMPOUND_FID, current->tgid,
|
&rqst[num_rqst],
|
||||||
FILE_DISPOSITION_INFORMATION,
|
cfile->fid.persistent_fid,
|
||||||
SMB2_O_INFO_FILE, 0, data, size);
|
cfile->fid.volatile_fid,
|
||||||
if (rc)
|
current->tgid,
|
||||||
|
FILE_DISPOSITION_INFORMATION,
|
||||||
|
SMB2_O_INFO_FILE, 0,
|
||||||
|
data, size);
|
||||||
|
} else {
|
||||||
|
rc = SMB2_set_info_init(tcon, server,
|
||||||
|
&rqst[num_rqst],
|
||||||
|
COMPOUND_FID,
|
||||||
|
COMPOUND_FID,
|
||||||
|
current->tgid,
|
||||||
|
FILE_DISPOSITION_INFORMATION,
|
||||||
|
SMB2_O_INFO_FILE, 0,
|
||||||
|
data, size);
|
||||||
|
}
|
||||||
|
if (!rc && (!cfile || num_rqst > 1)) {
|
||||||
|
smb2_set_next_command(tcon, &rqst[num_rqst]);
|
||||||
|
smb2_set_related(&rqst[num_rqst]);
|
||||||
|
} else if (rc) {
|
||||||
goto finished;
|
goto finished;
|
||||||
smb2_set_next_command(tcon, &rqst[num_rqst]);
|
}
|
||||||
smb2_set_related(&rqst[num_rqst++]);
|
num_rqst++;
|
||||||
trace_smb3_rmdir_enter(xid, tcon->tid, ses->Suid, full_path);
|
trace_smb3_unlink_enter(xid, tcon->tid, ses->Suid, full_path);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_SET_EOF:
|
case SMB2_OP_SET_EOF:
|
||||||
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
||||||
@ -440,7 +456,7 @@ replay_again:
|
|||||||
ses->Suid, full_path);
|
ses->Suid, full_path);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_RENAME:
|
case SMB2_OP_RENAME:
|
||||||
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
rqst[num_rqst].rq_iov = vars->rename_iov;
|
||||||
rqst[num_rqst].rq_nvec = 2;
|
rqst[num_rqst].rq_nvec = 2;
|
||||||
|
|
||||||
len = in_iov[i].iov_len;
|
len = in_iov[i].iov_len;
|
||||||
@ -671,7 +687,7 @@ finished:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_cmds; i++) {
|
for (i = 0; i < num_cmds; i++) {
|
||||||
char *buf = rsp_iov[i + i].iov_base;
|
char *buf = rsp_iov[i + 1].iov_base;
|
||||||
|
|
||||||
if (buf && resp_buftype[i + 1] != CIFS_NO_BUFFER)
|
if (buf && resp_buftype[i + 1] != CIFS_NO_BUFFER)
|
||||||
rc = server->ops->map_error(buf, false);
|
rc = server->ops->map_error(buf, false);
|
||||||
@ -730,19 +746,6 @@ finished:
|
|||||||
trace_smb3_posix_query_info_compound_done(xid, tcon->tid,
|
trace_smb3_posix_query_info_compound_done(xid, tcon->tid,
|
||||||
ses->Suid);
|
ses->Suid);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_DELETE:
|
|
||||||
if (rc)
|
|
||||||
trace_smb3_delete_err(xid, tcon->tid, ses->Suid, rc);
|
|
||||||
else {
|
|
||||||
/*
|
|
||||||
* If dentry (hence, inode) is NULL, lease break is going to
|
|
||||||
* take care of degrading leases on handles for deleted files.
|
|
||||||
*/
|
|
||||||
if (inode)
|
|
||||||
cifs_mark_open_handles_for_deleted_file(inode, full_path);
|
|
||||||
trace_smb3_delete_done(xid, tcon->tid, ses->Suid);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case SMB2_OP_MKDIR:
|
case SMB2_OP_MKDIR:
|
||||||
if (rc)
|
if (rc)
|
||||||
trace_smb3_mkdir_err(xid, tcon->tid, ses->Suid, rc);
|
trace_smb3_mkdir_err(xid, tcon->tid, ses->Suid, rc);
|
||||||
@ -763,11 +766,11 @@ finished:
|
|||||||
trace_smb3_rename_done(xid, tcon->tid, ses->Suid);
|
trace_smb3_rename_done(xid, tcon->tid, ses->Suid);
|
||||||
SMB2_set_info_free(&rqst[num_rqst++]);
|
SMB2_set_info_free(&rqst[num_rqst++]);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_RMDIR:
|
case SMB2_OP_UNLINK:
|
||||||
if (rc)
|
if (!rc)
|
||||||
trace_smb3_rmdir_err(xid, tcon->tid, ses->Suid, rc);
|
trace_smb3_unlink_done(xid, tcon->tid, ses->Suid);
|
||||||
else
|
else
|
||||||
trace_smb3_rmdir_done(xid, tcon->tid, ses->Suid);
|
trace_smb3_unlink_err(xid, tcon->tid, ses->Suid, rc);
|
||||||
SMB2_set_info_free(&rqst[num_rqst++]);
|
SMB2_set_info_free(&rqst[num_rqst++]);
|
||||||
break;
|
break;
|
||||||
case SMB2_OP_SET_EOF:
|
case SMB2_OP_SET_EOF:
|
||||||
@ -864,6 +867,7 @@ finished:
|
|||||||
smb2_should_replay(tcon, &retries, &cur_sleep))
|
smb2_should_replay(tcon, &retries, &cur_sleep))
|
||||||
goto replay_again;
|
goto replay_again;
|
||||||
|
|
||||||
|
out:
|
||||||
if (cfile)
|
if (cfile)
|
||||||
cifsFileInfo_put(cfile);
|
cifsFileInfo_put(cfile);
|
||||||
|
|
||||||
@ -1163,7 +1167,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
|
|||||||
FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE);
|
FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE);
|
||||||
return smb2_compound_op(xid, tcon, cifs_sb,
|
return smb2_compound_op(xid, tcon, cifs_sb,
|
||||||
name, &oparms, NULL,
|
name, &oparms, NULL,
|
||||||
&(int){SMB2_OP_RMDIR}, 1,
|
&(int){SMB2_OP_UNLINK}, 1,
|
||||||
NULL, NULL, NULL, NULL);
|
NULL, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1171,21 +1175,107 @@ int
|
|||||||
smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
|
smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
|
||||||
struct cifs_sb_info *cifs_sb, struct dentry *dentry)
|
struct cifs_sb_info *cifs_sb, struct dentry *dentry)
|
||||||
{
|
{
|
||||||
|
struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
|
||||||
|
__le16 *utf16_path __free(kfree) = NULL;
|
||||||
|
int retries = 0, cur_sleep = 1;
|
||||||
|
struct TCP_Server_Info *server;
|
||||||
struct cifs_open_parms oparms;
|
struct cifs_open_parms oparms;
|
||||||
|
struct smb2_create_req *creq;
|
||||||
|
struct inode *inode = NULL;
|
||||||
|
struct smb_rqst rqst[2];
|
||||||
|
struct kvec rsp_iov[2];
|
||||||
|
struct kvec close_iov;
|
||||||
|
int resp_buftype[2];
|
||||||
|
struct cifs_fid fid;
|
||||||
|
int flags = 0;
|
||||||
|
__u8 oplock;
|
||||||
|
int rc;
|
||||||
|
|
||||||
oparms = CIFS_OPARMS(cifs_sb, tcon, name,
|
utf16_path = cifs_convert_path_to_utf16(name, cifs_sb);
|
||||||
DELETE, FILE_OPEN,
|
if (!utf16_path)
|
||||||
CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
|
return -ENOMEM;
|
||||||
ACL_NO_MODE);
|
|
||||||
int rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
|
if (smb3_encryption_required(tcon))
|
||||||
NULL, &(int){SMB2_OP_DELETE}, 1,
|
flags |= CIFS_TRANSFORM_REQ;
|
||||||
NULL, NULL, NULL, dentry);
|
again:
|
||||||
if (rc == -EINVAL) {
|
oplock = SMB2_OPLOCK_LEVEL_NONE;
|
||||||
cifs_dbg(FYI, "invalid lease key, resending request without lease");
|
server = cifs_pick_channel(tcon->ses);
|
||||||
rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
|
|
||||||
NULL, &(int){SMB2_OP_DELETE}, 1,
|
memset(rqst, 0, sizeof(rqst));
|
||||||
NULL, NULL, NULL, NULL);
|
memset(resp_buftype, 0, sizeof(resp_buftype));
|
||||||
|
memset(rsp_iov, 0, sizeof(rsp_iov));
|
||||||
|
|
||||||
|
rqst[0].rq_iov = open_iov;
|
||||||
|
rqst[0].rq_nvec = ARRAY_SIZE(open_iov);
|
||||||
|
|
||||||
|
oparms = CIFS_OPARMS(cifs_sb, tcon, name, DELETE | FILE_READ_ATTRIBUTES,
|
||||||
|
FILE_OPEN, CREATE_DELETE_ON_CLOSE |
|
||||||
|
OPEN_REPARSE_POINT, ACL_NO_MODE);
|
||||||
|
oparms.fid = &fid;
|
||||||
|
|
||||||
|
if (dentry) {
|
||||||
|
inode = d_inode(dentry);
|
||||||
|
if (CIFS_I(inode)->lease_granted && server->ops->get_lease_key) {
|
||||||
|
oplock = SMB2_OPLOCK_LEVEL_LEASE;
|
||||||
|
server->ops->get_lease_key(inode, &fid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rc = SMB2_open_init(tcon, server,
|
||||||
|
&rqst[0], &oplock, &oparms, utf16_path);
|
||||||
|
if (rc)
|
||||||
|
goto err_free;
|
||||||
|
smb2_set_next_command(tcon, &rqst[0]);
|
||||||
|
creq = rqst[0].rq_iov[0].iov_base;
|
||||||
|
creq->ShareAccess = FILE_SHARE_DELETE_LE;
|
||||||
|
|
||||||
|
rqst[1].rq_iov = &close_iov;
|
||||||
|
rqst[1].rq_nvec = 1;
|
||||||
|
|
||||||
|
rc = SMB2_close_init(tcon, server, &rqst[1],
|
||||||
|
COMPOUND_FID, COMPOUND_FID, false);
|
||||||
|
smb2_set_related(&rqst[1]);
|
||||||
|
if (rc)
|
||||||
|
goto err_free;
|
||||||
|
|
||||||
|
if (retries) {
|
||||||
|
for (int i = 0; i < ARRAY_SIZE(rqst); i++)
|
||||||
|
smb2_set_replay(server, &rqst[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = compound_send_recv(xid, tcon->ses, server, flags,
|
||||||
|
ARRAY_SIZE(rqst), rqst,
|
||||||
|
resp_buftype, rsp_iov);
|
||||||
|
SMB2_open_free(&rqst[0]);
|
||||||
|
SMB2_close_free(&rqst[1]);
|
||||||
|
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
|
||||||
|
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
|
||||||
|
|
||||||
|
if (is_replayable_error(rc) &&
|
||||||
|
smb2_should_replay(tcon, &retries, &cur_sleep))
|
||||||
|
goto again;
|
||||||
|
|
||||||
|
/* Retry compound request without lease */
|
||||||
|
if (rc == -EINVAL && dentry) {
|
||||||
|
dentry = NULL;
|
||||||
|
retries = 0;
|
||||||
|
cur_sleep = 1;
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If dentry (hence, inode) is NULL, lease break is going to
|
||||||
|
* take care of degrading leases on handles for deleted files.
|
||||||
|
*/
|
||||||
|
if (!rc && inode)
|
||||||
|
cifs_mark_open_handles_for_deleted_file(inode, name);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
err_free:
|
||||||
|
SMB2_open_free(&rqst[0]);
|
||||||
|
SMB2_close_free(&rqst[1]);
|
||||||
|
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
|
||||||
|
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1438,3 +1528,113 @@ out:
|
|||||||
cifs_free_open_info(&data);
|
cifs_free_open_info(&data);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline __le16 *utf16_smb2_path(struct cifs_sb_info *cifs_sb,
|
||||||
|
const char *name, size_t namelen)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
|
||||||
|
if (*name == '\\' ||
|
||||||
|
(cifs_sb_master_tlink(cifs_sb) &&
|
||||||
|
cifs_sb_master_tcon(cifs_sb)->posix_extensions && *name == '/'))
|
||||||
|
name++;
|
||||||
|
return cifs_strndup_to_utf16(name, namelen, &len,
|
||||||
|
cifs_sb->local_nls,
|
||||||
|
cifs_remap(cifs_sb));
|
||||||
|
}
|
||||||
|
|
||||||
|
int smb2_rename_pending_delete(const char *full_path,
|
||||||
|
struct dentry *dentry,
|
||||||
|
const unsigned int xid)
|
||||||
|
{
|
||||||
|
struct cifs_sb_info *cifs_sb = CIFS_SB(d_inode(dentry)->i_sb);
|
||||||
|
struct cifsInodeInfo *cinode = CIFS_I(d_inode(dentry));
|
||||||
|
__le16 *utf16_path __free(kfree) = NULL;
|
||||||
|
__u32 co = file_create_options(dentry);
|
||||||
|
int cmds[] = {
|
||||||
|
SMB2_OP_SET_INFO,
|
||||||
|
SMB2_OP_RENAME,
|
||||||
|
SMB2_OP_UNLINK,
|
||||||
|
};
|
||||||
|
const int num_cmds = ARRAY_SIZE(cmds);
|
||||||
|
char *to_name __free(kfree) = NULL;
|
||||||
|
__u32 attrs = cinode->cifsAttrs;
|
||||||
|
struct cifs_open_parms oparms;
|
||||||
|
static atomic_t sillycounter;
|
||||||
|
struct cifsFileInfo *cfile;
|
||||||
|
struct tcon_link *tlink;
|
||||||
|
struct cifs_tcon *tcon;
|
||||||
|
struct kvec iov[2];
|
||||||
|
const char *ppath;
|
||||||
|
void *page;
|
||||||
|
size_t len;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
tlink = cifs_sb_tlink(cifs_sb);
|
||||||
|
if (IS_ERR(tlink))
|
||||||
|
return PTR_ERR(tlink);
|
||||||
|
tcon = tlink_tcon(tlink);
|
||||||
|
|
||||||
|
page = alloc_dentry_path();
|
||||||
|
|
||||||
|
ppath = build_path_from_dentry(dentry->d_parent, page);
|
||||||
|
if (IS_ERR(ppath)) {
|
||||||
|
rc = PTR_ERR(ppath);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = strlen(ppath) + strlen("/.__smb1234") + 1;
|
||||||
|
to_name = kmalloc(len, GFP_KERNEL);
|
||||||
|
if (!to_name) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
scnprintf(to_name, len, "%s%c.__smb%04X", ppath, CIFS_DIR_SEP(cifs_sb),
|
||||||
|
atomic_inc_return(&sillycounter) & 0xffff);
|
||||||
|
|
||||||
|
utf16_path = utf16_smb2_path(cifs_sb, to_name, len);
|
||||||
|
if (!utf16_path) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
drop_cached_dir_by_name(xid, tcon, full_path, cifs_sb);
|
||||||
|
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
|
||||||
|
DELETE | FILE_WRITE_ATTRIBUTES,
|
||||||
|
FILE_OPEN, co, ACL_NO_MODE);
|
||||||
|
|
||||||
|
attrs &= ~ATTR_READONLY;
|
||||||
|
if (!attrs)
|
||||||
|
attrs = ATTR_NORMAL;
|
||||||
|
if (d_inode(dentry)->i_nlink <= 1)
|
||||||
|
attrs |= ATTR_HIDDEN;
|
||||||
|
iov[0].iov_base = &(FILE_BASIC_INFO) {
|
||||||
|
.Attributes = cpu_to_le32(attrs),
|
||||||
|
};
|
||||||
|
iov[0].iov_len = sizeof(FILE_BASIC_INFO);
|
||||||
|
iov[1].iov_base = utf16_path;
|
||||||
|
iov[1].iov_len = sizeof(*utf16_path) * UniStrlen((wchar_t *)utf16_path);
|
||||||
|
|
||||||
|
cifs_get_writable_path(tcon, full_path, FIND_WR_WITH_DELETE, &cfile);
|
||||||
|
rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov,
|
||||||
|
cmds, num_cmds, cfile, NULL, NULL, dentry);
|
||||||
|
if (rc == -EINVAL) {
|
||||||
|
cifs_dbg(FYI, "invalid lease key, resending request without lease\n");
|
||||||
|
cifs_get_writable_path(tcon, full_path,
|
||||||
|
FIND_WR_WITH_DELETE, &cfile);
|
||||||
|
rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov,
|
||||||
|
cmds, num_cmds, cfile, NULL, NULL, NULL);
|
||||||
|
}
|
||||||
|
if (!rc) {
|
||||||
|
set_bit(CIFS_INO_DELETE_PENDING, &cinode->flags);
|
||||||
|
} else {
|
||||||
|
cifs_tcon_dbg(FYI, "%s: failed to rename '%s' to '%s': %d\n",
|
||||||
|
__func__, full_path, to_name, rc);
|
||||||
|
rc = -EIO;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
cifs_put_tlink(tlink);
|
||||||
|
free_dentry_path(page);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|||||||
@ -2596,13 +2596,35 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* SMB headers in a compound are 8 byte aligned. */
|
/* SMB headers in a compound are 8 byte aligned. */
|
||||||
if (!IS_ALIGNED(len, 8)) {
|
if (IS_ALIGNED(len, 8))
|
||||||
num_padding = 8 - (len & 7);
|
goto out;
|
||||||
|
|
||||||
|
num_padding = 8 - (len & 7);
|
||||||
|
if (smb3_encryption_required(tcon)) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flatten request into a single buffer with required padding as
|
||||||
|
* the encryption layer can't handle the padding iovs.
|
||||||
|
*/
|
||||||
|
for (i = 1; i < rqst->rq_nvec; i++) {
|
||||||
|
memcpy(rqst->rq_iov[0].iov_base +
|
||||||
|
rqst->rq_iov[0].iov_len,
|
||||||
|
rqst->rq_iov[i].iov_base,
|
||||||
|
rqst->rq_iov[i].iov_len);
|
||||||
|
rqst->rq_iov[0].iov_len += rqst->rq_iov[i].iov_len;
|
||||||
|
}
|
||||||
|
memset(rqst->rq_iov[0].iov_base + rqst->rq_iov[0].iov_len,
|
||||||
|
0, num_padding);
|
||||||
|
rqst->rq_iov[0].iov_len += num_padding;
|
||||||
|
rqst->rq_nvec = 1;
|
||||||
|
} else {
|
||||||
rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
|
rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
|
||||||
rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding;
|
rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding;
|
||||||
rqst->rq_nvec++;
|
rqst->rq_nvec++;
|
||||||
len += num_padding;
|
|
||||||
}
|
}
|
||||||
|
len += num_padding;
|
||||||
|
out:
|
||||||
shdr->NextCommand = cpu_to_le32(len);
|
shdr->NextCommand = cpu_to_le32(len);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5320,6 +5342,7 @@ struct smb_version_operations smb20_operations = {
|
|||||||
.llseek = smb3_llseek,
|
.llseek = smb3_llseek,
|
||||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||||
|
.rename_pending_delete = smb2_rename_pending_delete,
|
||||||
};
|
};
|
||||||
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
|
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
|
||||||
|
|
||||||
@ -5425,6 +5448,7 @@ struct smb_version_operations smb21_operations = {
|
|||||||
.llseek = smb3_llseek,
|
.llseek = smb3_llseek,
|
||||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||||
|
.rename_pending_delete = smb2_rename_pending_delete,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct smb_version_operations smb30_operations = {
|
struct smb_version_operations smb30_operations = {
|
||||||
@ -5541,6 +5565,7 @@ struct smb_version_operations smb30_operations = {
|
|||||||
.llseek = smb3_llseek,
|
.llseek = smb3_llseek,
|
||||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||||
|
.rename_pending_delete = smb2_rename_pending_delete,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct smb_version_operations smb311_operations = {
|
struct smb_version_operations smb311_operations = {
|
||||||
@ -5657,6 +5682,7 @@ struct smb_version_operations smb311_operations = {
|
|||||||
.llseek = smb3_llseek,
|
.llseek = smb3_llseek,
|
||||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||||
|
.rename_pending_delete = smb2_rename_pending_delete,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||||
|
|||||||
@ -318,5 +318,8 @@ int posix_info_sid_size(const void *beg, const void *end);
|
|||||||
int smb2_make_nfs_node(unsigned int xid, struct inode *inode,
|
int smb2_make_nfs_node(unsigned int xid, struct inode *inode,
|
||||||
struct dentry *dentry, struct cifs_tcon *tcon,
|
struct dentry *dentry, struct cifs_tcon *tcon,
|
||||||
const char *full_path, umode_t mode, dev_t dev);
|
const char *full_path, umode_t mode, dev_t dev);
|
||||||
|
int smb2_rename_pending_delete(const char *full_path,
|
||||||
|
struct dentry *dentry,
|
||||||
|
const unsigned int xid);
|
||||||
|
|
||||||
#endif /* _SMB2PROTO_H */
|
#endif /* _SMB2PROTO_H */
|
||||||
|
|||||||
@ -544,13 +544,12 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_info_compound_enter);
|
|||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(posix_query_info_compound_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(posix_query_info_compound_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(hardlink_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(hardlink_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rename_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rename_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rmdir_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(unlink_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_reparse_compound_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_reparse_compound_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_wsl_ea_compound_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_wsl_ea_compound_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
|
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
|
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
|
||||||
@ -585,13 +584,12 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_info_compound_done);
|
|||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(posix_query_info_compound_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(posix_query_info_compound_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(hardlink_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(hardlink_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rename_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rename_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rmdir_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(unlink_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_reparse_compound_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_reparse_compound_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(get_reparse_compound_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(get_reparse_compound_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
|
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
|
||||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
|
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
|
||||||
@ -631,14 +629,13 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_info_compound_err);
|
|||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(posix_query_info_compound_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(posix_query_info_compound_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(hardlink_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(hardlink_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rename_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rename_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rmdir_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(unlink_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_reparse_compound_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_reparse_compound_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(get_reparse_compound_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(get_reparse_compound_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
|
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
|
||||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
|
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
|
||||||
|
|
||||||
|
|||||||
@ -435,6 +435,13 @@ xfs_attr_rmtval_get(
|
|||||||
0, &bp, &xfs_attr3_rmt_buf_ops);
|
0, &bp, &xfs_attr3_rmt_buf_ops);
|
||||||
if (xfs_metadata_is_sick(error))
|
if (xfs_metadata_is_sick(error))
|
||||||
xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK);
|
xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK);
|
||||||
|
/*
|
||||||
|
* ENODATA from disk implies a disk medium failure;
|
||||||
|
* ENODATA for xattrs means attribute not found, so
|
||||||
|
* disambiguate that here.
|
||||||
|
*/
|
||||||
|
if (error == -ENODATA)
|
||||||
|
error = -EIO;
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
|
|||||||
@ -2833,6 +2833,12 @@ xfs_da_read_buf(
|
|||||||
&bp, ops);
|
&bp, ops);
|
||||||
if (xfs_metadata_is_sick(error))
|
if (xfs_metadata_is_sick(error))
|
||||||
xfs_dirattr_mark_sick(dp, whichfork);
|
xfs_dirattr_mark_sick(dp, whichfork);
|
||||||
|
/*
|
||||||
|
* ENODATA from disk implies a disk medium failure; ENODATA for
|
||||||
|
* xattrs means attribute not found, so disambiguate that here.
|
||||||
|
*/
|
||||||
|
if (error == -ENODATA && whichfork == XFS_ATTR_FORK)
|
||||||
|
error = -EIO;
|
||||||
if (error)
|
if (error)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
|
|
||||||
|
|||||||
@ -80,6 +80,7 @@ extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
|
|||||||
extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
|
extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
|
||||||
struct device_attribute *attr, char *buf);
|
struct device_attribute *attr, char *buf);
|
||||||
extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
|
extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
|
||||||
|
extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf);
|
||||||
|
|
||||||
extern __printf(4, 5)
|
extern __printf(4, 5)
|
||||||
struct device *cpu_device_create(struct device *parent, void *drvdata,
|
struct device *cpu_device_create(struct device *parent, void *drvdata,
|
||||||
|
|||||||
@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head,
|
|||||||
extern int nfs_page_group_lock(struct nfs_page *);
|
extern int nfs_page_group_lock(struct nfs_page *);
|
||||||
extern void nfs_page_group_unlock(struct nfs_page *);
|
extern void nfs_page_group_unlock(struct nfs_page *);
|
||||||
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
|
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
|
||||||
|
extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int);
|
||||||
extern int nfs_page_set_headlock(struct nfs_page *req);
|
extern int nfs_page_set_headlock(struct nfs_page *req);
|
||||||
extern void nfs_page_clear_headlock(struct nfs_page *req);
|
extern void nfs_page_clear_headlock(struct nfs_page *req);
|
||||||
extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
|
extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
|
||||||
|
|||||||
@ -6,9 +6,10 @@
|
|||||||
* Linux wait-bit related types and methods:
|
* Linux wait-bit related types and methods:
|
||||||
*/
|
*/
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
|
#include <linux/rh_kabi.h>
|
||||||
|
|
||||||
struct wait_bit_key {
|
struct wait_bit_key {
|
||||||
void *flags;
|
RH_KABI_REPLACE(void *flags, unsigned long *flags)
|
||||||
int bit_nr;
|
int bit_nr;
|
||||||
unsigned long timeout;
|
unsigned long timeout;
|
||||||
};
|
};
|
||||||
@ -23,14 +24,14 @@ struct wait_bit_queue_entry {
|
|||||||
|
|
||||||
typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
|
typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
|
||||||
|
|
||||||
void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
|
void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit);
|
||||||
int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
|
int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
|
||||||
int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
|
int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
|
||||||
void wake_up_bit(void *word, int bit);
|
void wake_up_bit(unsigned long *word, int bit);
|
||||||
int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
|
int out_of_line_wait_on_bit(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode);
|
||||||
int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
|
int out_of_line_wait_on_bit_timeout(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
|
||||||
int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
|
int out_of_line_wait_on_bit_lock(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode);
|
||||||
struct wait_queue_head *bit_waitqueue(void *word, int bit);
|
struct wait_queue_head *bit_waitqueue(unsigned long *word, int bit);
|
||||||
extern void __init wait_bit_init(void);
|
extern void __init wait_bit_init(void);
|
||||||
|
|
||||||
int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
|
int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
|
||||||
@ -52,19 +53,21 @@ extern int bit_wait_timeout(struct wait_bit_key *key, int mode);
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit - wait for a bit to be cleared
|
* wait_on_bit - wait for a bit to be cleared
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit at that address being waited on
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
*
|
*
|
||||||
* There is a standard hashed waitqueue table for generic use. This
|
* Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
|
||||||
* is the part of the hashtable's accessor API that waits on a bit.
|
* to be cleared. The clearing of the bit must be signalled with
|
||||||
* For instance, if one were to have waiters on a bitflag, one would
|
* wake_up_bit(), often as clear_and_wake_up_bit().
|
||||||
* call wait_on_bit() in threads waiting for the bit to clear.
|
*
|
||||||
* One uses wait_on_bit() where one is waiting for the bit to clear,
|
* The process will wait on a waitqueue selected by hash from a shared
|
||||||
* but has no intention of setting it.
|
* pool. It will only be woken on a wake_up for the target bit, even
|
||||||
* Returned value will be zero if the bit was cleared, or non-zero
|
* if other processes on the same queue are waiting for other bits.
|
||||||
* if the process received a signal and the mode permitted wakeup
|
*
|
||||||
* on that signal.
|
* Returned value will be zero if the bit was cleared in which case the
|
||||||
|
* call has ACQUIRE semantics, or %-EINTR if the process received a
|
||||||
|
* signal and the mode permitted wake up on that signal.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
wait_on_bit(unsigned long *word, int bit, unsigned mode)
|
wait_on_bit(unsigned long *word, int bit, unsigned mode)
|
||||||
@ -79,17 +82,20 @@ wait_on_bit(unsigned long *word, int bit, unsigned mode)
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_io - wait for a bit to be cleared
|
* wait_on_bit_io - wait for a bit to be cleared
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit at that address being waited on
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
*
|
*
|
||||||
* Use the standard hashed waitqueue table to wait for a bit
|
* Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
|
||||||
* to be cleared. This is similar to wait_on_bit(), but calls
|
* to be cleared. The clearing of the bit must be signalled with
|
||||||
* io_schedule() instead of schedule() for the actual waiting.
|
* wake_up_bit(), often as clear_and_wake_up_bit().
|
||||||
*
|
*
|
||||||
* Returned value will be zero if the bit was cleared, or non-zero
|
* This is similar to wait_on_bit(), but calls io_schedule() instead of
|
||||||
* if the process received a signal and the mode permitted wakeup
|
* schedule() for the actual waiting.
|
||||||
* on that signal.
|
*
|
||||||
|
* Returned value will be zero if the bit was cleared in which case the
|
||||||
|
* call has ACQUIRE semantics, or %-EINTR if the process received a
|
||||||
|
* signal and the mode permitted wake up on that signal.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
|
wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
|
||||||
@ -103,19 +109,24 @@ wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
|
* wait_on_bit_timeout - wait for a bit to be cleared or a timeout to elapse
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit at that address being waited on
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
* @timeout: timeout, in jiffies
|
* @timeout: timeout, in jiffies
|
||||||
*
|
*
|
||||||
* Use the standard hashed waitqueue table to wait for a bit
|
* Wait for the given bit in an unsigned long or bitmap (see
|
||||||
* to be cleared. This is similar to wait_on_bit(), except also takes a
|
* DECLARE_BITMAP()) to be cleared, or for a timeout to expire. The
|
||||||
* timeout parameter.
|
* clearing of the bit must be signalled with wake_up_bit(), often as
|
||||||
|
* clear_and_wake_up_bit().
|
||||||
*
|
*
|
||||||
* Returned value will be zero if the bit was cleared before the
|
* This is similar to wait_on_bit(), except it also takes a timeout
|
||||||
* @timeout elapsed, or non-zero if the @timeout elapsed or process
|
* parameter.
|
||||||
* received a signal and the mode permitted wakeup on that signal.
|
*
|
||||||
|
* Returned value will be zero if the bit was cleared in which case the
|
||||||
|
* call has ACQUIRE semantics, or %-EINTR if the process received a
|
||||||
|
* signal and the mode permitted wake up on that signal, or %-EAGAIN if the
|
||||||
|
* timeout elapsed.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
|
wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
|
||||||
@ -131,19 +142,21 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_action - wait for a bit to be cleared
|
* wait_on_bit_action - wait for a bit to be cleared
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit at that address being waited on
|
||||||
* @action: the function used to sleep, which may take special actions
|
* @action: the function used to sleep, which may take special actions
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
*
|
*
|
||||||
* Use the standard hashed waitqueue table to wait for a bit
|
* Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
|
||||||
* to be cleared, and allow the waiting action to be specified.
|
* to be cleared. The clearing of the bit must be signalled with
|
||||||
* This is like wait_on_bit() but allows fine control of how the waiting
|
* wake_up_bit(), often as clear_and_wake_up_bit().
|
||||||
* is done.
|
|
||||||
*
|
*
|
||||||
* Returned value will be zero if the bit was cleared, or non-zero
|
* This is similar to wait_on_bit(), but calls @action() instead of
|
||||||
* if the process received a signal and the mode permitted wakeup
|
* schedule() for the actual waiting.
|
||||||
* on that signal.
|
*
|
||||||
|
* Returned value will be zero if the bit was cleared in which case the
|
||||||
|
* call has ACQUIRE semantics, or the error code returned by @action if
|
||||||
|
* that call returned non-zero.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
|
wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
|
||||||
@ -156,23 +169,22 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
|
* wait_on_bit_lock - wait for a bit to be cleared, then set it
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit of the word being waited on and set
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
*
|
*
|
||||||
* There is a standard hashed waitqueue table for generic use. This
|
* Wait for the given bit in an unsigned long or bitmap (see
|
||||||
* is the part of the hashtable's accessor API that waits on a bit
|
* DECLARE_BITMAP()) to be cleared. The clearing of the bit must be
|
||||||
* when one intends to set it, for instance, trying to lock bitflags.
|
* signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As
|
||||||
* For instance, if one were to have waiters trying to set bitflag
|
* soon as it is clear, atomically set it and return.
|
||||||
* and waiting for it to clear before setting it, one would call
|
|
||||||
* wait_on_bit() in threads waiting to be able to set the bit.
|
|
||||||
* One uses wait_on_bit_lock() where one is waiting for the bit to
|
|
||||||
* clear with the intention of setting it, and when done, clearing it.
|
|
||||||
*
|
*
|
||||||
* Returns zero if the bit was (eventually) found to be clear and was
|
* This is similar to wait_on_bit(), but sets the bit before returning.
|
||||||
* set. Returns non-zero if a signal was delivered to the process and
|
*
|
||||||
* the @mode allows that signal to wake the process.
|
* Returned value will be zero if the bit was successfully set in which
|
||||||
|
* case the call has the same memory sequencing semantics as
|
||||||
|
* test_and_clear_bit(), or %-EINTR if the process received a signal and
|
||||||
|
* the mode permitted wake up on that signal.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
|
wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
|
||||||
@ -184,15 +196,18 @@ wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
|
* wait_on_bit_lock_io - wait for a bit to be cleared, then set it
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit of the word being waited on and set
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
*
|
*
|
||||||
* Use the standard hashed waitqueue table to wait for a bit
|
* Wait for the given bit in an unsigned long or bitmap (see
|
||||||
* to be cleared and then to atomically set it. This is similar
|
* DECLARE_BITMAP()) to be cleared. The clearing of the bit must be
|
||||||
* to wait_on_bit(), but calls io_schedule() instead of schedule()
|
* signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As
|
||||||
* for the actual waiting.
|
* soon as it is clear, atomically set it and return.
|
||||||
|
*
|
||||||
|
* This is similar to wait_on_bit_lock(), but calls io_schedule() instead
|
||||||
|
* of schedule().
|
||||||
*
|
*
|
||||||
* Returns zero if the bit was (eventually) found to be clear and was
|
* Returns zero if the bit was (eventually) found to be clear and was
|
||||||
* set. Returns non-zero if a signal was delivered to the process and
|
* set. Returns non-zero if a signal was delivered to the process and
|
||||||
@ -208,21 +223,19 @@ wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
|
* wait_on_bit_lock_action - wait for a bit to be cleared, then set it
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit of the word being waited on and set
|
||||||
* @action: the function used to sleep, which may take special actions
|
* @action: the function used to sleep, which may take special actions
|
||||||
* @mode: the task state to sleep in
|
* @mode: the task state to sleep in
|
||||||
*
|
*
|
||||||
* Use the standard hashed waitqueue table to wait for a bit
|
* This is similar to wait_on_bit_lock(), but calls @action() instead of
|
||||||
* to be cleared and then to set it, and allow the waiting action
|
* schedule() for the actual waiting.
|
||||||
* to be specified.
|
|
||||||
* This is like wait_on_bit() but allows fine control of how the waiting
|
|
||||||
* is done.
|
|
||||||
*
|
*
|
||||||
* Returns zero if the bit was (eventually) found to be clear and was
|
* Returned value will be zero if the bit was successfully set in which
|
||||||
* set. Returns non-zero if a signal was delivered to the process and
|
* case the call has the same memory sequencing semantics as
|
||||||
* the @mode allows that signal to wake the process.
|
* test_and_clear_bit(), or the error code returned by @action if that
|
||||||
|
* call returned non-zero.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
|
wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
|
||||||
@ -269,6 +282,22 @@ __out: __ret; \
|
|||||||
___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
|
___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
|
||||||
schedule())
|
schedule())
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_var_event - wait for a variable to be updated and notified
|
||||||
|
* @var: the address of variable being waited on
|
||||||
|
* @condition: the condition to wait for
|
||||||
|
*
|
||||||
|
* Wait for a @condition to be true, only re-checking when a wake up is
|
||||||
|
* received for the given @var (an arbitrary kernel address which need
|
||||||
|
* not be directly related to the given condition, but usually is).
|
||||||
|
*
|
||||||
|
* The process will wait on a waitqueue selected by hash from a shared
|
||||||
|
* pool. It will only be woken on a wake_up for the given address.
|
||||||
|
*
|
||||||
|
* The condition should normally use smp_load_acquire() or a similarly
|
||||||
|
* ordered access to ensure that any changes to memory made before the
|
||||||
|
* condition became true will be visible after the wait completes.
|
||||||
|
*/
|
||||||
#define wait_var_event(var, condition) \
|
#define wait_var_event(var, condition) \
|
||||||
do { \
|
do { \
|
||||||
might_sleep(); \
|
might_sleep(); \
|
||||||
@ -281,6 +310,24 @@ do { \
|
|||||||
___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \
|
___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \
|
||||||
schedule())
|
schedule())
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_var_event_killable - wait for a variable to be updated and notified
|
||||||
|
* @var: the address of variable being waited on
|
||||||
|
* @condition: the condition to wait for
|
||||||
|
*
|
||||||
|
* Wait for a @condition to be true or a fatal signal to be received,
|
||||||
|
* only re-checking the condition when a wake up is received for the given
|
||||||
|
* @var (an arbitrary kernel address which need not be directly related
|
||||||
|
* to the given condition, but usually is).
|
||||||
|
*
|
||||||
|
* This is similar to wait_var_event() but returns a value which is
|
||||||
|
* 0 if the condition became true, or %-ERESTARTSYS if a fatal signal
|
||||||
|
* was received.
|
||||||
|
*
|
||||||
|
* The condition should normally use smp_load_acquire() or a similarly
|
||||||
|
* ordered access to ensure that any changes to memory made before the
|
||||||
|
* condition became true will be visible after the wait completes.
|
||||||
|
*/
|
||||||
#define wait_var_event_killable(var, condition) \
|
#define wait_var_event_killable(var, condition) \
|
||||||
({ \
|
({ \
|
||||||
int __ret = 0; \
|
int __ret = 0; \
|
||||||
@ -295,6 +342,26 @@ do { \
|
|||||||
TASK_UNINTERRUPTIBLE, 0, timeout, \
|
TASK_UNINTERRUPTIBLE, 0, timeout, \
|
||||||
__ret = schedule_timeout(__ret))
|
__ret = schedule_timeout(__ret))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_var_event_timeout - wait for a variable to be updated or a timeout to expire
|
||||||
|
* @var: the address of variable being waited on
|
||||||
|
* @condition: the condition to wait for
|
||||||
|
* @timeout: maximum time to wait in jiffies
|
||||||
|
*
|
||||||
|
* Wait for a @condition to be true or a timeout to expire, only
|
||||||
|
* re-checking the condition when a wake up is received for the given
|
||||||
|
* @var (an arbitrary kernel address which need not be directly related
|
||||||
|
* to the given condition, but usually is).
|
||||||
|
*
|
||||||
|
* This is similar to wait_var_event() but returns a value which is 0 if
|
||||||
|
* the timeout expired and the condition was still false, or the
|
||||||
|
* remaining time left in the timeout (but at least 1) if the condition
|
||||||
|
* was found to be true.
|
||||||
|
*
|
||||||
|
* The condition should normally use smp_load_acquire() or a similarly
|
||||||
|
* ordered access to ensure that any changes to memory made before the
|
||||||
|
* condition became true will be visible after the wait completes.
|
||||||
|
*/
|
||||||
#define wait_var_event_timeout(var, condition, timeout) \
|
#define wait_var_event_timeout(var, condition, timeout) \
|
||||||
({ \
|
({ \
|
||||||
long __ret = timeout; \
|
long __ret = timeout; \
|
||||||
@ -308,6 +375,23 @@ do { \
|
|||||||
___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \
|
___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \
|
||||||
schedule())
|
schedule())
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_var_event_killable - wait for a variable to be updated and notified
|
||||||
|
* @var: the address of variable being waited on
|
||||||
|
* @condition: the condition to wait for
|
||||||
|
*
|
||||||
|
* Wait for a @condition to be true or a signal to be received, only
|
||||||
|
* re-checking the condition when a wake up is received for the given
|
||||||
|
* @var (an arbitrary kernel address which need not be directly related
|
||||||
|
* to the given condition, but usually is).
|
||||||
|
*
|
||||||
|
* This is similar to wait_var_event() but returns a value which is 0 if
|
||||||
|
* the condition became true, or %-ERESTARTSYS if a signal was received.
|
||||||
|
*
|
||||||
|
* The condition should normally use smp_load_acquire() or a similarly
|
||||||
|
* ordered access to ensure that any changes to memory made before the
|
||||||
|
* condition became true will be visible after the wait completes.
|
||||||
|
*/
|
||||||
#define wait_var_event_interruptible(var, condition) \
|
#define wait_var_event_interruptible(var, condition) \
|
||||||
({ \
|
({ \
|
||||||
int __ret = 0; \
|
int __ret = 0; \
|
||||||
@ -318,15 +402,122 @@ do { \
|
|||||||
})
|
})
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
|
* wait_var_event_any_lock - wait for a variable to be updated under a lock
|
||||||
|
* @var: the address of the variable being waited on
|
||||||
|
* @condition: condition to wait for
|
||||||
|
* @lock: the object that is locked to protect updates to the variable
|
||||||
|
* @type: prefix on lock and unlock operations
|
||||||
|
* @state: waiting state, %TASK_UNINTERRUPTIBLE etc.
|
||||||
*
|
*
|
||||||
* @bit: the bit of the word being waited on
|
* Wait for a condition which can only be reliably tested while holding
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* a lock. The variables assessed in the condition will normal be updated
|
||||||
|
* under the same lock, and the wake up should be signalled with
|
||||||
|
* wake_up_var_locked() under the same lock.
|
||||||
*
|
*
|
||||||
* You can use this helper if bitflags are manipulated atomically rather than
|
* This is similar to wait_var_event(), but assumes a lock is held
|
||||||
* non-atomically under a lock.
|
* while calling this function and while updating the variable.
|
||||||
|
*
|
||||||
|
* This must be called while the given lock is held and the lock will be
|
||||||
|
* dropped when schedule() is called to wait for a wake up, and will be
|
||||||
|
* reclaimed before testing the condition again. The functions used to
|
||||||
|
* unlock and lock the object are constructed by appending _unlock and _lock
|
||||||
|
* to @type.
|
||||||
|
*
|
||||||
|
* Return %-ERESTARTSYS if a signal arrives which is allowed to interrupt
|
||||||
|
* the wait according to @state.
|
||||||
*/
|
*/
|
||||||
static inline void clear_and_wake_up_bit(int bit, void *word)
|
#define wait_var_event_any_lock(var, condition, lock, type, state) \
|
||||||
|
({ \
|
||||||
|
int __ret = 0; \
|
||||||
|
if (!(condition)) \
|
||||||
|
__ret = ___wait_var_event(var, condition, state, 0, 0, \
|
||||||
|
type ## _unlock(lock); \
|
||||||
|
schedule(); \
|
||||||
|
type ## _lock(lock)); \
|
||||||
|
__ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_var_event_spinlock - wait for a variable to be updated under a spinlock
|
||||||
|
* @var: the address of the variable being waited on
|
||||||
|
* @condition: condition to wait for
|
||||||
|
* @lock: the spinlock which protects updates to the variable
|
||||||
|
*
|
||||||
|
* Wait for a condition which can only be reliably tested while holding
|
||||||
|
* a spinlock. The variables assessed in the condition will normal be updated
|
||||||
|
* under the same spinlock, and the wake up should be signalled with
|
||||||
|
* wake_up_var_locked() under the same spinlock.
|
||||||
|
*
|
||||||
|
* This is similar to wait_var_event(), but assumes a spinlock is held
|
||||||
|
* while calling this function and while updating the variable.
|
||||||
|
*
|
||||||
|
* This must be called while the given lock is held and the lock will be
|
||||||
|
* dropped when schedule() is called to wait for a wake up, and will be
|
||||||
|
* reclaimed before testing the condition again.
|
||||||
|
*/
|
||||||
|
#define wait_var_event_spinlock(var, condition, lock) \
|
||||||
|
wait_var_event_any_lock(var, condition, lock, spin, TASK_UNINTERRUPTIBLE)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait_var_event_mutex - wait for a variable to be updated under a mutex
|
||||||
|
* @var: the address of the variable being waited on
|
||||||
|
* @condition: condition to wait for
|
||||||
|
* @mutex: the mutex which protects updates to the variable
|
||||||
|
*
|
||||||
|
* Wait for a condition which can only be reliably tested while holding
|
||||||
|
* a mutex. The variables assessed in the condition will normal be
|
||||||
|
* updated under the same mutex, and the wake up should be signalled
|
||||||
|
* with wake_up_var_locked() under the same mutex.
|
||||||
|
*
|
||||||
|
* This is similar to wait_var_event(), but assumes a mutex is held
|
||||||
|
* while calling this function and while updating the variable.
|
||||||
|
*
|
||||||
|
* This must be called while the given mutex is held and the mutex will be
|
||||||
|
* dropped when schedule() is called to wait for a wake up, and will be
|
||||||
|
* reclaimed before testing the condition again.
|
||||||
|
*/
|
||||||
|
#define wait_var_event_mutex(var, condition, lock) \
|
||||||
|
wait_var_event_any_lock(var, condition, lock, mutex, TASK_UNINTERRUPTIBLE)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wake_up_var_protected - wake up waiters for a variable asserting that it is safe
|
||||||
|
* @var: the address of the variable being waited on
|
||||||
|
* @cond: the condition which afirms this is safe
|
||||||
|
*
|
||||||
|
* When waking waiters which use wait_var_event_any_lock() the waker must be
|
||||||
|
* holding the reelvant lock to avoid races. This version of wake_up_var()
|
||||||
|
* asserts that the relevant lock is held and so no barrier is needed.
|
||||||
|
* The @cond is only tested when CONFIG_LOCKDEP is enabled.
|
||||||
|
*/
|
||||||
|
#define wake_up_var_protected(var, cond) \
|
||||||
|
do { \
|
||||||
|
lockdep_assert(cond); \
|
||||||
|
wake_up_var(var); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wake_up_var_locked - wake up waiters for a variable while holding a spinlock or mutex
|
||||||
|
* @var: the address of the variable being waited on
|
||||||
|
* @lock: The spinlock or mutex what protects the variable
|
||||||
|
*
|
||||||
|
* Send a wake up for the given variable which should be waited for with
|
||||||
|
* wait_var_event_spinlock() or wait_var_event_mutex(). Unlike wake_up_var(),
|
||||||
|
* no extra barriers are needed as the locking provides sufficient sequencing.
|
||||||
|
*/
|
||||||
|
#define wake_up_var_locked(var, lock) \
|
||||||
|
wake_up_var_protected(var, lockdep_is_held(lock))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
|
||||||
|
* @bit: the bit of the word being waited on
|
||||||
|
* @word: the address containing the bit being waited on
|
||||||
|
*
|
||||||
|
* The designated bit is cleared and any tasks waiting in wait_on_bit()
|
||||||
|
* or similar will be woken. This call has RELEASE semantics so that
|
||||||
|
* any changes to memory made before this call are guaranteed to be visible
|
||||||
|
* after the corresponding wait_on_bit() completes.
|
||||||
|
*/
|
||||||
|
static inline void clear_and_wake_up_bit(int bit, unsigned long *word)
|
||||||
{
|
{
|
||||||
clear_bit_unlock(bit, word);
|
clear_bit_unlock(bit, word);
|
||||||
/* See wake_up_bit() for which memory barrier you need to use. */
|
/* See wake_up_bit() for which memory barrier you need to use. */
|
||||||
@ -334,4 +525,64 @@ static inline void clear_and_wake_up_bit(int bit, void *word)
|
|||||||
wake_up_bit(word, bit);
|
wake_up_bit(word, bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test_and_clear_wake_up_bit - clear a bit if it was set: wake up anyone waiting on that bit
|
||||||
|
* @bit: the bit of the word being waited on
|
||||||
|
* @word: the address of memory containing that bit
|
||||||
|
*
|
||||||
|
* If the bit is set and can be atomically cleared, any tasks waiting in
|
||||||
|
* wait_on_bit() or similar will be woken. This call has the same
|
||||||
|
* complete ordering semantics as test_and_clear_bit(). Any changes to
|
||||||
|
* memory made before this call are guaranteed to be visible after the
|
||||||
|
* corresponding wait_on_bit() completes.
|
||||||
|
*
|
||||||
|
* Returns %true if the bit was successfully set and the wake up was sent.
|
||||||
|
*/
|
||||||
|
static inline bool test_and_clear_wake_up_bit(int bit, unsigned long *word)
|
||||||
|
{
|
||||||
|
if (!test_and_clear_bit(bit, word))
|
||||||
|
return false;
|
||||||
|
/* no extra barrier required */
|
||||||
|
wake_up_bit(word, bit);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* atomic_dec_and_wake_up - decrement an atomic_t and if zero, wake up waiters
|
||||||
|
* @var: the variable to dec and test
|
||||||
|
*
|
||||||
|
* Decrements the atomic variable and if it reaches zero, send a wake_up to any
|
||||||
|
* processes waiting on the variable.
|
||||||
|
*
|
||||||
|
* This function has the same complete ordering semantics as atomic_dec_and_test.
|
||||||
|
*
|
||||||
|
* Returns %true is the variable reaches zero and the wake up was sent.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline bool atomic_dec_and_wake_up(atomic_t *var)
|
||||||
|
{
|
||||||
|
if (!atomic_dec_and_test(var))
|
||||||
|
return false;
|
||||||
|
/* No extra barrier required */
|
||||||
|
wake_up_var(var);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* store_release_wake_up - update a variable and send a wake_up
|
||||||
|
* @var: the address of the variable to be updated and woken
|
||||||
|
* @val: the value to store in the variable.
|
||||||
|
*
|
||||||
|
* Store the given value in the variable send a wake up to any tasks
|
||||||
|
* waiting on the variable. All necessary barriers are included to ensure
|
||||||
|
* the task calling wait_var_event() sees the new value and all values
|
||||||
|
* written to memory before this call.
|
||||||
|
*/
|
||||||
|
#define store_release_wake_up(var, val) \
|
||||||
|
do { \
|
||||||
|
smp_store_release(var, val); \
|
||||||
|
smp_mb(); \
|
||||||
|
wake_up_var(var); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#endif /* _LINUX_WAIT_BIT_H */
|
#endif /* _LINUX_WAIT_BIT_H */
|
||||||
|
|||||||
@ -1199,6 +1199,27 @@ static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct hci_conn *hci_conn_hash_lookup_role(struct hci_dev *hdev,
|
||||||
|
__u8 type, __u8 role,
|
||||||
|
bdaddr_t *ba)
|
||||||
|
{
|
||||||
|
struct hci_conn_hash *h = &hdev->conn_hash;
|
||||||
|
struct hci_conn *c;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
list_for_each_entry_rcu(c, &h->list, list) {
|
||||||
|
if (c->type == type && c->role == role && !bacmp(&c->dst, ba)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev,
|
static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev,
|
||||||
bdaddr_t *ba,
|
bdaddr_t *ba,
|
||||||
__u8 ba_type)
|
__u8 ba_type)
|
||||||
|
|||||||
@ -10,6 +10,7 @@
|
|||||||
#include "shm_channel.h"
|
#include "shm_channel.h"
|
||||||
|
|
||||||
#define GDMA_STATUS_MORE_ENTRIES 0x00000105
|
#define GDMA_STATUS_MORE_ENTRIES 0x00000105
|
||||||
|
#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff
|
||||||
|
|
||||||
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
|
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
|
||||||
* them are naturally aligned and hence don't need __packed.
|
* them are naturally aligned and hence don't need __packed.
|
||||||
@ -58,9 +59,10 @@ enum gdma_eqe_type {
|
|||||||
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
|
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
|
||||||
GDMA_EQE_HWC_INIT_DATA = 130,
|
GDMA_EQE_HWC_INIT_DATA = 130,
|
||||||
GDMA_EQE_HWC_INIT_DONE = 131,
|
GDMA_EQE_HWC_INIT_DONE = 131,
|
||||||
GDMA_EQE_HWC_SOC_RECONFIG = 132,
|
GDMA_EQE_HWC_FPGA_RECONFIG = 132,
|
||||||
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
|
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
|
||||||
GDMA_EQE_HWC_SOC_SERVICE = 134,
|
GDMA_EQE_HWC_SOC_SERVICE = 134,
|
||||||
|
GDMA_EQE_HWC_RESET_REQUEST = 135,
|
||||||
GDMA_EQE_RNIC_QP_FATAL = 176,
|
GDMA_EQE_RNIC_QP_FATAL = 176,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -403,6 +405,8 @@ struct gdma_context {
|
|||||||
u32 test_event_eq_id;
|
u32 test_event_eq_id;
|
||||||
|
|
||||||
bool is_pf;
|
bool is_pf;
|
||||||
|
bool in_service;
|
||||||
|
|
||||||
phys_addr_t bar0_pa;
|
phys_addr_t bar0_pa;
|
||||||
void __iomem *bar0_va;
|
void __iomem *bar0_va;
|
||||||
void __iomem *shm_base;
|
void __iomem *shm_base;
|
||||||
@ -578,12 +582,20 @@ enum {
|
|||||||
/* Driver can handle holes (zeros) in the device list */
|
/* Driver can handle holes (zeros) in the device list */
|
||||||
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
|
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
|
||||||
|
|
||||||
|
/* Driver can self reset on EQE notification */
|
||||||
|
#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
|
||||||
|
|
||||||
|
/* Driver can self reset on FPGA Reconfig EQE notification */
|
||||||
|
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
|
||||||
|
|
||||||
#define GDMA_DRV_CAP_FLAGS1 \
|
#define GDMA_DRV_CAP_FLAGS1 \
|
||||||
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
|
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
|
||||||
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
|
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
|
||||||
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
|
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
|
||||||
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
|
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
|
||||||
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
|
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
|
||||||
|
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
|
||||||
|
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
|
||||||
|
|
||||||
#define GDMA_DRV_CAP_FLAGS2 0
|
#define GDMA_DRV_CAP_FLAGS2 0
|
||||||
|
|
||||||
@ -910,4 +922,9 @@ void mana_unregister_debugfs(void);
|
|||||||
|
|
||||||
int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
|
int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
|
||||||
|
|
||||||
|
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state);
|
||||||
|
int mana_gd_resume(struct pci_dev *pdev);
|
||||||
|
|
||||||
|
bool mana_need_log(struct gdma_context *gc, int err);
|
||||||
|
|
||||||
#endif /* _GDMA_H */
|
#endif /* _GDMA_H */
|
||||||
|
|||||||
@ -402,6 +402,65 @@ struct mana_ethtool_stats {
|
|||||||
u64 rx_cqe_unknown_type;
|
u64 rx_cqe_unknown_type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct mana_ethtool_phy_stats {
|
||||||
|
/* Drop Counters */
|
||||||
|
u64 rx_pkt_drop_phy;
|
||||||
|
u64 tx_pkt_drop_phy;
|
||||||
|
|
||||||
|
/* Per TC traffic Counters */
|
||||||
|
u64 rx_pkt_tc0_phy;
|
||||||
|
u64 tx_pkt_tc0_phy;
|
||||||
|
u64 rx_pkt_tc1_phy;
|
||||||
|
u64 tx_pkt_tc1_phy;
|
||||||
|
u64 rx_pkt_tc2_phy;
|
||||||
|
u64 tx_pkt_tc2_phy;
|
||||||
|
u64 rx_pkt_tc3_phy;
|
||||||
|
u64 tx_pkt_tc3_phy;
|
||||||
|
u64 rx_pkt_tc4_phy;
|
||||||
|
u64 tx_pkt_tc4_phy;
|
||||||
|
u64 rx_pkt_tc5_phy;
|
||||||
|
u64 tx_pkt_tc5_phy;
|
||||||
|
u64 rx_pkt_tc6_phy;
|
||||||
|
u64 tx_pkt_tc6_phy;
|
||||||
|
u64 rx_pkt_tc7_phy;
|
||||||
|
u64 tx_pkt_tc7_phy;
|
||||||
|
|
||||||
|
u64 rx_byte_tc0_phy;
|
||||||
|
u64 tx_byte_tc0_phy;
|
||||||
|
u64 rx_byte_tc1_phy;
|
||||||
|
u64 tx_byte_tc1_phy;
|
||||||
|
u64 rx_byte_tc2_phy;
|
||||||
|
u64 tx_byte_tc2_phy;
|
||||||
|
u64 rx_byte_tc3_phy;
|
||||||
|
u64 tx_byte_tc3_phy;
|
||||||
|
u64 rx_byte_tc4_phy;
|
||||||
|
u64 tx_byte_tc4_phy;
|
||||||
|
u64 rx_byte_tc5_phy;
|
||||||
|
u64 tx_byte_tc5_phy;
|
||||||
|
u64 rx_byte_tc6_phy;
|
||||||
|
u64 tx_byte_tc6_phy;
|
||||||
|
u64 rx_byte_tc7_phy;
|
||||||
|
u64 tx_byte_tc7_phy;
|
||||||
|
|
||||||
|
/* Per TC pause Counters */
|
||||||
|
u64 rx_pause_tc0_phy;
|
||||||
|
u64 tx_pause_tc0_phy;
|
||||||
|
u64 rx_pause_tc1_phy;
|
||||||
|
u64 tx_pause_tc1_phy;
|
||||||
|
u64 rx_pause_tc2_phy;
|
||||||
|
u64 tx_pause_tc2_phy;
|
||||||
|
u64 rx_pause_tc3_phy;
|
||||||
|
u64 tx_pause_tc3_phy;
|
||||||
|
u64 rx_pause_tc4_phy;
|
||||||
|
u64 tx_pause_tc4_phy;
|
||||||
|
u64 rx_pause_tc5_phy;
|
||||||
|
u64 tx_pause_tc5_phy;
|
||||||
|
u64 rx_pause_tc6_phy;
|
||||||
|
u64 tx_pause_tc6_phy;
|
||||||
|
u64 rx_pause_tc7_phy;
|
||||||
|
u64 tx_pause_tc7_phy;
|
||||||
|
};
|
||||||
|
|
||||||
struct mana_context {
|
struct mana_context {
|
||||||
struct gdma_dev *gdma_dev;
|
struct gdma_dev *gdma_dev;
|
||||||
|
|
||||||
@ -472,6 +531,8 @@ struct mana_port_context {
|
|||||||
|
|
||||||
struct mana_ethtool_stats eth_stats;
|
struct mana_ethtool_stats eth_stats;
|
||||||
|
|
||||||
|
struct mana_ethtool_phy_stats phy_stats;
|
||||||
|
|
||||||
/* Debugfs */
|
/* Debugfs */
|
||||||
struct dentry *mana_port_debugfs;
|
struct dentry *mana_port_debugfs;
|
||||||
};
|
};
|
||||||
@ -499,6 +560,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
|
|||||||
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
|
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
|
||||||
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
|
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
|
||||||
void mana_query_gf_stats(struct mana_port_context *apc);
|
void mana_query_gf_stats(struct mana_port_context *apc);
|
||||||
|
void mana_query_phy_stats(struct mana_port_context *apc);
|
||||||
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
|
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
|
||||||
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
|
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
|
||||||
|
|
||||||
@ -525,6 +587,7 @@ enum mana_command_code {
|
|||||||
MANA_FENCE_RQ = 0x20006,
|
MANA_FENCE_RQ = 0x20006,
|
||||||
MANA_CONFIG_VPORT_RX = 0x20007,
|
MANA_CONFIG_VPORT_RX = 0x20007,
|
||||||
MANA_QUERY_VPORT_CONFIG = 0x20008,
|
MANA_QUERY_VPORT_CONFIG = 0x20008,
|
||||||
|
MANA_QUERY_PHY_STAT = 0x2000c,
|
||||||
|
|
||||||
/* Privileged commands for the PF mode */
|
/* Privileged commands for the PF mode */
|
||||||
MANA_REGISTER_FILTER = 0x28000,
|
MANA_REGISTER_FILTER = 0x28000,
|
||||||
@ -687,6 +750,74 @@ struct mana_query_gf_stat_resp {
|
|||||||
u64 tx_err_gdma;
|
u64 tx_err_gdma;
|
||||||
}; /* HW DATA */
|
}; /* HW DATA */
|
||||||
|
|
||||||
|
/* Query phy stats */
|
||||||
|
struct mana_query_phy_stat_req {
|
||||||
|
struct gdma_req_hdr hdr;
|
||||||
|
u64 req_stats;
|
||||||
|
}; /* HW DATA */
|
||||||
|
|
||||||
|
struct mana_query_phy_stat_resp {
|
||||||
|
struct gdma_resp_hdr hdr;
|
||||||
|
u64 reported_stats;
|
||||||
|
|
||||||
|
/* Aggregate Drop Counters */
|
||||||
|
u64 rx_pkt_drop_phy;
|
||||||
|
u64 tx_pkt_drop_phy;
|
||||||
|
|
||||||
|
/* Per TC(Traffic class) traffic Counters */
|
||||||
|
u64 rx_pkt_tc0_phy;
|
||||||
|
u64 tx_pkt_tc0_phy;
|
||||||
|
u64 rx_pkt_tc1_phy;
|
||||||
|
u64 tx_pkt_tc1_phy;
|
||||||
|
u64 rx_pkt_tc2_phy;
|
||||||
|
u64 tx_pkt_tc2_phy;
|
||||||
|
u64 rx_pkt_tc3_phy;
|
||||||
|
u64 tx_pkt_tc3_phy;
|
||||||
|
u64 rx_pkt_tc4_phy;
|
||||||
|
u64 tx_pkt_tc4_phy;
|
||||||
|
u64 rx_pkt_tc5_phy;
|
||||||
|
u64 tx_pkt_tc5_phy;
|
||||||
|
u64 rx_pkt_tc6_phy;
|
||||||
|
u64 tx_pkt_tc6_phy;
|
||||||
|
u64 rx_pkt_tc7_phy;
|
||||||
|
u64 tx_pkt_tc7_phy;
|
||||||
|
|
||||||
|
u64 rx_byte_tc0_phy;
|
||||||
|
u64 tx_byte_tc0_phy;
|
||||||
|
u64 rx_byte_tc1_phy;
|
||||||
|
u64 tx_byte_tc1_phy;
|
||||||
|
u64 rx_byte_tc2_phy;
|
||||||
|
u64 tx_byte_tc2_phy;
|
||||||
|
u64 rx_byte_tc3_phy;
|
||||||
|
u64 tx_byte_tc3_phy;
|
||||||
|
u64 rx_byte_tc4_phy;
|
||||||
|
u64 tx_byte_tc4_phy;
|
||||||
|
u64 rx_byte_tc5_phy;
|
||||||
|
u64 tx_byte_tc5_phy;
|
||||||
|
u64 rx_byte_tc6_phy;
|
||||||
|
u64 tx_byte_tc6_phy;
|
||||||
|
u64 rx_byte_tc7_phy;
|
||||||
|
u64 tx_byte_tc7_phy;
|
||||||
|
|
||||||
|
/* Per TC(Traffic Class) pause Counters */
|
||||||
|
u64 rx_pause_tc0_phy;
|
||||||
|
u64 tx_pause_tc0_phy;
|
||||||
|
u64 rx_pause_tc1_phy;
|
||||||
|
u64 tx_pause_tc1_phy;
|
||||||
|
u64 rx_pause_tc2_phy;
|
||||||
|
u64 tx_pause_tc2_phy;
|
||||||
|
u64 rx_pause_tc3_phy;
|
||||||
|
u64 tx_pause_tc3_phy;
|
||||||
|
u64 rx_pause_tc4_phy;
|
||||||
|
u64 tx_pause_tc4_phy;
|
||||||
|
u64 rx_pause_tc5_phy;
|
||||||
|
u64 tx_pause_tc5_phy;
|
||||||
|
u64 rx_pause_tc6_phy;
|
||||||
|
u64 tx_pause_tc6_phy;
|
||||||
|
u64 rx_pause_tc7_phy;
|
||||||
|
u64 tx_pause_tc7_phy;
|
||||||
|
}; /* HW DATA */
|
||||||
|
|
||||||
/* Configure vPort Rx Steering */
|
/* Configure vPort Rx Steering */
|
||||||
struct mana_cfg_rx_steer_req_v2 {
|
struct mana_cfg_rx_steer_req_v2 {
|
||||||
struct gdma_req_hdr hdr;
|
struct gdma_req_hdr hdr;
|
||||||
|
|||||||
@ -272,13 +272,14 @@ static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
|
|||||||
if (!pid_child_should_wake(wo, p))
|
if (!pid_child_should_wake(wo, p))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
list_del_init(&wait->entry);
|
||||||
|
|
||||||
/* cancel is in progress */
|
/* cancel is in progress */
|
||||||
if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK)
|
if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
req->io_task_work.func = io_waitid_cb;
|
req->io_task_work.func = io_waitid_cb;
|
||||||
io_req_task_work_add(req);
|
io_req_task_work_add(req);
|
||||||
list_del_init(&wait->entry);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
|
static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
|
||||||
|
|
||||||
wait_queue_head_t *bit_waitqueue(void *word, int bit)
|
wait_queue_head_t *bit_waitqueue(unsigned long *word, int bit)
|
||||||
{
|
{
|
||||||
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
|
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
|
||||||
unsigned long val = (unsigned long)word << shift | bit;
|
unsigned long val = (unsigned long)word << shift | bit;
|
||||||
@ -55,7 +55,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__wait_on_bit);
|
EXPORT_SYMBOL(__wait_on_bit);
|
||||||
|
|
||||||
int __sched out_of_line_wait_on_bit(void *word, int bit,
|
int __sched out_of_line_wait_on_bit(unsigned long *word, int bit,
|
||||||
wait_bit_action_f *action, unsigned mode)
|
wait_bit_action_f *action, unsigned mode)
|
||||||
{
|
{
|
||||||
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
|
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
|
||||||
@ -66,7 +66,7 @@ int __sched out_of_line_wait_on_bit(void *word, int bit,
|
|||||||
EXPORT_SYMBOL(out_of_line_wait_on_bit);
|
EXPORT_SYMBOL(out_of_line_wait_on_bit);
|
||||||
|
|
||||||
int __sched out_of_line_wait_on_bit_timeout(
|
int __sched out_of_line_wait_on_bit_timeout(
|
||||||
void *word, int bit, wait_bit_action_f *action,
|
unsigned long *word, int bit, wait_bit_action_f *action,
|
||||||
unsigned mode, unsigned long timeout)
|
unsigned mode, unsigned long timeout)
|
||||||
{
|
{
|
||||||
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
|
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
|
||||||
@ -108,7 +108,7 @@ __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__wait_on_bit_lock);
|
EXPORT_SYMBOL(__wait_on_bit_lock);
|
||||||
|
|
||||||
int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
|
int __sched out_of_line_wait_on_bit_lock(unsigned long *word, int bit,
|
||||||
wait_bit_action_f *action, unsigned mode)
|
wait_bit_action_f *action, unsigned mode)
|
||||||
{
|
{
|
||||||
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
|
struct wait_queue_head *wq_head = bit_waitqueue(word, bit);
|
||||||
@ -118,7 +118,7 @@ int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
|
EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
|
||||||
|
|
||||||
void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
|
void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit)
|
||||||
{
|
{
|
||||||
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
|
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
|
||||||
|
|
||||||
@ -128,23 +128,31 @@ void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
|
|||||||
EXPORT_SYMBOL(__wake_up_bit);
|
EXPORT_SYMBOL(__wake_up_bit);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wake_up_bit - wake up a waiter on a bit
|
* wake_up_bit - wake up waiters on a bit
|
||||||
* @word: the word being waited on, a kernel virtual address
|
* @word: the address containing the bit being waited on
|
||||||
* @bit: the bit of the word being waited on
|
* @bit: the bit at that address being waited on
|
||||||
*
|
*
|
||||||
* There is a standard hashed waitqueue table for generic use. This
|
* Wake up any process waiting in wait_on_bit() or similar for the
|
||||||
* is the part of the hash-table's accessor API that wakes up waiters
|
* given bit to be cleared.
|
||||||
* on a bit. For instance, if one were to have waiters on a bitflag,
|
|
||||||
* one would call wake_up_bit() after clearing the bit.
|
|
||||||
*
|
*
|
||||||
* In order for this to function properly, as it uses waitqueue_active()
|
* The wake-up is sent to tasks in a waitqueue selected by hash from a
|
||||||
* internally, some kind of memory barrier must be done prior to calling
|
* shared pool. Only those tasks on that queue which have requested
|
||||||
* this. Typically, this will be smp_mb__after_atomic(), but in some
|
* wake_up on this specific address and bit will be woken, and only if the
|
||||||
* cases where bitflags are manipulated non-atomically under a lock, one
|
* bit is clear.
|
||||||
* may need to use a less regular barrier, such fs/inode.c's smp_mb(),
|
*
|
||||||
* because spin_unlock() does not guarantee a memory barrier.
|
* In order for this to function properly there must be a full memory
|
||||||
|
* barrier after the bit is cleared and before this function is called.
|
||||||
|
* If the bit was cleared atomically, such as a by clear_bit() then
|
||||||
|
* smb_mb__after_atomic() can be used, othwewise smb_mb() is needed.
|
||||||
|
* If the bit was cleared with a fully-ordered operation, no further
|
||||||
|
* barrier is required.
|
||||||
|
*
|
||||||
|
* Normally the bit should be cleared by an operation with RELEASE
|
||||||
|
* semantics so that any changes to memory made before the bit is
|
||||||
|
* cleared are guaranteed to be visible after the matching wait_on_bit()
|
||||||
|
* completes.
|
||||||
*/
|
*/
|
||||||
void wake_up_bit(void *word, int bit)
|
void wake_up_bit(unsigned long *word, int bit)
|
||||||
{
|
{
|
||||||
__wake_up_bit(bit_waitqueue(word, bit), word, bit);
|
__wake_up_bit(bit_waitqueue(word, bit), word, bit);
|
||||||
}
|
}
|
||||||
@ -188,6 +196,36 @@ void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(init_wait_var_entry);
|
EXPORT_SYMBOL(init_wait_var_entry);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wake_up_var - wake up waiters on a variable (kernel address)
|
||||||
|
* @var: the address of the variable being waited on
|
||||||
|
*
|
||||||
|
* Wake up any process waiting in wait_var_event() or similar for the
|
||||||
|
* given variable to change. wait_var_event() can be waiting for an
|
||||||
|
* arbitrary condition to be true and associates that condition with an
|
||||||
|
* address. Calling wake_up_var() suggests that the condition has been
|
||||||
|
* made true, but does not strictly require the condtion to use the
|
||||||
|
* address given.
|
||||||
|
*
|
||||||
|
* The wake-up is sent to tasks in a waitqueue selected by hash from a
|
||||||
|
* shared pool. Only those tasks on that queue which have requested
|
||||||
|
* wake_up on this specific address will be woken.
|
||||||
|
*
|
||||||
|
* In order for this to function properly there must be a full memory
|
||||||
|
* barrier after the variable is updated (or more accurately, after the
|
||||||
|
* condition waited on has been made to be true) and before this function
|
||||||
|
* is called. If the variable was updated atomically, such as a by
|
||||||
|
* atomic_dec() then smb_mb__after_atomic() can be used. If the
|
||||||
|
* variable was updated by a fully ordered operation such as
|
||||||
|
* atomic_dec_and_test() then no extra barrier is required. Otherwise
|
||||||
|
* smb_mb() is needed.
|
||||||
|
*
|
||||||
|
* Normally the variable should be updated (the condition should be made
|
||||||
|
* to be true) by an operation with RELEASE semantics such as
|
||||||
|
* smp_store_release() so that any changes to memory made before the
|
||||||
|
* variable was updated are guaranteed to be visible after the matching
|
||||||
|
* wait_var_event() completes.
|
||||||
|
*/
|
||||||
void wake_up_var(void *var)
|
void wake_up_var(void *var)
|
||||||
{
|
{
|
||||||
__wake_up_bit(__var_waitqueue(var), var, -1);
|
__wake_up_bit(__var_waitqueue(var), var, -1);
|
||||||
|
|||||||
59
mm/slub.c
59
mm/slub.c
@ -897,19 +897,19 @@ static struct track *get_track(struct kmem_cache *s, void *object,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_STACKDEPOT
|
#ifdef CONFIG_STACKDEPOT
|
||||||
static noinline depot_stack_handle_t set_track_prepare(void)
|
static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
|
||||||
{
|
{
|
||||||
depot_stack_handle_t handle;
|
depot_stack_handle_t handle;
|
||||||
unsigned long entries[TRACK_ADDRS_COUNT];
|
unsigned long entries[TRACK_ADDRS_COUNT];
|
||||||
unsigned int nr_entries;
|
unsigned int nr_entries;
|
||||||
|
|
||||||
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
|
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
|
||||||
handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
|
handle = stack_depot_save(entries, nr_entries, gfp_flags);
|
||||||
|
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline depot_stack_handle_t set_track_prepare(void)
|
static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -931,9 +931,9 @@ static void set_track_update(struct kmem_cache *s, void *object,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void set_track(struct kmem_cache *s, void *object,
|
static __always_inline void set_track(struct kmem_cache *s, void *object,
|
||||||
enum track_item alloc, unsigned long addr)
|
enum track_item alloc, unsigned long addr, gfp_t gfp_flags)
|
||||||
{
|
{
|
||||||
depot_stack_handle_t handle = set_track_prepare();
|
depot_stack_handle_t handle = set_track_prepare(gfp_flags);
|
||||||
|
|
||||||
set_track_update(s, object, alloc, addr, handle);
|
set_track_update(s, object, alloc, addr, handle);
|
||||||
}
|
}
|
||||||
@ -1826,9 +1826,9 @@ static inline bool free_debug_processing(struct kmem_cache *s,
|
|||||||
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
|
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
|
||||||
static inline int check_object(struct kmem_cache *s, struct slab *slab,
|
static inline int check_object(struct kmem_cache *s, struct slab *slab,
|
||||||
void *object, u8 val) { return 1; }
|
void *object, u8 val) { return 1; }
|
||||||
static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
|
static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; }
|
||||||
static inline void set_track(struct kmem_cache *s, void *object,
|
static inline void set_track(struct kmem_cache *s, void *object,
|
||||||
enum track_item alloc, unsigned long addr) {}
|
enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {}
|
||||||
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
|
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
|
||||||
struct slab *slab) {}
|
struct slab *slab) {}
|
||||||
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
|
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
|
||||||
@ -3514,8 +3514,26 @@ new_objects:
|
|||||||
pc.slab = &slab;
|
pc.slab = &slab;
|
||||||
pc.orig_size = orig_size;
|
pc.orig_size = orig_size;
|
||||||
freelist = get_partial(s, node, &pc);
|
freelist = get_partial(s, node, &pc);
|
||||||
if (freelist)
|
if (freelist) {
|
||||||
goto check_new_slab;
|
if (kmem_cache_debug(s)) {
|
||||||
|
/*
|
||||||
|
* For debug caches here we had to go through
|
||||||
|
* alloc_single_from_partial() so just store the
|
||||||
|
* tracking info and return the object.
|
||||||
|
*
|
||||||
|
* Due to disabled preemption we need to disallow
|
||||||
|
* blocking. The flags are further adjusted by
|
||||||
|
* gfp_nested_mask() in stack_depot itself.
|
||||||
|
*/
|
||||||
|
if (s->flags & SLAB_STORE_USER)
|
||||||
|
set_track(s, freelist, TRACK_ALLOC, addr,
|
||||||
|
gfpflags & ~(__GFP_DIRECT_RECLAIM));
|
||||||
|
|
||||||
|
return freelist;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto retry_load_slab;
|
||||||
|
}
|
||||||
|
|
||||||
slub_put_cpu_ptr(s->cpu_slab);
|
slub_put_cpu_ptr(s->cpu_slab);
|
||||||
slab = new_slab(s, gfpflags, node);
|
slab = new_slab(s, gfpflags, node);
|
||||||
@ -3535,7 +3553,8 @@ new_objects:
|
|||||||
goto new_objects;
|
goto new_objects;
|
||||||
|
|
||||||
if (s->flags & SLAB_STORE_USER)
|
if (s->flags & SLAB_STORE_USER)
|
||||||
set_track(s, freelist, TRACK_ALLOC, addr);
|
set_track(s, freelist, TRACK_ALLOC, addr,
|
||||||
|
gfpflags & ~(__GFP_DIRECT_RECLAIM));
|
||||||
|
|
||||||
return freelist;
|
return freelist;
|
||||||
}
|
}
|
||||||
@ -3551,20 +3570,6 @@ new_objects:
|
|||||||
|
|
||||||
inc_slabs_node(s, slab_nid(slab), slab->objects);
|
inc_slabs_node(s, slab_nid(slab), slab->objects);
|
||||||
|
|
||||||
check_new_slab:
|
|
||||||
|
|
||||||
if (kmem_cache_debug(s)) {
|
|
||||||
/*
|
|
||||||
* For debug caches here we had to go through
|
|
||||||
* alloc_single_from_partial() so just store the tracking info
|
|
||||||
* and return the object
|
|
||||||
*/
|
|
||||||
if (s->flags & SLAB_STORE_USER)
|
|
||||||
set_track(s, freelist, TRACK_ALLOC, addr);
|
|
||||||
|
|
||||||
return freelist;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
|
if (unlikely(!pfmemalloc_match(slab, gfpflags))) {
|
||||||
/*
|
/*
|
||||||
* For !pfmemalloc_match() case we don't load freelist so that
|
* For !pfmemalloc_match() case we don't load freelist so that
|
||||||
@ -4027,8 +4032,12 @@ static noinline void free_to_partial_list(
|
|||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
depot_stack_handle_t handle = 0;
|
depot_stack_handle_t handle = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We cannot use GFP_NOWAIT as there are callsites where waking up
|
||||||
|
* kswapd could deadlock
|
||||||
|
*/
|
||||||
if (s->flags & SLAB_STORE_USER)
|
if (s->flags & SLAB_STORE_USER)
|
||||||
handle = set_track_prepare();
|
handle = set_track_prepare(__GFP_NOWARN);
|
||||||
|
|
||||||
spin_lock_irqsave(&n->list_lock, flags);
|
spin_lock_irqsave(&n->list_lock, flags);
|
||||||
|
|
||||||
|
|||||||
@ -3115,8 +3115,18 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
|
|||||||
|
|
||||||
hci_dev_lock(hdev);
|
hci_dev_lock(hdev);
|
||||||
|
|
||||||
|
/* Check for existing connection:
|
||||||
|
*
|
||||||
|
* 1. If it doesn't exist then it must be receiver/slave role.
|
||||||
|
* 2. If it does exist confirm that it is connecting/BT_CONNECT in case
|
||||||
|
* of initiator/master role since there could be a collision where
|
||||||
|
* either side is attempting to connect or something like a fuzzing
|
||||||
|
* testing is trying to play tricks to destroy the hcon object before
|
||||||
|
* it even attempts to connect (e.g. hcon->state == BT_OPEN).
|
||||||
|
*/
|
||||||
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
|
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
|
||||||
if (!conn) {
|
if (!conn ||
|
||||||
|
(conn->role == HCI_ROLE_MASTER && conn->state != BT_CONNECT)) {
|
||||||
/* In case of error status and there is no connection pending
|
/* In case of error status and there is no connection pending
|
||||||
* just unlock as there is nothing to cleanup.
|
* just unlock as there is nothing to cleanup.
|
||||||
*/
|
*/
|
||||||
@ -4422,6 +4432,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
|
|||||||
|
|
||||||
bt_dev_dbg(hdev, "num %d", ev->num);
|
bt_dev_dbg(hdev, "num %d", ev->num);
|
||||||
|
|
||||||
|
hci_dev_lock(hdev);
|
||||||
|
|
||||||
for (i = 0; i < ev->num; i++) {
|
for (i = 0; i < ev->num; i++) {
|
||||||
struct hci_comp_pkts_info *info = &ev->handles[i];
|
struct hci_comp_pkts_info *info = &ev->handles[i];
|
||||||
struct hci_conn *conn;
|
struct hci_conn *conn;
|
||||||
@ -4487,6 +4499,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
queue_work(hdev->workqueue, &hdev->tx_work);
|
queue_work(hdev->workqueue, &hdev->tx_work);
|
||||||
|
|
||||||
|
hci_dev_unlock(hdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hci_mode_change_evt(struct hci_dev *hdev, void *data,
|
static void hci_mode_change_evt(struct hci_dev *hdev, void *data,
|
||||||
@ -5649,8 +5663,18 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
|
|||||||
*/
|
*/
|
||||||
hci_dev_clear_flag(hdev, HCI_LE_ADV);
|
hci_dev_clear_flag(hdev, HCI_LE_ADV);
|
||||||
|
|
||||||
conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr);
|
/* Check for existing connection:
|
||||||
if (!conn) {
|
*
|
||||||
|
* 1. If it doesn't exist then use the role to create a new object.
|
||||||
|
* 2. If it does exist confirm that it is connecting/BT_CONNECT in case
|
||||||
|
* of initiator/master role since there could be a collision where
|
||||||
|
* either side is attempting to connect or something like a fuzzing
|
||||||
|
* testing is trying to play tricks to destroy the hcon object before
|
||||||
|
* it even attempts to connect (e.g. hcon->state == BT_OPEN).
|
||||||
|
*/
|
||||||
|
conn = hci_conn_hash_lookup_role(hdev, LE_LINK, role, bdaddr);
|
||||||
|
if (!conn ||
|
||||||
|
(conn->role == HCI_ROLE_MASTER && conn->state != BT_CONNECT)) {
|
||||||
/* In case of error status and there is no connection pending
|
/* In case of error status and there is no connection pending
|
||||||
* just unlock as there is nothing to cleanup.
|
* just unlock as there is nothing to cleanup.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -2256,6 +2256,7 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
|
|||||||
if (IS_ERR(dst))
|
if (IS_ERR(dst))
|
||||||
goto out_drop;
|
goto out_drop;
|
||||||
|
|
||||||
|
skb_dst_drop(skb);
|
||||||
skb_dst_set(skb, dst);
|
skb_dst_set(skb, dst);
|
||||||
} else if (nh->nh_family != AF_INET6) {
|
} else if (nh->nh_family != AF_INET6) {
|
||||||
goto out_drop;
|
goto out_drop;
|
||||||
@ -2363,6 +2364,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
|
|||||||
goto out_drop;
|
goto out_drop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
skb_dst_drop(skb);
|
||||||
skb_dst_set(skb, &rt->dst);
|
skb_dst_set(skb, &rt->dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -204,6 +204,9 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
|
|||||||
if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
|
if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (skb_is_gso(skb))
|
||||||
|
skb_gso_reset(skb);
|
||||||
|
|
||||||
skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
|
skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
|
||||||
pskb_pull(skb, ETH_HLEN);
|
pskb_pull(skb, ETH_HLEN);
|
||||||
skb_reset_network_header(skb);
|
skb_reset_network_header(skb);
|
||||||
@ -298,6 +301,9 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
|
|||||||
if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
|
if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (skb_is_gso(skb))
|
||||||
|
skb_gso_reset(skb);
|
||||||
|
|
||||||
skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
|
skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
|
||||||
pskb_pull(skb, ETH_HLEN);
|
pskb_pull(skb, ETH_HLEN);
|
||||||
skb_reset_network_header(skb);
|
skb_reset_network_header(skb);
|
||||||
|
|||||||
@ -35,6 +35,7 @@
|
|||||||
#include <net/xfrm.h>
|
#include <net/xfrm.h>
|
||||||
|
|
||||||
#include <crypto/hash.h>
|
#include <crypto/hash.h>
|
||||||
|
#include <crypto/algapi.h>
|
||||||
#include <net/seg6.h>
|
#include <net/seg6.h>
|
||||||
#include <net/genetlink.h>
|
#include <net/genetlink.h>
|
||||||
#include <net/seg6_hmac.h>
|
#include <net/seg6_hmac.h>
|
||||||
@ -271,7 +272,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
|
|||||||
if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
|
if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0)
|
if (crypto_memneq(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -255,20 +255,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
|
svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
struct cmsghdr cmsg;
|
struct cmsghdr cmsg;
|
||||||
u8 buf[CMSG_SPACE(sizeof(u8))];
|
u8 buf[CMSG_SPACE(sizeof(u8))];
|
||||||
} u;
|
} u;
|
||||||
struct socket *sock = svsk->sk_sock;
|
u8 alert[2];
|
||||||
|
struct kvec alert_kvec = {
|
||||||
|
.iov_base = alert,
|
||||||
|
.iov_len = sizeof(alert),
|
||||||
|
};
|
||||||
|
struct msghdr msg = {
|
||||||
|
.msg_flags = *msg_flags,
|
||||||
|
.msg_control = &u,
|
||||||
|
.msg_controllen = sizeof(u),
|
||||||
|
};
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
msg->msg_control = &u;
|
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
|
||||||
msg->msg_controllen = sizeof(u);
|
alert_kvec.iov_len);
|
||||||
|
ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
|
||||||
|
if (ret > 0 &&
|
||||||
|
tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) {
|
||||||
|
iov_iter_revert(&msg.msg_iter, ret);
|
||||||
|
ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct socket *sock = svsk->sk_sock;
|
||||||
|
|
||||||
ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
|
ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
|
||||||
if (unlikely(msg->msg_controllen != sizeof(u)))
|
if (msg->msg_flags & MSG_CTRUNC) {
|
||||||
ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret);
|
msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
|
||||||
|
if (ret == 0 || ret == -EIO)
|
||||||
|
ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags);
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,7 +349,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
|
|||||||
iov_iter_advance(&msg.msg_iter, seek);
|
iov_iter_advance(&msg.msg_iter, seek);
|
||||||
buflen -= seek;
|
buflen -= seek;
|
||||||
}
|
}
|
||||||
len = svc_tcp_sock_recv_cmsg(svsk, &msg);
|
len = svc_tcp_sock_recvmsg(svsk, &msg);
|
||||||
if (len > 0)
|
if (len > 0)
|
||||||
svc_flush_bvec(bvec, len, seek);
|
svc_flush_bvec(bvec, len, seek);
|
||||||
|
|
||||||
@ -1018,7 +1045,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
|
|||||||
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
|
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
|
||||||
iov.iov_len = want;
|
iov.iov_len = want;
|
||||||
iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want);
|
iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want);
|
||||||
len = svc_tcp_sock_recv_cmsg(svsk, &msg);
|
len = svc_tcp_sock_recvmsg(svsk, &msg);
|
||||||
if (len < 0)
|
if (len < 0)
|
||||||
return len;
|
return len;
|
||||||
svsk->sk_tcplen += len;
|
svsk->sk_tcplen += len;
|
||||||
|
|||||||
@ -359,7 +359,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
|
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
|
||||||
struct cmsghdr *cmsg, int ret)
|
unsigned int *msg_flags, struct cmsghdr *cmsg, int ret)
|
||||||
{
|
{
|
||||||
u8 content_type = tls_get_record_type(sock->sk, cmsg);
|
u8 content_type = tls_get_record_type(sock->sk, cmsg);
|
||||||
u8 level, description;
|
u8 level, description;
|
||||||
@ -372,7 +372,7 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
|
|||||||
* record, even though there might be more frames
|
* record, even though there might be more frames
|
||||||
* waiting to be decrypted.
|
* waiting to be decrypted.
|
||||||
*/
|
*/
|
||||||
msg->msg_flags &= ~MSG_EOR;
|
*msg_flags &= ~MSG_EOR;
|
||||||
break;
|
break;
|
||||||
case TLS_RECORD_TYPE_ALERT:
|
case TLS_RECORD_TYPE_ALERT:
|
||||||
tls_alert_recv(sock->sk, msg, &level, &description);
|
tls_alert_recv(sock->sk, msg, &level, &description);
|
||||||
@ -387,19 +387,33 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags)
|
xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
struct cmsghdr cmsg;
|
struct cmsghdr cmsg;
|
||||||
u8 buf[CMSG_SPACE(sizeof(u8))];
|
u8 buf[CMSG_SPACE(sizeof(u8))];
|
||||||
} u;
|
} u;
|
||||||
|
u8 alert[2];
|
||||||
|
struct kvec alert_kvec = {
|
||||||
|
.iov_base = alert,
|
||||||
|
.iov_len = sizeof(alert),
|
||||||
|
};
|
||||||
|
struct msghdr msg = {
|
||||||
|
.msg_flags = *msg_flags,
|
||||||
|
.msg_control = &u,
|
||||||
|
.msg_controllen = sizeof(u),
|
||||||
|
};
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
msg->msg_control = &u;
|
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
|
||||||
msg->msg_controllen = sizeof(u);
|
alert_kvec.iov_len);
|
||||||
ret = sock_recvmsg(sock, msg, flags);
|
ret = sock_recvmsg(sock, &msg, flags);
|
||||||
if (msg->msg_controllen != sizeof(u))
|
if (ret > 0) {
|
||||||
ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret);
|
if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT)
|
||||||
|
iov_iter_revert(&msg.msg_iter, ret);
|
||||||
|
ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg,
|
||||||
|
-EAGAIN);
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -409,7 +423,13 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
|
|||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
if (seek != 0)
|
if (seek != 0)
|
||||||
iov_iter_advance(&msg->msg_iter, seek);
|
iov_iter_advance(&msg->msg_iter, seek);
|
||||||
ret = xs_sock_recv_cmsg(sock, msg, flags);
|
ret = sock_recvmsg(sock, msg, flags);
|
||||||
|
/* Handle TLS inband control message lazily */
|
||||||
|
if (msg->msg_flags & MSG_CTRUNC) {
|
||||||
|
msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
|
||||||
|
if (ret == 0 || ret == -EIO)
|
||||||
|
ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags);
|
||||||
|
}
|
||||||
return ret > 0 ? ret + seek : ret;
|
return ret > 0 ? ret + seek : ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,7 +455,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
|
|||||||
size_t count)
|
size_t count)
|
||||||
{
|
{
|
||||||
iov_iter_discard(&msg->msg_iter, READ, count);
|
iov_iter_discard(&msg->msg_iter, READ, count);
|
||||||
return xs_sock_recv_cmsg(sock, msg, flags);
|
return xs_sock_recvmsg(sock, msg, flags, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
|
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user