Import of kernel-6.12.0-124.13.1.el10_1
This commit is contained in:
parent
8e33fb9082
commit
13d687fe64
@ -525,6 +525,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||
/sys/devices/system/cpu/vulnerabilities/tsa
|
||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||
/sys/devices/system/cpu/vulnerabilities/vmscape
|
||||
Date: January 2018
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: Information about CPU vulnerabilities
|
||||
|
||||
@ -24,3 +24,4 @@ are configurable at compile, boot or run time.
|
||||
reg-file-data-sampling
|
||||
rsb
|
||||
indirect-target-selection
|
||||
vmscape
|
||||
|
||||
110
Documentation/admin-guide/hw-vuln/vmscape.rst
Normal file
110
Documentation/admin-guide/hw-vuln/vmscape.rst
Normal file
@ -0,0 +1,110 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
VMSCAPE
|
||||
=======
|
||||
|
||||
VMSCAPE is a vulnerability that may allow a guest to influence the branch
|
||||
prediction in host userspace. It particularly affects hypervisors like QEMU.
|
||||
|
||||
Even if a hypervisor may not have any sensitive data like disk encryption keys,
|
||||
guest-userspace may be able to attack the guest-kernel using the hypervisor as
|
||||
a confused deputy.
|
||||
|
||||
Affected processors
|
||||
-------------------
|
||||
|
||||
The following CPU families are affected by VMSCAPE:
|
||||
|
||||
**Intel processors:**
|
||||
- Skylake generation (Parts without Enhanced-IBRS)
|
||||
- Cascade Lake generation - (Parts affected by ITS guest/host separation)
|
||||
- Alder Lake and newer (Parts affected by BHI)
|
||||
|
||||
Note that, BHI affected parts that use BHB clearing software mitigation e.g.
|
||||
Icelake are not vulnerable to VMSCAPE.
|
||||
|
||||
**AMD processors:**
|
||||
- Zen series (families 0x17, 0x19, 0x1a)
|
||||
|
||||
** Hygon processors:**
|
||||
- Family 0x18
|
||||
|
||||
Mitigation
|
||||
----------
|
||||
|
||||
Conditional IBPB
|
||||
----------------
|
||||
|
||||
Kernel tracks when a CPU has run a potentially malicious guest and issues an
|
||||
IBPB before the first exit to userspace after VM-exit. If userspace did not run
|
||||
between VM-exit and the next VM-entry, no IBPB is issued.
|
||||
|
||||
Note that the existing userspace mitigation against Spectre-v2 is effective in
|
||||
protecting the userspace. They are insufficient to protect the userspace VMMs
|
||||
from a malicious guest. This is because Spectre-v2 mitigations are applied at
|
||||
context switch time, while the userspace VMM can run after a VM-exit without a
|
||||
context switch.
|
||||
|
||||
Vulnerability enumeration and mitigation is not applied inside a guest. This is
|
||||
because nested hypervisors should already be deploying IBPB to isolate
|
||||
themselves from nested guests.
|
||||
|
||||
SMT considerations
|
||||
------------------
|
||||
|
||||
When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be
|
||||
vulnerable to cross-thread attacks. For complete protection against VMSCAPE
|
||||
attacks in SMT environments, STIBP should be enabled.
|
||||
|
||||
The kernel will issue a warning if SMT is enabled without adequate STIBP
|
||||
protection. Warning is not issued when:
|
||||
|
||||
- SMT is disabled
|
||||
- STIBP is enabled system-wide
|
||||
- Intel eIBRS is enabled (which implies STIBP protection)
|
||||
|
||||
System information and options
|
||||
------------------------------
|
||||
|
||||
The sysfs file showing VMSCAPE mitigation status is:
|
||||
|
||||
/sys/devices/system/cpu/vulnerabilities/vmscape
|
||||
|
||||
The possible values in this file are:
|
||||
|
||||
* 'Not affected':
|
||||
|
||||
The processor is not vulnerable to VMSCAPE attacks.
|
||||
|
||||
* 'Vulnerable':
|
||||
|
||||
The processor is vulnerable and no mitigation has been applied.
|
||||
|
||||
* 'Mitigation: IBPB before exit to userspace':
|
||||
|
||||
Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has
|
||||
run a potentially malicious guest and issues an IBPB before the first
|
||||
exit to userspace after VM-exit.
|
||||
|
||||
* 'Mitigation: IBPB on VMEXIT':
|
||||
|
||||
IBPB is issued on every VM-exit. This occurs when other mitigations like
|
||||
RETBLEED or SRSO are already issuing IBPB on VM-exit.
|
||||
|
||||
Mitigation control on the kernel command line
|
||||
----------------------------------------------
|
||||
|
||||
The mitigation can be controlled via the ``vmscape=`` command line parameter:
|
||||
|
||||
* ``vmscape=off``:
|
||||
|
||||
Disable the VMSCAPE mitigation.
|
||||
|
||||
* ``vmscape=ibpb``:
|
||||
|
||||
Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y).
|
||||
|
||||
* ``vmscape=force``:
|
||||
|
||||
Force vulnerability detection and mitigation even on processors that are
|
||||
not known to be affected.
|
||||
@ -3563,6 +3563,7 @@
|
||||
srbds=off [X86,INTEL]
|
||||
ssbd=force-off [ARM64]
|
||||
tsx_async_abort=off [X86]
|
||||
vmscape=off [X86]
|
||||
|
||||
Exceptions:
|
||||
This does not have any effect on
|
||||
@ -7500,6 +7501,16 @@
|
||||
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
||||
Format: <command>
|
||||
|
||||
vmscape= [X86] Controls mitigation for VMscape attacks.
|
||||
VMscape attacks can leak information from a userspace
|
||||
hypervisor to a guest via speculative side-channels.
|
||||
|
||||
off - disable the mitigation
|
||||
ibpb - use Indirect Branch Prediction Barrier
|
||||
(IBPB) mitigation (default)
|
||||
force - force vulnerability detection even on
|
||||
unaffected processors
|
||||
|
||||
vsyscall= [X86-64,EARLY]
|
||||
Controls the behavior of vsyscalls (i.e. calls to
|
||||
fixed addresses of 0xffffffffff600x00 from legacy
|
||||
|
||||
@ -142,13 +142,6 @@ but depends on the BIOS to behave correctly.
|
||||
Note TDX works with CPU logical online/offline, thus the kernel still
|
||||
allows to offline logical CPU and online it again.
|
||||
|
||||
Kexec()
|
||||
~~~~~~~
|
||||
|
||||
TDX host support currently lacks the ability to handle kexec. For
|
||||
simplicity only one of them can be enabled in the Kconfig. This will be
|
||||
fixed in the future.
|
||||
|
||||
Erratum
|
||||
~~~~~~~
|
||||
|
||||
@ -171,6 +164,13 @@ If the platform has such erratum, the kernel prints additional message in
|
||||
machine check handler to tell user the machine check may be caused by
|
||||
kernel bug on TDX private memory.
|
||||
|
||||
Kexec
|
||||
~~~~~~~
|
||||
|
||||
Currently kexec doesn't work on the TDX platforms with the aforementioned
|
||||
erratum. It fails when loading the kexec kernel image. Otherwise it
|
||||
works normally.
|
||||
|
||||
Interaction vs S3 and deeper states
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
@ -12,7 +12,7 @@ RHEL_MINOR = 1
|
||||
#
|
||||
# Use this spot to avoid future merge conflicts.
|
||||
# Do not trim this comment.
|
||||
RHEL_RELEASE = 124.8.1
|
||||
RHEL_RELEASE = 124.13.1
|
||||
|
||||
#
|
||||
# RHEL_REBASE_NUM
|
||||
|
||||
@ -270,6 +270,7 @@
|
||||
#define H_QUERY_INT_STATE 0x1E4
|
||||
#define H_POLL_PENDING 0x1D8
|
||||
#define H_ILLAN_ATTRIBUTES 0x244
|
||||
#define H_ADD_LOGICAL_LAN_BUFFERS 0x248
|
||||
#define H_MODIFY_HEA_QP 0x250
|
||||
#define H_QUERY_HEA_QP 0x254
|
||||
#define H_QUERY_HEA 0x258
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/security.h>
|
||||
#include <linux/slab.h>
|
||||
#include "hypfs.h"
|
||||
|
||||
@ -66,23 +67,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
long rc;
|
||||
|
||||
mutex_lock(&df->lock);
|
||||
if (df->unlocked_ioctl)
|
||||
rc = df->unlocked_ioctl(file, cmd, arg);
|
||||
else
|
||||
rc = -ENOTTY;
|
||||
mutex_unlock(&df->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const struct file_operations dbfs_ops = {
|
||||
static const struct file_operations dbfs_ops_ioctl = {
|
||||
.read = dbfs_read,
|
||||
.unlocked_ioctl = dbfs_ioctl,
|
||||
};
|
||||
|
||||
static const struct file_operations dbfs_ops = {
|
||||
.read = dbfs_read,
|
||||
};
|
||||
|
||||
void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
|
||||
{
|
||||
df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
|
||||
&dbfs_ops);
|
||||
const struct file_operations *fops = &dbfs_ops;
|
||||
|
||||
if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS))
|
||||
fops = &dbfs_ops_ioctl;
|
||||
df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops);
|
||||
mutex_init(&df->lock);
|
||||
}
|
||||
|
||||
|
||||
@ -1982,7 +1982,6 @@ config INTEL_TDX_HOST
|
||||
depends on X86_X2APIC
|
||||
select ARCH_KEEP_MEMBLOCK
|
||||
depends on CONTIG_ALLOC
|
||||
depends on !KEXEC_CORE
|
||||
depends on X86_MCE
|
||||
help
|
||||
Intel Trust Domain Extensions (TDX) protects guest VMs from malicious
|
||||
@ -2772,6 +2771,15 @@ config MITIGATION_TSA
|
||||
security vulnerability on AMD CPUs which can lead to forwarding of
|
||||
invalid info to subsequent instructions and thus can affect their
|
||||
timing and thereby cause a leakage.
|
||||
|
||||
config MITIGATION_VMSCAPE
|
||||
bool "Mitigate VMSCAPE"
|
||||
depends on KVM
|
||||
default y
|
||||
help
|
||||
Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security
|
||||
vulnerability on Intel and AMD CPUs that may allow a guest to do
|
||||
Spectre v2 style attacks on userspace hypervisor.
|
||||
endif
|
||||
|
||||
config ARCH_HAS_ADD_PAGES
|
||||
|
||||
@ -492,6 +492,7 @@
|
||||
#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
|
||||
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
|
||||
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
|
||||
#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
@ -547,4 +548,5 @@
|
||||
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
|
||||
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
|
||||
#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
|
||||
#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
||||
@ -92,6 +92,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
* 8 (ia32) bits.
|
||||
*/
|
||||
choose_random_kstack_offset(rdtsc());
|
||||
|
||||
/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
|
||||
this_cpu_read(x86_ibpb_exit_to_user)) {
|
||||
indirect_branch_prediction_barrier();
|
||||
this_cpu_write(x86_ibpb_exit_to_user, false);
|
||||
}
|
||||
}
|
||||
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
|
||||
|
||||
|
||||
@ -8,14 +8,18 @@
|
||||
# define PA_PGD 2
|
||||
# define PA_SWAP_PAGE 3
|
||||
# define PAGES_NR 4
|
||||
#else
|
||||
# define PA_CONTROL_PAGE 0
|
||||
# define VA_CONTROL_PAGE 1
|
||||
# define PA_TABLE_PAGE 2
|
||||
# define PA_SWAP_PAGE 3
|
||||
# define PAGES_NR 4
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#include <linux/bits.h>
|
||||
|
||||
#define RELOC_KERNEL_PRESERVE_CONTEXT BIT(0)
|
||||
#define RELOC_KERNEL_CACHE_INCOHERENT BIT(1)
|
||||
|
||||
#endif
|
||||
|
||||
# define KEXEC_CONTROL_PAGE_SIZE 4096
|
||||
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
@ -43,7 +47,6 @@ struct kimage;
|
||||
/* Maximum address we can use for the control code buffer */
|
||||
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
|
||||
|
||||
# define KEXEC_CONTROL_PAGE_SIZE 4096
|
||||
|
||||
/* The native architecture */
|
||||
# define KEXEC_ARCH KEXEC_ARCH_386
|
||||
@ -58,11 +61,12 @@ struct kimage;
|
||||
/* Maximum address we can use for the control pages */
|
||||
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
|
||||
|
||||
/* Allocate one page for the pdp and the second for the code */
|
||||
# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
|
||||
|
||||
/* The native architecture */
|
||||
# define KEXEC_ARCH KEXEC_ARCH_X86_64
|
||||
|
||||
extern unsigned long kexec_va_control_page;
|
||||
extern unsigned long kexec_pa_table_page;
|
||||
extern unsigned long kexec_pa_swap_page;
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -116,21 +120,20 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
asmlinkage unsigned long
|
||||
relocate_kernel(unsigned long indirection_page,
|
||||
typedef asmlinkage unsigned long
|
||||
relocate_kernel_fn(unsigned long indirection_page,
|
||||
unsigned long control_page,
|
||||
unsigned long start_address,
|
||||
unsigned int has_pae,
|
||||
unsigned int preserve_context);
|
||||
#else
|
||||
unsigned long
|
||||
relocate_kernel(unsigned long indirection_page,
|
||||
unsigned long page_list,
|
||||
typedef unsigned long
|
||||
relocate_kernel_fn(unsigned long indirection_page,
|
||||
unsigned long pa_control_page,
|
||||
unsigned long start_address,
|
||||
unsigned int preserve_context,
|
||||
unsigned int host_mem_enc_active);
|
||||
unsigned int flags);
|
||||
#endif
|
||||
|
||||
extern relocate_kernel_fn relocate_kernel;
|
||||
#define ARCH_HAS_KIMAGE_ARCH
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -145,6 +148,19 @@ struct kimage_arch {
|
||||
};
|
||||
#else
|
||||
struct kimage_arch {
|
||||
/*
|
||||
* This is a kimage control page, as it must not overlap with either
|
||||
* source or destination address ranges.
|
||||
*/
|
||||
pgd_t *pgd;
|
||||
/*
|
||||
* The virtual mapping of the control code page itself is used only
|
||||
* during the transition, while the current kernel's pages are all
|
||||
* in place. Thus the intermediate page table pages used to map it
|
||||
* are not control pages, but instead just normal pages obtained
|
||||
* with get_zeroed_page(). And have to be tracked (below) so that
|
||||
* they can be freed.
|
||||
*/
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
@ -527,6 +527,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
|
||||
|
||||
static inline void indirect_branch_prediction_barrier(void)
|
||||
{
|
||||
asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
|
||||
|
||||
@ -180,13 +180,6 @@ static inline void halt(void)
|
||||
PVOP_VCALL0(irq.halt);
|
||||
}
|
||||
|
||||
extern noinstr void pv_native_wbinvd(void);
|
||||
|
||||
static __always_inline void wbinvd(void)
|
||||
{
|
||||
PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
|
||||
}
|
||||
|
||||
static inline u64 paravirt_read_msr(unsigned msr)
|
||||
{
|
||||
return PVOP_CALL1(u64, cpu.read_msr, msr);
|
||||
|
||||
@ -86,8 +86,6 @@ struct pv_cpu_ops {
|
||||
void (*update_io_bitmap)(void);
|
||||
#endif
|
||||
|
||||
void (*wbinvd)(void);
|
||||
|
||||
/* cpuid emulation, mostly so that caps bits can be disabled */
|
||||
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
|
||||
unsigned int *ecx, unsigned int *edx);
|
||||
|
||||
@ -744,6 +744,8 @@ void __noreturn stop_this_cpu(void *dummy);
|
||||
void microcode_check(struct cpuinfo_x86 *prev_info);
|
||||
void store_cpu_caps(struct cpuinfo_x86 *info);
|
||||
|
||||
DECLARE_PER_CPU(bool, cache_state_incoherent);
|
||||
|
||||
enum l1tf_mitigations {
|
||||
L1TF_MITIGATION_OFF,
|
||||
L1TF_MITIGATION_AUTO,
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
#include <asm-generic/sections.h>
|
||||
#include <asm/extable.h>
|
||||
|
||||
extern char __relocate_kernel_start[], __relocate_kernel_end[];
|
||||
extern char __brk_base[], __brk_limit[];
|
||||
extern char __end_rodata_aligned[];
|
||||
|
||||
|
||||
@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru)
|
||||
}
|
||||
#endif
|
||||
|
||||
static __always_inline void native_wbinvd(void)
|
||||
static __always_inline void wbinvd(void)
|
||||
{
|
||||
asm volatile("wbinvd": : :"memory");
|
||||
}
|
||||
@ -167,12 +167,6 @@ static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
native_write_cr4(x);
|
||||
}
|
||||
|
||||
static __always_inline void wbinvd(void)
|
||||
{
|
||||
native_wbinvd();
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PARAVIRT_XXL */
|
||||
|
||||
static __always_inline void clflush(volatile void *__p)
|
||||
|
||||
@ -97,10 +97,31 @@ u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
|
||||
u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
|
||||
void tdx_init(void);
|
||||
|
||||
#include <linux/preempt.h>
|
||||
#include <asm/archrandom.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
|
||||
|
||||
static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
|
||||
struct tdx_module_args *args)
|
||||
{
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
/*
|
||||
* SEAMCALLs are made to the TDX module and can generate dirty
|
||||
* cachelines of TDX private memory. Mark cache state incoherent
|
||||
* so that the cache can be flushed during kexec.
|
||||
*
|
||||
* This needs to be done before actually making the SEAMCALL,
|
||||
* because kexec-ing CPU could send NMI to stop remote CPUs,
|
||||
* in which case even disabling IRQ won't help here.
|
||||
*/
|
||||
this_cpu_write(cache_state_incoherent, true);
|
||||
|
||||
return func(fn, args);
|
||||
}
|
||||
|
||||
static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
|
||||
struct tdx_module_args *args)
|
||||
{
|
||||
@ -108,7 +129,9 @@ static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
|
||||
u64 ret;
|
||||
|
||||
do {
|
||||
ret = func(fn, args);
|
||||
preempt_disable();
|
||||
ret = __seamcall_dirty_cache(func, fn, args);
|
||||
preempt_enable();
|
||||
} while (ret == TDX_RND_NO_ENTROPY && --retry);
|
||||
|
||||
return ret;
|
||||
@ -198,5 +221,11 @@ static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
|
||||
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
|
||||
#endif /* CONFIG_INTEL_TDX_HOST */
|
||||
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
void tdx_cpu_flush_cache_for_kexec(void);
|
||||
#else
|
||||
static inline void tdx_cpu_flush_cache_for_kexec(void) { }
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_TDX_H */
|
||||
|
||||
@ -139,9 +139,15 @@ static bool skip_addr(void *dest)
|
||||
return true;
|
||||
#endif
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
# ifdef CONFIG_X86_64
|
||||
if (dest >= (void *)__relocate_kernel_start &&
|
||||
dest < (void *)__relocate_kernel_end)
|
||||
return true;
|
||||
# else
|
||||
if (dest >= (void *)relocate_kernel &&
|
||||
dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
|
||||
return true;
|
||||
# endif
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -530,6 +530,23 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u64 msr;
|
||||
|
||||
/*
|
||||
* Mark using WBINVD is needed during kexec on processors that
|
||||
* support SME. This provides support for performing a successful
|
||||
* kexec when going from SME inactive to SME active (or vice-versa).
|
||||
*
|
||||
* The cache must be cleared so that if there are entries with the
|
||||
* same physical address, both with and without the encryption bit,
|
||||
* they don't race each other when flushed and potentially end up
|
||||
* with the wrong entry being committed to memory.
|
||||
*
|
||||
* Test the CPUID bit directly because with mem_encrypt=off the
|
||||
* BSP will clear the X86_FEATURE_SME bit and the APs will not
|
||||
* see it set after that.
|
||||
*/
|
||||
if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0)))
|
||||
__this_cpu_write(cache_state_incoherent, true);
|
||||
|
||||
/*
|
||||
* BIOS support is required for SME and SEV.
|
||||
* For SME: If BIOS has enabled SME then adjust x86_phys_bits by
|
||||
|
||||
@ -96,6 +96,9 @@ static void __init its_update_mitigation(void);
|
||||
static void __init its_apply_mitigation(void);
|
||||
static void __init tsa_select_mitigation(void);
|
||||
static void __init tsa_apply_mitigation(void);
|
||||
static void __init vmscape_select_mitigation(void);
|
||||
static void __init vmscape_update_mitigation(void);
|
||||
static void __init vmscape_apply_mitigation(void);
|
||||
|
||||
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
|
||||
u64 x86_spec_ctrl_base;
|
||||
@ -105,6 +108,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
|
||||
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
|
||||
|
||||
/*
|
||||
* Set when the CPU has run a potentially malicious guest. An IBPB will
|
||||
* be needed to before running userspace. That IBPB will flush the branch
|
||||
* predictor content.
|
||||
*/
|
||||
DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user);
|
||||
|
||||
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
|
||||
|
||||
static u64 __ro_after_init x86_arch_cap_msr;
|
||||
@ -228,6 +239,7 @@ void __init cpu_select_mitigations(void)
|
||||
its_select_mitigation();
|
||||
bhi_select_mitigation();
|
||||
tsa_select_mitigation();
|
||||
vmscape_select_mitigation();
|
||||
|
||||
/*
|
||||
* After mitigations are selected, some may need to update their
|
||||
@ -259,6 +271,7 @@ void __init cpu_select_mitigations(void)
|
||||
bhi_update_mitigation();
|
||||
/* srso_update_mitigation() depends on retbleed_update_mitigation(). */
|
||||
srso_update_mitigation();
|
||||
vmscape_update_mitigation();
|
||||
|
||||
spectre_v1_apply_mitigation();
|
||||
spectre_v2_apply_mitigation();
|
||||
@ -276,6 +289,7 @@ void __init cpu_select_mitigations(void)
|
||||
its_apply_mitigation();
|
||||
bhi_apply_mitigation();
|
||||
tsa_apply_mitigation();
|
||||
vmscape_apply_mitigation();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2348,88 +2362,6 @@ static void update_mds_branch_idle(void)
|
||||
}
|
||||
}
|
||||
|
||||
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
|
||||
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
|
||||
#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
|
||||
|
||||
void cpu_bugs_smt_update(void)
|
||||
{
|
||||
mutex_lock(&spec_ctrl_mutex);
|
||||
|
||||
if (sched_smt_active() && unprivileged_ebpf_enabled() &&
|
||||
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
|
||||
pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
|
||||
|
||||
switch (spectre_v2_user_stibp) {
|
||||
case SPECTRE_V2_USER_NONE:
|
||||
break;
|
||||
case SPECTRE_V2_USER_STRICT:
|
||||
case SPECTRE_V2_USER_STRICT_PREFERRED:
|
||||
update_stibp_strict();
|
||||
break;
|
||||
case SPECTRE_V2_USER_PRCTL:
|
||||
case SPECTRE_V2_USER_SECCOMP:
|
||||
update_indir_branch_cond();
|
||||
break;
|
||||
}
|
||||
|
||||
switch (mds_mitigation) {
|
||||
case MDS_MITIGATION_FULL:
|
||||
case MDS_MITIGATION_AUTO:
|
||||
case MDS_MITIGATION_VMWERV:
|
||||
if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
|
||||
pr_warn_once(MDS_MSG_SMT);
|
||||
update_mds_branch_idle();
|
||||
break;
|
||||
case MDS_MITIGATION_OFF:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (taa_mitigation) {
|
||||
case TAA_MITIGATION_VERW:
|
||||
case TAA_MITIGATION_AUTO:
|
||||
case TAA_MITIGATION_UCODE_NEEDED:
|
||||
if (sched_smt_active())
|
||||
pr_warn_once(TAA_MSG_SMT);
|
||||
break;
|
||||
case TAA_MITIGATION_TSX_DISABLED:
|
||||
case TAA_MITIGATION_OFF:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (mmio_mitigation) {
|
||||
case MMIO_MITIGATION_VERW:
|
||||
case MMIO_MITIGATION_AUTO:
|
||||
case MMIO_MITIGATION_UCODE_NEEDED:
|
||||
if (sched_smt_active())
|
||||
pr_warn_once(MMIO_MSG_SMT);
|
||||
break;
|
||||
case MMIO_MITIGATION_OFF:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (tsa_mitigation) {
|
||||
case TSA_MITIGATION_USER_KERNEL:
|
||||
case TSA_MITIGATION_VM:
|
||||
case TSA_MITIGATION_AUTO:
|
||||
case TSA_MITIGATION_FULL:
|
||||
/*
|
||||
* TSA-SQ can potentially lead to info leakage between
|
||||
* SMT threads.
|
||||
*/
|
||||
if (sched_smt_active())
|
||||
static_branch_enable(&cpu_buf_idle_clear);
|
||||
else
|
||||
static_branch_disable(&cpu_buf_idle_clear);
|
||||
break;
|
||||
case TSA_MITIGATION_NONE:
|
||||
case TSA_MITIGATION_UCODE_NEEDED:
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&spec_ctrl_mutex);
|
||||
}
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
|
||||
|
||||
@ -3130,9 +3062,185 @@ static void __init srso_apply_mitigation(void)
|
||||
}
|
||||
}
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "VMSCAPE: " fmt
|
||||
|
||||
enum vmscape_mitigations {
|
||||
VMSCAPE_MITIGATION_NONE,
|
||||
VMSCAPE_MITIGATION_AUTO,
|
||||
VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER,
|
||||
VMSCAPE_MITIGATION_IBPB_ON_VMEXIT,
|
||||
};
|
||||
|
||||
static const char * const vmscape_strings[] = {
|
||||
[VMSCAPE_MITIGATION_NONE] = "Vulnerable",
|
||||
/* [VMSCAPE_MITIGATION_AUTO] */
|
||||
[VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace",
|
||||
[VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT",
|
||||
};
|
||||
|
||||
static enum vmscape_mitigations vmscape_mitigation __ro_after_init =
|
||||
IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE;
|
||||
|
||||
static int __init vmscape_parse_cmdline(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(str, "off")) {
|
||||
vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
|
||||
} else if (!strcmp(str, "ibpb")) {
|
||||
vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
|
||||
} else if (!strcmp(str, "force")) {
|
||||
setup_force_cpu_bug(X86_BUG_VMSCAPE);
|
||||
vmscape_mitigation = VMSCAPE_MITIGATION_AUTO;
|
||||
} else {
|
||||
pr_err("Ignoring unknown vmscape=%s option.\n", str);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("vmscape", vmscape_parse_cmdline);
|
||||
|
||||
static void __init vmscape_select_mitigation(void)
|
||||
{
|
||||
if (cpu_mitigations_off() ||
|
||||
!boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
|
||||
!boot_cpu_has(X86_FEATURE_IBPB)) {
|
||||
vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO)
|
||||
vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
|
||||
}
|
||||
|
||||
static void __init vmscape_update_mitigation(void)
|
||||
{
|
||||
if (!boot_cpu_has_bug(X86_BUG_VMSCAPE))
|
||||
return;
|
||||
|
||||
if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB ||
|
||||
srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT)
|
||||
vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT;
|
||||
|
||||
pr_info("%s\n", vmscape_strings[vmscape_mitigation]);
|
||||
}
|
||||
|
||||
static void __init vmscape_apply_mitigation(void)
|
||||
{
|
||||
if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER)
|
||||
setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER);
|
||||
}
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) fmt
|
||||
|
||||
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
|
||||
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
|
||||
#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
|
||||
#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n"
|
||||
|
||||
void cpu_bugs_smt_update(void)
|
||||
{
|
||||
mutex_lock(&spec_ctrl_mutex);
|
||||
|
||||
if (sched_smt_active() && unprivileged_ebpf_enabled() &&
|
||||
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
|
||||
pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
|
||||
|
||||
switch (spectre_v2_user_stibp) {
|
||||
case SPECTRE_V2_USER_NONE:
|
||||
break;
|
||||
case SPECTRE_V2_USER_STRICT:
|
||||
case SPECTRE_V2_USER_STRICT_PREFERRED:
|
||||
update_stibp_strict();
|
||||
break;
|
||||
case SPECTRE_V2_USER_PRCTL:
|
||||
case SPECTRE_V2_USER_SECCOMP:
|
||||
update_indir_branch_cond();
|
||||
break;
|
||||
}
|
||||
|
||||
switch (mds_mitigation) {
|
||||
case MDS_MITIGATION_FULL:
|
||||
case MDS_MITIGATION_AUTO:
|
||||
case MDS_MITIGATION_VMWERV:
|
||||
if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
|
||||
pr_warn_once(MDS_MSG_SMT);
|
||||
update_mds_branch_idle();
|
||||
break;
|
||||
case MDS_MITIGATION_OFF:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (taa_mitigation) {
|
||||
case TAA_MITIGATION_VERW:
|
||||
case TAA_MITIGATION_AUTO:
|
||||
case TAA_MITIGATION_UCODE_NEEDED:
|
||||
if (sched_smt_active())
|
||||
pr_warn_once(TAA_MSG_SMT);
|
||||
break;
|
||||
case TAA_MITIGATION_TSX_DISABLED:
|
||||
case TAA_MITIGATION_OFF:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (mmio_mitigation) {
|
||||
case MMIO_MITIGATION_VERW:
|
||||
case MMIO_MITIGATION_AUTO:
|
||||
case MMIO_MITIGATION_UCODE_NEEDED:
|
||||
if (sched_smt_active())
|
||||
pr_warn_once(MMIO_MSG_SMT);
|
||||
break;
|
||||
case MMIO_MITIGATION_OFF:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (tsa_mitigation) {
|
||||
case TSA_MITIGATION_USER_KERNEL:
|
||||
case TSA_MITIGATION_VM:
|
||||
case TSA_MITIGATION_AUTO:
|
||||
case TSA_MITIGATION_FULL:
|
||||
/*
|
||||
* TSA-SQ can potentially lead to info leakage between
|
||||
* SMT threads.
|
||||
*/
|
||||
if (sched_smt_active())
|
||||
static_branch_enable(&cpu_buf_idle_clear);
|
||||
else
|
||||
static_branch_disable(&cpu_buf_idle_clear);
|
||||
break;
|
||||
case TSA_MITIGATION_NONE:
|
||||
case TSA_MITIGATION_UCODE_NEEDED:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (vmscape_mitigation) {
|
||||
case VMSCAPE_MITIGATION_NONE:
|
||||
case VMSCAPE_MITIGATION_AUTO:
|
||||
break;
|
||||
case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT:
|
||||
case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER:
|
||||
/*
|
||||
* Hypervisors can be attacked across-threads, warn for SMT when
|
||||
* STIBP is not already enabled system-wide.
|
||||
*
|
||||
* Intel eIBRS (!AUTOIBRS) implies STIBP on.
|
||||
*/
|
||||
if (!sched_smt_active() ||
|
||||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
|
||||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
|
||||
(spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
|
||||
!boot_cpu_has(X86_FEATURE_AUTOIBRS)))
|
||||
break;
|
||||
pr_warn_once(VMSCAPE_MSG_SMT);
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_unlock(&spec_ctrl_mutex);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
|
||||
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
|
||||
@ -3373,6 +3481,11 @@ static ssize_t tsa_show_state(char *buf)
|
||||
return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
|
||||
}
|
||||
|
||||
static ssize_t vmscape_show_state(char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]);
|
||||
}
|
||||
|
||||
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
|
||||
char *buf, unsigned int bug)
|
||||
{
|
||||
@ -3436,6 +3549,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
|
||||
case X86_BUG_TSA:
|
||||
return tsa_show_state(buf);
|
||||
|
||||
case X86_BUG_VMSCAPE:
|
||||
return vmscape_show_state(buf);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -3522,6 +3638,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu
|
||||
{
|
||||
return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
|
||||
}
|
||||
|
||||
ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE);
|
||||
}
|
||||
#endif
|
||||
|
||||
void __warn_thunk(void)
|
||||
|
||||
@ -1237,55 +1237,71 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
|
||||
#define ITS_NATIVE_ONLY BIT(9)
|
||||
/* CPU is affected by Transient Scheduler Attacks */
|
||||
#define TSA BIT(10)
|
||||
/* CPU is affected by VMSCAPE */
|
||||
#define VMSCAPE BIT(11)
|
||||
|
||||
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
|
||||
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
|
||||
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED),
|
||||
VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
|
||||
VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
|
||||
VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
|
||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
|
||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS),
|
||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
|
||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
|
||||
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
|
||||
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
|
||||
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
|
||||
VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||
VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS),
|
||||
VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE),
|
||||
|
||||
VULNBL_AMD(0x15, RETBLEED),
|
||||
VULNBL_AMD(0x16, RETBLEED),
|
||||
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
|
||||
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
|
||||
VULNBL_AMD(0x19, SRSO | TSA),
|
||||
VULNBL_AMD(0x1a, SRSO),
|
||||
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
|
||||
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
|
||||
VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE),
|
||||
VULNBL_AMD(0x1a, SRSO | VMSCAPE),
|
||||
{}
|
||||
};
|
||||
|
||||
@ -1502,6 +1518,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the bug only on bare-metal. A nested hypervisor should already be
|
||||
* deploying IBPB to isolate itself from nested guests.
|
||||
*/
|
||||
if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) &&
|
||||
!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
setup_force_cpu_bug(X86_BUG_VMSCAPE);
|
||||
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
||||
return;
|
||||
|
||||
|
||||
@ -323,7 +323,7 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
|
||||
* Writeback and invalidate caches before updating microcode to avoid
|
||||
* internal issues depending on what the microcode is updating.
|
||||
*/
|
||||
native_wbinvd();
|
||||
wbinvd();
|
||||
|
||||
/* write microcode via MSR 0x79 */
|
||||
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
|
||||
|
||||
@ -459,7 +459,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
|
||||
* increase likelihood that allocated cache portion will be filled
|
||||
* with associated memory.
|
||||
*/
|
||||
native_wbinvd();
|
||||
wbinvd();
|
||||
|
||||
/*
|
||||
* Always called with interrupts enabled. By disabling interrupts
|
||||
|
||||
@ -160,15 +160,10 @@ void machine_kexec_cleanup(struct kimage *image)
|
||||
*/
|
||||
void machine_kexec(struct kimage *image)
|
||||
{
|
||||
relocate_kernel_fn *relocate_kernel_ptr;
|
||||
unsigned long page_list[PAGES_NR];
|
||||
void *control_page;
|
||||
int save_ftrace_enabled;
|
||||
asmlinkage unsigned long
|
||||
(*relocate_kernel_ptr)(unsigned long indirection_page,
|
||||
unsigned long control_page,
|
||||
unsigned long start_address,
|
||||
unsigned int has_pae,
|
||||
unsigned int preserve_context);
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (image->preserve_context)
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/efi.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
/*
|
||||
@ -146,7 +147,8 @@ static void free_transition_pgtable(struct kimage *image)
|
||||
image->arch.pte = NULL;
|
||||
}
|
||||
|
||||
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
||||
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd,
|
||||
unsigned long control_page)
|
||||
{
|
||||
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
|
||||
unsigned long vaddr, paddr;
|
||||
@ -156,8 +158,13 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
vaddr = (unsigned long)relocate_kernel;
|
||||
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
|
||||
/*
|
||||
* For the transition to the identity mapped page tables, the control
|
||||
* code page also needs to be mapped at the virtual address it starts
|
||||
* off running from.
|
||||
*/
|
||||
vaddr = (unsigned long)__va(control_page);
|
||||
paddr = control_page;
|
||||
pgd += pgd_index(vaddr);
|
||||
if (!pgd_present(*pgd)) {
|
||||
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
|
||||
@ -216,7 +223,7 @@ static void *alloc_pgt_page(void *data)
|
||||
return p;
|
||||
}
|
||||
|
||||
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
static int init_pgtable(struct kimage *image, unsigned long control_page)
|
||||
{
|
||||
struct x86_mapping_info info = {
|
||||
.alloc_pgt_page = alloc_pgt_page,
|
||||
@ -225,12 +232,12 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
.kernpg_flag = _KERNPG_TABLE_NOENC,
|
||||
};
|
||||
unsigned long mstart, mend;
|
||||
pgd_t *level4p;
|
||||
int result;
|
||||
int i;
|
||||
|
||||
level4p = (pgd_t *)__va(start_pgtable);
|
||||
clear_page(level4p);
|
||||
image->arch.pgd = alloc_pgt_page(image);
|
||||
if (!image->arch.pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
|
||||
info.page_flag |= _PAGE_ENC;
|
||||
@ -244,8 +251,8 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
mstart = pfn_mapped[i].start << PAGE_SHIFT;
|
||||
mend = pfn_mapped[i].end << PAGE_SHIFT;
|
||||
|
||||
result = kernel_ident_mapping_init(&info,
|
||||
level4p, mstart, mend);
|
||||
result = kernel_ident_mapping_init(&info, image->arch.pgd,
|
||||
mstart, mend);
|
||||
if (result)
|
||||
return result;
|
||||
}
|
||||
@ -260,8 +267,8 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
mstart = image->segment[i].mem;
|
||||
mend = mstart + image->segment[i].memsz;
|
||||
|
||||
result = kernel_ident_mapping_init(&info,
|
||||
level4p, mstart, mend);
|
||||
result = kernel_ident_mapping_init(&info, image->arch.pgd,
|
||||
mstart, mend);
|
||||
|
||||
if (result)
|
||||
return result;
|
||||
@ -271,15 +278,19 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
* Prepare EFI systab and ACPI tables for kexec kernel since they are
|
||||
* not covered by pfn_mapped.
|
||||
*/
|
||||
result = map_efi_systab(&info, level4p);
|
||||
result = map_efi_systab(&info, image->arch.pgd);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
result = map_acpi_tables(&info, level4p);
|
||||
result = map_acpi_tables(&info, image->arch.pgd);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
return init_transition_pgtable(image, level4p);
|
||||
/*
|
||||
* This must be last because the intermediate page table pages it
|
||||
* allocates will not be control pages and may overlap the image.
|
||||
*/
|
||||
return init_transition_pgtable(image, image->arch.pgd, control_page);
|
||||
}
|
||||
|
||||
static void load_segments(void)
|
||||
@ -296,22 +307,51 @@ static void load_segments(void)
|
||||
|
||||
int machine_kexec_prepare(struct kimage *image)
|
||||
{
|
||||
unsigned long start_pgtable;
|
||||
void *control_page = page_address(image->control_code_page);
|
||||
unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
|
||||
unsigned long reloc_end = (unsigned long)__relocate_kernel_end;
|
||||
int result;
|
||||
|
||||
/* Calculate the offsets */
|
||||
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
|
||||
/*
|
||||
* Some early TDX-capable platforms have an erratum. A kernel
|
||||
* partial write (a write transaction of less than cacheline
|
||||
* lands at memory controller) to TDX private memory poisons that
|
||||
* memory, and a subsequent read triggers a machine check.
|
||||
*
|
||||
* On those platforms the old kernel must reset TDX private
|
||||
* memory before jumping to the new kernel otherwise the new
|
||||
* kernel may see unexpected machine check. For simplicity
|
||||
* just fail kexec/kdump on those platforms.
|
||||
*/
|
||||
if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) {
|
||||
pr_info_once("Not allowed on platform with tdx_pw_mce bug\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Setup the identity mapped 64bit page table */
|
||||
result = init_pgtable(image, start_pgtable);
|
||||
result = init_pgtable(image, __pa(control_page));
|
||||
if (result)
|
||||
return result;
|
||||
kexec_va_control_page = (unsigned long)control_page;
|
||||
kexec_pa_table_page = (unsigned long)__pa(image->arch.pgd);
|
||||
|
||||
if (image->type == KEXEC_TYPE_DEFAULT)
|
||||
kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT;
|
||||
|
||||
__memcpy(control_page, __relocate_kernel_start, reloc_end - reloc_start);
|
||||
|
||||
set_memory_rox((unsigned long)control_page, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void machine_kexec_cleanup(struct kimage *image)
|
||||
{
|
||||
void *control_page = page_address(image->control_code_page);
|
||||
|
||||
set_memory_nx((unsigned long)control_page, 1);
|
||||
set_memory_rw((unsigned long)control_page, 1);
|
||||
|
||||
free_transition_pgtable(image);
|
||||
}
|
||||
|
||||
@ -319,19 +359,14 @@ void machine_kexec_cleanup(struct kimage *image)
|
||||
* Do not allocate memory (or fail in any way) in machine_kexec().
|
||||
* We are past the point of no return, committed to rebooting now.
|
||||
*/
|
||||
void machine_kexec(struct kimage *image)
|
||||
void __nocfi machine_kexec(struct kimage *image)
|
||||
{
|
||||
unsigned long page_list[PAGES_NR];
|
||||
unsigned int host_mem_enc_active;
|
||||
unsigned long reloc_start = (unsigned long)__relocate_kernel_start;
|
||||
relocate_kernel_fn *relocate_kernel_ptr;
|
||||
unsigned int relocate_kernel_flags;
|
||||
int save_ftrace_enabled;
|
||||
void *control_page;
|
||||
|
||||
/*
|
||||
* This must be done before load_segments() since if call depth tracking
|
||||
* is used then GS must be valid to make any function calls.
|
||||
*/
|
||||
host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (image->preserve_context)
|
||||
save_processor_state();
|
||||
@ -357,17 +392,24 @@ void machine_kexec(struct kimage *image)
|
||||
#endif
|
||||
}
|
||||
|
||||
control_page = page_address(image->control_code_page) + PAGE_SIZE;
|
||||
__memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
|
||||
control_page = page_address(image->control_code_page);
|
||||
|
||||
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
|
||||
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
|
||||
page_list[PA_TABLE_PAGE] =
|
||||
(unsigned long)__pa(page_address(image->control_code_page));
|
||||
/*
|
||||
* Allow for the possibility that relocate_kernel might not be at
|
||||
* the very start of the page.
|
||||
*/
|
||||
relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - reloc_start;
|
||||
|
||||
if (image->type == KEXEC_TYPE_DEFAULT)
|
||||
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
|
||||
<< PAGE_SHIFT);
|
||||
relocate_kernel_flags = 0;
|
||||
if (image->preserve_context)
|
||||
relocate_kernel_flags |= RELOC_KERNEL_PRESERVE_CONTEXT;
|
||||
|
||||
/*
|
||||
* This must be done before load_segments() since it resets
|
||||
* GS to 0 and percpu data needs the correct GS to work.
|
||||
*/
|
||||
if (this_cpu_read(cache_state_incoherent))
|
||||
relocate_kernel_flags |= RELOC_KERNEL_CACHE_INCOHERENT;
|
||||
|
||||
/*
|
||||
* The segment registers are funny things, they have both a
|
||||
@ -378,6 +420,11 @@ void machine_kexec(struct kimage *image)
|
||||
*
|
||||
* I take advantage of this here by force loading the
|
||||
* segments, before I zap the gdt with an invalid value.
|
||||
*
|
||||
* load_segments() resets GS to 0. Don't make any function call
|
||||
* after here since call depth tracking uses percpu variables to
|
||||
* operate (relocate_kernel() is explicitly ignored by call depth
|
||||
* tracking).
|
||||
*/
|
||||
load_segments();
|
||||
/*
|
||||
@ -388,11 +435,10 @@ void machine_kexec(struct kimage *image)
|
||||
native_gdt_invalidate();
|
||||
|
||||
/* now call it */
|
||||
image->start = relocate_kernel((unsigned long)image->head,
|
||||
(unsigned long)page_list,
|
||||
image->start = relocate_kernel_ptr((unsigned long)image->head,
|
||||
virt_to_phys(control_page),
|
||||
image->start,
|
||||
image->preserve_context,
|
||||
host_mem_enc_active);
|
||||
relocate_kernel_flags);
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (image->preserve_context)
|
||||
@ -573,8 +619,7 @@ static void kexec_mark_crashkres(bool protect)
|
||||
|
||||
/* Don't touch the control code page used in crash_kexec().*/
|
||||
control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page));
|
||||
/* Control code page is located in the 2nd page. */
|
||||
kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect);
|
||||
kexec_mark_range(crashk_res.start, control - 1, protect);
|
||||
control += KEXEC_CONTROL_PAGE_SIZE;
|
||||
kexec_mark_range(control, crashk_res.end, protect);
|
||||
}
|
||||
|
||||
@ -123,11 +123,6 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
|
||||
native_set_debugreg(regno, val);
|
||||
}
|
||||
|
||||
noinstr void pv_native_wbinvd(void)
|
||||
{
|
||||
native_wbinvd();
|
||||
}
|
||||
|
||||
static noinstr void pv_native_safe_halt(void)
|
||||
{
|
||||
native_safe_halt();
|
||||
@ -155,7 +150,6 @@ struct paravirt_patch_template pv_ops = {
|
||||
.cpu.read_cr0 = native_read_cr0,
|
||||
.cpu.write_cr0 = native_write_cr0,
|
||||
.cpu.write_cr4 = native_write_cr4,
|
||||
.cpu.wbinvd = pv_native_wbinvd,
|
||||
.cpu.read_msr = native_read_msr,
|
||||
.cpu.write_msr = native_write_msr,
|
||||
.cpu.read_msr_safe = native_read_msr_safe,
|
||||
|
||||
@ -87,6 +87,16 @@ EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
|
||||
DEFINE_PER_CPU(bool, __tss_limit_invalid);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
|
||||
|
||||
/*
|
||||
* The cache may be in an incoherent state and needs flushing during kexec.
|
||||
* E.g., on SME/TDX platforms, dirty cacheline aliases with and without
|
||||
* encryption bit(s) can coexist and the cache needs to be flushed before
|
||||
* booting to the new kernel to avoid the silent memory corruption due to
|
||||
* dirty cachelines with different encryption property being written back
|
||||
* to the memory.
|
||||
*/
|
||||
DEFINE_PER_CPU(bool, cache_state_incoherent);
|
||||
|
||||
/*
|
||||
* this gets called so that we can store lazy state into memory and copy the
|
||||
* current task into the new thread.
|
||||
@ -813,20 +823,8 @@ void __noreturn stop_this_cpu(void *dummy)
|
||||
disable_local_APIC();
|
||||
mcheck_cpu_clear(c);
|
||||
|
||||
/*
|
||||
* Use wbinvd on processors that support SME. This provides support
|
||||
* for performing a successful kexec when going from SME inactive
|
||||
* to SME active (or vice-versa). The cache must be cleared so that
|
||||
* if there are entries with the same physical address, both with and
|
||||
* without the encryption bit, they don't race each other when flushed
|
||||
* and potentially end up with the wrong entry being committed to
|
||||
* memory.
|
||||
*
|
||||
* Test the CPUID bit directly because the machine might've cleared
|
||||
* X86_FEATURE_SME due to cmdline options.
|
||||
*/
|
||||
if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0)))
|
||||
native_wbinvd();
|
||||
if (this_cpu_read(cache_state_incoherent))
|
||||
wbinvd();
|
||||
|
||||
/*
|
||||
* This brings a cache line back and dirties it, but
|
||||
@ -847,7 +845,7 @@ void __noreturn stop_this_cpu(void *dummy)
|
||||
/*
|
||||
* Use native_halt() so that memory contents don't change
|
||||
* (stack usage and variables) after possibly issuing the
|
||||
* native_wbinvd() above.
|
||||
* wbinvd() above.
|
||||
*/
|
||||
native_halt();
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/unwind_hints.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
/*
|
||||
* Must be relocatable PIC code callable as a C function, in particular
|
||||
@ -23,36 +24,32 @@
|
||||
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
||||
|
||||
/*
|
||||
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
|
||||
* ~ control_page + PAGE_SIZE are used as data storage and stack for
|
||||
* jumping back
|
||||
* The .text..relocate_kernel and .data..relocate_kernel sections are copied
|
||||
* into the control page, and the remainder of the page is used as the stack.
|
||||
*/
|
||||
#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
|
||||
|
||||
.section .data..relocate_kernel,"a";
|
||||
/* Minimal CPU state */
|
||||
#define RSP DATA(0x0)
|
||||
#define CR0 DATA(0x8)
|
||||
#define CR3 DATA(0x10)
|
||||
#define CR4 DATA(0x18)
|
||||
|
||||
SYM_DATA_LOCAL(saved_rsp, .quad 0)
|
||||
SYM_DATA_LOCAL(saved_cr0, .quad 0)
|
||||
SYM_DATA_LOCAL(saved_cr3, .quad 0)
|
||||
SYM_DATA_LOCAL(saved_cr4, .quad 0)
|
||||
/* other data */
|
||||
#define CP_PA_TABLE_PAGE DATA(0x20)
|
||||
#define CP_PA_SWAP_PAGE DATA(0x28)
|
||||
#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
|
||||
SYM_DATA(kexec_va_control_page, .quad 0)
|
||||
SYM_DATA(kexec_pa_table_page, .quad 0)
|
||||
SYM_DATA(kexec_pa_swap_page, .quad 0)
|
||||
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
|
||||
|
||||
.text
|
||||
.align PAGE_SIZE
|
||||
.section .text..relocate_kernel,"ax";
|
||||
.code64
|
||||
SYM_CODE_START_NOALIGN(relocate_range)
|
||||
SYM_CODE_START_NOALIGN(relocate_kernel)
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
ANNOTATE_NOENDBR
|
||||
/*
|
||||
* %rdi indirection_page
|
||||
* %rsi page_list
|
||||
* %rsi pa_control_page
|
||||
* %rdx start address
|
||||
* %rcx preserve_context
|
||||
* %r8 host_mem_enc_active
|
||||
* %rcx flags: RELOC_KERNEL_*
|
||||
*/
|
||||
|
||||
/* Save the CPU context, used for jumping back */
|
||||
@ -64,60 +61,56 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
|
||||
pushq %r15
|
||||
pushf
|
||||
|
||||
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
|
||||
movq %rsp, RSP(%r11)
|
||||
movq %cr0, %rax
|
||||
movq %rax, CR0(%r11)
|
||||
movq %cr3, %rax
|
||||
movq %rax, CR3(%r11)
|
||||
movq %cr4, %rax
|
||||
movq %rax, CR4(%r11)
|
||||
|
||||
/* Save CR4. Required to enable the right paging mode later. */
|
||||
movq %rax, %r13
|
||||
|
||||
/* zero out flags, and disable interrupts */
|
||||
pushq $0
|
||||
popfq
|
||||
|
||||
/* Save SME active flag */
|
||||
movq %r8, %r12
|
||||
|
||||
/*
|
||||
* get physical address of control page now
|
||||
* this is impossible after page table switch
|
||||
*/
|
||||
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
|
||||
|
||||
/* get physical address of page table now too */
|
||||
movq PTR(PA_TABLE_PAGE)(%rsi), %r9
|
||||
|
||||
/* get physical address of swap page now */
|
||||
movq PTR(PA_SWAP_PAGE)(%rsi), %r10
|
||||
|
||||
/* save some information for jumping back */
|
||||
movq %r9, CP_PA_TABLE_PAGE(%r11)
|
||||
movq %r10, CP_PA_SWAP_PAGE(%r11)
|
||||
movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
|
||||
|
||||
/* Switch to the identity mapped page tables */
|
||||
movq %cr3, %rax
|
||||
movq kexec_pa_table_page(%rip), %r9
|
||||
movq %r9, %cr3
|
||||
|
||||
/* Leave CR4 in %r13 to enable the right paging mode later. */
|
||||
movq %cr4, %r13
|
||||
|
||||
/* Disable global pages immediately to ensure this mapping is RWX */
|
||||
movq %r13, %r12
|
||||
andq $~(X86_CR4_PGE), %r12
|
||||
movq %r12, %cr4
|
||||
|
||||
/* Save %rsp and CRs. */
|
||||
movq %r13, saved_cr4(%rip)
|
||||
movq %rsp, saved_rsp(%rip)
|
||||
movq %rax, saved_cr3(%rip)
|
||||
movq %cr0, %rax
|
||||
movq %rax, saved_cr0(%rip)
|
||||
|
||||
/* save indirection list for jumping back */
|
||||
movq %rdi, pa_backup_pages_map(%rip)
|
||||
|
||||
/* Save the flags to %r11 as swap_pages clobbers %rcx. */
|
||||
movq %rcx, %r11
|
||||
|
||||
/* setup a new stack at the end of the physical control page */
|
||||
lea PAGE_SIZE(%r8), %rsp
|
||||
lea PAGE_SIZE(%rsi), %rsp
|
||||
|
||||
/* jump to identity mapped page */
|
||||
addq $(identity_mapped - relocate_kernel), %r8
|
||||
pushq %r8
|
||||
ANNOTATE_UNRET_SAFE
|
||||
ret
|
||||
int3
|
||||
0: addq $identity_mapped - 0b, %rsi
|
||||
subq $__relocate_kernel_start - 0b, %rsi
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
jmp *%rsi
|
||||
SYM_CODE_END(relocate_kernel)
|
||||
|
||||
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
/* set return address to 0 if not preserving context */
|
||||
pushq $0
|
||||
/*
|
||||
* %rdi indirection page
|
||||
* %rdx start address
|
||||
* %r9 page table page
|
||||
* %r11 flags: RELOC_KERNEL_*
|
||||
* %r13 original CR4 when relocate_kernel() was invoked
|
||||
*/
|
||||
|
||||
/* store the start address on the stack */
|
||||
pushq %rdx
|
||||
|
||||
@ -161,17 +154,22 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
movq %r9, %cr3
|
||||
|
||||
/*
|
||||
* If the memory cache is in incoherent state, e.g., due to
|
||||
* memory encryption, do WBINVD to flush cache.
|
||||
*
|
||||
* If SME is active, there could be old encrypted cache line
|
||||
* entries that will conflict with the now unencrypted memory
|
||||
* used by kexec. Flush the caches before copying the kernel.
|
||||
*
|
||||
* Note SME sets this flag to true when the platform supports
|
||||
* SME, so the WBINVD is performed even SME is not activated
|
||||
* by the kernel. But this has no harm.
|
||||
*/
|
||||
testq %r12, %r12
|
||||
jz .Lsme_off
|
||||
testb $RELOC_KERNEL_CACHE_INCOHERENT, %r11b
|
||||
jz .Lnowbinvd
|
||||
wbinvd
|
||||
.Lsme_off:
|
||||
.Lnowbinvd:
|
||||
|
||||
/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
|
||||
movq %rcx, %r11
|
||||
call swap_pages
|
||||
|
||||
/*
|
||||
@ -183,13 +181,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
movq %cr3, %rax
|
||||
movq %rax, %cr3
|
||||
|
||||
testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b
|
||||
jnz .Lrelocate
|
||||
|
||||
/*
|
||||
* set all of the registers to known values
|
||||
* leave %rsp alone
|
||||
*/
|
||||
|
||||
testq %r11, %r11
|
||||
jnz .Lrelocate
|
||||
xorl %eax, %eax
|
||||
xorl %ebx, %ebx
|
||||
xorl %ecx, %ecx
|
||||
@ -212,20 +211,40 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
||||
|
||||
.Lrelocate:
|
||||
popq %rdx
|
||||
|
||||
/* Use the swap page for the callee's stack */
|
||||
movq kexec_pa_swap_page(%rip), %r10
|
||||
leaq PAGE_SIZE(%r10), %rsp
|
||||
|
||||
/* push the existing entry point onto the callee's stack */
|
||||
pushq %rdx
|
||||
|
||||
ANNOTATE_RETPOLINE_SAFE
|
||||
call *%rdx
|
||||
|
||||
/* get the re-entry point of the peer system */
|
||||
movq 0(%rsp), %rbp
|
||||
leaq relocate_kernel(%rip), %r8
|
||||
movq CP_PA_SWAP_PAGE(%r8), %r10
|
||||
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
|
||||
movq CP_PA_TABLE_PAGE(%r8), %rax
|
||||
popq %rbp
|
||||
movq kexec_pa_swap_page(%rip), %r10
|
||||
movq pa_backup_pages_map(%rip), %rdi
|
||||
movq kexec_pa_table_page(%rip), %rax
|
||||
movq %rax, %cr3
|
||||
|
||||
/* Find start (and end) of this physical mapping of control page */
|
||||
leaq (%rip), %r8
|
||||
ANNOTATE_NOENDBR
|
||||
andq $PAGE_MASK, %r8
|
||||
lea PAGE_SIZE(%r8), %rsp
|
||||
/*
|
||||
* Ensure RELOC_KERNEL_PRESERVE_CONTEXT flag is set so that
|
||||
* swap_pages() can swap pages correctly. Note all other
|
||||
* RELOC_KERNEL_* flags passed to relocate_kernel() are not
|
||||
* restored.
|
||||
*/
|
||||
movl $RELOC_KERNEL_PRESERVE_CONTEXT, %r11d
|
||||
call swap_pages
|
||||
movq $virtual_mapped, %rax
|
||||
movq kexec_va_control_page(%rip), %rax
|
||||
0: addq $virtual_mapped - 0b, %rax
|
||||
subq $__relocate_kernel_start - 0b, %rax
|
||||
pushq %rax
|
||||
ANNOTATE_UNRET_SAFE
|
||||
ret
|
||||
@ -235,13 +254,21 @@ SYM_CODE_END(identity_mapped)
|
||||
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
ANNOTATE_NOENDBR // RET target, above
|
||||
movq RSP(%r8), %rsp
|
||||
movq CR4(%r8), %rax
|
||||
movq saved_rsp(%rip), %rsp
|
||||
movq saved_cr4(%rip), %rax
|
||||
movq %rax, %cr4
|
||||
movq CR3(%r8), %rax
|
||||
movq CR0(%r8), %r8
|
||||
movq saved_cr3(%rip), %rax
|
||||
movq saved_cr0(%rip), %r8
|
||||
movq %rax, %cr3
|
||||
movq %r8, %cr0
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
/* Saved in save_processor_state. */
|
||||
movq $saved_context, %rax
|
||||
lgdt saved_context_gdt_desc(%rax)
|
||||
#endif
|
||||
|
||||
/* relocate_kernel() returns the re-entry point for next time */
|
||||
movq %rbp, %rax
|
||||
|
||||
popf
|
||||
@ -259,42 +286,50 @@ SYM_CODE_END(virtual_mapped)
|
||||
/* Do the copies */
|
||||
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
/*
|
||||
* %rdi indirection page
|
||||
* %r11 flags: RELOC_KERNEL_*
|
||||
*/
|
||||
movq %rdi, %rcx /* Put the indirection_page in %rcx */
|
||||
xorl %edi, %edi
|
||||
xorl %esi, %esi
|
||||
jmp 1f
|
||||
jmp .Lstart /* Should start with an indirection record */
|
||||
|
||||
0: /* top, read another word for the indirection page */
|
||||
.Lloop: /* top, read another word for the indirection page */
|
||||
|
||||
movq (%rbx), %rcx
|
||||
addq $8, %rbx
|
||||
1:
|
||||
.Lstart:
|
||||
testb $0x1, %cl /* is it a destination page? */
|
||||
jz 2f
|
||||
jz .Lnotdest
|
||||
movq %rcx, %rdi
|
||||
andq $0xfffffffffffff000, %rdi
|
||||
jmp 0b
|
||||
2:
|
||||
jmp .Lloop
|
||||
.Lnotdest:
|
||||
testb $0x2, %cl /* is it an indirection page? */
|
||||
jz 2f
|
||||
jz .Lnotind
|
||||
movq %rcx, %rbx
|
||||
andq $0xfffffffffffff000, %rbx
|
||||
jmp 0b
|
||||
2:
|
||||
jmp .Lloop
|
||||
.Lnotind:
|
||||
testb $0x4, %cl /* is it the done indicator? */
|
||||
jz 2f
|
||||
jmp 3f
|
||||
2:
|
||||
jz .Lnotdone
|
||||
jmp .Ldone
|
||||
.Lnotdone:
|
||||
testb $0x8, %cl /* is it the source indicator? */
|
||||
jz 0b /* Ignore it otherwise */
|
||||
jz .Lloop /* Ignore it otherwise */
|
||||
movq %rcx, %rsi /* For ever source page do a copy */
|
||||
andq $0xfffffffffffff000, %rsi
|
||||
|
||||
movq %rdi, %rdx /* Save destination page to %rdx */
|
||||
movq %rsi, %rax /* Save source page to %rax */
|
||||
|
||||
/* Only actually swap for ::preserve_context */
|
||||
testb $RELOC_KERNEL_PRESERVE_CONTEXT, %r11b
|
||||
jz .Lnoswap
|
||||
|
||||
/* copy source page to swap page */
|
||||
movq %r10, %rdi
|
||||
movq kexec_pa_swap_page(%rip), %rdi
|
||||
movl $512, %ecx
|
||||
rep ; movsq
|
||||
|
||||
@ -306,17 +341,15 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
|
||||
|
||||
/* copy swap page to destination page */
|
||||
movq %rdx, %rdi
|
||||
movq %r10, %rsi
|
||||
movq kexec_pa_swap_page(%rip), %rsi
|
||||
.Lnoswap:
|
||||
movl $512, %ecx
|
||||
rep ; movsq
|
||||
|
||||
lea PAGE_SIZE(%rax), %rsi
|
||||
jmp 0b
|
||||
3:
|
||||
jmp .Lloop
|
||||
.Ldone:
|
||||
ANNOTATE_UNRET_SAFE
|
||||
ret
|
||||
int3
|
||||
SYM_CODE_END(swap_pages)
|
||||
|
||||
.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
|
||||
SYM_CODE_END(relocate_range);
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include <asm/orc_lookup.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/boot.h>
|
||||
#include <asm/kexec.h>
|
||||
|
||||
#undef i386 /* in case the preprocessor is a 32bit one */
|
||||
|
||||
@ -95,7 +96,19 @@ const_pcpu_hot = pcpu_hot;
|
||||
#define BSS_DECRYPTED
|
||||
|
||||
#endif
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_KEXEC_CORE)
|
||||
#define KEXEC_RELOCATE_KERNEL \
|
||||
. = ALIGN(0x100); \
|
||||
__relocate_kernel_start = .; \
|
||||
*(.text..relocate_kernel); \
|
||||
*(.data..relocate_kernel); \
|
||||
__relocate_kernel_end = .;
|
||||
|
||||
ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX_SIZE,
|
||||
"relocate_kernel code too large!")
|
||||
#else
|
||||
#define KEXEC_RELOCATE_KERNEL
|
||||
#endif
|
||||
PHDRS {
|
||||
text PT_LOAD FLAGS(5); /* R_E */
|
||||
data PT_LOAD FLAGS(6); /* RW_ */
|
||||
@ -181,6 +194,7 @@ SECTIONS
|
||||
|
||||
DATA_DATA
|
||||
CONSTRUCTORS
|
||||
KEXEC_RELOCATE_KERNEL
|
||||
|
||||
/* rarely changed data like cpu maps */
|
||||
READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
|
||||
|
||||
@ -442,6 +442,16 @@ void tdx_disable_virtualization_cpu(void)
|
||||
tdx_flush_vp(&arg);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
/*
|
||||
* Flush cache now if kexec is possible: this is necessary to avoid
|
||||
* having dirty private memory cachelines when the new kernel boots,
|
||||
* but WBINVD is a relatively expensive operation and doing it during
|
||||
* kexec can exacerbate races in native_stop_other_cpus(). Do it
|
||||
* now, since this is a safe moment and there is going to be no more
|
||||
* TDX activity on this CPU from this point on.
|
||||
*/
|
||||
tdx_cpu_flush_cache_for_kexec();
|
||||
}
|
||||
|
||||
#define TDX_SEAMCALL_RETRIES 10000
|
||||
|
||||
@ -11120,6 +11120,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->arch.guest_fpu.xfd_err)
|
||||
wrmsrl(MSR_IA32_XFD_ERR, 0);
|
||||
|
||||
/*
|
||||
* Mark this CPU as needing a branch predictor flush before running
|
||||
* userspace. Must be done before enabling preemption to ensure it gets
|
||||
* set for the CPU that actually ran the guest, and not the CPU that it
|
||||
* may migrate to.
|
||||
*/
|
||||
if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER))
|
||||
this_cpu_write(x86_ibpb_exit_to_user, true);
|
||||
|
||||
/*
|
||||
* Consume any pending interrupts, including the possible source of
|
||||
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
|
||||
|
||||
@ -1266,7 +1266,7 @@ static bool paddr_is_tdx_private(unsigned long phys)
|
||||
return false;
|
||||
|
||||
/* Get page type from the TDX module */
|
||||
sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args);
|
||||
sret = __seamcall_dirty_cache(__seamcall_ret, TDH_PHYMEM_PAGE_RDMD, &args);
|
||||
|
||||
/*
|
||||
* The SEAMCALL will not return success unless there is a
|
||||
@ -1522,7 +1522,7 @@ noinstr __flatten u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *ar
|
||||
{
|
||||
args->rcx = tdx_tdvpr_pa(td);
|
||||
|
||||
return __seamcall_saved_ret(TDH_VP_ENTER, args);
|
||||
return __seamcall_dirty_cache(__seamcall_saved_ret, TDH_VP_ENTER, args);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tdh_vp_enter);
|
||||
|
||||
@ -1870,3 +1870,22 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
|
||||
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
|
||||
|
||||
#ifdef CONFIG_KEXEC_CORE
|
||||
void tdx_cpu_flush_cache_for_kexec(void)
|
||||
{
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
if (!this_cpu_read(cache_state_incoherent))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Private memory cachelines need to be clean at the time of
|
||||
* kexec. Write them back now, as the caller promises that
|
||||
* there should be no more SEAMCALLs on this CPU.
|
||||
*/
|
||||
wbinvd();
|
||||
this_cpu_write(cache_state_incoherent, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tdx_cpu_flush_cache_for_kexec);
|
||||
#endif
|
||||
|
||||
@ -1162,8 +1162,6 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = {
|
||||
|
||||
.write_cr4 = xen_write_cr4,
|
||||
|
||||
.wbinvd = pv_native_wbinvd,
|
||||
|
||||
.read_msr = xen_read_msr,
|
||||
.write_msr = xen_write_msr,
|
||||
|
||||
|
||||
@ -500,9 +500,6 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y
|
||||
CONFIG_PPC_UV=y
|
||||
# CONFIG_LD_HEAD_STUB_CATCH is not set
|
||||
CONFIG_MPROFILE_KERNEL=y
|
||||
CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY=y
|
||||
CONFIG_PPC_FTRACE_OUT_OF_LINE=y
|
||||
CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE=32768
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_INTERRUPT_SANITIZE_REGISTERS=y
|
||||
CONFIG_PPC_QUEUED_SPINLOCKS=y
|
||||
@ -725,7 +722,6 @@ CONFIG_FUNCTION_ALIGNMENT_4B=y
|
||||
CONFIG_FUNCTION_ALIGNMENT=4
|
||||
CONFIG_CC_HAS_MIN_FUNCTION_ALIGNMENT=y
|
||||
CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT=y
|
||||
CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX=y
|
||||
# end of General architecture-dependent options
|
||||
|
||||
CONFIG_RT_MUTEXES=y
|
||||
@ -5022,7 +5018,6 @@ CONFIG_HID_KUNIT_TEST=m
|
||||
#
|
||||
# HID-BPF support
|
||||
#
|
||||
CONFIG_HID_BPF=y
|
||||
# end of HID-BPF support
|
||||
|
||||
CONFIG_I2C_HID=y
|
||||
@ -7125,8 +7120,6 @@ CONFIG_HAVE_FUNCTION_TRACER=y
|
||||
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
|
||||
CONFIG_HAVE_DYNAMIC_FTRACE=y
|
||||
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
|
||||
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
|
||||
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS=y
|
||||
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y
|
||||
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
|
||||
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
|
||||
@ -7147,8 +7140,6 @@ CONFIG_FUNCTION_TRACER=y
|
||||
CONFIG_FUNCTION_GRAPH_TRACER=y
|
||||
CONFIG_DYNAMIC_FTRACE=y
|
||||
CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
|
||||
CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
|
||||
CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS=y
|
||||
CONFIG_DYNAMIC_FTRACE_WITH_ARGS=y
|
||||
CONFIG_FPROBE=y
|
||||
CONFIG_FUNCTION_PROFILER=y
|
||||
@ -7173,7 +7164,7 @@ CONFIG_BPF_EVENTS=y
|
||||
CONFIG_DYNAMIC_EVENTS=y
|
||||
CONFIG_PROBE_EVENTS=y
|
||||
CONFIG_FTRACE_MCOUNT_RECORD=y
|
||||
CONFIG_FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY=y
|
||||
CONFIG_FTRACE_MCOUNT_USE_CC=y
|
||||
CONFIG_TRACING_MAP=y
|
||||
CONFIG_SYNTH_EVENTS=y
|
||||
# CONFIG_USER_EVENTS is not set
|
||||
@ -7199,8 +7190,6 @@ CONFIG_RV_REACTORS=y
|
||||
CONFIG_RV_REACT_PRINTK=y
|
||||
CONFIG_RV_REACT_PANIC=y
|
||||
# CONFIG_SAMPLES is not set
|
||||
CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
|
||||
CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
|
||||
CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
|
||||
CONFIG_STRICT_DEVMEM=y
|
||||
# CONFIG_IO_STRICT_DEVMEM is not set
|
||||
|
||||
@ -507,6 +507,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -586,6 +587,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -808,6 +810,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1172,6 +1175,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -506,6 +506,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -585,6 +586,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -807,6 +809,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1168,6 +1171,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -503,6 +503,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -582,6 +583,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -802,6 +804,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1164,6 +1167,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -504,6 +504,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -583,6 +584,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -803,6 +805,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1168,6 +1171,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -507,6 +507,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -586,6 +587,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -808,6 +810,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1172,6 +1175,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -506,6 +506,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -585,6 +586,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -807,6 +809,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1168,6 +1171,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -503,6 +503,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -582,6 +583,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -802,6 +804,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1164,6 +1167,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -504,6 +504,7 @@ CONFIG_X86_INTEL_TSX_MODE_OFF=y
|
||||
# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
|
||||
CONFIG_X86_SGX=y
|
||||
CONFIG_X86_USER_SHADOW_STACK=y
|
||||
CONFIG_INTEL_TDX_HOST=y
|
||||
CONFIG_EFI=y
|
||||
CONFIG_EFI_STUB=y
|
||||
CONFIG_EFI_HANDOVER_PROTOCOL=y
|
||||
@ -583,6 +584,7 @@ CONFIG_MITIGATION_SRBDS=y
|
||||
CONFIG_MITIGATION_SSB=y
|
||||
CONFIG_MITIGATION_ITS=y
|
||||
CONFIG_MITIGATION_TSA=y
|
||||
CONFIG_MITIGATION_VMSCAPE=y
|
||||
CONFIG_ARCH_HAS_ADD_PAGES=y
|
||||
|
||||
#
|
||||
@ -803,6 +805,7 @@ CONFIG_KVM_SW_PROTECTED_VM=y
|
||||
CONFIG_KVM_INTEL=m
|
||||
# CONFIG_KVM_INTEL_PROVE_VE is not set
|
||||
CONFIG_X86_SGX_KVM=y
|
||||
CONFIG_KVM_INTEL_TDX=y
|
||||
CONFIG_KVM_AMD=m
|
||||
CONFIG_KVM_AMD_SEV=y
|
||||
CONFIG_KVM_SMM=y
|
||||
@ -1168,6 +1171,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=y
|
||||
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
|
||||
CONFIG_HAVE_GUP_FAST=y
|
||||
CONFIG_ARCH_KEEP_MEMBLOCK=y
|
||||
CONFIG_NUMA_KEEP_MEMINFO=y
|
||||
CONFIG_MEMORY_ISOLATION=y
|
||||
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
|
||||
|
||||
@ -601,6 +601,7 @@ CPU_SHOW_VULN_FALLBACK(gds);
|
||||
CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling);
|
||||
CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
|
||||
CPU_SHOW_VULN_FALLBACK(tsa);
|
||||
CPU_SHOW_VULN_FALLBACK(vmscape);
|
||||
|
||||
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
|
||||
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
|
||||
@ -618,6 +619,7 @@ static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
|
||||
static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL);
|
||||
static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
|
||||
static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
|
||||
static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL);
|
||||
|
||||
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
||||
&dev_attr_meltdown.attr,
|
||||
@ -636,6 +638,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
||||
&dev_attr_reg_file_data_sampling.attr,
|
||||
&dev_attr_indirect_target_selection.attr,
|
||||
&dev_attr_tsa.attr,
|
||||
&dev_attr_vmscape.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
@ -772,7 +772,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
||||
req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
|
||||
req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
|
||||
ibqp->qp_num, attr->dest_qp_num);
|
||||
req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class;
|
||||
req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2;
|
||||
req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
|
||||
}
|
||||
|
||||
|
||||
@ -39,8 +39,6 @@
|
||||
#include "ibmveth.h"
|
||||
|
||||
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
|
||||
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
|
||||
bool reuse);
|
||||
static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
|
||||
|
||||
static struct kobj_type ktype_veth_pool;
|
||||
@ -213,94 +211,169 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
|
||||
static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
|
||||
struct ibmveth_buff_pool *pool)
|
||||
{
|
||||
u32 i;
|
||||
u32 count = pool->size - atomic_read(&pool->available);
|
||||
u32 buffers_added = 0;
|
||||
struct sk_buff *skb;
|
||||
unsigned int free_index, index;
|
||||
u64 correlator;
|
||||
union ibmveth_buf_desc descs[IBMVETH_MAX_RX_PER_HCALL] = {0};
|
||||
u32 remaining = pool->size - atomic_read(&pool->available);
|
||||
u64 correlators[IBMVETH_MAX_RX_PER_HCALL] = {0};
|
||||
unsigned long lpar_rc;
|
||||
u32 buffers_added = 0;
|
||||
u32 i, filled, batch;
|
||||
struct vio_dev *vdev;
|
||||
dma_addr_t dma_addr;
|
||||
struct device *dev;
|
||||
u32 index;
|
||||
|
||||
vdev = adapter->vdev;
|
||||
dev = &vdev->dev;
|
||||
|
||||
mb();
|
||||
|
||||
for (i = 0; i < count; ++i) {
|
||||
union ibmveth_buf_desc desc;
|
||||
batch = adapter->rx_buffers_per_hcall;
|
||||
|
||||
free_index = pool->consumer_index;
|
||||
while (remaining > 0) {
|
||||
unsigned int free_index = pool->consumer_index;
|
||||
|
||||
/* Fill a batch of descriptors */
|
||||
for (filled = 0; filled < min(remaining, batch); filled++) {
|
||||
index = pool->free_map[free_index];
|
||||
skb = NULL;
|
||||
|
||||
BUG_ON(index == IBM_VETH_INVALID_MAP);
|
||||
|
||||
/* are we allocating a new buffer or recycling an old one */
|
||||
if (pool->skbuff[index])
|
||||
goto reuse;
|
||||
|
||||
skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
|
||||
|
||||
if (!skb) {
|
||||
netdev_dbg(adapter->netdev,
|
||||
"replenish: unable to allocate skb\n");
|
||||
adapter->replenish_no_mem++;
|
||||
if (WARN_ON(index == IBM_VETH_INVALID_MAP)) {
|
||||
adapter->replenish_add_buff_failure++;
|
||||
netdev_info(adapter->netdev,
|
||||
"Invalid map index %u, reset\n",
|
||||
index);
|
||||
schedule_work(&adapter->work);
|
||||
break;
|
||||
}
|
||||
|
||||
dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
|
||||
pool->buff_size, DMA_FROM_DEVICE);
|
||||
if (!pool->skbuff[index]) {
|
||||
struct sk_buff *skb = NULL;
|
||||
|
||||
if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
|
||||
goto failure;
|
||||
skb = netdev_alloc_skb(adapter->netdev,
|
||||
pool->buff_size);
|
||||
if (!skb) {
|
||||
adapter->replenish_no_mem++;
|
||||
adapter->replenish_add_buff_failure++;
|
||||
break;
|
||||
}
|
||||
|
||||
dma_addr = dma_map_single(dev, skb->data,
|
||||
pool->buff_size,
|
||||
DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(dev, dma_addr)) {
|
||||
dev_kfree_skb_any(skb);
|
||||
adapter->replenish_add_buff_failure++;
|
||||
break;
|
||||
}
|
||||
|
||||
pool->dma_addr[index] = dma_addr;
|
||||
pool->skbuff[index] = skb;
|
||||
} else {
|
||||
/* re-use case */
|
||||
dma_addr = pool->dma_addr[index];
|
||||
}
|
||||
|
||||
if (rx_flush) {
|
||||
unsigned int len = min(pool->buff_size,
|
||||
adapter->netdev->mtu +
|
||||
IBMVETH_BUFF_OH);
|
||||
ibmveth_flush_buffer(skb->data, len);
|
||||
unsigned int len;
|
||||
|
||||
len = adapter->netdev->mtu + IBMVETH_BUFF_OH;
|
||||
len = min(pool->buff_size, len);
|
||||
ibmveth_flush_buffer(pool->skbuff[index]->data,
|
||||
len);
|
||||
}
|
||||
reuse:
|
||||
dma_addr = pool->dma_addr[index];
|
||||
desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
|
||||
desc.fields.address = dma_addr;
|
||||
|
||||
correlator = ((u64)pool->index << 32) | index;
|
||||
*(u64 *)pool->skbuff[index]->data = correlator;
|
||||
descs[filled].fields.flags_len = IBMVETH_BUF_VALID |
|
||||
pool->buff_size;
|
||||
descs[filled].fields.address = dma_addr;
|
||||
|
||||
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
|
||||
desc.desc);
|
||||
correlators[filled] = ((u64)pool->index << 32) | index;
|
||||
*(u64 *)pool->skbuff[index]->data = correlators[filled];
|
||||
|
||||
free_index++;
|
||||
if (free_index >= pool->size)
|
||||
free_index = 0;
|
||||
}
|
||||
|
||||
if (!filled)
|
||||
break;
|
||||
|
||||
/* single buffer case*/
|
||||
if (filled == 1)
|
||||
lpar_rc = h_add_logical_lan_buffer(vdev->unit_address,
|
||||
descs[0].desc);
|
||||
else
|
||||
/* Multi-buffer hcall */
|
||||
lpar_rc = h_add_logical_lan_buffers(vdev->unit_address,
|
||||
descs[0].desc,
|
||||
descs[1].desc,
|
||||
descs[2].desc,
|
||||
descs[3].desc,
|
||||
descs[4].desc,
|
||||
descs[5].desc,
|
||||
descs[6].desc,
|
||||
descs[7].desc);
|
||||
if (lpar_rc != H_SUCCESS) {
|
||||
netdev_warn(adapter->netdev,
|
||||
"%sadd_logical_lan failed %lu\n",
|
||||
skb ? "" : "When recycling: ", lpar_rc);
|
||||
goto failure;
|
||||
dev_warn_ratelimited(dev,
|
||||
"RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n",
|
||||
filled, lpar_rc, batch);
|
||||
goto hcall_failure;
|
||||
}
|
||||
|
||||
/* Only update pool state after hcall succeeds */
|
||||
for (i = 0; i < filled; i++) {
|
||||
free_index = pool->consumer_index;
|
||||
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
|
||||
|
||||
pool->consumer_index++;
|
||||
if (pool->consumer_index >= pool->size)
|
||||
pool->consumer_index = 0;
|
||||
|
||||
buffers_added++;
|
||||
adapter->replenish_add_buff_success++;
|
||||
}
|
||||
|
||||
mb();
|
||||
atomic_add(buffers_added, &(pool->available));
|
||||
return;
|
||||
buffers_added += filled;
|
||||
adapter->replenish_add_buff_success += filled;
|
||||
remaining -= filled;
|
||||
|
||||
failure:
|
||||
memset(&descs, 0, sizeof(descs));
|
||||
memset(&correlators, 0, sizeof(correlators));
|
||||
continue;
|
||||
|
||||
if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
|
||||
dma_unmap_single(&adapter->vdev->dev,
|
||||
pool->dma_addr[index], pool->buff_size,
|
||||
hcall_failure:
|
||||
for (i = 0; i < filled; i++) {
|
||||
index = correlators[i] & 0xffffffffUL;
|
||||
dma_addr = pool->dma_addr[index];
|
||||
|
||||
if (pool->skbuff[index]) {
|
||||
if (dma_addr &&
|
||||
!dma_mapping_error(dev, dma_addr))
|
||||
dma_unmap_single(dev, dma_addr,
|
||||
pool->buff_size,
|
||||
DMA_FROM_DEVICE);
|
||||
|
||||
dev_kfree_skb_any(pool->skbuff[index]);
|
||||
pool->skbuff[index] = NULL;
|
||||
adapter->replenish_add_buff_failure++;
|
||||
}
|
||||
}
|
||||
adapter->replenish_add_buff_failure += filled;
|
||||
|
||||
/*
|
||||
* If multi rx buffers hcall is no longer supported by FW
|
||||
* e.g. in the case of Live Parttion Migration
|
||||
*/
|
||||
if (batch > 1 && lpar_rc == H_FUNCTION) {
|
||||
/*
|
||||
* Instead of retry submit single buffer individually
|
||||
* here just set the max rx buffer per hcall to 1
|
||||
* buffers will be respleshed next time
|
||||
* when ibmveth_replenish_buffer_pool() is called again
|
||||
* with single-buffer case
|
||||
*/
|
||||
netdev_info(adapter->netdev,
|
||||
"RX Multi buffers not supported by FW, rc=%lu\n",
|
||||
lpar_rc);
|
||||
adapter->rx_buffers_per_hcall = 1;
|
||||
netdev_info(adapter->netdev,
|
||||
"Next rx replesh will fall back to single-buffer hcall\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
mb();
|
||||
atomic_add(buffers_added, &(pool->available));
|
||||
@ -370,8 +443,18 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
|
||||
}
|
||||
}
|
||||
|
||||
/* remove a buffer from a pool */
|
||||
static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
||||
/**
|
||||
* ibmveth_remove_buffer_from_pool - remove a buffer from a pool
|
||||
* @adapter: adapter instance
|
||||
* @correlator: identifies pool and index
|
||||
* @reuse: whether to reuse buffer
|
||||
*
|
||||
* Return:
|
||||
* * %0 - success
|
||||
* * %-EINVAL - correlator maps to pool or index out of range
|
||||
* * %-EFAULT - pool and index map to null skb
|
||||
*/
|
||||
static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
||||
u64 correlator, bool reuse)
|
||||
{
|
||||
unsigned int pool = correlator >> 32;
|
||||
@ -379,11 +462,17 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
||||
unsigned int free_index;
|
||||
struct sk_buff *skb;
|
||||
|
||||
BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
|
||||
BUG_ON(index >= adapter->rx_buff_pool[pool].size);
|
||||
if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
|
||||
WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
|
||||
schedule_work(&adapter->work);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
skb = adapter->rx_buff_pool[pool].skbuff[index];
|
||||
BUG_ON(skb == NULL);
|
||||
if (WARN_ON(!skb)) {
|
||||
schedule_work(&adapter->work);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* if we are going to reuse the buffer then keep the pointers around
|
||||
* but mark index as available. replenish will see the skb pointer and
|
||||
@ -411,6 +500,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
|
||||
mb();
|
||||
|
||||
atomic_dec(&(adapter->rx_buff_pool[pool].available));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the current buffer on the rx queue */
|
||||
@ -420,24 +511,44 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada
|
||||
unsigned int pool = correlator >> 32;
|
||||
unsigned int index = correlator & 0xffffffffUL;
|
||||
|
||||
BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
|
||||
BUG_ON(index >= adapter->rx_buff_pool[pool].size);
|
||||
if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) ||
|
||||
WARN_ON(index >= adapter->rx_buff_pool[pool].size)) {
|
||||
schedule_work(&adapter->work);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return adapter->rx_buff_pool[pool].skbuff[index];
|
||||
}
|
||||
|
||||
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
|
||||
/**
|
||||
* ibmveth_rxq_harvest_buffer - Harvest buffer from pool
|
||||
*
|
||||
* @adapter: pointer to adapter
|
||||
* @reuse: whether to reuse buffer
|
||||
*
|
||||
* Context: called from ibmveth_poll
|
||||
*
|
||||
* Return:
|
||||
* * %0 - success
|
||||
* * other - non-zero return from ibmveth_remove_buffer_from_pool
|
||||
*/
|
||||
static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
|
||||
bool reuse)
|
||||
{
|
||||
u64 cor;
|
||||
int rc;
|
||||
|
||||
cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
|
||||
ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
|
||||
rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
|
||||
if (unlikely(rc))
|
||||
return rc;
|
||||
|
||||
if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
|
||||
adapter->rx_queue.index = 0;
|
||||
adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
|
||||
@ -709,6 +820,35 @@ static int ibmveth_close(struct net_device *netdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ibmveth_reset - Handle scheduled reset work
|
||||
*
|
||||
* @w: pointer to work_struct embedded in adapter structure
|
||||
*
|
||||
* Context: This routine acquires rtnl_mutex and disables its NAPI through
|
||||
* ibmveth_close. It can't be called directly in a context that has
|
||||
* already acquired rtnl_mutex or disabled its NAPI, or directly from
|
||||
* a poll routine.
|
||||
*
|
||||
* Return: void
|
||||
*/
|
||||
static void ibmveth_reset(struct work_struct *w)
|
||||
{
|
||||
struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work);
|
||||
struct net_device *netdev = adapter->netdev;
|
||||
|
||||
netdev_dbg(netdev, "reset starting\n");
|
||||
|
||||
rtnl_lock();
|
||||
|
||||
dev_close(adapter->netdev);
|
||||
dev_open(adapter->netdev, NULL);
|
||||
|
||||
rtnl_unlock();
|
||||
|
||||
netdev_dbg(netdev, "reset complete\n");
|
||||
}
|
||||
|
||||
static int ibmveth_set_link_ksettings(struct net_device *dev,
|
||||
const struct ethtool_link_ksettings *cmd)
|
||||
{
|
||||
@ -1324,7 +1464,8 @@ restart_poll:
|
||||
wmb(); /* suggested by larson1 */
|
||||
adapter->rx_invalid_buffer++;
|
||||
netdev_dbg(netdev, "recycling invalid buffer\n");
|
||||
ibmveth_rxq_harvest_buffer(adapter, true);
|
||||
if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
|
||||
break;
|
||||
} else {
|
||||
struct sk_buff *skb, *new_skb;
|
||||
int length = ibmveth_rxq_frame_length(adapter);
|
||||
@ -1334,6 +1475,8 @@ restart_poll:
|
||||
__sum16 iph_check = 0;
|
||||
|
||||
skb = ibmveth_rxq_get_buffer(adapter);
|
||||
if (unlikely(!skb))
|
||||
break;
|
||||
|
||||
/* if the large packet bit is set in the rx queue
|
||||
* descriptor, the mss will be written by PHYP eight
|
||||
@ -1357,10 +1500,12 @@ restart_poll:
|
||||
if (rx_flush)
|
||||
ibmveth_flush_buffer(skb->data,
|
||||
length + offset);
|
||||
ibmveth_rxq_harvest_buffer(adapter, true);
|
||||
if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true)))
|
||||
break;
|
||||
skb = new_skb;
|
||||
} else {
|
||||
ibmveth_rxq_harvest_buffer(adapter, false);
|
||||
if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false)))
|
||||
break;
|
||||
skb_reserve(skb, offset);
|
||||
}
|
||||
|
||||
@ -1407,7 +1552,10 @@ restart_poll:
|
||||
* then check once more to make sure we are done.
|
||||
*/
|
||||
lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
|
||||
BUG_ON(lpar_rc != H_SUCCESS);
|
||||
if (WARN_ON(lpar_rc != H_SUCCESS)) {
|
||||
schedule_work(&adapter->work);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) {
|
||||
lpar_rc = h_vio_signal(adapter->vdev->unit_address,
|
||||
@ -1428,7 +1576,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
|
||||
if (napi_schedule_prep(&adapter->napi)) {
|
||||
lpar_rc = h_vio_signal(adapter->vdev->unit_address,
|
||||
VIO_IRQ_DISABLE);
|
||||
BUG_ON(lpar_rc != H_SUCCESS);
|
||||
WARN_ON(lpar_rc != H_SUCCESS);
|
||||
__napi_schedule(&adapter->napi);
|
||||
}
|
||||
return IRQ_HANDLED;
|
||||
@ -1670,6 +1818,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
||||
|
||||
adapter->vdev = dev;
|
||||
adapter->netdev = netdev;
|
||||
INIT_WORK(&adapter->work, ibmveth_reset);
|
||||
adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p);
|
||||
ibmveth_init_link_settings(netdev);
|
||||
|
||||
@ -1705,6 +1854,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
||||
netdev->features |= NETIF_F_FRAGLIST;
|
||||
}
|
||||
|
||||
if (ret == H_SUCCESS &&
|
||||
(ret_attr & IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT)) {
|
||||
adapter->rx_buffers_per_hcall = IBMVETH_MAX_RX_PER_HCALL;
|
||||
netdev_dbg(netdev,
|
||||
"RX Multi-buffer hcall supported by FW, batch set to %u\n",
|
||||
adapter->rx_buffers_per_hcall);
|
||||
} else {
|
||||
adapter->rx_buffers_per_hcall = 1;
|
||||
netdev_dbg(netdev,
|
||||
"RX Single-buffer hcall mode, batch set to %u\n",
|
||||
adapter->rx_buffers_per_hcall);
|
||||
}
|
||||
|
||||
netdev->min_mtu = IBMVETH_MIN_MTU;
|
||||
netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
|
||||
|
||||
@ -1762,6 +1924,8 @@ static void ibmveth_remove(struct vio_dev *dev)
|
||||
struct ibmveth_adapter *adapter = netdev_priv(netdev);
|
||||
int i;
|
||||
|
||||
cancel_work_sync(&adapter->work);
|
||||
|
||||
for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
|
||||
kobject_put(&adapter->rx_buff_pool[i].kobj);
|
||||
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#define IbmVethMcastRemoveFilter 0x2UL
|
||||
#define IbmVethMcastClearFilterTable 0x3UL
|
||||
|
||||
#define IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT 0x0000000000040000UL
|
||||
#define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL
|
||||
#define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL
|
||||
#define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL
|
||||
@ -46,6 +47,24 @@
|
||||
#define h_add_logical_lan_buffer(ua, buf) \
|
||||
plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
|
||||
|
||||
static inline long h_add_logical_lan_buffers(unsigned long unit_address,
|
||||
unsigned long desc1,
|
||||
unsigned long desc2,
|
||||
unsigned long desc3,
|
||||
unsigned long desc4,
|
||||
unsigned long desc5,
|
||||
unsigned long desc6,
|
||||
unsigned long desc7,
|
||||
unsigned long desc8)
|
||||
{
|
||||
unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
|
||||
|
||||
return plpar_hcall9(H_ADD_LOGICAL_LAN_BUFFERS,
|
||||
retbuf, unit_address,
|
||||
desc1, desc2, desc3, desc4,
|
||||
desc5, desc6, desc7, desc8);
|
||||
}
|
||||
|
||||
/* FW allows us to send 6 descriptors but we only use one so mark
|
||||
* the other 5 as unused (0)
|
||||
*/
|
||||
@ -101,6 +120,7 @@ static inline long h_illan_attributes(unsigned long unit_address,
|
||||
#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
|
||||
#define IBMVETH_MAX_QUEUES 16U
|
||||
#define IBMVETH_DEFAULT_QUEUES 8U
|
||||
#define IBMVETH_MAX_RX_PER_HCALL 8U
|
||||
|
||||
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
|
||||
static int pool_count[] = { 256, 512, 256, 256, 256 };
|
||||
@ -137,6 +157,7 @@ struct ibmveth_adapter {
|
||||
struct vio_dev *vdev;
|
||||
struct net_device *netdev;
|
||||
struct napi_struct napi;
|
||||
struct work_struct work;
|
||||
unsigned int mcastFilterSize;
|
||||
void * buffer_list_addr;
|
||||
void * filter_list_addr;
|
||||
@ -150,6 +171,7 @@ struct ibmveth_adapter {
|
||||
int rx_csum;
|
||||
int large_send;
|
||||
bool is_active_trunk;
|
||||
unsigned int rx_buffers_per_hcall;
|
||||
|
||||
u64 fw_ipv6_csum_support;
|
||||
u64 fw_ipv4_csum_support;
|
||||
|
||||
@ -752,6 +752,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
|
||||
adapter->rx_pool[i].active = 0;
|
||||
}
|
||||
|
||||
static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter)
|
||||
{
|
||||
if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) {
|
||||
netdev_info(adapter->netdev,
|
||||
"set max ind descs from %u to safe limit %u\n",
|
||||
adapter->cur_max_ind_descs,
|
||||
IBMVNIC_SAFE_IND_DESC);
|
||||
adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC;
|
||||
}
|
||||
}
|
||||
|
||||
static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
||||
struct ibmvnic_rx_pool *pool)
|
||||
{
|
||||
@ -839,7 +850,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
||||
sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
|
||||
|
||||
/* if send_subcrq_indirect queue is full, flush to VIOS */
|
||||
if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
|
||||
if (ind_bufp->index == adapter->cur_max_ind_descs ||
|
||||
i == count - 1) {
|
||||
lpar_rc =
|
||||
send_subcrq_indirect(adapter, handle,
|
||||
@ -858,6 +869,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
|
||||
failure:
|
||||
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
|
||||
dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
|
||||
|
||||
/* Detect platform limit H_PARAMETER */
|
||||
if (lpar_rc == H_PARAMETER)
|
||||
ibmvnic_set_safe_max_ind_descs(adapter);
|
||||
|
||||
/* For all error case, temporarily drop only this batch
|
||||
* Rely on TCP/IP retransmissions to retry and recover
|
||||
*/
|
||||
for (i = ind_bufp->index - 1; i >= 0; --i) {
|
||||
struct ibmvnic_rx_buff *rx_buff;
|
||||
|
||||
@ -2308,9 +2327,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
|
||||
tx_pool->num_buffers - 1 :
|
||||
tx_pool->consumer_index - 1;
|
||||
tx_buff = &tx_pool->tx_buff[index];
|
||||
adapter->netdev->stats.tx_packets--;
|
||||
adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
|
||||
adapter->tx_stats_buffers[queue_num].packets--;
|
||||
adapter->tx_stats_buffers[queue_num].batched_packets--;
|
||||
adapter->tx_stats_buffers[queue_num].bytes -=
|
||||
tx_buff->skb->len;
|
||||
dev_kfree_skb_any(tx_buff->skb);
|
||||
@ -2379,16 +2396,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
|
||||
rc = send_subcrq_direct(adapter, handle,
|
||||
(u64 *)ind_bufp->indir_arr);
|
||||
|
||||
if (rc)
|
||||
if (rc) {
|
||||
dev_err_ratelimited(&adapter->vdev->dev,
|
||||
"tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n",
|
||||
rc, entries, &dma_addr, handle);
|
||||
/* Detect platform limit H_PARAMETER */
|
||||
if (rc == H_PARAMETER)
|
||||
ibmvnic_set_safe_max_ind_descs(adapter);
|
||||
|
||||
/* For all error case, temporarily drop only this batch
|
||||
* Rely on TCP/IP retransmissions to retry and recover
|
||||
*/
|
||||
ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
|
||||
else
|
||||
} else {
|
||||
ind_bufp->index = 0;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
||||
{
|
||||
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
|
||||
u32 cur_max_ind_descs = adapter->cur_max_ind_descs;
|
||||
int queue_num = skb_get_queue_mapping(skb);
|
||||
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
|
||||
struct device *dev = &adapter->vdev->dev;
|
||||
@ -2402,7 +2431,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
||||
unsigned int tx_map_failed = 0;
|
||||
union sub_crq indir_arr[16];
|
||||
unsigned int tx_dropped = 0;
|
||||
unsigned int tx_packets = 0;
|
||||
unsigned int tx_dpackets = 0;
|
||||
unsigned int tx_bpackets = 0;
|
||||
unsigned int tx_bytes = 0;
|
||||
dma_addr_t data_dma_addr;
|
||||
struct netdev_queue *txq;
|
||||
@ -2577,6 +2607,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
||||
if (lpar_rc != H_SUCCESS)
|
||||
goto tx_err;
|
||||
|
||||
tx_dpackets++;
|
||||
goto early_exit;
|
||||
}
|
||||
|
||||
@ -2586,7 +2617,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
||||
tx_crq.v1.n_crq_elem = num_entries;
|
||||
tx_buff->num_entries = num_entries;
|
||||
/* flush buffer if current entry can not fit */
|
||||
if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
|
||||
if (num_entries + ind_bufp->index > cur_max_ind_descs) {
|
||||
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
|
||||
if (lpar_rc != H_SUCCESS)
|
||||
goto tx_flush_err;
|
||||
@ -2599,11 +2630,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
||||
ind_bufp->index += num_entries;
|
||||
if (__netdev_tx_sent_queue(txq, skb->len,
|
||||
netdev_xmit_more() &&
|
||||
ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
|
||||
ind_bufp->index < cur_max_ind_descs)) {
|
||||
lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
|
||||
if (lpar_rc != H_SUCCESS)
|
||||
goto tx_err;
|
||||
}
|
||||
tx_bpackets++;
|
||||
|
||||
early_exit:
|
||||
if (atomic_add_return(num_entries, &tx_scrq->used)
|
||||
@ -2612,7 +2644,6 @@ early_exit:
|
||||
netif_stop_subqueue(netdev, queue_num);
|
||||
}
|
||||
|
||||
tx_packets++;
|
||||
tx_bytes += skblen;
|
||||
txq_trans_cond_update(txq);
|
||||
ret = NETDEV_TX_OK;
|
||||
@ -2640,12 +2671,10 @@ tx_err:
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
netdev->stats.tx_dropped += tx_dropped;
|
||||
netdev->stats.tx_bytes += tx_bytes;
|
||||
netdev->stats.tx_packets += tx_packets;
|
||||
adapter->tx_send_failed += tx_send_failed;
|
||||
adapter->tx_map_failed += tx_map_failed;
|
||||
adapter->tx_stats_buffers[queue_num].packets += tx_packets;
|
||||
adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets;
|
||||
adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets;
|
||||
adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
|
||||
adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
|
||||
|
||||
@ -3444,6 +3473,25 @@ err:
|
||||
return -ret;
|
||||
}
|
||||
|
||||
static void ibmvnic_get_stats64(struct net_device *netdev,
|
||||
struct rtnl_link_stats64 *stats)
|
||||
{
|
||||
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adapter->req_rx_queues; i++) {
|
||||
stats->rx_packets += adapter->rx_stats_buffers[i].packets;
|
||||
stats->rx_bytes += adapter->rx_stats_buffers[i].bytes;
|
||||
}
|
||||
|
||||
for (i = 0; i < adapter->req_tx_queues; i++) {
|
||||
stats->tx_packets += adapter->tx_stats_buffers[i].batched_packets;
|
||||
stats->tx_packets += adapter->tx_stats_buffers[i].direct_packets;
|
||||
stats->tx_bytes += adapter->tx_stats_buffers[i].bytes;
|
||||
stats->tx_dropped += adapter->tx_stats_buffers[i].dropped_packets;
|
||||
}
|
||||
}
|
||||
|
||||
static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
|
||||
{
|
||||
struct ibmvnic_adapter *adapter = netdev_priv(dev);
|
||||
@ -3559,8 +3607,6 @@ restart_poll:
|
||||
|
||||
length = skb->len;
|
||||
napi_gro_receive(napi, skb); /* send it up */
|
||||
netdev->stats.rx_packets++;
|
||||
netdev->stats.rx_bytes += length;
|
||||
adapter->rx_stats_buffers[scrq_num].packets++;
|
||||
adapter->rx_stats_buffers[scrq_num].bytes += length;
|
||||
frames_processed++;
|
||||
@ -3670,6 +3716,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
|
||||
.ndo_set_rx_mode = ibmvnic_set_multi,
|
||||
.ndo_set_mac_address = ibmvnic_set_mac,
|
||||
.ndo_validate_addr = eth_validate_addr,
|
||||
.ndo_get_stats64 = ibmvnic_get_stats64,
|
||||
.ndo_tx_timeout = ibmvnic_tx_timeout,
|
||||
.ndo_change_mtu = ibmvnic_change_mtu,
|
||||
.ndo_features_check = ibmvnic_features_check,
|
||||
@ -3810,7 +3857,10 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
|
||||
memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
|
||||
|
||||
for (i = 0; i < adapter->req_tx_queues; i++) {
|
||||
snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
|
||||
snprintf(data, ETH_GSTRING_LEN, "tx%d_batched_packets", i);
|
||||
data += ETH_GSTRING_LEN;
|
||||
|
||||
snprintf(data, ETH_GSTRING_LEN, "tx%d_direct_packets", i);
|
||||
data += ETH_GSTRING_LEN;
|
||||
|
||||
snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
|
||||
@ -3875,7 +3925,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
|
||||
(adapter, ibmvnic_stats[i].offset));
|
||||
|
||||
for (j = 0; j < adapter->req_tx_queues; j++) {
|
||||
data[i] = adapter->tx_stats_buffers[j].packets;
|
||||
data[i] = adapter->tx_stats_buffers[j].batched_packets;
|
||||
i++;
|
||||
data[i] = adapter->tx_stats_buffers[j].direct_packets;
|
||||
i++;
|
||||
data[i] = adapter->tx_stats_buffers[j].bytes;
|
||||
i++;
|
||||
@ -3992,7 +4044,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
|
||||
}
|
||||
|
||||
dma_free_coherent(dev,
|
||||
IBMVNIC_IND_ARR_SZ,
|
||||
IBMVNIC_IND_MAX_ARR_SZ,
|
||||
scrq->ind_buf.indir_arr,
|
||||
scrq->ind_buf.indir_dma);
|
||||
|
||||
@ -4049,7 +4101,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
|
||||
|
||||
scrq->ind_buf.indir_arr =
|
||||
dma_alloc_coherent(dev,
|
||||
IBMVNIC_IND_ARR_SZ,
|
||||
IBMVNIC_IND_MAX_ARR_SZ,
|
||||
&scrq->ind_buf.indir_dma,
|
||||
GFP_KERNEL);
|
||||
|
||||
@ -6355,6 +6407,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
|
||||
rc = reset_sub_crq_queues(adapter);
|
||||
}
|
||||
} else {
|
||||
if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
|
||||
/* After an LPM, reset the max number of indirect
|
||||
* subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT
|
||||
* hcall to the default max (e.g POWER8 -> POWER10)
|
||||
*
|
||||
* If the new destination platform does not support
|
||||
* the higher limit max (e.g. POWER10-> POWER8 LPM)
|
||||
* H_PARAMETER will trigger automatic fallback to the
|
||||
* safe minimum limit.
|
||||
*/
|
||||
adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
|
||||
}
|
||||
|
||||
rc = init_sub_crqs(adapter);
|
||||
}
|
||||
|
||||
@ -6506,6 +6571,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
||||
|
||||
adapter->wait_for_reset = false;
|
||||
adapter->last_reset_time = jiffies;
|
||||
adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
|
||||
|
||||
rc = register_netdev(netdev);
|
||||
if (rc) {
|
||||
|
||||
@ -29,8 +29,9 @@
|
||||
#define IBMVNIC_BUFFS_PER_POOL 100
|
||||
#define IBMVNIC_MAX_QUEUES 16
|
||||
#define IBMVNIC_MAX_QUEUE_SZ 4096
|
||||
#define IBMVNIC_MAX_IND_DESCS 16
|
||||
#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
|
||||
#define IBMVNIC_MAX_IND_DESCS 128
|
||||
#define IBMVNIC_SAFE_IND_DESC 16
|
||||
#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
|
||||
|
||||
#define IBMVNIC_TSO_BUF_SZ 65536
|
||||
#define IBMVNIC_TSO_BUFS 64
|
||||
@ -211,20 +212,25 @@ struct ibmvnic_statistics {
|
||||
u8 reserved[72];
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define NUM_TX_STATS 3
|
||||
struct ibmvnic_tx_queue_stats {
|
||||
u64 packets;
|
||||
u64 batched_packets;
|
||||
u64 direct_packets;
|
||||
u64 bytes;
|
||||
u64 dropped_packets;
|
||||
};
|
||||
|
||||
#define NUM_RX_STATS 3
|
||||
#define NUM_TX_STATS \
|
||||
(sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64))
|
||||
|
||||
struct ibmvnic_rx_queue_stats {
|
||||
u64 packets;
|
||||
u64 bytes;
|
||||
u64 interrupts;
|
||||
};
|
||||
|
||||
#define NUM_RX_STATS \
|
||||
(sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64))
|
||||
|
||||
struct ibmvnic_acl_buffer {
|
||||
__be32 len;
|
||||
__be32 version;
|
||||
@ -925,6 +931,7 @@ struct ibmvnic_adapter {
|
||||
struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
|
||||
dma_addr_t ip_offload_ctrl_tok;
|
||||
u32 msg_enable;
|
||||
u32 cur_max_ind_descs;
|
||||
|
||||
/* Vital Product Data (VPD) */
|
||||
struct ibmvnic_vpd *vpd;
|
||||
|
||||
@ -2374,7 +2374,13 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size,
|
||||
* The function will apply the new Tx topology from the package buffer
|
||||
* if available.
|
||||
*
|
||||
* Return: zero when update was successful, negative values otherwise.
|
||||
* Return:
|
||||
* * 0 - Successfully applied topology configuration.
|
||||
* * -EBUSY - Failed to acquire global configuration lock.
|
||||
* * -EEXIST - Topology configuration has already been applied.
|
||||
* * -EIO - Unable to apply topology configuration.
|
||||
* * -ENODEV - Failed to re-initialize device after applying configuration.
|
||||
* * Other negative error codes indicate unexpected failures.
|
||||
*/
|
||||
int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len)
|
||||
{
|
||||
@ -2407,7 +2413,7 @@ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len)
|
||||
|
||||
if (status) {
|
||||
ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n");
|
||||
return status;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Is default topology already applied ? */
|
||||
@ -2494,31 +2500,45 @@ update_topo:
|
||||
ICE_GLOBAL_CFG_LOCK_TIMEOUT);
|
||||
if (status) {
|
||||
ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n");
|
||||
return status;
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* Check if reset was triggered already. */
|
||||
reg = rd32(hw, GLGEN_RSTAT);
|
||||
if (reg & GLGEN_RSTAT_DEVSTATE_M) {
|
||||
/* Reset is in progress, re-init the HW again */
|
||||
ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n");
|
||||
ice_check_reset(hw);
|
||||
return 0;
|
||||
/* Reset is in progress, re-init the HW again */
|
||||
goto reinit_hw;
|
||||
}
|
||||
|
||||
/* Set new topology */
|
||||
status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true);
|
||||
if (status) {
|
||||
ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n");
|
||||
return status;
|
||||
ice_debug(hw, ICE_DBG_INIT, "Failed to set Tx topology, status %pe\n",
|
||||
ERR_PTR(status));
|
||||
/* only report -EIO here as the caller checks the error value
|
||||
* and reports an informational error message informing that
|
||||
* the driver failed to program Tx topology.
|
||||
*/
|
||||
status = -EIO;
|
||||
}
|
||||
|
||||
/* New topology is updated, delay 1 second before issuing the CORER */
|
||||
/* Even if Tx topology config failed, we need to CORE reset here to
|
||||
* clear the global configuration lock. Delay 1 second to allow
|
||||
* hardware to settle then issue a CORER
|
||||
*/
|
||||
msleep(1000);
|
||||
ice_reset(hw, ICE_RESET_CORER);
|
||||
/* CORER will clear the global lock, so no explicit call
|
||||
* required for release.
|
||||
*/
|
||||
ice_check_reset(hw);
|
||||
|
||||
return 0;
|
||||
reinit_hw:
|
||||
/* Since we triggered a CORER, re-initialize hardware */
|
||||
ice_deinit_hw(hw);
|
||||
if (ice_init_hw(hw)) {
|
||||
ice_debug(hw, ICE_DBG_INIT, "Failed to re-init hardware after setting Tx topology\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -4573,17 +4573,23 @@ ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware)
|
||||
dev_info(dev, "Tx scheduling layers switching feature disabled\n");
|
||||
else
|
||||
dev_info(dev, "Tx scheduling layers switching feature enabled\n");
|
||||
/* if there was a change in topology ice_cfg_tx_topo triggered
|
||||
* a CORER and we need to re-init hw
|
||||
return 0;
|
||||
} else if (err == -ENODEV) {
|
||||
/* If we failed to re-initialize the device, we can no longer
|
||||
* continue loading.
|
||||
*/
|
||||
ice_deinit_hw(hw);
|
||||
err = ice_init_hw(hw);
|
||||
|
||||
dev_warn(dev, "Failed to initialize hardware after applying Tx scheduling configuration.\n");
|
||||
return err;
|
||||
} else if (err == -EIO) {
|
||||
dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n");
|
||||
return 0;
|
||||
} else if (err == -EEXIST) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Do not treat this as a fatal error. */
|
||||
dev_info(dev, "Failed to apply Tx scheduling configuration, err %pe\n",
|
||||
ERR_PTR(err));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -865,10 +865,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
||||
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
|
||||
rx_buf->page_offset, size);
|
||||
sinfo->xdp_frags_size += size;
|
||||
/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
|
||||
* can pop off frags but driver has to handle it on its own
|
||||
*/
|
||||
rx_ring->nr_frags = sinfo->nr_frags;
|
||||
|
||||
if (page_is_pfmemalloc(rx_buf->page))
|
||||
xdp_buff_set_frag_pfmemalloc(xdp);
|
||||
@ -939,20 +935,20 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
|
||||
/**
|
||||
* ice_get_pgcnts - grab page_count() for gathered fragments
|
||||
* @rx_ring: Rx descriptor ring to store the page counts on
|
||||
* @ntc: the next to clean element (not included in this frame!)
|
||||
*
|
||||
* This function is intended to be called right before running XDP
|
||||
* program so that the page recycling mechanism will be able to take
|
||||
* a correct decision regarding underlying pages; this is done in such
|
||||
* way as XDP program can change the refcount of page
|
||||
*/
|
||||
static void ice_get_pgcnts(struct ice_rx_ring *rx_ring)
|
||||
static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc)
|
||||
{
|
||||
u32 nr_frags = rx_ring->nr_frags + 1;
|
||||
u32 idx = rx_ring->first_desc;
|
||||
struct ice_rx_buf *rx_buf;
|
||||
u32 cnt = rx_ring->count;
|
||||
|
||||
for (int i = 0; i < nr_frags; i++) {
|
||||
while (idx != ntc) {
|
||||
rx_buf = &rx_ring->rx_buf[idx];
|
||||
rx_buf->pgcnt = page_count(rx_buf->page);
|
||||
|
||||
@ -1125,62 +1121,51 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags
|
||||
* ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame
|
||||
* @rx_ring: Rx ring with all the auxiliary data
|
||||
* @xdp: XDP buffer carrying linear + frags part
|
||||
* @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
|
||||
* @ntc: a current next_to_clean value to be stored at rx_ring
|
||||
* @ntc: the next to clean element (not included in this frame!)
|
||||
* @verdict: return code from XDP program execution
|
||||
*
|
||||
* Walk through gathered fragments and satisfy internal page
|
||||
* recycle mechanism; we take here an action related to verdict
|
||||
* returned by XDP program;
|
||||
* Called after XDP program is completed, or on error with verdict set to
|
||||
* ICE_XDP_CONSUMED.
|
||||
*
|
||||
* Walk through buffers from first_desc to the end of the frame, releasing
|
||||
* buffers and satisfying internal page recycle mechanism. The action depends
|
||||
* on verdict from XDP program.
|
||||
*/
|
||||
static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
|
||||
u32 *xdp_xmit, u32 ntc, u32 verdict)
|
||||
u32 ntc, u32 verdict)
|
||||
{
|
||||
u32 nr_frags = rx_ring->nr_frags + 1;
|
||||
u32 idx = rx_ring->first_desc;
|
||||
u32 cnt = rx_ring->count;
|
||||
u32 post_xdp_frags = 1;
|
||||
struct ice_rx_buf *buf;
|
||||
int i;
|
||||
u32 xdp_frags = 0;
|
||||
int i = 0;
|
||||
|
||||
if (unlikely(xdp_buff_has_frags(xdp)))
|
||||
post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags;
|
||||
xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
|
||||
|
||||
for (i = 0; i < post_xdp_frags; i++) {
|
||||
while (idx != ntc) {
|
||||
buf = &rx_ring->rx_buf[idx];
|
||||
|
||||
if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) {
|
||||
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
|
||||
*xdp_xmit |= verdict;
|
||||
} else if (verdict & ICE_XDP_CONSUMED) {
|
||||
buf->pagecnt_bias++;
|
||||
} else if (verdict == ICE_XDP_PASS) {
|
||||
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
|
||||
}
|
||||
|
||||
ice_put_rx_buf(rx_ring, buf);
|
||||
|
||||
if (++idx == cnt)
|
||||
idx = 0;
|
||||
}
|
||||
/* handle buffers that represented frags released by XDP prog;
|
||||
* for these we keep pagecnt_bias as-is; refcount from struct page
|
||||
* has been decremented within XDP prog and we do not have to increase
|
||||
* the biased refcnt
|
||||
|
||||
/* An XDP program could release fragments from the end of the
|
||||
* buffer. For these, we need to keep the pagecnt_bias as-is.
|
||||
* To do this, only adjust pagecnt_bias for fragments up to
|
||||
* the total remaining after the XDP program has run.
|
||||
*/
|
||||
for (; i < nr_frags; i++) {
|
||||
buf = &rx_ring->rx_buf[idx];
|
||||
if (verdict != ICE_XDP_CONSUMED)
|
||||
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
|
||||
else if (i++ <= xdp_frags)
|
||||
buf->pagecnt_bias++;
|
||||
|
||||
ice_put_rx_buf(rx_ring, buf);
|
||||
if (++idx == cnt)
|
||||
idx = 0;
|
||||
}
|
||||
|
||||
xdp->data = NULL;
|
||||
rx_ring->first_desc = ntc;
|
||||
rx_ring->nr_frags = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1260,6 +1245,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
|
||||
/* retrieve a buffer from the ring */
|
||||
rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
|
||||
|
||||
/* Increment ntc before calls to ice_put_rx_mbuf() */
|
||||
if (++ntc == cnt)
|
||||
ntc = 0;
|
||||
|
||||
if (!xdp->data) {
|
||||
void *hard_start;
|
||||
|
||||
@ -1268,24 +1257,23 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
|
||||
xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
|
||||
xdp_buff_clear_frags_flag(xdp);
|
||||
} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
|
||||
ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED);
|
||||
ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED);
|
||||
break;
|
||||
}
|
||||
if (++ntc == cnt)
|
||||
ntc = 0;
|
||||
|
||||
/* skip if it is NOP desc */
|
||||
if (ice_is_non_eop(rx_ring, rx_desc))
|
||||
continue;
|
||||
|
||||
ice_get_pgcnts(rx_ring);
|
||||
ice_get_pgcnts(rx_ring, ntc);
|
||||
xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
|
||||
if (xdp_verdict == ICE_XDP_PASS)
|
||||
goto construct_skb;
|
||||
total_rx_bytes += xdp_get_buff_len(xdp);
|
||||
total_rx_pkts++;
|
||||
|
||||
ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
|
||||
ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
|
||||
xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR);
|
||||
|
||||
continue;
|
||||
construct_skb:
|
||||
@ -1298,7 +1286,7 @@ construct_skb:
|
||||
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
|
||||
xdp_verdict = ICE_XDP_CONSUMED;
|
||||
}
|
||||
ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
|
||||
ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
|
||||
|
||||
if (!skb)
|
||||
break;
|
||||
|
||||
@ -358,7 +358,6 @@ struct ice_rx_ring {
|
||||
struct ice_tx_ring *xdp_ring;
|
||||
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
|
||||
struct xsk_buff_pool *xsk_pool;
|
||||
u32 nr_frags;
|
||||
u16 max_frame;
|
||||
u16 rx_buf_len;
|
||||
dma_addr_t dma; /* physical address of ring */
|
||||
|
||||
@ -3094,7 +3094,7 @@ static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
combo_ver = le32_to_cpu(civd.combo_ver);
|
||||
combo_ver = get_unaligned_le32(&civd.combo_ver);
|
||||
|
||||
orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver);
|
||||
orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver);
|
||||
|
||||
@ -1136,7 +1136,7 @@ struct ixgbe_orom_civd_info {
|
||||
__le32 combo_ver; /* Combo Image Version number */
|
||||
u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */
|
||||
__le16 combo_name[32]; /* Unicode string representing the Combo Image version */
|
||||
};
|
||||
} __packed;
|
||||
|
||||
/* Function specific capabilities */
|
||||
struct ixgbe_hw_func_caps {
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <linux/version.h>
|
||||
|
||||
#include <net/mana/mana.h>
|
||||
#include <net/mana/hw_channel.h>
|
||||
|
||||
struct dentry *mana_debugfs_root;
|
||||
|
||||
@ -66,6 +67,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev)
|
||||
mana_gd_init_vf_regs(pdev);
|
||||
}
|
||||
|
||||
/* Suppress logging when we set timeout to zero */
|
||||
bool mana_need_log(struct gdma_context *gc, int err)
|
||||
{
|
||||
struct hw_channel_context *hwc;
|
||||
|
||||
if (err != -ETIMEDOUT)
|
||||
return true;
|
||||
|
||||
if (!gc)
|
||||
return true;
|
||||
|
||||
hwc = gc->hwc.driver_data;
|
||||
if (hwc && hwc->hwc_timeout == 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int mana_gd_query_max_resources(struct pci_dev *pdev)
|
||||
{
|
||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||
@ -269,6 +288,7 @@ static int mana_gd_disable_queue(struct gdma_queue *queue)
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err || resp.hdr.status) {
|
||||
if (mana_need_log(gc, err))
|
||||
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
|
||||
resp.hdr.status);
|
||||
return err ? err : -EPROTO;
|
||||
@ -355,11 +375,113 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
|
||||
}
|
||||
EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
|
||||
|
||||
#define MANA_SERVICE_PERIOD 10
|
||||
|
||||
static void mana_serv_fpga(struct pci_dev *pdev)
|
||||
{
|
||||
struct pci_bus *bus, *parent;
|
||||
|
||||
pci_lock_rescan_remove();
|
||||
|
||||
bus = pdev->bus;
|
||||
if (!bus) {
|
||||
dev_err(&pdev->dev, "MANA service: no bus\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
parent = bus->parent;
|
||||
if (!parent) {
|
||||
dev_err(&pdev->dev, "MANA service: no parent bus\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
pci_stop_and_remove_bus_device(bus->self);
|
||||
|
||||
msleep(MANA_SERVICE_PERIOD * 1000);
|
||||
|
||||
pci_rescan_bus(parent);
|
||||
|
||||
out:
|
||||
pci_unlock_rescan_remove();
|
||||
}
|
||||
|
||||
static void mana_serv_reset(struct pci_dev *pdev)
|
||||
{
|
||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||
struct hw_channel_context *hwc;
|
||||
|
||||
if (!gc) {
|
||||
dev_err(&pdev->dev, "MANA service: no GC\n");
|
||||
return;
|
||||
}
|
||||
|
||||
hwc = gc->hwc.driver_data;
|
||||
if (!hwc) {
|
||||
dev_err(&pdev->dev, "MANA service: no HWC\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* HWC is not responding in this case, so don't wait */
|
||||
hwc->hwc_timeout = 0;
|
||||
|
||||
dev_info(&pdev->dev, "MANA reset cycle start\n");
|
||||
|
||||
mana_gd_suspend(pdev, PMSG_SUSPEND);
|
||||
|
||||
msleep(MANA_SERVICE_PERIOD * 1000);
|
||||
|
||||
mana_gd_resume(pdev);
|
||||
|
||||
dev_info(&pdev->dev, "MANA reset cycle completed\n");
|
||||
|
||||
out:
|
||||
gc->in_service = false;
|
||||
}
|
||||
|
||||
struct mana_serv_work {
|
||||
struct work_struct serv_work;
|
||||
struct pci_dev *pdev;
|
||||
enum gdma_eqe_type type;
|
||||
};
|
||||
|
||||
static void mana_serv_func(struct work_struct *w)
|
||||
{
|
||||
struct mana_serv_work *mns_wk;
|
||||
struct pci_dev *pdev;
|
||||
|
||||
mns_wk = container_of(w, struct mana_serv_work, serv_work);
|
||||
pdev = mns_wk->pdev;
|
||||
|
||||
if (!pdev)
|
||||
goto out;
|
||||
|
||||
switch (mns_wk->type) {
|
||||
case GDMA_EQE_HWC_FPGA_RECONFIG:
|
||||
mana_serv_fpga(pdev);
|
||||
break;
|
||||
|
||||
case GDMA_EQE_HWC_RESET_REQUEST:
|
||||
mana_serv_reset(pdev);
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_err(&pdev->dev, "MANA service: unknown type %d\n",
|
||||
mns_wk->type);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
pci_dev_put(pdev);
|
||||
kfree(mns_wk);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static void mana_gd_process_eqe(struct gdma_queue *eq)
|
||||
{
|
||||
u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
|
||||
struct gdma_context *gc = eq->gdma_dev->gdma_context;
|
||||
struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
|
||||
struct mana_serv_work *mns_wk;
|
||||
union gdma_eqe_info eqe_info;
|
||||
enum gdma_eqe_type type;
|
||||
struct gdma_event event;
|
||||
@ -404,6 +526,35 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
|
||||
eq->eq.callback(eq->eq.context, eq, &event);
|
||||
break;
|
||||
|
||||
case GDMA_EQE_HWC_FPGA_RECONFIG:
|
||||
case GDMA_EQE_HWC_RESET_REQUEST:
|
||||
dev_info(gc->dev, "Recv MANA service type:%d\n", type);
|
||||
|
||||
if (gc->in_service) {
|
||||
dev_info(gc->dev, "Already in service\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!try_module_get(THIS_MODULE)) {
|
||||
dev_info(gc->dev, "Module is unloading\n");
|
||||
break;
|
||||
}
|
||||
|
||||
mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
|
||||
if (!mns_wk) {
|
||||
module_put(THIS_MODULE);
|
||||
break;
|
||||
}
|
||||
|
||||
dev_info(gc->dev, "Start MANA service type:%d\n", type);
|
||||
gc->in_service = true;
|
||||
mns_wk->pdev = to_pci_dev(gc->dev);
|
||||
mns_wk->type = type;
|
||||
pci_dev_get(mns_wk->pdev);
|
||||
INIT_WORK(&mns_wk->serv_work, mana_serv_func);
|
||||
schedule_work(&mns_wk->serv_work);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -545,6 +696,7 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err) {
|
||||
if (mana_need_log(gc, err))
|
||||
dev_err(dev, "test_eq failed: %d\n", err);
|
||||
goto out;
|
||||
}
|
||||
@ -580,7 +732,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
|
||||
|
||||
if (flush_evenets) {
|
||||
err = mana_gd_test_eq(gc, queue);
|
||||
if (err)
|
||||
if (err && mana_need_log(gc, err))
|
||||
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
|
||||
}
|
||||
|
||||
@ -726,6 +878,7 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err || resp.hdr.status) {
|
||||
if (mana_need_log(gc, err))
|
||||
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
|
||||
err, resp.hdr.status);
|
||||
return -EPROTO;
|
||||
@ -1027,6 +1180,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
|
||||
|
||||
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
|
||||
if (err || resp.hdr.status) {
|
||||
if (mana_need_log(gc, err))
|
||||
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
|
||||
err, resp.hdr.status);
|
||||
if (!err)
|
||||
@ -1644,7 +1798,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
|
||||
}
|
||||
|
||||
/* The 'state' parameter is not used. */
|
||||
static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
||||
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
||||
{
|
||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||
|
||||
@ -1660,7 +1814,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
|
||||
* fail -- if this happens, it's safer to just report an error than try to undo
|
||||
* what has been done.
|
||||
*/
|
||||
static int mana_gd_resume(struct pci_dev *pdev)
|
||||
int mana_gd_resume(struct pci_dev *pdev)
|
||||
{
|
||||
struct gdma_context *gc = pci_get_drvdata(pdev);
|
||||
int err;
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
/* Copyright (c) 2021, Microsoft Corporation. */
|
||||
|
||||
#include <net/mana/gdma.h>
|
||||
#include <net/mana/mana.h>
|
||||
#include <net/mana/hw_channel.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
@ -879,7 +880,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
|
||||
|
||||
if (!wait_for_completion_timeout(&ctx->comp_event,
|
||||
(msecs_to_jiffies(hwc->hwc_timeout)))) {
|
||||
if (hwc->hwc_timeout != 0)
|
||||
dev_err(hwc->dev, "HWC: Request timed out!\n");
|
||||
|
||||
err = -ETIMEDOUT;
|
||||
goto out;
|
||||
}
|
||||
@ -890,6 +893,11 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
|
||||
}
|
||||
|
||||
if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) {
|
||||
if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT)
|
||||
dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
|
||||
ctx->status_code);
|
||||
err = -EPROTO;
|
||||
|
||||
@ -46,6 +46,15 @@ static const struct file_operations mana_dbg_q_fops = {
|
||||
.read = mana_dbg_q_read,
|
||||
};
|
||||
|
||||
static bool mana_en_need_log(struct mana_port_context *apc, int err)
|
||||
{
|
||||
if (apc && apc->ac && apc->ac->gdma_dev &&
|
||||
apc->ac->gdma_dev->gdma_context)
|
||||
return mana_need_log(apc->ac->gdma_dev->gdma_context, err);
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Microsoft Azure Network Adapter (MANA) functions */
|
||||
|
||||
static int mana_open(struct net_device *ndev)
|
||||
@ -250,10 +259,10 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
||||
struct netdev_queue *net_txq;
|
||||
struct mana_stats_tx *tx_stats;
|
||||
struct gdma_queue *gdma_sq;
|
||||
int err, len, num_gso_seg;
|
||||
unsigned int csum_type;
|
||||
struct mana_txq *txq;
|
||||
struct mana_cq *cq;
|
||||
int err, len;
|
||||
|
||||
if (unlikely(!apc->port_is_up))
|
||||
goto tx_drop;
|
||||
@ -406,6 +415,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
||||
skb_queue_tail(&txq->pending_skbs, skb);
|
||||
|
||||
len = skb->len;
|
||||
num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
|
||||
net_txq = netdev_get_tx_queue(ndev, txq_idx);
|
||||
|
||||
err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
|
||||
@ -430,10 +440,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
|
||||
/* skb may be freed after mana_gd_post_work_request. Do not use it. */
|
||||
skb = NULL;
|
||||
|
||||
/* Populated the packet and bytes counters based on post GSO packet
|
||||
* calculations
|
||||
*/
|
||||
tx_stats = &txq->stats;
|
||||
u64_stats_update_begin(&tx_stats->syncp);
|
||||
tx_stats->packets++;
|
||||
tx_stats->bytes += len;
|
||||
tx_stats->packets += num_gso_seg;
|
||||
tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs);
|
||||
u64_stats_update_end(&tx_stats->syncp);
|
||||
|
||||
tx_busy:
|
||||
@ -773,6 +786,11 @@ static int mana_send_request(struct mana_context *ac, void *in_buf,
|
||||
err = mana_gd_send_request(gc, in_len, in_buf, out_len,
|
||||
out_buf);
|
||||
if (err || resp->status) {
|
||||
if (err == -EOPNOTSUPP)
|
||||
return err;
|
||||
|
||||
if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
|
||||
mana_need_log(gc, err))
|
||||
dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
|
||||
err, resp->status);
|
||||
return err ? err : -EPROTO;
|
||||
@ -849,8 +867,10 @@ static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
|
||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||
sizeof(resp));
|
||||
if (err) {
|
||||
if (mana_en_need_log(apc, err))
|
||||
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
|
||||
err);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -905,8 +925,10 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc)
|
||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||
sizeof(resp));
|
||||
if (err) {
|
||||
if (mana_en_need_log(apc, err))
|
||||
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
|
||||
err);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1136,7 +1158,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
|
||||
err = mana_send_request(apc->ac, req, req_buf_size, &resp,
|
||||
sizeof(resp));
|
||||
if (err) {
|
||||
if (mana_en_need_log(apc, err))
|
||||
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1231,7 +1255,9 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
|
||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||
sizeof(resp));
|
||||
if (err) {
|
||||
if (mana_en_need_log(apc, err))
|
||||
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2610,6 +2636,88 @@ void mana_query_gf_stats(struct mana_port_context *apc)
|
||||
apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma;
|
||||
}
|
||||
|
||||
void mana_query_phy_stats(struct mana_port_context *apc)
|
||||
{
|
||||
struct mana_query_phy_stat_resp resp = {};
|
||||
struct mana_query_phy_stat_req req = {};
|
||||
struct net_device *ndev = apc->ndev;
|
||||
int err;
|
||||
|
||||
mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT,
|
||||
sizeof(req), sizeof(resp));
|
||||
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
|
||||
sizeof(resp));
|
||||
if (err)
|
||||
return;
|
||||
|
||||
err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT,
|
||||
sizeof(resp));
|
||||
if (err || resp.hdr.status) {
|
||||
netdev_err(ndev,
|
||||
"Failed to query PHY stats: %d, resp:0x%x\n",
|
||||
err, resp.hdr.status);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Aggregate drop counters */
|
||||
apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy;
|
||||
apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy;
|
||||
|
||||
/* Per TC traffic Counters */
|
||||
apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy;
|
||||
apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy;
|
||||
apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy;
|
||||
apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy;
|
||||
apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy;
|
||||
apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy;
|
||||
apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy;
|
||||
apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy;
|
||||
apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy;
|
||||
apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy;
|
||||
apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy;
|
||||
apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy;
|
||||
apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy;
|
||||
apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy;
|
||||
apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy;
|
||||
apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy;
|
||||
|
||||
/* Per TC byte Counters */
|
||||
apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy;
|
||||
apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy;
|
||||
apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy;
|
||||
apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy;
|
||||
apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy;
|
||||
apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy;
|
||||
apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy;
|
||||
apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy;
|
||||
apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy;
|
||||
apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy;
|
||||
apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy;
|
||||
apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy;
|
||||
apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy;
|
||||
apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy;
|
||||
apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy;
|
||||
apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy;
|
||||
|
||||
/* Per TC pause Counters */
|
||||
apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy;
|
||||
apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy;
|
||||
apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy;
|
||||
apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy;
|
||||
apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy;
|
||||
apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy;
|
||||
apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy;
|
||||
apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy;
|
||||
apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy;
|
||||
apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy;
|
||||
apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy;
|
||||
apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy;
|
||||
apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy;
|
||||
apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy;
|
||||
apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy;
|
||||
apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy;
|
||||
}
|
||||
|
||||
static int mana_init_port(struct net_device *ndev)
|
||||
{
|
||||
struct mana_port_context *apc = netdev_priv(ndev);
|
||||
@ -2804,11 +2912,10 @@ static int mana_dealloc_queues(struct net_device *ndev)
|
||||
|
||||
apc->rss_state = TRI_STATE_FALSE;
|
||||
err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
|
||||
if (err) {
|
||||
if (err && mana_en_need_log(apc, err))
|
||||
netdev_err(ndev, "Failed to disable vPort: %d\n", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Even in err case, still need to cleanup the vPort */
|
||||
mana_destroy_vport(apc);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -7,10 +7,12 @@
|
||||
|
||||
#include <net/mana/mana.h>
|
||||
|
||||
static const struct {
|
||||
struct mana_stats_desc {
|
||||
char name[ETH_GSTRING_LEN];
|
||||
u16 offset;
|
||||
} mana_eth_stats[] = {
|
||||
};
|
||||
|
||||
static const struct mana_stats_desc mana_eth_stats[] = {
|
||||
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
|
||||
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
|
||||
{"hc_rx_discards_no_wqe", offsetof(struct mana_ethtool_stats,
|
||||
@ -75,6 +77,59 @@ static const struct {
|
||||
rx_cqe_unknown_type)},
|
||||
};
|
||||
|
||||
static const struct mana_stats_desc mana_phy_stats[] = {
|
||||
{ "hc_rx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_drop_phy) },
|
||||
{ "hc_tx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_drop_phy) },
|
||||
{ "hc_tc0_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc0_phy) },
|
||||
{ "hc_tc0_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc0_phy) },
|
||||
{ "hc_tc0_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc0_phy) },
|
||||
{ "hc_tc0_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc0_phy) },
|
||||
{ "hc_tc1_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc1_phy) },
|
||||
{ "hc_tc1_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc1_phy) },
|
||||
{ "hc_tc1_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc1_phy) },
|
||||
{ "hc_tc1_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc1_phy) },
|
||||
{ "hc_tc2_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc2_phy) },
|
||||
{ "hc_tc2_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc2_phy) },
|
||||
{ "hc_tc2_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc2_phy) },
|
||||
{ "hc_tc2_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc2_phy) },
|
||||
{ "hc_tc3_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc3_phy) },
|
||||
{ "hc_tc3_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc3_phy) },
|
||||
{ "hc_tc3_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc3_phy) },
|
||||
{ "hc_tc3_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc3_phy) },
|
||||
{ "hc_tc4_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc4_phy) },
|
||||
{ "hc_tc4_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc4_phy) },
|
||||
{ "hc_tc4_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc4_phy) },
|
||||
{ "hc_tc4_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc4_phy) },
|
||||
{ "hc_tc5_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc5_phy) },
|
||||
{ "hc_tc5_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc5_phy) },
|
||||
{ "hc_tc5_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc5_phy) },
|
||||
{ "hc_tc5_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc5_phy) },
|
||||
{ "hc_tc6_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc6_phy) },
|
||||
{ "hc_tc6_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc6_phy) },
|
||||
{ "hc_tc6_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc6_phy) },
|
||||
{ "hc_tc6_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc6_phy) },
|
||||
{ "hc_tc7_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc7_phy) },
|
||||
{ "hc_tc7_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc7_phy) },
|
||||
{ "hc_tc7_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc7_phy) },
|
||||
{ "hc_tc7_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc7_phy) },
|
||||
{ "hc_tc0_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc0_phy) },
|
||||
{ "hc_tc0_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc0_phy) },
|
||||
{ "hc_tc1_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc1_phy) },
|
||||
{ "hc_tc1_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc1_phy) },
|
||||
{ "hc_tc2_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc2_phy) },
|
||||
{ "hc_tc2_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc2_phy) },
|
||||
{ "hc_tc3_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc3_phy) },
|
||||
{ "hc_tc3_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc3_phy) },
|
||||
{ "hc_tc4_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc4_phy) },
|
||||
{ "hc_tc4_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc4_phy) },
|
||||
{ "hc_tc5_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc5_phy) },
|
||||
{ "hc_tc5_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc5_phy) },
|
||||
{ "hc_tc6_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc6_phy) },
|
||||
{ "hc_tc6_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc6_phy) },
|
||||
{ "hc_tc7_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc7_phy) },
|
||||
{ "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) },
|
||||
};
|
||||
|
||||
static int mana_get_sset_count(struct net_device *ndev, int stringset)
|
||||
{
|
||||
struct mana_port_context *apc = netdev_priv(ndev);
|
||||
@ -83,8 +138,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
|
||||
if (stringset != ETH_SS_STATS)
|
||||
return -EINVAL;
|
||||
|
||||
return ARRAY_SIZE(mana_eth_stats) + num_queues *
|
||||
(MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT);
|
||||
return ARRAY_SIZE(mana_eth_stats) + ARRAY_SIZE(mana_phy_stats) +
|
||||
num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT);
|
||||
}
|
||||
|
||||
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
|
||||
@ -99,6 +154,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
|
||||
for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++)
|
||||
ethtool_puts(&data, mana_eth_stats[i].name);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mana_phy_stats); i++)
|
||||
ethtool_puts(&data, mana_phy_stats[i].name);
|
||||
|
||||
for (i = 0; i < num_queues; i++) {
|
||||
ethtool_sprintf(&data, "rx_%d_packets", i);
|
||||
ethtool_sprintf(&data, "rx_%d_bytes", i);
|
||||
@ -128,6 +186,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
|
||||
struct mana_port_context *apc = netdev_priv(ndev);
|
||||
unsigned int num_queues = apc->num_queues;
|
||||
void *eth_stats = &apc->eth_stats;
|
||||
void *phy_stats = &apc->phy_stats;
|
||||
struct mana_stats_rx *rx_stats;
|
||||
struct mana_stats_tx *tx_stats;
|
||||
unsigned int start;
|
||||
@ -151,9 +210,18 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
|
||||
/* we call mana function to update stats from GDMA */
|
||||
mana_query_gf_stats(apc);
|
||||
|
||||
/* We call this mana function to get the phy stats from GDMA and includes
|
||||
* aggregate tx/rx drop counters, Per-TC(Traffic Channel) tx/rx and pause
|
||||
* counters.
|
||||
*/
|
||||
mana_query_phy_stats(apc);
|
||||
|
||||
for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++)
|
||||
data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
|
||||
|
||||
for (q = 0; q < ARRAY_SIZE(mana_phy_stats); q++)
|
||||
data[i++] = *(u64 *)(phy_stats + mana_phy_stats[q].offset);
|
||||
|
||||
for (q = 0; q < num_queues; q++) {
|
||||
rx_stats = &apc->rxqs[q]->stats;
|
||||
|
||||
|
||||
@ -62,8 +62,11 @@ static int __init of_numa_parse_memory_nodes(void)
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++)
|
||||
for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) {
|
||||
r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
|
||||
if (!r)
|
||||
node_set(nid, numa_nodes_parsed);
|
||||
}
|
||||
|
||||
if (!i || r) {
|
||||
of_node_put(np);
|
||||
|
||||
@ -167,7 +167,7 @@ static int tpmi_get_logical_id(unsigned int cpu, struct tpmi_cpu_info *info)
|
||||
|
||||
info->punit_thread_id = FIELD_GET(LP_ID_MASK, data);
|
||||
info->punit_core_id = FIELD_GET(MODULE_ID_MASK, data);
|
||||
info->pkg_id = topology_physical_package_id(cpu);
|
||||
info->pkg_id = topology_logical_package_id(cpu);
|
||||
info->linux_cpu = cpu;
|
||||
|
||||
return 0;
|
||||
|
||||
@ -131,6 +131,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd)
|
||||
struct ism_req_hdr *req = cmd;
|
||||
struct ism_resp_hdr *resp = cmd;
|
||||
|
||||
spin_lock(&ism->cmd_lock);
|
||||
__ism_write_cmd(ism, req + 1, sizeof(*req), req->len - sizeof(*req));
|
||||
__ism_write_cmd(ism, req, 0, sizeof(*req));
|
||||
|
||||
@ -144,6 +145,7 @@ static int ism_cmd(struct ism_dev *ism, void *cmd)
|
||||
}
|
||||
__ism_read_cmd(ism, resp + 1, sizeof(*resp), resp->len - sizeof(*resp));
|
||||
out:
|
||||
spin_unlock(&ism->cmd_lock);
|
||||
return resp->ret;
|
||||
}
|
||||
|
||||
@ -607,6 +609,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&ism->lock);
|
||||
spin_lock_init(&ism->cmd_lock);
|
||||
dev_set_drvdata(&pdev->dev, ism);
|
||||
ism->pdev = pdev;
|
||||
ism->dev.parent = &pdev->dev;
|
||||
|
||||
@ -1243,7 +1243,7 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
||||
struct lpfc_nvmet_tgtport *tgtp;
|
||||
struct lpfc_async_xchg_ctx *ctxp =
|
||||
container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
|
||||
struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
|
||||
struct rqb_dmabuf *nvmebuf;
|
||||
struct lpfc_hba *phba = ctxp->phba;
|
||||
unsigned long iflag;
|
||||
|
||||
@ -1251,13 +1251,18 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
||||
lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n",
|
||||
ctxp->oxid, ctxp->size, raw_smp_processor_id());
|
||||
|
||||
spin_lock_irqsave(&ctxp->ctxlock, iflag);
|
||||
nvmebuf = ctxp->rqb_buffer;
|
||||
if (!nvmebuf) {
|
||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
|
||||
"6425 Defer rcv: no buffer oxid x%x: "
|
||||
"flg %x ste %x\n",
|
||||
ctxp->oxid, ctxp->flag, ctxp->state);
|
||||
return;
|
||||
}
|
||||
ctxp->rqb_buffer = NULL;
|
||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||
|
||||
tgtp = phba->targetport->private;
|
||||
if (tgtp)
|
||||
@ -1265,9 +1270,6 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
||||
|
||||
/* Free the nvmebuf since a new buffer already replaced it */
|
||||
nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
|
||||
spin_lock_irqsave(&ctxp->ctxlock, iflag);
|
||||
ctxp->rqb_buffer = NULL;
|
||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -127,6 +127,10 @@ static int efivarfs_d_compare(const struct dentry *dentry,
|
||||
{
|
||||
int guid = len - EFI_VARIABLE_GUID_LEN;
|
||||
|
||||
/* Parallel lookups may produce a temporary invalid filename */
|
||||
if (guid <= 0)
|
||||
return 1;
|
||||
|
||||
if (name->len != len)
|
||||
return 1;
|
||||
|
||||
|
||||
@ -218,6 +218,7 @@ struct eventpoll {
|
||||
/* used to optimize loop detection check */
|
||||
u64 gen;
|
||||
struct hlist_head refs;
|
||||
u8 loop_check_depth;
|
||||
|
||||
/*
|
||||
* usage count, used together with epitem->dying to
|
||||
@ -2088,23 +2089,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
|
||||
}
|
||||
|
||||
/**
|
||||
* ep_loop_check_proc - verify that adding an epoll file inside another
|
||||
* epoll structure does not violate the constraints, in
|
||||
* terms of closed loops, or too deep chains (which can
|
||||
* result in excessive stack usage).
|
||||
* ep_loop_check_proc - verify that adding an epoll file @ep inside another
|
||||
* epoll file does not create closed loops, and
|
||||
* determine the depth of the subtree starting at @ep
|
||||
*
|
||||
* @ep: the &struct eventpoll to be currently checked.
|
||||
* @depth: Current depth of the path being checked.
|
||||
*
|
||||
* Return: %zero if adding the epoll @file inside current epoll
|
||||
* structure @ep does not violate the constraints, or %-1 otherwise.
|
||||
* Return: depth of the subtree, or INT_MAX if we found a loop or went too deep.
|
||||
*/
|
||||
static int ep_loop_check_proc(struct eventpoll *ep, int depth)
|
||||
{
|
||||
int error = 0;
|
||||
int result = 0;
|
||||
struct rb_node *rbp;
|
||||
struct epitem *epi;
|
||||
|
||||
if (ep->gen == loop_check_gen)
|
||||
return ep->loop_check_depth;
|
||||
|
||||
mutex_lock_nested(&ep->mtx, depth + 1);
|
||||
ep->gen = loop_check_gen;
|
||||
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
|
||||
@ -2112,13 +2114,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
|
||||
if (unlikely(is_file_epoll(epi->ffd.file))) {
|
||||
struct eventpoll *ep_tovisit;
|
||||
ep_tovisit = epi->ffd.file->private_data;
|
||||
if (ep_tovisit->gen == loop_check_gen)
|
||||
continue;
|
||||
if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS)
|
||||
error = -1;
|
||||
result = INT_MAX;
|
||||
else
|
||||
error = ep_loop_check_proc(ep_tovisit, depth + 1);
|
||||
if (error != 0)
|
||||
result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1);
|
||||
if (result > EP_MAX_NESTS)
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
@ -2132,9 +2132,27 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
|
||||
list_file(epi->ffd.file);
|
||||
}
|
||||
}
|
||||
ep->loop_check_depth = result;
|
||||
mutex_unlock(&ep->mtx);
|
||||
|
||||
return error;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards
|
||||
*/
|
||||
static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth)
|
||||
{
|
||||
int result = 0;
|
||||
struct epitem *epi;
|
||||
|
||||
if (ep->gen == loop_check_gen)
|
||||
return ep->loop_check_depth;
|
||||
hlist_for_each_entry_rcu(epi, &ep->refs, fllink)
|
||||
result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1);
|
||||
ep->gen = loop_check_gen;
|
||||
ep->loop_check_depth = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2150,8 +2168,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
|
||||
*/
|
||||
static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to)
|
||||
{
|
||||
int depth, upwards_depth;
|
||||
|
||||
inserting_into = ep;
|
||||
return ep_loop_check_proc(to, 0);
|
||||
/*
|
||||
* Check how deep down we can get from @to, and whether it is possible
|
||||
* to loop up to @ep.
|
||||
*/
|
||||
depth = ep_loop_check_proc(to, 0);
|
||||
if (depth > EP_MAX_NESTS)
|
||||
return -1;
|
||||
/* Check how far up we can go from @ep. */
|
||||
rcu_read_lock();
|
||||
upwards_depth = ep_get_upwards_depth_proc(ep, 0);
|
||||
rcu_read_unlock();
|
||||
|
||||
return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0;
|
||||
}
|
||||
|
||||
static void clear_tfile_check_list(void)
|
||||
|
||||
@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
|
||||
!list_empty(&of->list));
|
||||
}
|
||||
|
||||
/* Get active reference to kernfs node for an open file */
|
||||
static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
|
||||
{
|
||||
/* Skip if file was already released */
|
||||
if (unlikely(of->released))
|
||||
return NULL;
|
||||
|
||||
if (!kernfs_get_active(of->kn))
|
||||
return NULL;
|
||||
|
||||
return of;
|
||||
}
|
||||
|
||||
static void kernfs_put_active_of(struct kernfs_open_file *of)
|
||||
{
|
||||
return kernfs_put_active(of->kn);
|
||||
}
|
||||
|
||||
/**
|
||||
* kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
|
||||
*
|
||||
@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
|
||||
|
||||
if (ops->seq_stop)
|
||||
ops->seq_stop(sf, v);
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
}
|
||||
|
||||
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
|
||||
@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
|
||||
* the ops aren't called concurrently for the same open file.
|
||||
*/
|
||||
mutex_lock(&of->mutex);
|
||||
if (!kernfs_get_active(of->kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
ops = kernfs_ops(of->kn);
|
||||
@ -238,7 +256,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
* the ops aren't called concurrently for the same open file.
|
||||
*/
|
||||
mutex_lock(&of->mutex);
|
||||
if (!kernfs_get_active(of->kn)) {
|
||||
if (!kernfs_get_active_of(of)) {
|
||||
len = -ENODEV;
|
||||
mutex_unlock(&of->mutex);
|
||||
goto out_free;
|
||||
@ -252,7 +270,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
else
|
||||
len = -EINVAL;
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
mutex_unlock(&of->mutex);
|
||||
|
||||
if (len < 0)
|
||||
@ -323,7 +341,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
* the ops aren't called concurrently for the same open file.
|
||||
*/
|
||||
mutex_lock(&of->mutex);
|
||||
if (!kernfs_get_active(of->kn)) {
|
||||
if (!kernfs_get_active_of(of)) {
|
||||
mutex_unlock(&of->mutex);
|
||||
len = -ENODEV;
|
||||
goto out_free;
|
||||
@ -335,7 +353,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
else
|
||||
len = -EINVAL;
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
mutex_unlock(&of->mutex);
|
||||
|
||||
if (len > 0)
|
||||
@ -357,13 +375,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
|
||||
if (!of->vm_ops)
|
||||
return;
|
||||
|
||||
if (!kernfs_get_active(of->kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
return;
|
||||
|
||||
if (of->vm_ops->open)
|
||||
of->vm_ops->open(vma);
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
}
|
||||
|
||||
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
|
||||
@ -375,14 +393,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
|
||||
if (!of->vm_ops)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
if (!kernfs_get_active(of->kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
if (of->vm_ops->fault)
|
||||
ret = of->vm_ops->fault(vmf);
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -395,7 +413,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
|
||||
if (!of->vm_ops)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
if (!kernfs_get_active(of->kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
ret = 0;
|
||||
@ -404,7 +422,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
|
||||
else
|
||||
file_update_time(file);
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -418,14 +436,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
|
||||
if (!of->vm_ops)
|
||||
return -EINVAL;
|
||||
|
||||
if (!kernfs_get_active(of->kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
return -EINVAL;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (of->vm_ops->access)
|
||||
ret = of->vm_ops->access(vma, addr, buf, len, write);
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -455,7 +473,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
mutex_lock(&of->mutex);
|
||||
|
||||
rc = -ENODEV;
|
||||
if (!kernfs_get_active(of->kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
goto out_unlock;
|
||||
|
||||
ops = kernfs_ops(of->kn);
|
||||
@ -490,7 +508,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
}
|
||||
vma->vm_ops = &kernfs_vm_ops;
|
||||
out_put:
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
out_unlock:
|
||||
mutex_unlock(&of->mutex);
|
||||
|
||||
@ -851,7 +869,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
|
||||
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
|
||||
__poll_t ret;
|
||||
|
||||
if (!kernfs_get_active(kn))
|
||||
if (!kernfs_get_active_of(of))
|
||||
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
|
||||
|
||||
if (kn->attr.ops->poll)
|
||||
@ -859,7 +877,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
|
||||
else
|
||||
ret = kernfs_generic_poll(of, wait);
|
||||
|
||||
kernfs_put_active(kn);
|
||||
kernfs_put_active_of(of);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -874,7 +892,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
|
||||
* the ops aren't called concurrently for the same open file.
|
||||
*/
|
||||
mutex_lock(&of->mutex);
|
||||
if (!kernfs_get_active(of->kn)) {
|
||||
if (!kernfs_get_active_of(of)) {
|
||||
mutex_unlock(&of->mutex);
|
||||
return -ENODEV;
|
||||
}
|
||||
@ -885,7 +903,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
|
||||
else
|
||||
ret = generic_file_llseek(file, offset, whence);
|
||||
|
||||
kernfs_put_active(of->kn);
|
||||
kernfs_put_active_of(of);
|
||||
mutex_unlock(&of->mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2673,6 +2673,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
|
||||
return attach_recursive_mnt(mnt, p, mp, 0);
|
||||
}
|
||||
|
||||
static int may_change_propagation(const struct mount *m)
|
||||
{
|
||||
struct mnt_namespace *ns = m->mnt_ns;
|
||||
|
||||
// it must be mounted in some namespace
|
||||
if (IS_ERR_OR_NULL(ns)) // is_mounted()
|
||||
return -EINVAL;
|
||||
// and the caller must be admin in userns of that namespace
|
||||
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity check the flags to change_mnt_propagation.
|
||||
*/
|
||||
@ -2709,6 +2722,10 @@ static int do_change_type(struct path *path, int ms_flags)
|
||||
return -EINVAL;
|
||||
|
||||
namespace_lock();
|
||||
err = may_change_propagation(mnt);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
if (type == MS_SHARED) {
|
||||
err = invent_group_ids(mnt, recurse);
|
||||
if (err)
|
||||
@ -3102,18 +3119,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
|
||||
|
||||
namespace_lock();
|
||||
|
||||
err = -EINVAL;
|
||||
/* To and From must be mounted */
|
||||
if (!is_mounted(&from->mnt))
|
||||
err = may_change_propagation(from);
|
||||
if (err)
|
||||
goto out;
|
||||
if (!is_mounted(&to->mnt))
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
/* We should be allowed to modify mount namespaces of both mounts */
|
||||
if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
|
||||
err = may_change_propagation(to);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EINVAL;
|
||||
|
||||
@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
|
||||
{
|
||||
struct nfs_fattr *fattr = NULL;
|
||||
struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
|
||||
size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
|
||||
size_t fh_size = offsetof(struct nfs_fh, data);
|
||||
const struct nfs_rpc_ops *rpc_ops;
|
||||
struct dentry *dentry;
|
||||
struct inode *inode;
|
||||
int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
|
||||
int len = EMBED_FH_OFF;
|
||||
u32 *p = fid->raw;
|
||||
int ret;
|
||||
|
||||
/* Initial check of bounds */
|
||||
if (fh_len < len + XDR_QUADLEN(fh_size) ||
|
||||
fh_len > XDR_QUADLEN(NFS_MAXFHSIZE))
|
||||
return NULL;
|
||||
/* Calculate embedded filehandle size */
|
||||
fh_size += server_fh->size;
|
||||
len += XDR_QUADLEN(fh_size);
|
||||
/* NULL translates to ESTALE */
|
||||
if (fh_len < len || fh_type != len)
|
||||
return NULL;
|
||||
|
||||
@ -7831,10 +7831,10 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
|
||||
return err;
|
||||
do {
|
||||
err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
|
||||
if (err != -NFS4ERR_DELAY)
|
||||
if (err != -NFS4ERR_DELAY && err != -NFS4ERR_GRACE)
|
||||
break;
|
||||
ssleep(1);
|
||||
} while (err == -NFS4ERR_DELAY);
|
||||
} while (err == -NFS4ERR_DELAY || err == -NFSERR_GRACE);
|
||||
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
|
||||
}
|
||||
|
||||
|
||||
@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req)
|
||||
nfs_page_clear_headlock(req);
|
||||
}
|
||||
|
||||
/*
|
||||
* nfs_page_group_sync_on_bit_locked
|
||||
/**
|
||||
* nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set
|
||||
* @req: request in page group
|
||||
* @bit: PG_* bit that is used to sync page group
|
||||
*
|
||||
* must be called with page group lock held
|
||||
*/
|
||||
static bool
|
||||
nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
|
||||
bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
|
||||
{
|
||||
struct nfs_page *head = req->wb_head;
|
||||
struct nfs_page *tmp;
|
||||
|
||||
@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
|
||||
static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!test_bit(PG_REMOVE, &req->wb_flags))
|
||||
return 0;
|
||||
ret = nfs_page_group_lock(req);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
|
||||
nfs_page_set_inode_ref(req, inode);
|
||||
nfs_page_group_unlock(req);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -585,19 +575,18 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
ret = nfs_page_group_lock(head);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
||||
/* Ensure that nobody removed the request before we locked it */
|
||||
if (head != folio->private) {
|
||||
nfs_page_group_unlock(head);
|
||||
nfs_unlock_and_release_request(head);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
ret = nfs_cancel_remove_inode(head, inode);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
||||
ret = nfs_page_group_lock(head);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
nfs_cancel_remove_inode(head, inode);
|
||||
|
||||
/* lock each request in the page group */
|
||||
for (subreq = head->wb_this_page;
|
||||
@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
|
||||
|
||||
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
|
||||
nfs_page_group_lock(req);
|
||||
if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) {
|
||||
struct folio *folio = nfs_page_to_folio(req->wb_head);
|
||||
struct address_space *mapping = folio->mapping;
|
||||
|
||||
@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
|
||||
}
|
||||
spin_unlock(&mapping->i_private_lock);
|
||||
}
|
||||
nfs_page_group_unlock(req);
|
||||
|
||||
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
|
||||
atomic_long_dec(&nfsi->nrequests);
|
||||
|
||||
@ -57,6 +57,21 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
|
||||
switch (nfserr) {
|
||||
case nfs_ok:
|
||||
return 0;
|
||||
case nfserr_jukebox:
|
||||
/* this error can indicate a presence of a conflicting
|
||||
* delegation to an NLM lock request. Options are:
|
||||
* (1) For now, drop this request and make the client
|
||||
* retry. When delegation is returned, client's lock retry
|
||||
* will complete.
|
||||
* (2) NLM4_DENIED as per "spec" signals to the client
|
||||
* that the lock is unavailable now but client can retry.
|
||||
* Linux client implementation does not. It treats
|
||||
* NLM4_DENIED same as NLM4_FAILED and errors the request.
|
||||
* (3) For the future, treat this as blocked lock and try
|
||||
* to callback when the delegation is returned but might
|
||||
* not have a proper lock request to block on.
|
||||
*/
|
||||
fallthrough;
|
||||
case nfserr_dropit:
|
||||
return nlm_drop_reply;
|
||||
case nfserr_stale:
|
||||
|
||||
@ -87,7 +87,7 @@
|
||||
#define SMB_INTERFACE_POLL_INTERVAL 600
|
||||
|
||||
/* maximum number of PDUs in one compound */
|
||||
#define MAX_COMPOUND 7
|
||||
#define MAX_COMPOUND 10
|
||||
|
||||
/*
|
||||
* Default number of credits to keep available for SMB3.
|
||||
@ -1881,9 +1881,12 @@ static inline bool is_replayable_error(int error)
|
||||
|
||||
|
||||
/* cifs_get_writable_file() flags */
|
||||
#define FIND_WR_ANY 0
|
||||
#define FIND_WR_FSUID_ONLY 1
|
||||
#define FIND_WR_WITH_DELETE 2
|
||||
enum cifs_writable_file_flags {
|
||||
FIND_WR_ANY = 0U,
|
||||
FIND_WR_FSUID_ONLY = (1U << 0),
|
||||
FIND_WR_WITH_DELETE = (1U << 1),
|
||||
FIND_WR_NO_PENDING_DELETE = (1U << 2),
|
||||
};
|
||||
|
||||
#define MID_FREE 0
|
||||
#define MID_REQUEST_ALLOCATED 1
|
||||
@ -2339,6 +2342,8 @@ struct smb2_compound_vars {
|
||||
struct kvec qi_iov;
|
||||
struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
|
||||
struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
|
||||
struct kvec unlink_iov[SMB2_SET_INFO_IOV_SIZE];
|
||||
struct kvec rename_iov[SMB2_SET_INFO_IOV_SIZE];
|
||||
struct kvec close_iov;
|
||||
struct smb2_file_rename_info_hdr rename_info;
|
||||
struct smb2_file_link_info_hdr link_info;
|
||||
|
||||
@ -312,8 +312,8 @@ extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
|
||||
|
||||
extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
|
||||
|
||||
extern void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon,
|
||||
const char *path);
|
||||
void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon,
|
||||
struct dentry *dentry);
|
||||
|
||||
extern void cifs_mark_open_handles_for_deleted_file(struct inode *inode,
|
||||
const char *path);
|
||||
|
||||
@ -998,7 +998,10 @@ int cifs_open(struct inode *inode, struct file *file)
|
||||
|
||||
/* Get the cached handle as SMB2 close is deferred */
|
||||
if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) {
|
||||
rc = cifs_get_writable_path(tcon, full_path, FIND_WR_FSUID_ONLY, &cfile);
|
||||
rc = cifs_get_writable_path(tcon, full_path,
|
||||
FIND_WR_FSUID_ONLY |
|
||||
FIND_WR_NO_PENDING_DELETE,
|
||||
&cfile);
|
||||
} else {
|
||||
rc = cifs_get_readable_path(tcon, full_path, &cfile);
|
||||
}
|
||||
@ -2530,6 +2533,9 @@ refind_writable:
|
||||
continue;
|
||||
if (with_delete && !(open_file->fid.access & DELETE))
|
||||
continue;
|
||||
if ((flags & FIND_WR_NO_PENDING_DELETE) &&
|
||||
open_file->status_file_deleted)
|
||||
continue;
|
||||
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
|
||||
if (!open_file->invalidHandle) {
|
||||
/* found a good writable file */
|
||||
@ -2647,6 +2653,16 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
|
||||
spin_unlock(&tcon->open_file_lock);
|
||||
free_dentry_path(page);
|
||||
*ret_file = find_readable_file(cinode, 0);
|
||||
if (*ret_file) {
|
||||
spin_lock(&cinode->open_file_lock);
|
||||
if ((*ret_file)->status_file_deleted) {
|
||||
spin_unlock(&cinode->open_file_lock);
|
||||
cifsFileInfo_put(*ret_file);
|
||||
*ret_file = NULL;
|
||||
} else {
|
||||
spin_unlock(&cinode->open_file_lock);
|
||||
}
|
||||
}
|
||||
return *ret_file ? 0 : -ENOENT;
|
||||
}
|
||||
|
||||
|
||||
@ -1931,7 +1931,7 @@ cifs_drop_nlink(struct inode *inode)
|
||||
* but will return the EACCES to the caller. Note that the VFS does not call
|
||||
* unlink on negative dentries currently.
|
||||
*/
|
||||
int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
static int __cifs_unlink(struct inode *dir, struct dentry *dentry, bool sillyrename)
|
||||
{
|
||||
int rc = 0;
|
||||
unsigned int xid;
|
||||
@ -1943,15 +1943,24 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
|
||||
struct tcon_link *tlink;
|
||||
struct cifs_tcon *tcon;
|
||||
__u32 dosattr = 0, origattr = 0;
|
||||
struct TCP_Server_Info *server;
|
||||
struct iattr *attrs = NULL;
|
||||
__u32 dosattr = 0, origattr = 0;
|
||||
bool rehash = false;
|
||||
|
||||
cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry);
|
||||
|
||||
if (unlikely(cifs_forced_shutdown(cifs_sb)))
|
||||
return -EIO;
|
||||
|
||||
/* Unhash dentry in advance to prevent any concurrent opens */
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (!d_unhashed(dentry)) {
|
||||
__d_drop(dentry);
|
||||
rehash = true;
|
||||
}
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
tlink = cifs_sb_tlink(cifs_sb);
|
||||
if (IS_ERR(tlink))
|
||||
return PTR_ERR(tlink);
|
||||
@ -1975,7 +1984,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
}
|
||||
|
||||
netfs_wait_for_outstanding_io(inode);
|
||||
cifs_close_deferred_file_under_dentry(tcon, full_path);
|
||||
cifs_close_deferred_file_under_dentry(tcon, dentry);
|
||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
|
||||
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
|
||||
@ -1994,6 +2003,23 @@ retry_std_delete:
|
||||
goto psx_del_no_retry;
|
||||
}
|
||||
|
||||
/* For SMB2+, if the file is open, we always perform a silly rename.
|
||||
*
|
||||
* We check for d_count() right after calling
|
||||
* cifs_close_deferred_file_under_dentry() to make sure that the
|
||||
* dentry's refcount gets dropped in case the file had any deferred
|
||||
* close.
|
||||
*/
|
||||
if (!sillyrename && server->vals->protocol_id > SMB10_PROT_ID) {
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (d_count(dentry) > 1)
|
||||
sillyrename = true;
|
||||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
|
||||
if (sillyrename)
|
||||
rc = -EBUSY;
|
||||
else
|
||||
rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry);
|
||||
|
||||
psx_del_no_retry:
|
||||
@ -2003,7 +2029,8 @@ psx_del_no_retry:
|
||||
cifs_drop_nlink(inode);
|
||||
}
|
||||
} else if (rc == -ENOENT) {
|
||||
d_drop(dentry);
|
||||
if (simple_positive(dentry))
|
||||
d_delete(dentry);
|
||||
} else if (rc == -EBUSY) {
|
||||
if (server->ops->rename_pending_delete) {
|
||||
rc = server->ops->rename_pending_delete(full_path,
|
||||
@ -2056,9 +2083,16 @@ unlink_out:
|
||||
kfree(attrs);
|
||||
free_xid(xid);
|
||||
cifs_put_tlink(tlink);
|
||||
if (rehash)
|
||||
d_rehash(dentry);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int cifs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return __cifs_unlink(dir, dentry, false);
|
||||
}
|
||||
|
||||
static int
|
||||
cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
|
||||
const char *full_path, struct cifs_sb_info *cifs_sb,
|
||||
@ -2346,14 +2380,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
|
||||
rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb);
|
||||
cifs_put_tlink(tlink);
|
||||
|
||||
cifsInode = CIFS_I(d_inode(direntry));
|
||||
|
||||
if (!rc) {
|
||||
set_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags);
|
||||
spin_lock(&d_inode(direntry)->i_lock);
|
||||
i_size_write(d_inode(direntry), 0);
|
||||
clear_nlink(d_inode(direntry));
|
||||
spin_unlock(&d_inode(direntry)->i_lock);
|
||||
}
|
||||
|
||||
cifsInode = CIFS_I(d_inode(direntry));
|
||||
/* force revalidate to go get info when needed */
|
||||
cifsInode->time = 0;
|
||||
|
||||
@ -2458,10 +2494,12 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
||||
struct dentry *target_dentry, unsigned int flags)
|
||||
{
|
||||
const char *from_name, *to_name;
|
||||
struct TCP_Server_Info *server;
|
||||
void *page1, *page2;
|
||||
struct cifs_sb_info *cifs_sb;
|
||||
struct tcon_link *tlink;
|
||||
struct cifs_tcon *tcon;
|
||||
bool rehash = false;
|
||||
unsigned int xid;
|
||||
int rc, tmprc;
|
||||
int retry_count = 0;
|
||||
@ -2477,10 +2515,22 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
||||
if (unlikely(cifs_forced_shutdown(cifs_sb)))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* Prevent any concurrent opens on the target by unhashing the dentry.
|
||||
* VFS already unhashes the target when renaming directories.
|
||||
*/
|
||||
if (d_is_positive(target_dentry) && !d_is_dir(target_dentry)) {
|
||||
if (!d_unhashed(target_dentry)) {
|
||||
d_drop(target_dentry);
|
||||
rehash = true;
|
||||
}
|
||||
}
|
||||
|
||||
tlink = cifs_sb_tlink(cifs_sb);
|
||||
if (IS_ERR(tlink))
|
||||
return PTR_ERR(tlink);
|
||||
tcon = tlink_tcon(tlink);
|
||||
server = tcon->ses->server;
|
||||
|
||||
page1 = alloc_dentry_path();
|
||||
page2 = alloc_dentry_path();
|
||||
@ -2498,10 +2548,10 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
||||
goto cifs_rename_exit;
|
||||
}
|
||||
|
||||
cifs_close_deferred_file_under_dentry(tcon, from_name);
|
||||
cifs_close_deferred_file_under_dentry(tcon, source_dentry);
|
||||
if (d_inode(target_dentry) != NULL) {
|
||||
netfs_wait_for_outstanding_io(d_inode(target_dentry));
|
||||
cifs_close_deferred_file_under_dentry(tcon, to_name);
|
||||
cifs_close_deferred_file_under_dentry(tcon, target_dentry);
|
||||
}
|
||||
|
||||
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
|
||||
@ -2518,6 +2568,8 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
||||
}
|
||||
}
|
||||
|
||||
if (!rc)
|
||||
rehash = false;
|
||||
/*
|
||||
* No-replace is the natural behavior for CIFS, so skip unlink hacks.
|
||||
*/
|
||||
@ -2565,23 +2617,61 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
|
||||
|
||||
unlink_target:
|
||||
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
|
||||
|
||||
/* Try unlinking the target dentry if it's not negative */
|
||||
if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
|
||||
if (d_is_dir(target_dentry))
|
||||
if (d_really_is_positive(target_dentry)) {
|
||||
if (!rc) {
|
||||
struct inode *inode = d_inode(target_dentry);
|
||||
/*
|
||||
* Samba and ksmbd servers allow renaming a target
|
||||
* directory that is open, so make sure to update
|
||||
* ->i_nlink and then mark it as delete pending.
|
||||
*/
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
drop_cached_dir_by_name(xid, tcon, to_name, cifs_sb);
|
||||
spin_lock(&inode->i_lock);
|
||||
i_size_write(inode, 0);
|
||||
clear_nlink(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
set_bit(CIFS_INO_DELETE_PENDING, &CIFS_I(inode)->flags);
|
||||
CIFS_I(inode)->time = 0; /* force reval */
|
||||
inode_set_ctime_current(inode);
|
||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||
}
|
||||
} else if (rc == -EACCES || rc == -EEXIST) {
|
||||
/*
|
||||
* Rename failed, possibly due to a busy target.
|
||||
* Retry it by unliking the target first.
|
||||
*/
|
||||
if (d_is_dir(target_dentry)) {
|
||||
tmprc = cifs_rmdir(target_dir, target_dentry);
|
||||
else
|
||||
tmprc = cifs_unlink(target_dir, target_dentry);
|
||||
if (tmprc)
|
||||
} else {
|
||||
tmprc = __cifs_unlink(target_dir, target_dentry,
|
||||
server->vals->protocol_id > SMB10_PROT_ID);
|
||||
}
|
||||
if (tmprc) {
|
||||
/*
|
||||
* Some servers will return STATUS_ACCESS_DENIED
|
||||
* or STATUS_DIRECTORY_NOT_EMPTY when failing to
|
||||
* rename a non-empty directory. Make sure to
|
||||
* propagate the appropriate error back to
|
||||
* userspace.
|
||||
*/
|
||||
if (tmprc == -EEXIST || tmprc == -ENOTEMPTY)
|
||||
rc = tmprc;
|
||||
goto cifs_rename_exit;
|
||||
}
|
||||
rc = cifs_do_rename(xid, source_dentry, from_name,
|
||||
target_dentry, to_name);
|
||||
if (!rc)
|
||||
rehash = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* force revalidate to go get info when needed */
|
||||
CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
|
||||
|
||||
cifs_rename_exit:
|
||||
if (rehash)
|
||||
d_rehash(target_dentry);
|
||||
kfree(info_buf_source);
|
||||
free_dentry_path(page2);
|
||||
free_dentry_path(page1);
|
||||
@ -2599,6 +2689,8 @@ cifs_dentry_needs_reval(struct dentry *dentry)
|
||||
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
|
||||
struct cached_fid *cfid = NULL;
|
||||
|
||||
if (test_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags))
|
||||
return false;
|
||||
if (cifs_i->time == 0)
|
||||
return true;
|
||||
|
||||
|
||||
@ -832,22 +832,19 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
|
||||
kfree(tmp_list);
|
||||
}
|
||||
}
|
||||
void
|
||||
cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
|
||||
|
||||
void cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
struct cifsFileInfo *cfile;
|
||||
struct file_list *tmp_list, *tmp_next_list;
|
||||
void *page;
|
||||
const char *full_path;
|
||||
struct cifsFileInfo *cfile;
|
||||
LIST_HEAD(file_head);
|
||||
|
||||
page = alloc_dentry_path();
|
||||
spin_lock(&tcon->open_file_lock);
|
||||
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
|
||||
full_path = build_path_from_dentry(cfile->dentry, page);
|
||||
if (strstr(full_path, path)) {
|
||||
if (delayed_work_pending(&cfile->deferred)) {
|
||||
if (cancel_delayed_work(&cfile->deferred)) {
|
||||
if ((cfile->dentry == dentry) &&
|
||||
delayed_work_pending(&cfile->deferred) &&
|
||||
cancel_delayed_work(&cfile->deferred)) {
|
||||
spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
|
||||
cifs_del_deferred_close(cfile);
|
||||
spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
|
||||
@ -859,8 +856,6 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
|
||||
list_add_tail(&tmp_list->list, &file_head);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&tcon->open_file_lock);
|
||||
|
||||
list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) {
|
||||
@ -868,7 +863,6 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path)
|
||||
list_del(&tmp_list->list);
|
||||
kfree(tmp_list);
|
||||
}
|
||||
free_dentry_path(page);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@ -30,10 +30,9 @@ enum smb2_compound_ops {
|
||||
SMB2_OP_QUERY_DIR,
|
||||
SMB2_OP_MKDIR,
|
||||
SMB2_OP_RENAME,
|
||||
SMB2_OP_DELETE,
|
||||
SMB2_OP_HARDLINK,
|
||||
SMB2_OP_SET_EOF,
|
||||
SMB2_OP_RMDIR,
|
||||
SMB2_OP_UNLINK,
|
||||
SMB2_OP_POSIX_QUERY_INFO,
|
||||
SMB2_OP_SET_REPARSE,
|
||||
SMB2_OP_GET_REPARSE,
|
||||
|
||||
@ -207,8 +207,10 @@ replay_again:
|
||||
server = cifs_pick_channel(ses);
|
||||
|
||||
vars = kzalloc(sizeof(*vars), GFP_ATOMIC);
|
||||
if (vars == NULL)
|
||||
return -ENOMEM;
|
||||
if (vars == NULL) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
rqst = &vars->rqst[0];
|
||||
rsp_iov = &vars->rsp_iov[0];
|
||||
|
||||
@ -344,9 +346,6 @@ replay_again:
|
||||
trace_smb3_posix_query_info_compound_enter(xid, tcon->tid,
|
||||
ses->Suid, full_path);
|
||||
break;
|
||||
case SMB2_OP_DELETE:
|
||||
trace_smb3_delete_enter(xid, tcon->tid, ses->Suid, full_path);
|
||||
break;
|
||||
case SMB2_OP_MKDIR:
|
||||
/*
|
||||
* Directories are created through parameters in the
|
||||
@ -354,23 +353,40 @@ replay_again:
|
||||
*/
|
||||
trace_smb3_mkdir_enter(xid, tcon->tid, ses->Suid, full_path);
|
||||
break;
|
||||
case SMB2_OP_RMDIR:
|
||||
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
||||
case SMB2_OP_UNLINK:
|
||||
rqst[num_rqst].rq_iov = vars->unlink_iov;
|
||||
rqst[num_rqst].rq_nvec = 1;
|
||||
|
||||
size[0] = 1; /* sizeof __u8 See MS-FSCC section 2.4.11 */
|
||||
data[0] = &delete_pending[0];
|
||||
|
||||
if (cfile) {
|
||||
rc = SMB2_set_info_init(tcon, server,
|
||||
&rqst[num_rqst], COMPOUND_FID,
|
||||
COMPOUND_FID, current->tgid,
|
||||
&rqst[num_rqst],
|
||||
cfile->fid.persistent_fid,
|
||||
cfile->fid.volatile_fid,
|
||||
current->tgid,
|
||||
FILE_DISPOSITION_INFORMATION,
|
||||
SMB2_O_INFO_FILE, 0, data, size);
|
||||
if (rc)
|
||||
goto finished;
|
||||
SMB2_O_INFO_FILE, 0,
|
||||
data, size);
|
||||
} else {
|
||||
rc = SMB2_set_info_init(tcon, server,
|
||||
&rqst[num_rqst],
|
||||
COMPOUND_FID,
|
||||
COMPOUND_FID,
|
||||
current->tgid,
|
||||
FILE_DISPOSITION_INFORMATION,
|
||||
SMB2_O_INFO_FILE, 0,
|
||||
data, size);
|
||||
}
|
||||
if (!rc && (!cfile || num_rqst > 1)) {
|
||||
smb2_set_next_command(tcon, &rqst[num_rqst]);
|
||||
smb2_set_related(&rqst[num_rqst++]);
|
||||
trace_smb3_rmdir_enter(xid, tcon->tid, ses->Suid, full_path);
|
||||
smb2_set_related(&rqst[num_rqst]);
|
||||
} else if (rc) {
|
||||
goto finished;
|
||||
}
|
||||
num_rqst++;
|
||||
trace_smb3_unlink_enter(xid, tcon->tid, ses->Suid, full_path);
|
||||
break;
|
||||
case SMB2_OP_SET_EOF:
|
||||
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
||||
@ -440,7 +456,7 @@ replay_again:
|
||||
ses->Suid, full_path);
|
||||
break;
|
||||
case SMB2_OP_RENAME:
|
||||
rqst[num_rqst].rq_iov = &vars->si_iov[0];
|
||||
rqst[num_rqst].rq_iov = vars->rename_iov;
|
||||
rqst[num_rqst].rq_nvec = 2;
|
||||
|
||||
len = in_iov[i].iov_len;
|
||||
@ -671,7 +687,7 @@ finished:
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmds; i++) {
|
||||
char *buf = rsp_iov[i + i].iov_base;
|
||||
char *buf = rsp_iov[i + 1].iov_base;
|
||||
|
||||
if (buf && resp_buftype[i + 1] != CIFS_NO_BUFFER)
|
||||
rc = server->ops->map_error(buf, false);
|
||||
@ -730,19 +746,6 @@ finished:
|
||||
trace_smb3_posix_query_info_compound_done(xid, tcon->tid,
|
||||
ses->Suid);
|
||||
break;
|
||||
case SMB2_OP_DELETE:
|
||||
if (rc)
|
||||
trace_smb3_delete_err(xid, tcon->tid, ses->Suid, rc);
|
||||
else {
|
||||
/*
|
||||
* If dentry (hence, inode) is NULL, lease break is going to
|
||||
* take care of degrading leases on handles for deleted files.
|
||||
*/
|
||||
if (inode)
|
||||
cifs_mark_open_handles_for_deleted_file(inode, full_path);
|
||||
trace_smb3_delete_done(xid, tcon->tid, ses->Suid);
|
||||
}
|
||||
break;
|
||||
case SMB2_OP_MKDIR:
|
||||
if (rc)
|
||||
trace_smb3_mkdir_err(xid, tcon->tid, ses->Suid, rc);
|
||||
@ -763,11 +766,11 @@ finished:
|
||||
trace_smb3_rename_done(xid, tcon->tid, ses->Suid);
|
||||
SMB2_set_info_free(&rqst[num_rqst++]);
|
||||
break;
|
||||
case SMB2_OP_RMDIR:
|
||||
if (rc)
|
||||
trace_smb3_rmdir_err(xid, tcon->tid, ses->Suid, rc);
|
||||
case SMB2_OP_UNLINK:
|
||||
if (!rc)
|
||||
trace_smb3_unlink_done(xid, tcon->tid, ses->Suid);
|
||||
else
|
||||
trace_smb3_rmdir_done(xid, tcon->tid, ses->Suid);
|
||||
trace_smb3_unlink_err(xid, tcon->tid, ses->Suid, rc);
|
||||
SMB2_set_info_free(&rqst[num_rqst++]);
|
||||
break;
|
||||
case SMB2_OP_SET_EOF:
|
||||
@ -864,6 +867,7 @@ finished:
|
||||
smb2_should_replay(tcon, &retries, &cur_sleep))
|
||||
goto replay_again;
|
||||
|
||||
out:
|
||||
if (cfile)
|
||||
cifsFileInfo_put(cfile);
|
||||
|
||||
@ -1163,7 +1167,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
|
||||
FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE);
|
||||
return smb2_compound_op(xid, tcon, cifs_sb,
|
||||
name, &oparms, NULL,
|
||||
&(int){SMB2_OP_RMDIR}, 1,
|
||||
&(int){SMB2_OP_UNLINK}, 1,
|
||||
NULL, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
@ -1171,21 +1175,107 @@ int
|
||||
smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
|
||||
struct cifs_sb_info *cifs_sb, struct dentry *dentry)
|
||||
{
|
||||
struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
|
||||
__le16 *utf16_path __free(kfree) = NULL;
|
||||
int retries = 0, cur_sleep = 1;
|
||||
struct TCP_Server_Info *server;
|
||||
struct cifs_open_parms oparms;
|
||||
struct smb2_create_req *creq;
|
||||
struct inode *inode = NULL;
|
||||
struct smb_rqst rqst[2];
|
||||
struct kvec rsp_iov[2];
|
||||
struct kvec close_iov;
|
||||
int resp_buftype[2];
|
||||
struct cifs_fid fid;
|
||||
int flags = 0;
|
||||
__u8 oplock;
|
||||
int rc;
|
||||
|
||||
oparms = CIFS_OPARMS(cifs_sb, tcon, name,
|
||||
DELETE, FILE_OPEN,
|
||||
CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
|
||||
ACL_NO_MODE);
|
||||
int rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
|
||||
NULL, &(int){SMB2_OP_DELETE}, 1,
|
||||
NULL, NULL, NULL, dentry);
|
||||
if (rc == -EINVAL) {
|
||||
cifs_dbg(FYI, "invalid lease key, resending request without lease");
|
||||
rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
|
||||
NULL, &(int){SMB2_OP_DELETE}, 1,
|
||||
NULL, NULL, NULL, NULL);
|
||||
utf16_path = cifs_convert_path_to_utf16(name, cifs_sb);
|
||||
if (!utf16_path)
|
||||
return -ENOMEM;
|
||||
|
||||
if (smb3_encryption_required(tcon))
|
||||
flags |= CIFS_TRANSFORM_REQ;
|
||||
again:
|
||||
oplock = SMB2_OPLOCK_LEVEL_NONE;
|
||||
server = cifs_pick_channel(tcon->ses);
|
||||
|
||||
memset(rqst, 0, sizeof(rqst));
|
||||
memset(resp_buftype, 0, sizeof(resp_buftype));
|
||||
memset(rsp_iov, 0, sizeof(rsp_iov));
|
||||
|
||||
rqst[0].rq_iov = open_iov;
|
||||
rqst[0].rq_nvec = ARRAY_SIZE(open_iov);
|
||||
|
||||
oparms = CIFS_OPARMS(cifs_sb, tcon, name, DELETE | FILE_READ_ATTRIBUTES,
|
||||
FILE_OPEN, CREATE_DELETE_ON_CLOSE |
|
||||
OPEN_REPARSE_POINT, ACL_NO_MODE);
|
||||
oparms.fid = &fid;
|
||||
|
||||
if (dentry) {
|
||||
inode = d_inode(dentry);
|
||||
if (CIFS_I(inode)->lease_granted && server->ops->get_lease_key) {
|
||||
oplock = SMB2_OPLOCK_LEVEL_LEASE;
|
||||
server->ops->get_lease_key(inode, &fid);
|
||||
}
|
||||
}
|
||||
|
||||
rc = SMB2_open_init(tcon, server,
|
||||
&rqst[0], &oplock, &oparms, utf16_path);
|
||||
if (rc)
|
||||
goto err_free;
|
||||
smb2_set_next_command(tcon, &rqst[0]);
|
||||
creq = rqst[0].rq_iov[0].iov_base;
|
||||
creq->ShareAccess = FILE_SHARE_DELETE_LE;
|
||||
|
||||
rqst[1].rq_iov = &close_iov;
|
||||
rqst[1].rq_nvec = 1;
|
||||
|
||||
rc = SMB2_close_init(tcon, server, &rqst[1],
|
||||
COMPOUND_FID, COMPOUND_FID, false);
|
||||
smb2_set_related(&rqst[1]);
|
||||
if (rc)
|
||||
goto err_free;
|
||||
|
||||
if (retries) {
|
||||
for (int i = 0; i < ARRAY_SIZE(rqst); i++)
|
||||
smb2_set_replay(server, &rqst[i]);
|
||||
}
|
||||
|
||||
rc = compound_send_recv(xid, tcon->ses, server, flags,
|
||||
ARRAY_SIZE(rqst), rqst,
|
||||
resp_buftype, rsp_iov);
|
||||
SMB2_open_free(&rqst[0]);
|
||||
SMB2_close_free(&rqst[1]);
|
||||
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
|
||||
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
|
||||
|
||||
if (is_replayable_error(rc) &&
|
||||
smb2_should_replay(tcon, &retries, &cur_sleep))
|
||||
goto again;
|
||||
|
||||
/* Retry compound request without lease */
|
||||
if (rc == -EINVAL && dentry) {
|
||||
dentry = NULL;
|
||||
retries = 0;
|
||||
cur_sleep = 1;
|
||||
goto again;
|
||||
}
|
||||
/*
|
||||
* If dentry (hence, inode) is NULL, lease break is going to
|
||||
* take care of degrading leases on handles for deleted files.
|
||||
*/
|
||||
if (!rc && inode)
|
||||
cifs_mark_open_handles_for_deleted_file(inode, name);
|
||||
|
||||
return rc;
|
||||
|
||||
err_free:
|
||||
SMB2_open_free(&rqst[0]);
|
||||
SMB2_close_free(&rqst[1]);
|
||||
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
|
||||
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -1438,3 +1528,113 @@ out:
|
||||
cifs_free_open_info(&data);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline __le16 *utf16_smb2_path(struct cifs_sb_info *cifs_sb,
|
||||
const char *name, size_t namelen)
|
||||
{
|
||||
int len;
|
||||
|
||||
if (*name == '\\' ||
|
||||
(cifs_sb_master_tlink(cifs_sb) &&
|
||||
cifs_sb_master_tcon(cifs_sb)->posix_extensions && *name == '/'))
|
||||
name++;
|
||||
return cifs_strndup_to_utf16(name, namelen, &len,
|
||||
cifs_sb->local_nls,
|
||||
cifs_remap(cifs_sb));
|
||||
}
|
||||
|
||||
int smb2_rename_pending_delete(const char *full_path,
|
||||
struct dentry *dentry,
|
||||
const unsigned int xid)
|
||||
{
|
||||
struct cifs_sb_info *cifs_sb = CIFS_SB(d_inode(dentry)->i_sb);
|
||||
struct cifsInodeInfo *cinode = CIFS_I(d_inode(dentry));
|
||||
__le16 *utf16_path __free(kfree) = NULL;
|
||||
__u32 co = file_create_options(dentry);
|
||||
int cmds[] = {
|
||||
SMB2_OP_SET_INFO,
|
||||
SMB2_OP_RENAME,
|
||||
SMB2_OP_UNLINK,
|
||||
};
|
||||
const int num_cmds = ARRAY_SIZE(cmds);
|
||||
char *to_name __free(kfree) = NULL;
|
||||
__u32 attrs = cinode->cifsAttrs;
|
||||
struct cifs_open_parms oparms;
|
||||
static atomic_t sillycounter;
|
||||
struct cifsFileInfo *cfile;
|
||||
struct tcon_link *tlink;
|
||||
struct cifs_tcon *tcon;
|
||||
struct kvec iov[2];
|
||||
const char *ppath;
|
||||
void *page;
|
||||
size_t len;
|
||||
int rc;
|
||||
|
||||
tlink = cifs_sb_tlink(cifs_sb);
|
||||
if (IS_ERR(tlink))
|
||||
return PTR_ERR(tlink);
|
||||
tcon = tlink_tcon(tlink);
|
||||
|
||||
page = alloc_dentry_path();
|
||||
|
||||
ppath = build_path_from_dentry(dentry->d_parent, page);
|
||||
if (IS_ERR(ppath)) {
|
||||
rc = PTR_ERR(ppath);
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = strlen(ppath) + strlen("/.__smb1234") + 1;
|
||||
to_name = kmalloc(len, GFP_KERNEL);
|
||||
if (!to_name) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
scnprintf(to_name, len, "%s%c.__smb%04X", ppath, CIFS_DIR_SEP(cifs_sb),
|
||||
atomic_inc_return(&sillycounter) & 0xffff);
|
||||
|
||||
utf16_path = utf16_smb2_path(cifs_sb, to_name, len);
|
||||
if (!utf16_path) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
drop_cached_dir_by_name(xid, tcon, full_path, cifs_sb);
|
||||
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
|
||||
DELETE | FILE_WRITE_ATTRIBUTES,
|
||||
FILE_OPEN, co, ACL_NO_MODE);
|
||||
|
||||
attrs &= ~ATTR_READONLY;
|
||||
if (!attrs)
|
||||
attrs = ATTR_NORMAL;
|
||||
if (d_inode(dentry)->i_nlink <= 1)
|
||||
attrs |= ATTR_HIDDEN;
|
||||
iov[0].iov_base = &(FILE_BASIC_INFO) {
|
||||
.Attributes = cpu_to_le32(attrs),
|
||||
};
|
||||
iov[0].iov_len = sizeof(FILE_BASIC_INFO);
|
||||
iov[1].iov_base = utf16_path;
|
||||
iov[1].iov_len = sizeof(*utf16_path) * UniStrlen((wchar_t *)utf16_path);
|
||||
|
||||
cifs_get_writable_path(tcon, full_path, FIND_WR_WITH_DELETE, &cfile);
|
||||
rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov,
|
||||
cmds, num_cmds, cfile, NULL, NULL, dentry);
|
||||
if (rc == -EINVAL) {
|
||||
cifs_dbg(FYI, "invalid lease key, resending request without lease\n");
|
||||
cifs_get_writable_path(tcon, full_path,
|
||||
FIND_WR_WITH_DELETE, &cfile);
|
||||
rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov,
|
||||
cmds, num_cmds, cfile, NULL, NULL, NULL);
|
||||
}
|
||||
if (!rc) {
|
||||
set_bit(CIFS_INO_DELETE_PENDING, &cinode->flags);
|
||||
} else {
|
||||
cifs_tcon_dbg(FYI, "%s: failed to rename '%s' to '%s': %d\n",
|
||||
__func__, full_path, to_name, rc);
|
||||
rc = -EIO;
|
||||
}
|
||||
out:
|
||||
cifs_put_tlink(tlink);
|
||||
free_dentry_path(page);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -2631,13 +2631,35 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst)
|
||||
}
|
||||
|
||||
/* SMB headers in a compound are 8 byte aligned. */
|
||||
if (!IS_ALIGNED(len, 8)) {
|
||||
if (IS_ALIGNED(len, 8))
|
||||
goto out;
|
||||
|
||||
num_padding = 8 - (len & 7);
|
||||
if (smb3_encryption_required(tcon)) {
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Flatten request into a single buffer with required padding as
|
||||
* the encryption layer can't handle the padding iovs.
|
||||
*/
|
||||
for (i = 1; i < rqst->rq_nvec; i++) {
|
||||
memcpy(rqst->rq_iov[0].iov_base +
|
||||
rqst->rq_iov[0].iov_len,
|
||||
rqst->rq_iov[i].iov_base,
|
||||
rqst->rq_iov[i].iov_len);
|
||||
rqst->rq_iov[0].iov_len += rqst->rq_iov[i].iov_len;
|
||||
}
|
||||
memset(rqst->rq_iov[0].iov_base + rqst->rq_iov[0].iov_len,
|
||||
0, num_padding);
|
||||
rqst->rq_iov[0].iov_len += num_padding;
|
||||
rqst->rq_nvec = 1;
|
||||
} else {
|
||||
rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
|
||||
rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding;
|
||||
rqst->rq_nvec++;
|
||||
len += num_padding;
|
||||
}
|
||||
len += num_padding;
|
||||
out:
|
||||
shdr->NextCommand = cpu_to_le32(len);
|
||||
}
|
||||
|
||||
@ -5367,6 +5389,7 @@ struct smb_version_operations smb20_operations = {
|
||||
.llseek = smb3_llseek,
|
||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||
.rename_pending_delete = smb2_rename_pending_delete,
|
||||
};
|
||||
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
|
||||
|
||||
@ -5472,6 +5495,7 @@ struct smb_version_operations smb21_operations = {
|
||||
.llseek = smb3_llseek,
|
||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||
.rename_pending_delete = smb2_rename_pending_delete,
|
||||
};
|
||||
|
||||
struct smb_version_operations smb30_operations = {
|
||||
@ -5588,6 +5612,7 @@ struct smb_version_operations smb30_operations = {
|
||||
.llseek = smb3_llseek,
|
||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||
.rename_pending_delete = smb2_rename_pending_delete,
|
||||
};
|
||||
|
||||
struct smb_version_operations smb311_operations = {
|
||||
@ -5704,6 +5729,7 @@ struct smb_version_operations smb311_operations = {
|
||||
.llseek = smb3_llseek,
|
||||
.is_status_io_timeout = smb2_is_status_io_timeout,
|
||||
.is_network_name_deleted = smb2_is_network_name_deleted,
|
||||
.rename_pending_delete = smb2_rename_pending_delete,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
|
||||
|
||||
@ -317,5 +317,8 @@ int posix_info_sid_size(const void *beg, const void *end);
|
||||
int smb2_make_nfs_node(unsigned int xid, struct inode *inode,
|
||||
struct dentry *dentry, struct cifs_tcon *tcon,
|
||||
const char *full_path, umode_t mode, dev_t dev);
|
||||
int smb2_rename_pending_delete(const char *full_path,
|
||||
struct dentry *dentry,
|
||||
const unsigned int xid);
|
||||
|
||||
#endif /* _SMB2PROTO_H */
|
||||
|
||||
@ -669,13 +669,12 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_info_compound_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(posix_query_info_compound_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(hardlink_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rename_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rmdir_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(unlink_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_reparse_compound_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_wsl_ea_compound_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
|
||||
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
|
||||
@ -710,13 +709,12 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_info_compound_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(posix_query_info_compound_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(hardlink_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rename_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rmdir_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(unlink_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_reparse_compound_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(get_reparse_compound_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
|
||||
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
|
||||
@ -756,14 +754,13 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_info_compound_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(posix_query_info_compound_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(hardlink_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rename_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rmdir_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(unlink_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_reparse_compound_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(get_reparse_compound_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
|
||||
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
|
||||
|
||||
|
||||
@ -435,6 +435,13 @@ xfs_attr_rmtval_get(
|
||||
0, &bp, &xfs_attr3_rmt_buf_ops);
|
||||
if (xfs_metadata_is_sick(error))
|
||||
xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK);
|
||||
/*
|
||||
* ENODATA from disk implies a disk medium failure;
|
||||
* ENODATA for xattrs means attribute not found, so
|
||||
* disambiguate that here.
|
||||
*/
|
||||
if (error == -ENODATA)
|
||||
error = -EIO;
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
||||
@ -2833,6 +2833,12 @@ xfs_da_read_buf(
|
||||
&bp, ops);
|
||||
if (xfs_metadata_is_sick(error))
|
||||
xfs_dirattr_mark_sick(dp, whichfork);
|
||||
/*
|
||||
* ENODATA from disk implies a disk medium failure; ENODATA for
|
||||
* xattrs means attribute not found, so disambiguate that here.
|
||||
*/
|
||||
if (error == -ENODATA && whichfork == XFS_ATTR_FORK)
|
||||
error = -EIO;
|
||||
if (error)
|
||||
goto out_free;
|
||||
|
||||
|
||||
@ -80,6 +80,7 @@ extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev,
|
||||
extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
|
||||
struct device_attribute *attr, char *buf);
|
||||
extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
|
||||
extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf);
|
||||
|
||||
extern __printf(4, 5)
|
||||
struct device *cpu_device_create(struct device *parent, void *drvdata,
|
||||
|
||||
@ -28,6 +28,7 @@ struct ism_dmb {
|
||||
|
||||
struct ism_dev {
|
||||
spinlock_t lock; /* protects the ism device */
|
||||
spinlock_t cmd_lock; /* serializes cmds */
|
||||
struct list_head list;
|
||||
struct pci_dev *pdev;
|
||||
|
||||
|
||||
@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head,
|
||||
extern int nfs_page_group_lock(struct nfs_page *);
|
||||
extern void nfs_page_group_unlock(struct nfs_page *);
|
||||
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
|
||||
extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int);
|
||||
extern int nfs_page_set_headlock(struct nfs_page *req);
|
||||
extern void nfs_page_clear_headlock(struct nfs_page *req);
|
||||
extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
#include "shm_channel.h"
|
||||
|
||||
#define GDMA_STATUS_MORE_ENTRIES 0x00000105
|
||||
#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff
|
||||
|
||||
/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
|
||||
* them are naturally aligned and hence don't need __packed.
|
||||
@ -58,9 +59,10 @@ enum gdma_eqe_type {
|
||||
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
|
||||
GDMA_EQE_HWC_INIT_DATA = 130,
|
||||
GDMA_EQE_HWC_INIT_DONE = 131,
|
||||
GDMA_EQE_HWC_SOC_RECONFIG = 132,
|
||||
GDMA_EQE_HWC_FPGA_RECONFIG = 132,
|
||||
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
|
||||
GDMA_EQE_HWC_SOC_SERVICE = 134,
|
||||
GDMA_EQE_HWC_RESET_REQUEST = 135,
|
||||
GDMA_EQE_RNIC_QP_FATAL = 176,
|
||||
};
|
||||
|
||||
@ -403,6 +405,8 @@ struct gdma_context {
|
||||
u32 test_event_eq_id;
|
||||
|
||||
bool is_pf;
|
||||
bool in_service;
|
||||
|
||||
phys_addr_t bar0_pa;
|
||||
void __iomem *bar0_va;
|
||||
void __iomem *shm_base;
|
||||
@ -578,12 +582,20 @@ enum {
|
||||
/* Driver can handle holes (zeros) in the device list */
|
||||
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
|
||||
|
||||
/* Driver can self reset on EQE notification */
|
||||
#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
|
||||
|
||||
/* Driver can self reset on FPGA Reconfig EQE notification */
|
||||
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
|
||||
|
||||
#define GDMA_DRV_CAP_FLAGS1 \
|
||||
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
|
||||
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
|
||||
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
|
||||
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
|
||||
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP)
|
||||
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
|
||||
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
|
||||
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
|
||||
|
||||
#define GDMA_DRV_CAP_FLAGS2 0
|
||||
|
||||
@ -910,4 +922,9 @@ void mana_unregister_debugfs(void);
|
||||
|
||||
int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
|
||||
|
||||
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state);
|
||||
int mana_gd_resume(struct pci_dev *pdev);
|
||||
|
||||
bool mana_need_log(struct gdma_context *gc, int err);
|
||||
|
||||
#endif /* _GDMA_H */
|
||||
|
||||
@ -404,6 +404,65 @@ struct mana_ethtool_stats {
|
||||
u64 rx_cqe_unknown_type;
|
||||
};
|
||||
|
||||
struct mana_ethtool_phy_stats {
|
||||
/* Drop Counters */
|
||||
u64 rx_pkt_drop_phy;
|
||||
u64 tx_pkt_drop_phy;
|
||||
|
||||
/* Per TC traffic Counters */
|
||||
u64 rx_pkt_tc0_phy;
|
||||
u64 tx_pkt_tc0_phy;
|
||||
u64 rx_pkt_tc1_phy;
|
||||
u64 tx_pkt_tc1_phy;
|
||||
u64 rx_pkt_tc2_phy;
|
||||
u64 tx_pkt_tc2_phy;
|
||||
u64 rx_pkt_tc3_phy;
|
||||
u64 tx_pkt_tc3_phy;
|
||||
u64 rx_pkt_tc4_phy;
|
||||
u64 tx_pkt_tc4_phy;
|
||||
u64 rx_pkt_tc5_phy;
|
||||
u64 tx_pkt_tc5_phy;
|
||||
u64 rx_pkt_tc6_phy;
|
||||
u64 tx_pkt_tc6_phy;
|
||||
u64 rx_pkt_tc7_phy;
|
||||
u64 tx_pkt_tc7_phy;
|
||||
|
||||
u64 rx_byte_tc0_phy;
|
||||
u64 tx_byte_tc0_phy;
|
||||
u64 rx_byte_tc1_phy;
|
||||
u64 tx_byte_tc1_phy;
|
||||
u64 rx_byte_tc2_phy;
|
||||
u64 tx_byte_tc2_phy;
|
||||
u64 rx_byte_tc3_phy;
|
||||
u64 tx_byte_tc3_phy;
|
||||
u64 rx_byte_tc4_phy;
|
||||
u64 tx_byte_tc4_phy;
|
||||
u64 rx_byte_tc5_phy;
|
||||
u64 tx_byte_tc5_phy;
|
||||
u64 rx_byte_tc6_phy;
|
||||
u64 tx_byte_tc6_phy;
|
||||
u64 rx_byte_tc7_phy;
|
||||
u64 tx_byte_tc7_phy;
|
||||
|
||||
/* Per TC pause Counters */
|
||||
u64 rx_pause_tc0_phy;
|
||||
u64 tx_pause_tc0_phy;
|
||||
u64 rx_pause_tc1_phy;
|
||||
u64 tx_pause_tc1_phy;
|
||||
u64 rx_pause_tc2_phy;
|
||||
u64 tx_pause_tc2_phy;
|
||||
u64 rx_pause_tc3_phy;
|
||||
u64 tx_pause_tc3_phy;
|
||||
u64 rx_pause_tc4_phy;
|
||||
u64 tx_pause_tc4_phy;
|
||||
u64 rx_pause_tc5_phy;
|
||||
u64 tx_pause_tc5_phy;
|
||||
u64 rx_pause_tc6_phy;
|
||||
u64 tx_pause_tc6_phy;
|
||||
u64 rx_pause_tc7_phy;
|
||||
u64 tx_pause_tc7_phy;
|
||||
};
|
||||
|
||||
struct mana_context {
|
||||
struct gdma_dev *gdma_dev;
|
||||
|
||||
@ -474,6 +533,8 @@ struct mana_port_context {
|
||||
|
||||
struct mana_ethtool_stats eth_stats;
|
||||
|
||||
struct mana_ethtool_phy_stats phy_stats;
|
||||
|
||||
/* Debugfs */
|
||||
struct dentry *mana_port_debugfs;
|
||||
};
|
||||
@ -501,6 +562,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
|
||||
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
|
||||
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
|
||||
void mana_query_gf_stats(struct mana_port_context *apc);
|
||||
void mana_query_phy_stats(struct mana_port_context *apc);
|
||||
int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues);
|
||||
void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
|
||||
|
||||
@ -527,6 +589,7 @@ enum mana_command_code {
|
||||
MANA_FENCE_RQ = 0x20006,
|
||||
MANA_CONFIG_VPORT_RX = 0x20007,
|
||||
MANA_QUERY_VPORT_CONFIG = 0x20008,
|
||||
MANA_QUERY_PHY_STAT = 0x2000c,
|
||||
|
||||
/* Privileged commands for the PF mode */
|
||||
MANA_REGISTER_FILTER = 0x28000,
|
||||
@ -689,6 +752,74 @@ struct mana_query_gf_stat_resp {
|
||||
u64 tx_err_gdma;
|
||||
}; /* HW DATA */
|
||||
|
||||
/* Query phy stats */
|
||||
struct mana_query_phy_stat_req {
|
||||
struct gdma_req_hdr hdr;
|
||||
u64 req_stats;
|
||||
}; /* HW DATA */
|
||||
|
||||
struct mana_query_phy_stat_resp {
|
||||
struct gdma_resp_hdr hdr;
|
||||
u64 reported_stats;
|
||||
|
||||
/* Aggregate Drop Counters */
|
||||
u64 rx_pkt_drop_phy;
|
||||
u64 tx_pkt_drop_phy;
|
||||
|
||||
/* Per TC(Traffic class) traffic Counters */
|
||||
u64 rx_pkt_tc0_phy;
|
||||
u64 tx_pkt_tc0_phy;
|
||||
u64 rx_pkt_tc1_phy;
|
||||
u64 tx_pkt_tc1_phy;
|
||||
u64 rx_pkt_tc2_phy;
|
||||
u64 tx_pkt_tc2_phy;
|
||||
u64 rx_pkt_tc3_phy;
|
||||
u64 tx_pkt_tc3_phy;
|
||||
u64 rx_pkt_tc4_phy;
|
||||
u64 tx_pkt_tc4_phy;
|
||||
u64 rx_pkt_tc5_phy;
|
||||
u64 tx_pkt_tc5_phy;
|
||||
u64 rx_pkt_tc6_phy;
|
||||
u64 tx_pkt_tc6_phy;
|
||||
u64 rx_pkt_tc7_phy;
|
||||
u64 tx_pkt_tc7_phy;
|
||||
|
||||
u64 rx_byte_tc0_phy;
|
||||
u64 tx_byte_tc0_phy;
|
||||
u64 rx_byte_tc1_phy;
|
||||
u64 tx_byte_tc1_phy;
|
||||
u64 rx_byte_tc2_phy;
|
||||
u64 tx_byte_tc2_phy;
|
||||
u64 rx_byte_tc3_phy;
|
||||
u64 tx_byte_tc3_phy;
|
||||
u64 rx_byte_tc4_phy;
|
||||
u64 tx_byte_tc4_phy;
|
||||
u64 rx_byte_tc5_phy;
|
||||
u64 tx_byte_tc5_phy;
|
||||
u64 rx_byte_tc6_phy;
|
||||
u64 tx_byte_tc6_phy;
|
||||
u64 rx_byte_tc7_phy;
|
||||
u64 tx_byte_tc7_phy;
|
||||
|
||||
/* Per TC(Traffic Class) pause Counters */
|
||||
u64 rx_pause_tc0_phy;
|
||||
u64 tx_pause_tc0_phy;
|
||||
u64 rx_pause_tc1_phy;
|
||||
u64 tx_pause_tc1_phy;
|
||||
u64 rx_pause_tc2_phy;
|
||||
u64 tx_pause_tc2_phy;
|
||||
u64 rx_pause_tc3_phy;
|
||||
u64 tx_pause_tc3_phy;
|
||||
u64 rx_pause_tc4_phy;
|
||||
u64 tx_pause_tc4_phy;
|
||||
u64 rx_pause_tc5_phy;
|
||||
u64 tx_pause_tc5_phy;
|
||||
u64 rx_pause_tc6_phy;
|
||||
u64 tx_pause_tc6_phy;
|
||||
u64 rx_pause_tc7_phy;
|
||||
u64 tx_pause_tc7_phy;
|
||||
}; /* HW DATA */
|
||||
|
||||
/* Configure vPort Rx Steering */
|
||||
struct mana_cfg_rx_steer_req_v2 {
|
||||
struct gdma_req_hdr hdr;
|
||||
|
||||
@ -4109,6 +4109,7 @@ static void cgroup_file_release(struct kernfs_open_file *of)
|
||||
cft->release(of);
|
||||
put_cgroup_ns(ctx->ns);
|
||||
kfree(ctx);
|
||||
of->priv = NULL;
|
||||
}
|
||||
|
||||
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
|
||||
|
||||
@ -1001,6 +1001,12 @@ int kernel_kexec(void)
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (kexec_image->preserve_context) {
|
||||
/*
|
||||
* This flow is analogous to hibernation flows that occur
|
||||
* before creating an image and before jumping from the
|
||||
* restore kernel to the image one, so it uses the same
|
||||
* device callbacks as those two flows.
|
||||
*/
|
||||
pm_prepare_console();
|
||||
error = freeze_processes();
|
||||
if (error) {
|
||||
@ -1011,12 +1017,10 @@ int kernel_kexec(void)
|
||||
error = dpm_suspend_start(PMSG_FREEZE);
|
||||
if (error)
|
||||
goto Resume_console;
|
||||
/* At this point, dpm_suspend_start() has been called,
|
||||
* but *not* dpm_suspend_end(). We *must* call
|
||||
* dpm_suspend_end() now. Otherwise, drivers for
|
||||
* some devices (e.g. interrupt controllers) become
|
||||
* desynchronized with the actual state of the
|
||||
* hardware at resume time, and evil weirdness ensues.
|
||||
/*
|
||||
* dpm_suspend_end() must be called after dpm_suspend_start()
|
||||
* to complete the transition, like in the hibernation flows
|
||||
* mentioned above.
|
||||
*/
|
||||
error = dpm_suspend_end(PMSG_FREEZE);
|
||||
if (error)
|
||||
@ -1052,6 +1056,13 @@ int kernel_kexec(void)
|
||||
|
||||
#ifdef CONFIG_KEXEC_JUMP
|
||||
if (kexec_image->preserve_context) {
|
||||
/*
|
||||
* This flow is analogous to hibernation flows that occur after
|
||||
* creating an image and after the image kernel has got control
|
||||
* back, and in case the devices have been reset or otherwise
|
||||
* manipulated in the meantime, it uses the device callbacks
|
||||
* used by the latter.
|
||||
*/
|
||||
syscore_resume();
|
||||
Enable_irqs:
|
||||
local_irq_enable();
|
||||
|
||||
@ -3322,9 +3322,13 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
|
||||
* and the EOF part of the last cluster.
|
||||
*/
|
||||
inc_cluster_info_page(si, cluster_info, 0);
|
||||
for (i = 0; i < swap_header->info.nr_badpages; i++)
|
||||
inc_cluster_info_page(si, cluster_info,
|
||||
swap_header->info.badpages[i]);
|
||||
for (i = 0; i < swap_header->info.nr_badpages; i++) {
|
||||
unsigned int page_nr = swap_header->info.badpages[i];
|
||||
|
||||
if (page_nr >= maxpages)
|
||||
continue;
|
||||
inc_cluster_info_page(si, cluster_info, page_nr);
|
||||
}
|
||||
for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
|
||||
inc_cluster_info_page(si, cluster_info, i);
|
||||
|
||||
|
||||
@ -3272,6 +3272,7 @@ int tcp_disconnect(struct sock *sk, int flags)
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int old_state = sk->sk_state;
|
||||
struct request_sock *req;
|
||||
u32 seq;
|
||||
|
||||
if (old_state != TCP_CLOSE)
|
||||
@ -3386,6 +3387,10 @@ int tcp_disconnect(struct sock *sk, int flags)
|
||||
|
||||
|
||||
/* Clean up fastopen related fields */
|
||||
req = rcu_dereference_protected(tp->fastopen_rsk,
|
||||
lockdep_sock_is_held(sk));
|
||||
if (req)
|
||||
reqsk_fastopen_remove(sk, req, false);
|
||||
tcp_free_fastopen_req(tp);
|
||||
inet_clear_bit(DEFER_CONNECT, sk);
|
||||
tp->fastopen_client_fail = 0;
|
||||
|
||||
@ -7338,7 +7338,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
||||
&foc, TCP_SYNACK_FASTOPEN, skb);
|
||||
/* Add the child socket directly into the accept queue */
|
||||
if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
|
||||
reqsk_fastopen_remove(fastopen_sk, req, false);
|
||||
bh_unlock_sock(fastopen_sk);
|
||||
sock_put(fastopen_sk);
|
||||
goto drop_and_free;
|
||||
|
||||
@ -35,6 +35,7 @@
|
||||
#include <net/xfrm.h>
|
||||
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/utils.h>
|
||||
#include <net/seg6.h>
|
||||
#include <net/genetlink.h>
|
||||
#include <net/seg6_hmac.h>
|
||||
@ -271,7 +272,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
|
||||
if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
|
||||
return false;
|
||||
|
||||
if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0)
|
||||
if (crypto_memneq(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
||||
@ -276,8 +276,6 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
|
||||
|
||||
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
|
||||
{
|
||||
if (unlikely(current->flags & PF_EXITING))
|
||||
return -EINTR;
|
||||
schedule();
|
||||
if (signal_pending_state(mode, current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
@ -141,6 +141,7 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx);
|
||||
|
||||
int wait_on_pending_writer(struct sock *sk, long *timeo);
|
||||
void tls_err_abort(struct sock *sk, int err);
|
||||
void tls_strp_abort_strp(struct tls_strparser *strp, int err);
|
||||
|
||||
int init_prot_info(struct tls_prot_info *prot,
|
||||
const struct tls_crypto_info *crypto_info,
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
static struct workqueue_struct *tls_strp_wq;
|
||||
|
||||
static void tls_strp_abort_strp(struct tls_strparser *strp, int err)
|
||||
void tls_strp_abort_strp(struct tls_strparser *strp, int err)
|
||||
{
|
||||
if (strp->stopped)
|
||||
return;
|
||||
@ -211,11 +211,17 @@ static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
|
||||
struct sk_buff *in_skb, unsigned int offset,
|
||||
size_t in_len)
|
||||
{
|
||||
unsigned int nfrag = skb->len / PAGE_SIZE;
|
||||
size_t len, chunk;
|
||||
skb_frag_t *frag;
|
||||
int sz;
|
||||
|
||||
frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
|
||||
if (unlikely(nfrag >= skb_shinfo(skb)->nr_frags)) {
|
||||
DEBUG_NET_WARN_ON_ONCE(1);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
frag = &skb_shinfo(skb)->frags[nfrag];
|
||||
|
||||
len = in_len;
|
||||
/* First make sure we got the header */
|
||||
@ -514,10 +520,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
|
||||
tls_strp_load_anchor_with_queue(strp, inq);
|
||||
if (!strp->stm.full_len) {
|
||||
sz = tls_rx_msg_size(strp, strp->anchor);
|
||||
if (sz < 0) {
|
||||
tls_strp_abort_strp(strp, sz);
|
||||
if (sz < 0)
|
||||
return sz;
|
||||
}
|
||||
|
||||
strp->stm.full_len = sz;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user