Import of kernel-5.14.0-687.19.1.el9_8

This commit is contained in:
almalinux-bot-kernel 2026-06-30 05:29:12 +00:00
parent 36c7b56810
commit 8626b73c68
312 changed files with 14341 additions and 4818 deletions

View File

@ -1341,3 +1341,40 @@ Device Counters
- The number of times the device owned queue had not enough buffers
allocated.
- Error
* - `pci_bw_inbound_high`
- The number of times the device crossed the high inbound pcie bandwidth
threshold. To be compared to pci_bw_inbound_low to check if the device
is in a congested state.
If pci_bw_inbound_high == pci_bw_inbound_low then the device is not congested.
If pci_bw_inbound_high > pci_bw_inbound_low then the device is congested.
- Informative
* - `pci_bw_inbound_low`
- The number of times the device crossed the low inbound PCIe bandwidth
threshold. To be compared to pci_bw_inbound_high to check if the device
is in a congested state.
If pci_bw_inbound_high == pci_bw_inbound_low then the device is not congested.
If pci_bw_inbound_high > pci_bw_inbound_low then the device is congested.
- Informative
* - `pci_bw_outbound_high`
- The number of times the device crossed the high outbound pcie bandwidth
threshold. To be compared to pci_bw_outbound_low to check if the device
is in a congested state.
If pci_bw_outbound_high == pci_bw_outbound_low then the device is not congested.
If pci_bw_outbound_high > pci_bw_outbound_low then the device is congested.
- Informative
* - `pci_bw_outbound_low`
- The number of times the device crossed the low outbound PCIe bandwidth
threshold. To be compared to pci_bw_outbound_high to check if the device
is in a congested state.
If pci_bw_outbound_high == pci_bw_outbound_low then the device is not congested.
If pci_bw_outbound_high > pci_bw_outbound_low then the device is congested.
- Informative
* - `pci_bw_stale_event`
- The number of times the device fired a PCIe congestion event but on query
there was no change in state.
- Informative

View File

@ -15,23 +15,62 @@ Parameters
* - Name
- Mode
- Validation
- Notes
* - ``enable_roce``
- driverinit
- Type: Boolean
If the device supports RoCE disablement, RoCE enablement state controls
- Boolean
- If the device supports RoCE disablement, RoCE enablement state controls
device support for RoCE capability. Otherwise, the control occurs in the
driver stack. When RoCE is disabled at the driver level, only raw
ethernet QPs are supported.
* - ``io_eq_size``
- driverinit
- The range is between 64 and 4096.
-
* - ``event_eq_size``
- driverinit
- The range is between 64 and 4096.
-
* - ``max_macs``
- driverinit
- The range is between 1 and 2^31. Only power of 2 values are supported.
-
* - ``enable_sriov``
- permanent
- Boolean
- Applies to each physical function (PF) independently, if the device
supports it. Otherwise, it applies symmetrically to all PFs.
* - ``total_vfs``
- permanent
- The range is between 1 and a device-specific max.
- Applies to each physical function (PF) independently, if the device
supports it. Otherwise, it applies symmetrically to all PFs.
Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect
.. code-block:: bash
# setup parameters
devlink dev param set pci/0000:01:00.0 name enable_sriov value true cmode permanent
devlink dev param set pci/0000:01:00.0 name total_vfs value 8 cmode permanent
# Fw reset
devlink dev reload pci/0000:01:00.0 action fw_activate
# for PCI related config such as sriov PCI reset/rescan is required:
echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
echo 1 >/sys/bus/pci/rescan
grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
* - ``num_doorbells``
- driverinit
- This controls the number of channel doorbells used by the netdev. In all
cases, an additional doorbell is allocated and used for non-channel
communication (e.g. for PTP, HWS, etc.). Supported values are:
- 0: No channel-specific doorbells, use the global one for everything.
- [1, max_num_channels]: Spread netdev channels equally across these
doorbells.
The ``mlx5`` driver also implements the following driver-specific
parameters.
@ -116,6 +155,68 @@ parameters.
- u32
- driverinit
- Control the size (in packets) of the hairpin queues.
* - ``pcie_cong_inbound_high``
- u16
- driverinit
- High threshold configuration for PCIe congestion events. The firmware
will send an event once device side inbound PCIe traffic went
above the configured high threshold for a long enough period (at least
200ms).
See pci_bw_inbound_high ethtool stat.
Units are 0.01 %. Accepted values are in range [0, 10000].
pcie_cong_inbound_low < pcie_cong_inbound_high.
Default value: 9000 (Corresponds to 90%).
* - ``pcie_cong_inbound_low``
- u16
- driverinit
- Low threshold configuration for PCIe congestion events. The firmware
will send an event once device side inbound PCIe traffic went
below the configured low threshold, only after having been previously in
a congested state.
See pci_bw_inbound_low ethtool stat.
Units are 0.01 %. Accepted values are in range [0, 10000].
pcie_cong_inbound_low < pcie_cong_inbound_high.
Default value: 7500.
* - ``pcie_cong_outbound_high``
- u16
- driverinit
- High threshold configuration for PCIe congestion events. The firmware
will send an event once device side outbound PCIe traffic went
above the configured high threshold for a long enough period (at least
200ms).
See pci_bw_outbound_high ethtool stat.
Units are 0.01 %. Accepted values are in range [0, 10000].
pcie_cong_outbound_low < pcie_cong_outbound_high.
Default value: 9000 (Corresponds to 90%).
* - ``pcie_cong_outbound_low``
- u16
- driverinit
- Low threshold configuration for PCIe congestion events. The firmware
will send an event once device side outbound PCIe traffic went
below the configured low threshold, only after having been previously in
a congested state.
See pci_bw_outbound_low ethtool stat.
Units are 0.01 %. Accepted values are in range [0, 10000].
pcie_cong_outbound_low < pcie_cong_outbound_high.
Default value: 7500.
* - ``cqe_compress_type``
- string
- permanent
- Configure which mechanism/algorithm should be used by the NIC that will
affect the rate (aggressiveness) of compressed CQEs depending on PCIe bus
conditions and other internal NIC factors. This mode affects all queues
that enable compression.
* ``balanced`` : Merges fewer CQEs, resulting in a moderate compression ratio but maintaining a balance between bandwidth savings and performance
* ``aggressive`` : Merges more CQEs into a single entry, achieving a higher compression rate and maximizing performance, particularly under high traffic loads
The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
@ -284,6 +385,12 @@ Description of the vnic counters:
amount of Interconnect Host Memory (ICM) consumed by the vnic in
granularity of 4KB. ICM is host memory allocated by SW upon HCA request
and is used for storing data structures that control HCA operation.
- bar_uar_access
number of WRITE or READ access operations to the UAR on the PCIe BAR.
- odp_local_triggered_page_fault
number of locally-triggered page-faults due to ODP.
- odp_remote_triggered_page_fault
number of remotly-triggered page-faults due to ODP.
User commands examples:

View File

@ -7475,6 +7475,15 @@ F: fs/mnt_idmapping.c
F: include/linux/mnt_idmapping.*
F: tools/testing/selftests/mount_setattr/
FILESYSTEMS [STACKABLE]
M: Miklos Szeredi <miklos@szeredi.hu>
M: Amir Goldstein <amir73il@gmail.com>
L: linux-fsdevel@vger.kernel.org
L: linux-unionfs@vger.kernel.org
S: Maintained
F: fs/backing-file.c
F: include/linux/backing-file.h
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
M: Riku Voipio <riku.voipio@iki.fi>
L: linux-hwmon@vger.kernel.org

View File

@ -31,7 +31,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"nop \n"
".pushsection __jump_table, \"aw\" \n"
@ -47,7 +47,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key,
bool branch)
{
asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
"1: \n"
"b %l[l_yes] \n"
".pushsection __jump_table, \"aw\" \n"

View File

@ -90,10 +90,12 @@ static void show_faulting_vma(unsigned long address)
*/
if (vma) {
char buf[ARC_PATH_MAX];
char *nm = "?";
char *nm = "anon";
if (vma->vm_file) {
nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
/* XXX: can we use %pD below and get rid of buf? */
nm = d_path(file_user_path(vma->vm_file), buf,
ARC_PATH_MAX-1);
if (IS_ERR(nm))
nm = "?";
}

View File

@ -11,7 +11,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
WASM(nop) "\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@ -25,7 +25,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
WASM(b) " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"

View File

@ -229,7 +229,7 @@ alternative_has_cap_likely(const unsigned long cpucap)
if (!cpucap_is_possible(cpucap))
return false;
asm_volatile_goto(
asm goto(
ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
:
: [cpucap] "i" (cpucap)
@ -247,7 +247,7 @@ alternative_has_cap_unlikely(const unsigned long cpucap)
if (!cpucap_is_possible(cpucap))
return false;
asm_volatile_goto(
asm goto(
ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
:
: [cpucap] "i" (cpucap)

View File

@ -18,7 +18,7 @@
static __always_inline bool arch_static_branch(struct static_key * const key,
const bool branch)
{
asm_volatile_goto(
asm goto(
"1: nop \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"
@ -35,7 +35,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key,
const bool branch)
{
asm_volatile_goto(
asm goto(
"1: b %l[l_yes] \n\t"
" .pushsection __jump_table, \"aw\" \n\t"
" .align 3 \n\t"

View File

@ -627,8 +627,10 @@ static void vgic_its_invalidate_cache(struct vgic_its *its)
unsigned long idx;
xa_for_each(&its->translation_cache, idx, irq) {
xa_erase(&its->translation_cache, idx);
vgic_put_irq(kvm, irq);
/* Only the context that erases the entry drops its cache ref. */
irq = xa_erase(&its->translation_cache, idx);
if (irq)
vgic_put_irq(kvm, irq);
}
}

View File

@ -36,7 +36,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\t" B_INSN " 2f\n\t"
asm goto("1:\t" B_INSN " 2f\n\t"
"2:\t.insn\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
@ -50,7 +50,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t"
asm goto("1:\t" J_INSN " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"

View File

@ -11,7 +11,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b - ., %l[l_yes] - .\n\t"
@ -26,7 +26,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"b,n %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b - ., %l[l_yes] - .\n\t"

View File

@ -328,7 +328,7 @@ static inline unsigned long get_kuap(void)
return mfspr(SPRN_AMR);
}
static inline void set_kuap(unsigned long value)
static __always_inline void set_kuap(unsigned long value)
{
if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
return;
@ -398,7 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user
#endif /* !CONFIG_PPC_KUAP */
static inline void prevent_user_access(unsigned long dir)
static __always_inline void prevent_user_access(unsigned long dir)
{
set_kuap(AMR_KUAP_BLOCKED);
if (static_branch_unlikely(&uaccess_flush_key))

View File

@ -17,7 +17,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"
@ -32,7 +32,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"b %l[l_yes] # arch_static_branch_jump\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".long 1b - ., %l[l_yes] - .\n\t"

View File

@ -73,7 +73,7 @@ __pu_failed: \
* are no aliasing issues.
*/
#define __put_user_asm_goto(x, addr, label, op) \
asm_volatile_goto( \
asm goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@ -86,7 +86,7 @@ __pu_failed: \
__put_user_asm_goto(x, ptr, label, "std")
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
asm_volatile_goto( \
asm goto( \
"1: stw%X1 %0, %1\n" \
"2: stw%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
@ -130,7 +130,7 @@ do { \
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_user_asm_goto(x, addr, label, op) \
asm_volatile_goto( \
asm_goto_output( \
"1: "op"%U1%X1 %0, %1 # get_user\n" \
EX_TABLE(1b, %l2) \
: "=r" (x) \
@ -143,7 +143,7 @@ do { \
__get_user_asm_goto(x, addr, label, "ld")
#else /* __powerpc64__ */
#define __get_user_asm2_goto(x, addr, label) \
asm_volatile_goto( \
asm_goto_output( \
"1: lwz%X1 %0, %1\n" \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \

View File

@ -242,7 +242,7 @@ notrace void arch_local_irq_restore(unsigned long mask)
* This allows interrupts to be unmasked without hard disabling, and
* also without new hard interrupts coming in ahead of pending ones.
*/
asm_volatile_goto(
asm goto(
"1: \n"
" lbz 9,%0(13) \n"
" cmpwi 9,0 \n"

View File

@ -25,7 +25,7 @@
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("0: brcl 0,%l[label]\n"
asm goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
@ -39,7 +39,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("0: brcl 15,%l[label]\n"
asm goto("0: brcl 15,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"

View File

@ -10,7 +10,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"nop\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
@ -26,7 +26,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"b %l[l_yes]\n\t"
"nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"

View File

@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto("1: jmp 6f\n"
asm goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"

View File

@ -24,7 +24,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
asm goto("1:"
"jmp %l[l_yes] # objtool NOPs this \n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (2 | branch) : : l_yes);
@ -38,7 +38,7 @@ l_yes:
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
asm_volatile_goto("1:"
asm goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@ -52,7 +52,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
asm_volatile_goto("1:"
asm goto("1:"
"jmp %l[l_yes]\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);

View File

@ -18,7 +18,7 @@
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c = false; \
asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
asm goto (fullop "; j" #cc " %l[cc_label]" \
: : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
if (0) { \

View File

@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p)
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
asm goto("1: wrussq %[val], (%[addr])\n"
_ASM_EXTABLE(1b, %l[fail])
:: [addr] "r" (addr), [val] "r" (val)
:: fail);

View File

@ -133,7 +133,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
asm_volatile_goto("\n" \
asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@ -295,7 +295,7 @@ do { \
} while (0)
#define __get_user_asm(x, addr, itype, ltype, label) \
asm_volatile_goto("\n" \
asm_goto_output("\n" \
"1: mov"itype" %[umem],%[output]\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: [output] ltype(x) \
@ -375,7 +375,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
asm_volatile_goto("\n" \
asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@ -394,7 +394,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
asm_volatile_goto("\n" \
asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_goto(x, addr, itype, ltype, label) \
asm_volatile_goto("\n" \
asm goto("\n" \
"1: mov"itype" %0,%1\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \

View File

@ -8,7 +8,7 @@
#define svm_asm(insn, clobber...) \
do { \
asm_volatile_goto("1: " __stringify(insn) "\n\t" \
asm goto("1: " __stringify(insn) "\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
::: clobber : fault); \
return; \
@ -18,7 +18,7 @@ fault: \
#define svm_asm1(insn, op1, clobber...) \
do { \
asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
asm goto("1: " __stringify(insn) " %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1 : clobber : fault); \
return; \
@ -28,7 +28,7 @@ fault: \
#define svm_asm2(insn, op1, op2, clobber...) \
do { \
asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
asm goto("1: " __stringify(insn) " %1, %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1, op2 : clobber : fault); \
return; \

View File

@ -745,7 +745,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
*/
static int kvm_cpu_vmxoff(void)
{
asm_volatile_goto("1: vmxoff\n\t"
asm goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
@ -2807,7 +2807,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
cr4_set_bits(X86_CR4_VMXE);
asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
asm goto("1: vmxon %[vmxon_pointer]\n\t"
_ASM_EXTABLE(1b, %l[fault])
: : [vmxon_pointer] "m"(vmxon_pointer)
: : fault);

View File

@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
asm_volatile_goto("1: vmread %[field], %[output]\n\t"
asm_goto_output("1: vmread %[field], %[output]\n\t"
"jna %l[do_fail]\n\t"
_ASM_EXTABLE(1b, %l[do_exception])
@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
#define vmx_asm1(insn, op1, error_args...) \
do { \
asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
asm goto("1: " __stringify(insn) " %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
@ -205,7 +205,7 @@ fault: \
#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
asm goto("1: " __stringify(insn) " %1, %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \

View File

@ -13,7 +13,7 @@
static __always_inline bool arch_static_branch(struct static_key *key,
bool branch)
{
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
"_nop\n\t"
".pushsection __jump_table, \"aw\"\n\t"
".word 1b, %l[l_yes], %c0\n\t"
@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key,
* make it reachable and wrap both into a no-transform block
* to avoid any assembler interference with this.
*/
asm_volatile_goto("1:\n\t"
asm goto("1:\n\t"
".begin no-transform\n\t"
"_j %l[l_yes]\n\t"
"2:\n\t"

View File

@ -7360,6 +7360,7 @@ CONFIG_HTE_TEGRA194_TEST=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7337,6 +7337,7 @@ CONFIG_HTE_TEGRA194=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7365,6 +7365,7 @@ CONFIG_HTE_TEGRA194_TEST=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7364,6 +7364,7 @@ CONFIG_HTE_TEGRA194_TEST=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7342,6 +7342,7 @@ CONFIG_HTE_TEGRA194=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7367,6 +7367,7 @@ CONFIG_HTE_TEGRA194_TEST=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7345,6 +7345,7 @@ CONFIG_HTE_TEGRA194=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -7342,6 +7342,7 @@ CONFIG_HTE_TEGRA194=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -5757,6 +5757,7 @@ CONFIG_NVMEM_SYSFS=y
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -5774,6 +5774,7 @@ CONFIG_NVMEM_SYSFS=y
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -5771,6 +5771,7 @@ CONFIG_NVMEM_SYSFS=y
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -5754,6 +5754,7 @@ CONFIG_NVMEM_SYSFS=y
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -3079,6 +3079,7 @@ CONFIG_NVMEM_SYSFS=y
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -3102,6 +3102,7 @@ CONFIG_NVMEM_SYSFS=y
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -8170,6 +8170,7 @@ CONFIG_INTEL_QEP=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -8221,6 +8221,7 @@ CONFIG_INTEL_QEP=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -8194,6 +8194,7 @@ CONFIG_INTEL_QEP=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -8143,6 +8143,7 @@ CONFIG_INTEL_QEP=m
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
CONFIG_FS_STACK=y
CONFIG_BUFFER_HEAD=y
CONFIG_LEGACY_DIRECT_IO=y
# CONFIG_EXT2_FS is not set

View File

@ -2049,7 +2049,7 @@ static int binder_translate_binder(struct flat_binder_object *fp,
ret = -EINVAL;
goto done;
}
if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
ret = -EPERM;
goto done;
}
@ -2095,7 +2095,7 @@ static int binder_translate_handle(struct flat_binder_object *fp,
proc->pid, thread->pid, fp->handle);
return -EINVAL;
}
if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
ret = -EPERM;
goto done;
}
@ -2183,7 +2183,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset,
ret = -EBADF;
goto err_fget;
}
ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file);
ret = security_binder_transfer_file(proc->cred, target_proc->cred, file);
if (ret < 0) {
ret = -EPERM;
goto err_security;
@ -2588,8 +2588,8 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_invalid_target_handle;
}
if (security_binder_transaction(proc->tsk,
target_proc->tsk) < 0) {
if (security_binder_transaction(proc->cred,
target_proc->cred) < 0) {
return_error = BR_FAILED_REPLY;
return_error_param = -EPERM;
return_error_line = __LINE__;
@ -4554,7 +4554,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp,
ret = -EBUSY;
goto out;
}
ret = security_binder_set_context_mgr(proc->tsk);
ret = security_binder_set_context_mgr(proc->cred);
if (ret < 0)
goto out;
if (uid_valid(context->binder_context_mgr_uid)) {

View File

@ -12220,6 +12220,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
if (dc_resource_is_dsc_encoding_supported(dc)) {
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
dm_new_crtc_state->mode_changed_independent_from_dsc = new_crtc_state->mode_changed;
}
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
if (drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = add_affected_mst_dsc_crtcs(state, crtc);

View File

@ -962,6 +962,7 @@ struct dm_crtc_state {
bool freesync_vrr_info_changed;
bool mode_changed_independent_from_dsc;
bool dsc_force_changed;
bool vrr_supported;
struct mod_freesync_config freesync_config;

View File

@ -1744,9 +1744,11 @@ int pre_validate_dsc(struct drm_atomic_state *state,
int ind = find_crtc_index_in_state_by_stream(state, stream);
if (ind >= 0) {
struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(state->crtcs[ind].new_state);
DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n",
__func__, __LINE__, stream);
state->crtcs[ind].new_state->mode_changed = 0;
dm_new_crtc_state->base.mode_changed = dm_new_crtc_state->mode_changed_independent_from_dsc;
}
}
}

View File

@ -95,7 +95,6 @@ static struct workqueue_struct *iwcm_wq;
struct iwcm_work {
struct work_struct work;
struct iwcm_id_private *cm_id;
struct list_head list;
struct iw_cm_event event;
struct list_head free_list;
};
@ -179,7 +178,6 @@ static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
return -ENOMEM;
}
work->cm_id = cm_id_priv;
INIT_LIST_HEAD(&work->list);
put_work(work);
}
return 0;
@ -214,7 +212,6 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
if (refcount_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
return true;
}
@ -261,7 +258,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
refcount_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
init_completion(&cm_id_priv->destroy_comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
INIT_LIST_HEAD(&cm_id_priv->work_free_list);
return &cm_id_priv->id;
@ -1008,13 +1004,13 @@ static int process_event(struct iwcm_id_private *cm_id_priv,
}
/*
* Process events on the work_list for the cm_id. If the callback
* function requests that the cm_id be deleted, a flag is set in the
* cm_id flags to indicate that when the last reference is
* removed, the cm_id is to be destroyed. This is necessary to
* distinguish between an object that will be destroyed by the app
* thread asleep on the destroy_comp list vs. an object destroyed
* here synchronously when the last reference is removed.
* Process events for the cm_id. If the callback function requests
* that the cm_id be deleted, a flag is set in the cm_id flags to
* indicate that when the last reference is removed, the cm_id is
* to be destroyed. This is necessary to distinguish between an
* object that will be destroyed by the app thread asleep on the
* destroy_comp list vs. an object destroyed here synchronously
* when the last reference is removed.
*/
static void cm_work_handler(struct work_struct *_work)
{
@ -1025,35 +1021,26 @@ static void cm_work_handler(struct work_struct *_work)
int ret = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
while (!list_empty(&cm_id_priv->work_list)) {
work = list_first_entry(&cm_id_priv->work_list,
struct iwcm_work, list);
list_del_init(&work->list);
levent = work->event;
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
if (ret) {
destroy_cm_id(&cm_id_priv->id);
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
}
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
}
levent = work->event;
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
if (ret) {
destroy_cm_id(&cm_id_priv->id);
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
}
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
return;
}
/*
* This function is called on interrupt context. Schedule events on
* the iwcm_wq thread to allow callback functions to downcall into
* the CM and/or block. Events are queued to a per-CM_ID
* work_list. If this is the first event on the work_list, the work
* element is also queued on the iwcm_wq thread.
* the CM and/or block.
*
* Each event holds a reference on the cm_id. Until the last posted
* event has been delivered and processed, the cm_id cannot be
@ -1095,7 +1082,6 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}
refcount_inc(&cm_id_priv->refcount);
list_add_tail(&work->list, &cm_id_priv->work_list);
queue_work(iwcm_wq, &work->work);
out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);

View File

@ -50,7 +50,6 @@ struct iwcm_id_private {
struct ib_qp *qp;
struct completion destroy_comp;
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
refcount_t refcount;
struct list_head work_free_list;

View File

@ -8,6 +8,7 @@ mlx5_ib-y := ah.o \
cq.o \
data_direct.o \
dm.o \
dmah.o \
doorbell.o \
fs.o \
gsi.o \

View File

@ -16,6 +16,18 @@ struct mlx5_ib_counter {
u32 type;
};
struct mlx5_rdma_counter {
struct rdma_counter rdma_counter;
struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
struct xarray qpn_opfc_xa;
};
static struct mlx5_rdma_counter *to_mcounter(struct rdma_counter *counter)
{
return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
}
#define INIT_Q_COUNTER(_name) \
{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
@ -398,7 +410,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@ -418,7 +430,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
@ -602,7 +614,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
return 0;
WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
mlx5r_fs_destroy_fcs(dev, counter);
mlx5r_fs_destroy_fcs(dev, mcounter->fc);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
@ -612,6 +624,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
struct ib_qp *qp, u32 port)
{
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_ib_dev *dev = to_mdev(qp->device);
bool new = false;
int err;
@ -635,7 +648,11 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
if (err)
goto fail_set_counter;
err = mlx5r_fs_bind_op_fc(qp, counter, port);
if (!counter->mode.bind_opcnt)
return 0;
err = mlx5r_fs_bind_op_fc(qp, mcounter->fc, &mcounter->qpn_opfc_xa,
port);
if (err)
goto fail_bind_op_fc;
@ -655,9 +672,12 @@ fail_set_counter:
static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
{
struct rdma_counter *counter = qp->counter;
struct mlx5_rdma_counter *mcounter;
int err;
mlx5r_fs_unbind_op_fc(qp, counter);
mcounter = to_mcounter(counter);
mlx5r_fs_unbind_op_fc(qp, &mcounter->qpn_opfc_xa);
err = mlx5_ib_qp_set_counter(qp, NULL);
if (err)
@ -666,7 +686,9 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
return 0;
fail_set_counter:
mlx5r_fs_bind_op_fc(qp, counter, port);
if (counter->mode.bind_opcnt)
mlx5r_fs_bind_op_fc(qp, mcounter->fc,
&mcounter->qpn_opfc_xa, port);
return err;
}

View File

@ -8,19 +8,6 @@
#include "mlx5_ib.h"
struct mlx5_rdma_counter {
struct rdma_counter rdma_counter;
struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
struct xarray qpn_opfc_xa;
};
static inline struct mlx5_rdma_counter *
to_mcounter(struct rdma_counter *counter)
{
return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
}
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_clear_description(struct ib_counters *counters);

View File

@ -645,7 +645,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
struct mlx5_ib_cq *cq = to_mcq(ibcq);
void __iomem *uar_page = mdev->priv.uar->map;
void __iomem *uar_page = mdev->priv.bfreg.up->map;
unsigned long irq_flags;
int ret = 0;
@ -920,7 +920,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
cq->buf.frag_buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
*index = dev->mdev->priv.uar->index;
*index = dev->mdev->priv.bfreg.up->index;
return 0;
@ -1017,15 +1017,18 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
if (udata) {
cq->mcq.comp = mlx5_add_cq_to_tasklet;
cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
} else {
cq->mcq.comp = mlx5_ib_cq_comp;
}
err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
if (err)
goto err_cqb;
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
if (udata)
cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
else
cq->mcq.comp = mlx5_ib_cq_comp;
cq->mcq.event = mlx5_ib_cq_event;
INIT_LIST_HEAD(&cq->wc_list);
@ -1052,20 +1055,31 @@ err_cqb:
return err;
}
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
int mlx5_ib_pre_destroy_cq(struct ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
return mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
}
void mlx5_ib_post_destroy_cq(struct ib_cq *cq)
{
destroy_cq_kernel(to_mdev(cq->device), to_mcq(cq));
}
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
int ret;
ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
ret = mlx5_ib_pre_destroy_cq(cq);
if (ret)
return ret;
if (udata)
destroy_cq_user(mcq, udata);
destroy_cq_user(to_mcq(cq), udata);
else
destroy_cq_kernel(dev, mcq);
mlx5_ib_post_destroy_cq(cq);
return 0;
}

View File

@ -35,7 +35,7 @@ static int mlx5_data_direct_vpd_get_vuid(struct mlx5_data_direct_dev *dev)
vpd_data = pci_vpd_alloc(pdev, &vpd_size);
if (IS_ERR(vpd_data)) {
pci_err(pdev, "Unable to read VPD, err=%ld\n", PTR_ERR(vpd_data));
pci_err(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
return PTR_ERR(vpd_data);
}

View File

@ -159,7 +159,7 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
if (is_user &&
(MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
capable(CAP_NET_RAW))
rdma_dev_has_raw_cap(&dev->ib_dev))
cap |= MLX5_UCTX_CAP_RAW_TX;
if (is_user &&
(MLX5_CAP_GEN(dev->mdev, uctx_cap) &
@ -233,6 +233,7 @@ static u16 get_legacy_obj_type(u16 opcode)
{
switch (opcode) {
case MLX5_CMD_OP_CREATE_RQ:
case MLX5_CMD_OP_CREATE_RMP:
return MLX5_EVENT_QUEUE_TYPE_RQ;
case MLX5_CMD_OP_CREATE_QP:
return MLX5_EVENT_QUEUE_TYPE_QP;
@ -1224,6 +1225,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_GET(create_flow_table_in, in, other_vport));
MLX5_SET(destroy_flow_table_in, din, vport_number,
MLX5_GET(create_flow_table_in, in, vport_number));
MLX5_SET(destroy_flow_table_in, din, other_eswitch,
MLX5_GET(create_flow_table_in, in, other_eswitch));
MLX5_SET(destroy_flow_table_in, din, eswitch_owner_vhca_id,
MLX5_GET(create_flow_table_in, in,
eswitch_owner_vhca_id));
MLX5_SET(destroy_flow_table_in, din, table_type,
MLX5_GET(create_flow_table_in, in, table_type));
MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
@ -1236,6 +1242,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_GET(create_flow_group_in, in, other_vport));
MLX5_SET(destroy_flow_group_in, din, vport_number,
MLX5_GET(create_flow_group_in, in, vport_number));
MLX5_SET(destroy_flow_group_in, din, other_eswitch,
MLX5_GET(create_flow_group_in, in, other_eswitch));
MLX5_SET(destroy_flow_group_in, din, eswitch_owner_vhca_id,
MLX5_GET(create_flow_group_in, in,
eswitch_owner_vhca_id));
MLX5_SET(destroy_flow_group_in, din, table_type,
MLX5_GET(create_flow_group_in, in, table_type));
MLX5_SET(destroy_flow_group_in, din, table_id,
@ -1250,6 +1261,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_GET(set_fte_in, in, other_vport));
MLX5_SET(delete_fte_in, din, vport_number,
MLX5_GET(set_fte_in, in, vport_number));
MLX5_SET(delete_fte_in, din, other_eswitch,
MLX5_GET(set_fte_in, in, other_eswitch));
MLX5_SET(delete_fte_in, din, eswitch_owner_vhca_id,
MLX5_GET(set_fte_in, in, eswitch_owner_vhca_id));
MLX5_SET(delete_fte_in, din, table_type,
MLX5_GET(set_fte_in, in, table_type));
MLX5_SET(delete_fte_in, din, table_id,
@ -1393,6 +1408,10 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
}
MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
/* TPH is not allowed to bypass the regular kernel's verbs flow */
MLX5_SET(mkc, mkc, pcie_tph_en, 0);
MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX);
return 0;
}
@ -1958,6 +1977,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@ -1966,7 +1986,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;

View File

@ -282,7 +282,7 @@ static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx,
int err;
u64 address;
if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic))
if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic))
return ERR_PTR(-EOPNOTSUPP);
dm = kzalloc(sizeof(*dm), GFP_KERNEL);

View File

@ -0,0 +1,54 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include <rdma/uverbs_std_types.h>
#include <linux/pci-tph.h>
#include "dmah.h"
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
static int mlx5_ib_alloc_dmah(struct ib_dmah *ibdmah,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
u16 st_bits = BIT(IB_DMAH_CPU_ID_EXISTS) |
BIT(IB_DMAH_MEM_TYPE_EXISTS);
int err;
/* PH is a must for TPH following PCIe spec 6.2-1.0 */
if (!(ibdmah->valid_fields & BIT(IB_DMAH_PH_EXISTS)))
return -EINVAL;
/* ST is optional; however, partial data for it is not allowed */
if (ibdmah->valid_fields & st_bits) {
if ((ibdmah->valid_fields & st_bits) != st_bits)
return -EINVAL;
err = mlx5_st_alloc_index(mdev, ibdmah->mem_type,
ibdmah->cpu_id, &dmah->st_index);
if (err)
return err;
}
return 0;
}
static int mlx5_ib_dealloc_dmah(struct ib_dmah *ibdmah,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
if (ibdmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
return mlx5_st_dealloc_index(mdev, dmah->st_index);
return 0;
}
const struct ib_device_ops mlx5_ib_dev_dmah_ops = {
.alloc_dmah = mlx5_ib_alloc_dmah,
.dealloc_dmah = mlx5_ib_dealloc_dmah,
};

View File

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#ifndef _MLX5_IB_DMAH_H
#define _MLX5_IB_DMAH_H
#include "mlx5_ib.h"
extern const struct ib_device_ops mlx5_ib_dev_dmah_ops;
struct mlx5_ib_dmah {
struct ib_dmah ibdmah;
u16 st_index;
};
static inline struct mlx5_ib_dmah *to_mdmah(struct ib_dmah *ibdmah)
{
return container_of(ibdmah, struct mlx5_ib_dmah, ibdmah);
}
#endif /* _MLX5_IB_DMAH_H */

View File

@ -691,22 +691,13 @@ static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
}
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns,
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
u32 flags, u16 vport)
struct mlx5_flow_table_attr *ft_attr)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
ft_attr.vport = vport;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
ft = mlx5_create_auto_grouped_flow_table(ns, ft_attr);
if (IS_ERR(ft))
return ERR_CAST(ft);
@ -720,6 +711,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
enum flow_table_type ft_type)
{
bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns = NULL;
enum mlx5_flow_namespace_type fn_type;
struct mlx5_ib_flow_prio *prio;
@ -797,11 +789,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = min_t(int, num_entries, max_table_size);
ft = prio->flow_table;
if (!ft)
return _get_prio(dev, ns, prio, priority, max_table_size,
num_groups, flags, 0);
if (ft)
return prio;
return prio;
ft_attr.prio = priority;
ft_attr.max_fte = max_table_size;
ft_attr.flags = flags;
ft_attr.autogroup.max_num_groups = num_groups;
return _get_prio(ns, prio, &ft_attr);
}
enum {
@ -950,6 +945,7 @@ static int get_per_qp_prio(struct mlx5_ib_dev *dev,
enum mlx5_ib_optional_counter_type type)
{
enum mlx5_ib_optional_counter_type per_qp_type;
struct mlx5_flow_table_attr ft_attr = {};
enum mlx5_flow_namespace_type fn_type;
struct mlx5_flow_namespace *ns;
struct mlx5_ib_flow_prio *prio;
@ -1003,7 +999,10 @@ static int get_per_qp_prio(struct mlx5_ib_dev *dev,
if (prio->flow_table)
return 0;
prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
ft_attr.prio = priority;
ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE;
ft_attr.autogroup.max_num_groups = 1;
prio = _get_prio(ns, prio, &ft_attr);
if (IS_ERR(prio))
return PTR_ERR(prio);
@ -1012,14 +1011,14 @@ static int get_per_qp_prio(struct mlx5_ib_dev *dev,
return 0;
}
static struct mlx5_per_qp_opfc *
get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
static struct mlx5_per_qp_opfc *get_per_qp_opfc(struct xarray *qpn_opfc_xa,
u32 qp_num, bool *new)
{
struct mlx5_per_qp_opfc *per_qp_opfc;
*new = false;
per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num);
per_qp_opfc = xa_load(qpn_opfc_xa, qp_num);
if (per_qp_opfc)
return per_qp_opfc;
per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
@ -1032,7 +1031,8 @@ get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
}
static int add_op_fc_rules(struct mlx5_ib_dev *dev,
struct mlx5_rdma_counter *mcounter,
struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
struct xarray *qpn_opfc_xa,
struct mlx5_per_qp_opfc *per_qp_opfc,
struct mlx5_ib_flow_prio *prio,
enum mlx5_ib_optional_counter_type type,
@ -1055,7 +1055,7 @@ static int add_op_fc_rules(struct mlx5_ib_dev *dev,
return 0;
}
opfc->fc = mcounter->fc[type];
opfc->fc = fc_arr[type];
spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
if (!spec) {
@ -1148,8 +1148,7 @@ static int add_op_fc_rules(struct mlx5_ib_dev *dev,
}
prio->refcount += spec_num;
err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc,
GFP_KERNEL));
err = xa_err(xa_store(qpn_opfc_xa, qp_num, per_qp_opfc, GFP_KERNEL));
if (err)
goto del_rules;
@ -1168,8 +1167,9 @@ null_fc:
return err;
}
static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
u32 type, struct mlx5_fc **fc)
static bool
is_fc_shared_and_in_use(struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX], u32 type,
struct mlx5_fc **fc)
{
u32 shared_fc_type;
@ -1190,7 +1190,7 @@ static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
return false;
}
*fc = mcounter->fc[shared_fc_type];
*fc = fc_arr[shared_fc_type];
if (!(*fc))
return false;
@ -1198,24 +1198,23 @@ static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
}
void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
struct rdma_counter *counter)
struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX])
{
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_fc *in_use_fc;
int i;
for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
if (!mcounter->fc[i])
if (!fc_arr[i])
continue;
if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) {
mcounter->fc[i] = NULL;
if (is_fc_shared_and_in_use(fc_arr, i, &in_use_fc)) {
fc_arr[i] = NULL;
continue;
}
mlx5_fc_destroy(dev->mdev, mcounter->fc[i]);
mcounter->fc[i] = NULL;
mlx5_fc_destroy(dev->mdev, fc_arr[i]);
fc_arr[i] = NULL;
}
}
@ -1223,6 +1222,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
struct mlx5_flow_table_attr ft_attr = {};
enum mlx5_flow_namespace_type fn_type;
int priority, i, err, spec_num;
struct mlx5_flow_act flow_act = {};
@ -1304,8 +1304,10 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
if (err)
goto free;
prio = _get_prio(dev, ns, prio, priority,
dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
ft_attr.prio = priority;
ft_attr.max_fte = dev->num_ports * MAX_OPFC_RULES;
ft_attr.autogroup.max_num_groups = 1;
prio = _get_prio(ns, prio, &ft_attr);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
goto put_prio;
@ -1359,16 +1361,15 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
put_per_qp_prio(dev, type);
}
void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct xarray *qpn_opfc_xa)
{
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_ib_dev *dev = to_mdev(counter->device);
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_per_qp_opfc *per_qp_opfc;
struct mlx5_ib_op_fc *in_use_opfc;
struct mlx5_ib_flow_prio *prio;
int i, j;
per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num);
per_qp_opfc = xa_load(qpn_opfc_xa, qp->qp_num);
if (!per_qp_opfc)
return;
@ -1394,13 +1395,13 @@ void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
}
kfree(per_qp_opfc);
xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num);
xa_erase(qpn_opfc_xa, qp->qp_num);
}
int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
u32 port)
int mlx5r_fs_bind_op_fc(struct ib_qp *qp,
struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
struct xarray *qpn_opfc_xa, u32 port)
{
struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_per_qp_opfc *per_qp_opfc;
struct mlx5_ib_flow_prio *prio;
@ -1410,9 +1411,6 @@ int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
int i, err, per_qp_type;
bool new;
if (!counter->mode.bind_opcnt)
return 0;
cnts = &dev->port[port - 1].cnts;
for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
@ -1424,23 +1422,22 @@ int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
prio = get_opfc_prio(dev, per_qp_type);
WARN_ON(!prio->flow_table);
if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc))
mcounter->fc[per_qp_type] = in_use_fc;
if (is_fc_shared_and_in_use(fc_arr, per_qp_type, &in_use_fc))
fc_arr[per_qp_type] = in_use_fc;
if (!mcounter->fc[per_qp_type]) {
mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev,
false);
if (IS_ERR(mcounter->fc[per_qp_type]))
return PTR_ERR(mcounter->fc[per_qp_type]);
if (!fc_arr[per_qp_type]) {
fc_arr[per_qp_type] = mlx5_fc_create(dev->mdev, false);
if (IS_ERR(fc_arr[per_qp_type]))
return PTR_ERR(fc_arr[per_qp_type]);
}
per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new);
per_qp_opfc = get_per_qp_opfc(qpn_opfc_xa, qp->qp_num, &new);
if (!per_qp_opfc) {
err = -ENOMEM;
goto free_fc;
}
err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio,
per_qp_type, qp->qp_num, port);
err = add_op_fc_rules(dev, fc_arr, qpn_opfc_xa, per_qp_opfc,
prio, per_qp_type, qp->qp_num, port);
if (err)
goto del_rules;
}
@ -1448,12 +1445,12 @@ int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
return 0;
del_rules:
mlx5r_fs_unbind_op_fc(qp, counter);
mlx5r_fs_unbind_op_fc(qp, qpn_opfc_xa);
if (new)
kfree(per_qp_opfc);
free_fc:
if (xa_empty(&mcounter->qpn_opfc_xa))
mlx5r_fs_destroy_fcs(dev, counter);
if (xa_empty(qpn_opfc_xa))
mlx5r_fs_destroy_fcs(dev, fc_arr);
return err;
}
@ -1645,11 +1642,6 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
}
enum {
LEFTOVERS_MC,
LEFTOVERS_UC,
};
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
@ -1659,43 +1651,32 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de
struct mlx5_ib_flow_handler *handler = NULL;
static struct {
struct ib_flow_attr flow_attr;
struct ib_flow_spec_eth eth_flow;
} leftovers_specs[] = {
[LEFTOVERS_MC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {0x1} }
}
},
[LEFTOVERS_UC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {} }
}
}
};
struct ib_flow_attr flow_attr;
} leftovers_wc = { .flow_attr = { .num_of_specs = 1,
.size = sizeof(leftovers_wc) },
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = { .dst_mac = { 0x1 } },
.val = { .dst_mac = { 0x1 } } } };
handler = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_MC].flow_attr,
dst);
static struct {
struct ib_flow_spec_eth eth_flow;
struct ib_flow_attr flow_attr;
} leftovers_uc = { .flow_attr = { .num_of_specs = 1,
.size = sizeof(leftovers_uc) },
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = { .dst_mac = { 0x1 } },
.val = { .dst_mac = {} } } };
handler = create_flow_rule(dev, ft_prio, &leftovers_wc.flow_attr, dst);
if (!IS_ERR(handler) &&
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
handler_ucast = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_UC].flow_attr,
dst);
&leftovers_uc.flow_attr, dst);
if (IS_ERR(handler_ucast)) {
mlx5_del_flow_rules(handler->rule);
ft_prio->refcount--;
@ -1893,7 +1874,7 @@ static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
u32 *flags, u16 *vport_idx,
u16 *vport,
struct mlx5_core_dev **ft_mdev,
u32 ib_port)
u32 ib_port, u16 *esw_owner_vhca_id)
{
struct mlx5_core_dev *esw_mdev;
@ -1907,8 +1888,13 @@ static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
return -EINVAL;
esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
if (esw_mdev != dev->mdev)
return -EOPNOTSUPP;
if (esw_mdev != dev->mdev) {
if (!MLX5_CAP_ADV_RDMA(dev->mdev,
rdma_transport_manager_other_eswitch))
return -EOPNOTSUPP;
*flags |= MLX5_FLOW_TABLE_OTHER_ESWITCH;
*esw_owner_vhca_id = MLX5_CAP_GEN(esw_mdev, vhca_id);
}
*flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
*ft_mdev = esw_mdev;
@ -1924,8 +1910,10 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
bool mcast, u32 ib_port)
{
struct mlx5_core_dev *ft_mdev = dev->mdev;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
u16 esw_owner_vhca_id = 0;
int max_table_size = 0;
u16 vport_idx = 0;
bool esw_encap;
@ -1982,11 +1970,13 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
break;
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
if (ib_port == 0 ||
user_priority >= MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
return ERR_PTR(-EINVAL);
ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
&vport_idx, &vport,
&ft_mdev, ib_port);
&ft_mdev, ib_port,
&esw_owner_vhca_id);
if (ret)
return ERR_PTR(ret);
@ -2032,10 +2022,10 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
prio = &dev->flow_db->rdma_tx[priority];
break;
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
prio = &dev->flow_db->rdma_transport_rx[priority][ib_port - 1];
break;
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
prio = &dev->flow_db->rdma_transport_tx[priority][ib_port - 1];
break;
default: return ERR_PTR(-EINVAL);
}
@ -2046,8 +2036,13 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
if (prio->flow_table)
return prio;
return _get_prio(dev, ns, prio, priority, max_table_size,
MLX5_FS_MAX_TYPES, flags, vport);
ft_attr.prio = priority;
ft_attr.max_fte = max_table_size;
ft_attr.flags = flags;
ft_attr.vport = vport;
ft_attr.esw_owner_vhca_id = esw_owner_vhca_id;
ft_attr.autogroup.max_num_groups = MLX5_FS_MAX_TYPES;
return _get_prio(ns, prio, &ft_attr);
}
static struct mlx5_ib_flow_handler *
@ -2474,7 +2469,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct mlx5_ib_dev *dev;
u32 flags;
if (!capable(CAP_NET_RAW))
if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
fs_matcher = uverbs_attr_get_obj(attrs,
@ -3005,7 +3000,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
u32 ft_id;
int err;
if (!capable(CAP_NET_RAW))
if (!rdma_dev_has_raw_cap(&dev->ib_dev))
return -EPERM;
err = uverbs_get_const(&ib_uapi_ft_type, attrs,
@ -3482,31 +3477,40 @@ static const struct ib_device_ops flow_ops = {
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
int i, j;
dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
if (!dev->flow_db)
return -ENOMEM;
dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
sizeof(struct mlx5_ib_flow_prio),
GFP_KERNEL);
if (!dev->flow_db->rdma_transport_rx)
goto free_flow_db;
for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) {
dev->flow_db->rdma_transport_rx[i] =
kcalloc(dev->num_ports,
sizeof(struct mlx5_ib_flow_prio), GFP_KERNEL);
if (!dev->flow_db->rdma_transport_rx[i])
goto free_rdma_transport_rx;
}
dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
sizeof(struct mlx5_ib_flow_prio),
GFP_KERNEL);
if (!dev->flow_db->rdma_transport_tx)
goto free_rdma_transport_rx;
for (j = 0; j < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; j++) {
dev->flow_db->rdma_transport_tx[j] =
kcalloc(dev->num_ports,
sizeof(struct mlx5_ib_flow_prio), GFP_KERNEL);
if (!dev->flow_db->rdma_transport_tx[j])
goto free_rdma_transport_tx;
}
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
return 0;
free_rdma_transport_tx:
while (j--)
kfree(dev->flow_db->rdma_transport_tx[j]);
free_rdma_transport_rx:
kfree(dev->flow_db->rdma_transport_rx);
free_flow_db:
while (i--)
kfree(dev->flow_db->rdma_transport_rx[i]);
kfree(dev->flow_db);
return -ENOMEM;
}

View File

@ -13,6 +13,8 @@ void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
int i;
/* When a steering anchor is created, a special flow table is also
* created for the user to reference. Since the user can reference it,
* the kernel cannot trust that when the user destroys the steering
@ -25,8 +27,10 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
* is a safe assumption that all references are gone.
*/
mlx5_ib_fs_cleanup_anchor(dev);
kfree(dev->flow_db->rdma_transport_tx);
kfree(dev->flow_db->rdma_transport_rx);
for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++)
kfree(dev->flow_db->rdma_transport_tx[i]);
for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++)
kfree(dev->flow_db->rdma_transport_rx[i]);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */

View File

@ -131,8 +131,9 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
IB_POLL_SOFTIRQ);
if (IS_ERR(gsi->cq)) {
mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
PTR_ERR(gsi->cq));
mlx5_ib_warn(dev,
"unable to create send CQ for GSI QP. error %pe\n",
gsi->cq);
ret = PTR_ERR(gsi->cq);
goto err_free_wrs;
}
@ -147,8 +148,9 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
if (IS_ERR(gsi->rx_qp)) {
mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
PTR_ERR(gsi->rx_qp));
mlx5_ib_warn(dev,
"unable to create hardware GSI QP. error %pe\n",
gsi->rx_qp);
ret = PTR_ERR(gsi->rx_qp);
goto err_destroy_cq;
}
@ -294,8 +296,9 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
qp = create_gsi_ud_qp(gsi);
if (IS_ERR(qp)) {
mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
PTR_ERR(qp));
mlx5_ib_warn(dev,
"unable to create hardware UD QP for GSI: %pe\n",
qp);
return;
}

View File

@ -44,6 +44,63 @@ static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
}
}
static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
struct mlx5_core_dev *new_owner)
{
int ret;
if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
return 0;
if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
return 0;
ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
FS_FT_RDMA_TRANSPORT_TX);
if (ret)
return ret;
ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
FS_FT_RDMA_TRANSPORT_RX);
if (ret) {
mlx5_fs_set_root_dev(cur_owner, cur_owner,
FS_FT_RDMA_TRANSPORT_TX);
return ret;
}
return 0;
}
static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev;
int i, ret;
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
WARN_ON_ONCE(ret);
}
}
static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev;
int ret;
int i;
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
ret = mlx5_ib_set_owner_transport(peer_dev, dev);
if (ret) {
mlx5_ib_release_transport(dev);
return ret;
}
}
return 0;
}
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
@ -88,9 +145,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
else
return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
return -ENOMEM;
if (mlx5_lag_is_shared_fdb(dev)) {
ret = mlx5_ib_take_transport(lag_master);
if (ret)
return ret;
}
ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
mlx5_core_net(lag_master));
if (!ibdev) {
ret = -ENOMEM;
goto release_transport;
}
ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
GFP_KERNEL);
@ -126,6 +192,10 @@ fail_add:
kfree(ibdev->port);
fail_port:
ib_dealloc_device(&ibdev->ib_dev);
release_transport:
if (mlx5_lag_is_shared_fdb(lag_master))
mlx5_ib_release_transport(lag_master);
return ret;
}
@ -181,6 +251,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
esw = peer_mdev->priv.eswitch;
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}
mlx5_ib_release_transport(mdev);
}
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}

View File

@ -13,6 +13,7 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
@ -50,6 +51,7 @@
#include <rdma/ib_ucaps.h>
#include "macsec.h"
#include "data_direct.h"
#include "dmah.h"
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
@ -485,6 +487,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_NDR;
break;
case MLX5E_PROT_MASK(MLX5E_200GAUI_1_200GBASE_CR1_KR1):
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_XDR;
break;
case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
*active_width = IB_WIDTH_8X;
*active_speed = IB_SPEED_HDR;
@ -493,10 +499,18 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_NDR;
break;
case MLX5E_PROT_MASK(MLX5E_400GAUI_2_400GBASE_CR2_KR2):
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_XDR;
break;
case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
*active_width = IB_WIDTH_8X;
*active_speed = IB_SPEED_NDR;
break;
case MLX5E_PROT_MASK(MLX5E_800GAUI_4_800GBASE_CR4_KR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_XDR;
break;
default:
return -EINVAL;
}
@ -828,7 +842,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
err = mlx5_query_nic_vport_node_guid(dev->mdev, 0, false, &tmp);
break;
default:
@ -870,6 +884,51 @@ static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev,
resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
}
/*
* Calculate maximum SQ overhead across all QP types.
* Other QP types (REG_UMR, UC, RC, UD/SMI/GSI, XRC_TGT)
* have smaller overhead than the types calculated below,
* so they are implicitly included.
*/
static u32 mlx5_ib_calc_max_sq_overhead(void)
{
u32 max_overhead_xrc, overhead_ud_lso, a, b;
/* XRC_INI */
max_overhead_xrc = sizeof(struct mlx5_wqe_xrc_seg);
max_overhead_xrc += sizeof(struct mlx5_wqe_ctrl_seg);
a = sizeof(struct mlx5_wqe_atomic_seg) +
sizeof(struct mlx5_wqe_raddr_seg);
b = sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_mkey_seg) +
MLX5_IB_SQ_UMR_INLINE_THRESHOLD / MLX5_IB_UMR_OCTOWORD;
max_overhead_xrc += max(a, b);
/* UD with LSO */
overhead_ud_lso = sizeof(struct mlx5_wqe_ctrl_seg);
overhead_ud_lso += sizeof(struct mlx5_wqe_eth_pad);
overhead_ud_lso += sizeof(struct mlx5_wqe_eth_seg);
overhead_ud_lso += sizeof(struct mlx5_wqe_datagram_seg);
return max(max_overhead_xrc, overhead_ud_lso);
}
static u32 mlx5_ib_calc_max_qp_wr(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
u32 max_wqe_bb_units = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
u32 max_wqe_size;
/* max QP overhead + 1 SGE, no inline, no special features */
max_wqe_size = mlx5_ib_calc_max_sq_overhead() +
sizeof(struct mlx5_wqe_data_seg);
max_wqe_size = roundup_pow_of_two(max_wqe_size);
max_wqe_size = ALIGN(max_wqe_size, MLX5_SEND_WQE_BB);
return (max_wqe_bb_units * MLX5_SEND_WQE_BB) / max_wqe_size;
}
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
@ -1028,7 +1087,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_mr_size = ~0ull;
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->max_qp_wr = mlx5_ib_calc_max_qp_wr(dev);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
@ -1779,10 +1838,45 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave,
struct mlx5_ib_lb_state *lb_state)
{
int err;
err = mlx5_nic_vport_update_local_lb(master, true);
if (err)
return err;
err = mlx5_nic_vport_update_local_lb(slave, true);
if (err)
goto out;
lb_state->force_enable = true;
return 0;
out:
mlx5_nic_vport_update_local_lb(master, false);
return err;
}
static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
struct mlx5_core_dev *slave,
struct mlx5_ib_lb_state *lb_state)
{
mlx5_nic_vport_update_local_lb(slave, false);
mlx5_nic_vport_update_local_lb(master, false);
lb_state->force_enable = false;
}
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
if (dev->lb.force_enable)
return 0;
mutex_lock(&dev->lb.mutex);
if (td)
dev->lb.user_td++;
@ -1804,6 +1898,9 @@ int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
if (dev->lb.force_enable)
return;
mutex_lock(&dev->lb.mutex);
if (td)
dev->lb.user_td--;
@ -2954,14 +3051,16 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret);
mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%pe\n",
pd);
goto unlock;
}
cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret);
mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%pe\n",
cq);
ib_dealloc_pd(pd);
goto unlock;
}
@ -3005,7 +3104,9 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
s0 = ib_create_srq(devr->p0, &attr);
if (IS_ERR(s0)) {
ret = PTR_ERR(s0);
mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret);
mlx5_ib_err(dev,
"Couldn't create SRQ 0 for res init, err=%pe\n",
s0);
goto unlock;
}
@ -3017,8 +3118,11 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
s1 = ib_create_srq(devr->p0, &attr);
if (IS_ERR(s1)) {
ret = PTR_ERR(s1);
mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret);
mlx5_ib_err(dev,
"Couldn't create SRQ 1 for res init, err=%pe\n",
s1);
ib_destroy_srq(s0);
goto unlock;
}
devr->s0 = s0;
@ -3078,6 +3182,7 @@ mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_core_dev *mdev = dev->mdev;
bool ro_supp = false;
void *mkc;
u32 mkey;
u32 pdn;
@ -3106,14 +3211,37 @@ mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
MLX5_SET(mkc, mkc, length64, 1);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
kvfree(in);
if (err)
goto err;
goto err_mkey;
dev->ddr.mkey = mkey;
dev->ddr.pdn = pdn;
/* create another mkey with RO support */
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) {
MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
ro_supp = true;
}
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) {
MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
ro_supp = true;
}
if (ro_supp) {
err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
/* RO is defined as best effort */
if (!err) {
dev->ddr.mkey_ro = mkey;
dev->ddr.mkey_ro_valid = true;
}
}
kvfree(in);
return 0;
err_mkey:
kvfree(in);
err:
mlx5_core_dealloc_pd(mdev, pdn);
return err;
@ -3122,6 +3250,10 @@ err:
static void
mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev)
{
if (dev->ddr.mkey_ro_valid)
mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey_ro);
mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey);
mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn);
}
@ -3483,6 +3615,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@ -3578,6 +3712,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
if (err)
goto unbind;
return true;
unbind:
@ -4145,7 +4283,9 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.modify_port = mlx5_ib_modify_port,
.modify_qp = mlx5_ib_modify_qp,
.modify_srq = mlx5_ib_modify_srq,
.pre_destroy_cq = mlx5_ib_pre_destroy_cq,
.poll_cq = mlx5_ib_poll_cq,
.post_destroy_cq = mlx5_ib_post_destroy_cq,
.post_recv = mlx5_ib_post_recv_nodrain,
.post_send = mlx5_ib_post_send_nodrain,
.post_srq_recv = mlx5_ib_post_srq_recv,
@ -4167,6 +4307,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_dmah, mlx5_ib_dmah, ibdmah),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mlx5_ib_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
@ -4294,6 +4435,9 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
if (mdev->st)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dmah_ops);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@ -4422,17 +4566,6 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
mlx5_core_native_port_num(dev->mdev) - 1);
}
static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
{
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
}
static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@ -4661,9 +4794,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_UAR,
mlx5_ib_stage_uar_init,
mlx5_ib_stage_uar_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
@ -4721,9 +4851,6 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_UAR,
mlx5_ib_stage_uar_init,
mlx5_ib_stage_uar_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
@ -4795,7 +4922,8 @@ static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
!MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud))
return ERR_PTR(-EOPNOTSUPP);
mplane = ib_alloc_device(mlx5_ib_dev, ib_dev);
mplane = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
mlx5_core_net(mparent->mdev));
if (!mplane)
return ERR_PTR(-ENOMEM);
@ -4909,7 +5037,8 @@ static int mlx5r_probe(struct auxiliary_device *adev,
num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
MLX5_CAP_GEN(mdev, num_vhca_ports));
dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
dev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
mlx5_core_net(mdev));
if (!dev)
return -ENOMEM;

View File

@ -100,19 +100,6 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
__mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
page_offset_quantized)
static inline unsigned long
mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf)
{
/*
* mkeys used for dmabuf are fixed at PAGE_SIZE because we must be able
* to hold any sgl after a move operation. Ideally the mkc page size
* could be changed at runtime to be optimal, but right now the driver
* cannot do that.
*/
return ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
umem_dmabuf->umem.iova);
}
enum {
MLX5_IB_MMAP_OFFSET_START = 9,
MLX5_IB_MMAP_OFFSET_END = 255,
@ -316,8 +303,8 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
struct mlx5_ib_flow_prio *rdma_transport_rx;
struct mlx5_ib_flow_prio *rdma_transport_tx;
struct mlx5_ib_flow_prio *rdma_transport_rx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
struct mlx5_ib_flow_prio *rdma_transport_tx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
@ -348,6 +335,7 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
#define MLX5_IB_UPD_XLT_DOWNGRADE BIT(7)
#define MLX5_IB_UPD_XLT_KEEP_PGSZ BIT(8)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
@ -646,8 +634,13 @@ enum mlx5_mkey_type {
MLX5_MKEY_IMPLICIT_CHILD,
};
/* Used for non-existent ph value */
#define MLX5_IB_NO_PH 0xff
struct mlx5r_cache_rb_key {
u8 ats:1;
u8 ph;
u16 st_index;
unsigned int access_mode;
unsigned int access_flags;
unsigned int ndescs;
@ -735,6 +728,8 @@ struct mlx5_ib_mr {
struct mlx5_ib_mr *dd_crossed_mr;
struct list_head dd_node;
u8 revoked :1;
/* Indicates previous dmabuf page fault occurred */
u8 dmabuf_faulted:1;
struct mlx5_ib_mkey null_mmkey;
};
};
@ -855,6 +850,8 @@ struct mlx5_ib_port_resources {
struct mlx5_data_direct_resources {
u32 pdn;
u32 mkey;
u32 mkey_ro;
u8 mkey_ro_valid :1;
};
struct mlx5_ib_resources {
@ -895,13 +892,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type);
int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
u32 port);
int mlx5r_fs_bind_op_fc(struct ib_qp *qp,
struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
struct xarray *qpn_opfc_xa, u32 port);
void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter);
void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct xarray *qpn_opfc_xa);
void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
struct rdma_counter *counter);
struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX]);
struct mlx5_ib_multiport_info;
@ -1002,7 +1000,6 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_WHITELIST_UID,
@ -1110,6 +1107,7 @@ struct mlx5_ib_lb_state {
u32 user_td;
int qps;
bool enabled;
bool force_enable;
};
struct mlx5_ib_pf_eq {
@ -1369,6 +1367,8 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_pre_destroy_cq(struct ib_cq *cq);
void mlx5_ib_post_destroy_cq(struct ib_cq *cq);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
@ -1747,20 +1747,75 @@ static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
return (port - 1) / dev->num_ports + 1;
}
static inline unsigned int get_max_log_entity_size_cap(struct mlx5_ib_dev *dev,
int access_mode)
{
int max_log_size = 0;
if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
max_log_size =
MLX5_CAP_GEN_2(dev->mdev, max_mkey_log_entity_size_mtt);
else if (access_mode == MLX5_MKC_ACCESS_MODE_KSM)
max_log_size = MLX5_CAP_GEN_2(
dev->mdev, max_mkey_log_entity_size_fixed_buffer);
if (!max_log_size ||
(max_log_size > 31 &&
!MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)))
max_log_size = 31;
return max_log_size;
}
static inline unsigned int get_min_log_entity_size_cap(struct mlx5_ib_dev *dev,
int access_mode)
{
int min_log_size = 0;
if (access_mode == MLX5_MKC_ACCESS_MODE_KSM &&
MLX5_CAP_GEN_2(dev->mdev,
min_mkey_log_entity_size_fixed_buffer_valid))
min_log_size = MLX5_CAP_GEN_2(
dev->mdev, min_mkey_log_entity_size_fixed_buffer);
else
min_log_size =
MLX5_CAP_GEN_2(dev->mdev, log_min_mkey_entity_size);
min_log_size = max(min_log_size, MLX5_ADAPTER_PAGE_SHIFT);
return min_log_size;
}
/*
* For mkc users, instead of a page_offset the command has a start_iova which
* specifies both the page_offset and the on-the-wire IOVA
*/
static __always_inline unsigned long
mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
u64 iova)
u64 iova, int access_mode)
{
int page_size_bits =
MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : 5;
unsigned long bitmap =
__mlx5_log_page_size_to_bitmap(page_size_bits, 0);
unsigned int max_log_entity_size_cap, min_log_entity_size_cap;
unsigned long bitmap;
max_log_entity_size_cap = get_max_log_entity_size_cap(dev, access_mode);
min_log_entity_size_cap = get_min_log_entity_size_cap(dev, access_mode);
bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap);
/* In KSM mode HW requires IOVA and mkey's page size to be aligned */
if (access_mode == MLX5_MKC_ACCESS_MODE_KSM && iova)
bitmap &= GENMASK_ULL(__ffs64(iova), 0);
return ib_umem_find_best_pgsz(umem, bitmap, iova);
}
static inline unsigned long
mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf,
int access_mode)
{
return mlx5_umem_mkc_find_best_pgsz(to_mdev(umem_dmabuf->umem.ibdev),
&umem_dmabuf->umem,
umem_dmabuf->umem.iova,
access_mode);
}
#endif /* MLX5_IB_H */

View File

@ -44,6 +44,7 @@
#include "mlx5_ib.h"
#include "umr.h"
#include "data_direct.h"
#include "dmah.h"
enum {
MAX_PENDING_REG_MR = 8,
@ -57,7 +58,7 @@ create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
unsigned long page_size, bool populate,
int access_mode);
int access_mode, u16 st_index, u8 ph);
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
@ -256,6 +257,14 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
get_mkc_octo_size(ent->rb_key.access_mode,
ent->rb_key.ndescs));
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
if (ent->rb_key.ph != MLX5_IB_NO_PH) {
MLX5_SET(mkc, mkc, pcie_tph_en, 1);
MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph);
if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
ent->rb_key.st_index);
}
}
/* Asynchronously schedule new MRs to be populated in the cache. */
@ -525,7 +534,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
ent->fill_to_high_water = false;
if (ent->pending)
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(1000));
secs_to_jiffies(1));
else
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
}
@ -576,7 +585,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
"add keys command failed, err %d\n",
err);
queue_delayed_work(cache->wq, &ent->dwork,
msecs_to_jiffies(1000));
secs_to_jiffies(1));
}
}
} else if (ent->mkeys_queue.ci > 2 * ent->limit) {
@ -641,6 +650,14 @@ static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
if (res)
return res;
res = key1.st_index - key2.st_index;
if (res)
return res;
res = key1.ph - key2.ph;
if (res)
return res;
/*
* keep ndescs the last in the compare table since the find function
* searches for an exact match on all properties and only closest
@ -712,6 +729,8 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
smallest->rb_key.access_mode == rb_key.access_mode &&
smallest->rb_key.access_flags == rb_key.access_flags &&
smallest->rb_key.ats == rb_key.ats &&
smallest->rb_key.st_index == rb_key.st_index &&
smallest->rb_key.ph == rb_key.ph &&
smallest->rb_key.ndescs <= ndescs_limit) ?
smallest :
NULL;
@ -786,7 +805,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
struct mlx5r_cache_rb_key rb_key = {
.ndescs = ndescs,
.access_mode = access_mode,
.access_flags = get_unchangeable_access_flags(dev, access_flags)
.access_flags = get_unchangeable_access_flags(dev, access_flags),
.ph = MLX5_IB_NO_PH,
};
struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
@ -943,6 +963,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
struct rb_root *root = &dev->cache.rb_root;
struct mlx5r_cache_rb_key rb_key = {
.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
.ph = MLX5_IB_NO_PH,
};
struct mlx5_cache_ent *ent;
struct rb_node *node;
@ -1119,7 +1140,8 @@ static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
int access_flags, int access_mode)
int access_flags, int access_mode,
u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5r_cache_rb_key rb_key = {};
@ -1130,7 +1152,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
else
page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova,
access_mode);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
@ -1138,6 +1161,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
rb_key.st_index = st_index;
rb_key.ph = ph;
ent = mkey_cache_ent_from_rb_key(dev, rb_key);
/*
* If the MR can't come from the cache then synchronously create an uncached
@ -1145,7 +1170,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
*/
if (!ent) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode);
mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode,
st_index, ph);
mutex_unlock(&dev->slow_path_mutex);
if (IS_ERR(mr))
return mr;
@ -1230,7 +1256,7 @@ err_1:
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
unsigned long page_size, bool populate,
int access_mode)
int access_mode, u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@ -1240,7 +1266,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u32 *in;
int err;
bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) &&
(access_mode == MLX5_MKC_ACCESS_MODE_MTT);
(access_mode == MLX5_MKC_ACCESS_MODE_MTT) &&
(ph == MLX5_IB_NO_PH);
bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
if (!page_size)
@ -1304,6 +1331,13 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
get_octo_len(iova, umem->length, mr->page_shift));
}
if (ph != MLX5_IB_NO_PH) {
MLX5_SET(mkc, mkc, pcie_tph_en, 1);
MLX5_SET(mkc, mkc, pcie_tph_ph, ph);
if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index);
}
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
@ -1423,24 +1457,37 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
}
static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags)
u64 iova, int access_flags,
struct ib_dmah *dmah)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
u8 ph = MLX5_IB_NO_PH;
int err;
if (dmah) {
struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
ph = dmah->ph;
if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
st_index = mdmah->st_index;
}
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT);
MLX5_MKC_ACCESS_MODE_MTT,
st_index, ph);
} else {
unsigned long page_size =
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(
dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT);
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size,
true, MLX5_MKC_ACCESS_MODE_MTT);
true, MLX5_MKC_ACCESS_MODE_MTT,
st_index, ph);
mutex_unlock(&dev->slow_path_mutex);
}
if (IS_ERR(mr)) {
@ -1504,7 +1551,9 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_CAST(odp);
mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT);
MLX5_MKC_ACCESS_MODE_MTT,
MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX,
MLX5_IB_NO_PH);
if (IS_ERR(mr)) {
ib_umem_release(&odp->umem);
return ERR_CAST(mr);
@ -1535,7 +1584,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_umem *umem;
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || dmah)
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
((access_flags & IB_ACCESS_ON_DEMAND) && dmah))
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
@ -1551,7 +1601,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
return create_real_mr(pd, umem, iova, access_flags);
return create_real_mr(pd, umem, iova, access_flags, dmah);
}
static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
@ -1576,12 +1626,15 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
static struct ib_mr *
reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
u64 offset, u64 length, u64 virt_addr,
int fd, int access_flags, int access_mode)
int fd, int access_flags, int access_mode,
struct ib_dmah *dmah)
{
bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
struct ib_umem_dmabuf *umem_dmabuf;
u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
u8 ph = MLX5_IB_NO_PH;
int err;
err = mlx5r_umr_resource_init(dev);
@ -1599,13 +1652,21 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
fd, access_flags);
if (IS_ERR(umem_dmabuf)) {
mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
PTR_ERR(umem_dmabuf));
mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf);
return ERR_CAST(umem_dmabuf);
}
if (dmah) {
struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
ph = dmah->ph;
if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
st_index = mdmah->st_index;
}
mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
access_flags, access_mode);
access_flags, access_mode,
st_index, ph);
if (IS_ERR(mr)) {
ib_umem_release(&umem_dmabuf->umem);
return ERR_CAST(mr);
@ -1655,14 +1716,15 @@ reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
goto end;
}
/* The device's 'data direct mkey' was created without RO flags to
* simplify things and allow for a single mkey per device.
* Since RO is not a must, mask it out accordingly.
/* If no device's 'data direct mkey' with RO flags exists
* mask it out accordingly.
*/
access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
if (!dev->ddr.mkey_ro_valid)
access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
offset, length, virt_addr, fd,
access_flags, MLX5_MKC_ACCESS_MODE_KSM);
access_flags, MLX5_MKC_ACCESS_MODE_KSM,
NULL);
if (IS_ERR(crossed_mr)) {
ret = PTR_ERR(crossed_mr);
goto end;
@ -1697,7 +1759,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || dmah)
!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
return ERR_PTR(-EOPNOTSUPP);
if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
@ -1722,7 +1784,8 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
return reg_user_mr_dmabuf(pd, pd->device->dma_device,
offset, length, virt_addr,
fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT);
fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT,
dmah);
}
/*
@ -1756,7 +1819,8 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
*page_size = mlx5_umem_mkc_find_best_pgsz(dev, new_umem, iova);
*page_size = mlx5_umem_mkc_find_best_pgsz(
dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode);
if (WARN_ON(!*page_size))
return false;
return (mr->mmkey.cache_ent->rb_key.ndescs) >=
@ -1819,7 +1883,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct)
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct ||
mr->mmkey.rb_key.ph != MLX5_IB_NO_PH)
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(
@ -1863,7 +1928,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
return create_real_mr(new_pd, umem, mr->ibmr.iova,
new_access_flags);
new_access_flags, NULL);
}
/*
@ -1894,7 +1959,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
return NULL;
}
return create_real_mr(new_pd, new_umem, iova, new_access_flags);
return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL);
}
/*
@ -2080,7 +2145,7 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
ent->in_use--;
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(30 * 1000));
secs_to_jiffies(30));
ent->tmp_cleanup_scheduled = true;
}
spin_unlock_irq(&ent->mkeys_queue.lock);

View File

@ -97,33 +97,28 @@ struct mlx5_pagefault {
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
static u64 mlx5_imr_ksm_entries;
static u64 mlx5_imr_mtt_entries;
static u64 mlx5_imr_mtt_size;
static u8 mlx5_imr_mtt_shift;
static u8 mlx5_imr_ksm_page_shift;
static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
static void populate_ksm(struct mlx5_ksm *pksm, size_t idx, size_t nentries,
struct mlx5_ib_mr *imr, int flags)
{
struct mlx5_core_dev *dev = mr_to_mdev(imr)->mdev;
struct mlx5_klm *end = pklm + nentries;
int step = MLX5_CAP_ODP(dev, mem_page_fault) ? MLX5_IMR_MTT_SIZE : 0;
struct mlx5_ksm *end = pksm + nentries;
u64 step = MLX5_CAP_ODP(dev, mem_page_fault) ? mlx5_imr_mtt_size : 0;
__be32 key = MLX5_CAP_ODP(dev, mem_page_fault) ?
cpu_to_be32(imr->null_mmkey.key) :
mr_to_mdev(imr)->mkeys.null_mkey;
u64 va =
MLX5_CAP_ODP(dev, mem_page_fault) ? idx * MLX5_IMR_MTT_SIZE : 0;
MLX5_CAP_ODP(dev, mem_page_fault) ? idx * mlx5_imr_mtt_size : 0;
if (flags & MLX5_IB_UPD_XLT_ZAP) {
for (; pklm != end; pklm++, idx++, va += step) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
pklm->key = key;
pklm->va = cpu_to_be64(va);
for (; pksm != end; pksm++, idx++, va += step) {
pksm->key = key;
pksm->va = cpu_to_be64(va);
}
return;
}
@ -147,16 +142,15 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
for (; pklm != end; pklm++, idx++, va += step) {
for (; pksm != end; pksm++, idx++, va += step) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
if (mtt) {
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
pksm->key = cpu_to_be32(mtt->ibmr.lkey);
pksm->va = cpu_to_be64(idx * mlx5_imr_mtt_size);
} else {
pklm->key = key;
pklm->va = cpu_to_be64(va);
pksm->key = key;
pksm->va = cpu_to_be64(va);
}
}
}
@ -201,7 +195,7 @@ int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags)
{
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
populate_klm(xlt, idx, nentries, mr, flags);
populate_ksm(xlt, idx, nentries, mr, flags);
return 0;
} else {
return populate_mtt(xlt, idx, nentries, mr, flags);
@ -226,7 +220,7 @@ static void free_implicit_child_mr_work(struct work_struct *work)
mutex_lock(&odp_imr->umem_mutex);
mlx5r_umr_update_xlt(mr->parent,
ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0,
ib_umem_start(odp) >> mlx5_imr_mtt_shift, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
@ -237,7 +231,7 @@ static void free_implicit_child_mr_work(struct work_struct *work)
static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
unsigned long idx = ib_umem_start(odp) >> mlx5_imr_mtt_shift;
struct mlx5_ib_mr *imr = mr->parent;
/*
@ -425,7 +419,10 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
!MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled))
!MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled) &&
mlx5_imr_ksm_entries != 0 &&
!(mlx5_imr_ksm_page_shift >
get_max_log_entity_size_cap(dev, MLX5_MKC_ACCESS_MODE_KSM)))
caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
}
@ -476,14 +473,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
int err;
odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
idx * MLX5_IMR_MTT_SIZE,
MLX5_IMR_MTT_SIZE, &mlx5_mn_ops);
idx * mlx5_imr_mtt_size,
mlx5_imr_mtt_size, &mlx5_mn_ops);
if (IS_ERR(odp))
return ERR_CAST(odp);
mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
MLX5_MKC_ACCESS_MODE_MTT,
MLX5_IMR_MTT_ENTRIES);
mlx5_imr_mtt_entries);
if (IS_ERR(mr)) {
ib_umem_odp_release(odp);
return mr;
@ -495,7 +492,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE;
mr->ibmr.iova = idx * mlx5_imr_mtt_size;
mr->parent = imr;
odp->private = mr;
@ -506,7 +503,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
refcount_set(&mr->mmkey.usecount, 2);
err = mlx5r_umr_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
mlx5_imr_mtt_entries,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
@ -611,7 +608,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
if (!mlx5r_umr_can_load_pas(dev, mlx5_imr_mtt_entries * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
@ -647,7 +644,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
err = mlx5r_umr_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
mlx5_imr_ksm_page_shift,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
@ -750,20 +747,20 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
struct ib_umem_odp *odp_imr, u64 user_va,
size_t bcnt, u32 *bytes_mapped, u32 flags)
{
unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
unsigned long end_idx = (user_va + bcnt - 1) >> mlx5_imr_mtt_shift;
unsigned long upd_start_idx = end_idx + 1;
unsigned long upd_len = 0;
unsigned long npages = 0;
int err;
int ret;
if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt))
if (unlikely(user_va >= mlx5_imr_ksm_entries * mlx5_imr_mtt_size ||
mlx5_imr_ksm_entries * mlx5_imr_mtt_size - user_va < bcnt))
return -EFAULT;
/* Fault each child mr that intersects with our interval. */
while (bcnt) {
unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT;
unsigned long idx = user_va >> mlx5_imr_mtt_shift;
struct ib_umem_odp *umem_odp;
struct mlx5_ib_mr *mtt;
u64 len;
@ -836,9 +833,13 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
u32 *bytes_mapped, u32 flags)
{
struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
int access_mode = mr->data_direct ? MLX5_MKC_ACCESS_MODE_KSM :
MLX5_MKC_ACCESS_MODE_MTT;
unsigned int old_page_shift = mr->page_shift;
unsigned int page_shift;
unsigned long page_size;
u32 xlt_flags = 0;
int err;
unsigned long page_size;
if (flags & MLX5_PF_FLAGS_ENABLE)
xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
@ -850,20 +851,33 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
return err;
}
page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf);
page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf, access_mode);
if (!page_size) {
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
if (mr->data_direct)
err = mlx5r_umr_update_data_direct_ksm_pas(mr, xlt_flags);
else
err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
page_shift = order_base_2(page_size);
if (page_shift != mr->page_shift && mr->dmabuf_faulted) {
err = mlx5r_umr_dmabuf_update_pgsz(mr, xlt_flags,
page_shift);
} else {
mr->page_shift = page_shift;
if (mr->data_direct)
err = mlx5r_umr_update_data_direct_ksm_pas(
mr, xlt_flags);
else
err = mlx5r_umr_update_mr_pas(mr,
xlt_flags);
}
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
if (err)
if (err) {
mr->page_shift = old_page_shift;
return err;
}
mr->dmabuf_faulted = 1;
if (bytes_mapped)
*bytes_mapped += bcnt;
@ -1866,6 +1880,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
struct mlx5r_cache_rb_key rb_key = {
.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
.ndescs = mlx5_imr_ksm_entries,
.ph = MLX5_IB_NO_PH,
};
struct mlx5_cache_ent *ent;
@ -1906,9 +1921,25 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
int mlx5_ib_odp_init(void)
{
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
MLX5_IMR_MTT_BITS);
u32 log_va_pages = ilog2(TASK_SIZE) - PAGE_SHIFT;
u8 mlx5_imr_mtt_bits;
/* 48 is default ARM64 VA space and covers X86 4-level paging which is 47 */
if (log_va_pages <= 48 - PAGE_SHIFT)
mlx5_imr_mtt_shift = 30;
/* 56 is x86-64, 5-level paging */
else if (log_va_pages <= 56 - PAGE_SHIFT)
mlx5_imr_mtt_shift = 34;
else
return 0;
mlx5_imr_mtt_size = BIT_ULL(mlx5_imr_mtt_shift);
mlx5_imr_mtt_bits = mlx5_imr_mtt_shift - PAGE_SHIFT;
mlx5_imr_mtt_entries = BIT_ULL(mlx5_imr_mtt_bits);
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
mlx5_imr_mtt_bits);
mlx5_imr_ksm_page_shift = mlx5_imr_mtt_shift;
return 0;
}

View File

@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
spin_lock_irqsave(&table->lock, flags);
common = radix_tree_lookup(&table->tree, rsn);
if (common)
if (common && !common->invalid)
refcount_inc(&common->refcount);
else
common = NULL;
spin_unlock_irqrestore(&table->lock, flags);
@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev,
return 0;
}
static void modify_resource_common_state(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *qp,
bool invalid)
{
struct mlx5_qp_table *table = &dev->qp_table;
unsigned long flags;
spin_lock_irqsave(&table->lock, flags);
qp->common.invalid = invalid;
spin_unlock_irqrestore(&table->lock, flags);
}
static void destroy_resource_common(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *qp)
{
@ -609,8 +623,20 @@ err_destroy_rq:
int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *rq)
{
int ret;
/* The rq destruction can be called again in case it fails, hence we
* mark the common resource as invalid and only once FW destruction
* is completed successfully we actually destroy the resources.
*/
modify_resource_common_state(dev, rq, true);
ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
if (ret) {
modify_resource_common_state(dev, rq, false);
return ret;
}
destroy_resource_common(dev, rq);
return destroy_rq_tracked(dev, rq->qpn, rq->uid);
return 0;
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)

View File

@ -83,33 +83,14 @@ static int fill_vport_icm_addr(struct mlx5_core_dev *mdev, u16 vport,
static int fill_vport_vhca_id(struct mlx5_core_dev *mdev, u16 vport,
struct mlx5_ib_uapi_query_port *info)
{
size_t out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
void *out;
int err;
int err = mlx5_vport_get_vhca_id(mdev, vport, &info->vport_vhca_id);
out = kzalloc(out_sz, GFP_KERNEL);
if (!out)
return -ENOMEM;
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, other_function, true);
MLX5_SET(query_hca_cap_in, in, function_id, vport);
MLX5_SET(query_hca_cap_in, in, op_mod,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE |
HCA_CAP_OPMOD_GET_CUR);
err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_sz);
if (err)
goto out;
info->vport_vhca_id = MLX5_GET(query_hca_cap_out, out,
capability.cmd_hca_cap.vhca_id);
return err;
info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID;
out:
kfree(out);
return err;
return 0;
}
static int fill_multiport_info(struct mlx5_ib_dev *dev, u32 port_num,

View File

@ -32,13 +32,15 @@ static __be64 get_umr_disable_mr_mask(void)
return cpu_to_be64(result);
}
static __be64 get_umr_update_translation_mask(void)
static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5))
result |= MLX5_MKEY_MASK_PAGE_SIZE_5;
return cpu_to_be64(result);
}
@ -654,9 +656,12 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
if (update_translation) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev);
if (!mr->ibmr.length)
MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
if (flags & MLX5_IB_UPD_XLT_KEEP_PGSZ)
wqe->ctrl_seg.mkey_mask &=
cpu_to_be64(~MLX5_MKEY_MASK_PAGE_SIZE);
}
wqe->ctrl_seg.xlt_octowords =
@ -664,46 +669,78 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
static void
_mlx5r_umr_init_wqe(struct mlx5_ib_mr *mr, struct mlx5r_umr_wqe *wqe,
struct ib_sge *sg, unsigned int flags,
unsigned int page_shift, bool dd)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe->ctrl_seg, flags, sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe->mkey_seg, mr, page_shift);
if (dd) /* Use the data direct internal kernel PD */
MLX5_SET(mkc, &wqe->mkey_seg, pd, dev->ddr.pdn);
mlx5r_umr_set_update_xlt_data_seg(&wqe->data_seg, sg);
}
static int
_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd,
size_t start_block, size_t nblocks)
{
size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
size_t processed_blocks = 0;
struct ib_block_iter biter;
size_t cur_block_idx = 0;
struct mlx5_ksm *cur_ksm;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
size_t total_blocks;
size_t final_size;
void *curr_entry;
struct ib_sge sg;
void *entry;
u64 offset = 0;
u64 offset;
int err = 0;
entry = mlx5r_umr_create_xlt(dev, &sg,
ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
ent_size, flags);
total_blocks = ib_umem_num_dma_blocks(mr->umem, 1UL << mr->page_shift);
if (start_block > total_blocks)
return -EINVAL;
/* nblocks 0 means update all blocks starting from start_block */
if (nblocks)
total_blocks = nblocks;
entry = mlx5r_umr_create_xlt(dev, &sg, total_blocks, ent_size, flags);
if (!entry)
return -ENOMEM;
orig_sg_length = sg.length;
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
if (dd) {
/* Use the data direct internal kernel PD */
MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
cur_ksm = entry;
} else {
cur_mtt = entry;
}
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
_mlx5r_umr_init_wqe(mr, &wqe, &sg, flags, mr->page_shift, dd);
/* Set initial translation offset to start_block */
offset = (u64)start_block * ent_size;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
if (dd)
cur_ksm = entry;
else
cur_mtt = entry;
curr_entry = entry;
rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
if (cur_block_idx < start_block) {
cur_block_idx++;
continue;
}
if (nblocks && processed_blocks >= nblocks)
break;
if (curr_entry == entry + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
@ -724,7 +761,16 @@ _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
if (dd) {
cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
if (mr->access_flags & IB_ACCESS_RELAXED_ORDERING &&
dev->ddr.mkey_ro_valid)
cur_ksm->key = cpu_to_be32(dev->ddr.mkey_ro);
else
cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
if (mr->umem->is_dmabuf &&
(flags & MLX5_IB_UPD_XLT_ZAP)) {
cur_ksm->va = 0;
cur_ksm->key = 0;
}
cur_ksm++;
curr_entry = cur_ksm;
} else {
@ -736,6 +782,8 @@ _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
cur_mtt++;
curr_entry = cur_mtt;
}
processed_blocks++;
}
final_size = curr_entry - entry;
@ -752,13 +800,32 @@ err:
return err;
}
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags)
int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,
unsigned int flags,
size_t start_block,
size_t nblocks)
{
/* No invalidation flow is expected */
if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP))
if (WARN_ON(!mr->umem->is_dmabuf) || ((flags & MLX5_IB_UPD_XLT_ZAP) &&
!(flags & MLX5_IB_UPD_XLT_KEEP_PGSZ)))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, true);
return _mlx5r_umr_update_mr_pas(mr, flags, true, start_block, nblocks);
}
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr,
unsigned int flags)
{
return mlx5r_umr_update_data_direct_ksm_pas_range(mr, flags, 0, 0);
}
int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,
size_t start_block, size_t nblocks)
{
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, false, start_block, nblocks);
}
/*
@ -768,10 +835,7 @@ int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int fla
*/
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
return _mlx5r_umr_update_mr_pas(mr, flags, false);
return mlx5r_umr_update_mr_pas_range(mr, flags, 0, 0);
}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
@ -864,3 +928,202 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
return err;
}
/*
* Update only the page-size (log_page_size) field of an existing memory key
* using UMR. This is useful when the MR's physical layout stays the same
* but the optimal page shift has changed (e.g. dmabuf after pages are
* pinned and the HW can switch from 4K to huge-page alignment).
*/
int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,
unsigned int page_shift,
bool dd)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
int err;
/* Build UMR wqe: we touch only PAGE_SIZE, so use the dedicated mask */
wqe.ctrl_seg.mkey_mask = get_umr_update_translation_mask(dev);
/* MR must be free while page size is modified */
wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE | MLX5_UMR_INLINE;
/* Fill mkey segment with the new page size, keep the rest unchanged */
MLX5_SET(mkc, &wqe.mkey_seg, log_page_size, page_shift);
if (dd)
MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
else
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
MLX5_SET64(mkc, &wqe.mkey_seg, start_addr, mr->ibmr.iova);
MLX5_SET64(mkc, &wqe.mkey_seg, len, mr->ibmr.length);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
if (!err)
mr->page_shift = page_shift;
return err;
}
static inline int
_mlx5r_dmabuf_umr_update_pas(struct mlx5_ib_mr *mr, unsigned int flags,
size_t start_block, size_t nblocks, bool dd)
{
if (dd)
return mlx5r_umr_update_data_direct_ksm_pas_range(mr, flags,
start_block,
nblocks);
else
return mlx5r_umr_update_mr_pas_range(mr, flags, start_block,
nblocks);
}
/**
* This function makes an mkey non-present by zapping the translation entries of
* the mkey by zapping (zeroing out) the first N entries, where N is determined
* by the largest page size supported by the device and the MR length.
* It then updates the mkey's page size to the largest possible value, ensuring
* the MR is completely non-present and safe for further updates.
* It is useful to update the page size of a dmabuf MR on a page fault.
*
* Return: On success, returns the number of entries that were zapped.
* On error, returns a negative error code.
*/
static int _mlx5r_umr_zap_mkey(struct mlx5_ib_mr *mr,
unsigned int flags,
unsigned int page_shift,
size_t *nblocks,
bool dd)
{
unsigned int old_page_shift = mr->page_shift;
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
unsigned int max_page_shift;
size_t page_shift_nblocks;
unsigned int max_log_size;
int access_mode;
int err;
access_mode = dd ? MLX5_MKC_ACCESS_MODE_KSM : MLX5_MKC_ACCESS_MODE_MTT;
flags |= MLX5_IB_UPD_XLT_KEEP_PGSZ | MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC;
max_log_size = get_max_log_entity_size_cap(dev, access_mode);
max_page_shift = order_base_2(mr->ibmr.length);
max_page_shift = min(max(max_page_shift, page_shift), max_log_size);
/* Count blocks in units of max_page_shift, we will zap exactly this
* many to make the whole MR non-present.
* Block size must be aligned to MLX5_UMR_FLEX_ALIGNMENT since it may
* be used as offset into the XLT later on.
*/
*nblocks = ib_umem_num_dma_blocks(mr->umem, 1UL << max_page_shift);
if (dd)
*nblocks = ALIGN(*nblocks, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
else
*nblocks = ALIGN(*nblocks, MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT);
page_shift_nblocks = ib_umem_num_dma_blocks(mr->umem,
1UL << page_shift);
/* If the number of blocks at max possible page shift is greater than
* the number of blocks at the new page size, we should just go over the
* whole mkey entries.
*/
if (*nblocks >= page_shift_nblocks)
*nblocks = 0;
/* Make the first nblocks entries non-present without changing
* page size yet.
*/
if (*nblocks)
mr->page_shift = max_page_shift;
err = _mlx5r_dmabuf_umr_update_pas(mr, flags, 0, *nblocks, dd);
if (err) {
mr->page_shift = old_page_shift;
return err;
}
/* Change page size to the max page size now that the MR is completely
* non-present.
*/
if (*nblocks) {
err = mlx5r_umr_update_mr_page_shift(mr, max_page_shift, dd);
if (err) {
mr->page_shift = old_page_shift;
return err;
}
}
return 0;
}
/**
* mlx5r_umr_dmabuf_update_pgsz - Safely update DMABUF MR page size and its
* entries accordingly
* @mr: The memory region to update
* @xlt_flags: Translation table update flags
* @page_shift: The new (optimized) page shift to use
*
* This function updates the page size and mkey translation entries for a DMABUF
* MR in a safe, multi-step process to avoid exposing partially updated mappings
* The update is performed in 5 steps:
* 1. Make the first X entries non-present, while X is calculated to be
* minimal according to a large page shift that can be used to cover the
* MR length.
* 2. Update the page size to the large supported page size
* 3. Load the remaining N-X entries according to the (optimized) page_shift
* 4. Update the page size according to the (optimized) page_shift
* 5. Load the first X entries with the correct translations
*
* This ensures that at no point is the MR accessible with a partially updated
* translation table, maintaining correctness and preventing access to stale or
* inconsistent mappings.
*
* Returns 0 on success or a negative error code on failure.
*/
int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,
unsigned int page_shift)
{
unsigned int old_page_shift = mr->page_shift;
size_t zapped_blocks;
size_t total_blocks;
int err;
err = _mlx5r_umr_zap_mkey(mr, xlt_flags, page_shift, &zapped_blocks,
mr->data_direct);
if (err)
return err;
/* _mlx5r_umr_zap_mkey already enables the mkey */
xlt_flags &= ~MLX5_IB_UPD_XLT_ENABLE;
mr->page_shift = page_shift;
total_blocks = ib_umem_num_dma_blocks(mr->umem, 1UL << mr->page_shift);
if (zapped_blocks && zapped_blocks < total_blocks) {
/* Update PAS according to the new page size but don't update
* the page size in the mkey yet.
*/
err = _mlx5r_dmabuf_umr_update_pas(
mr,
xlt_flags | MLX5_IB_UPD_XLT_KEEP_PGSZ,
zapped_blocks,
total_blocks - zapped_blocks,
mr->data_direct);
if (err)
goto err;
}
err = mlx5r_umr_update_mr_page_shift(mr, mr->page_shift,
mr->data_direct);
if (err)
goto err;
err = _mlx5r_dmabuf_umr_update_pas(mr, xlt_flags, 0, zapped_blocks,
mr->data_direct);
if (err)
goto err;
return 0;
err:
mr->page_shift = old_page_shift;
return err;
}

View File

@ -94,9 +94,20 @@ struct mlx5r_umr_wqe {
int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,
unsigned int flags,
size_t start_block,
size_t nblocks);
int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,
size_t start_block, size_t nblocks);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,
unsigned int page_shift,
bool dd);
int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,
unsigned int page_shift);
#endif /* _MLX5_IB_UMR_H */

View File

@ -350,7 +350,7 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
uresp.qp_tab_size = vdev->dsr->caps.max_qp;
ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (ret) {
pvrdma_uar_free(vdev, &context->uar);
/* pvrdma_dealloc_ucontext() also frees the UAR */
pvrdma_dealloc_ucontext(&context->ibucontext);
return -EFAULT;
}

View File

@ -158,11 +158,10 @@ struct iavf_vlan {
enum iavf_vlan_state_t {
IAVF_VLAN_INVALID,
IAVF_VLAN_ADD, /* filter needs to be added */
IAVF_VLAN_IS_NEW, /* filter is new, wait for PF answer */
IAVF_VLAN_ACTIVE, /* filter is accepted by PF */
IAVF_VLAN_DISABLE, /* filter needs to be deleted by PF, then marked INACTIVE */
IAVF_VLAN_INACTIVE, /* filter is inactive, we are in IFF_DOWN */
IAVF_VLAN_REMOVE, /* filter needs to be removed from list */
IAVF_VLAN_ADDING, /* ADD sent to PF, waiting for response */
IAVF_VLAN_ACTIVE, /* PF confirmed, filter is in HW */
IAVF_VLAN_REMOVE, /* filter queued for DEL from PF */
IAVF_VLAN_REMOVING, /* DEL sent to PF, waiting for response */
};
struct iavf_vlan_filter {

View File

@ -781,10 +781,13 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter,
adapter->num_vlan_filters++;
iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER);
} else if (f->state == IAVF_VLAN_REMOVE) {
/* IAVF_VLAN_REMOVE means that VLAN wasn't yet removed.
* We can safely only change the state here.
*/
/* DEL not yet sent to PF, cancel it */
f->state = IAVF_VLAN_ACTIVE;
} else if (f->state == IAVF_VLAN_REMOVING) {
/* DEL already sent to PF, re-add after completion */
f->state = IAVF_VLAN_ADD;
iavf_schedule_aq_request(adapter,
IAVF_FLAG_AQ_ADD_VLAN_FILTER);
}
clearout:
@ -812,37 +815,19 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan)
list_del(&f->list);
kfree(f);
adapter->num_vlan_filters--;
} else {
} else if (f->state != IAVF_VLAN_REMOVING) {
f->state = IAVF_VLAN_REMOVE;
iavf_schedule_aq_request(adapter,
IAVF_FLAG_AQ_DEL_VLAN_FILTER);
}
/* If REMOVING, DEL is already sent to PF; completion
* handler will free the filter when PF confirms.
*/
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
/**
* iavf_restore_filters
* @adapter: board private structure
*
* Restore existing non MAC filters when VF netdev comes back up
**/
static void iavf_restore_filters(struct iavf_adapter *adapter)
{
struct iavf_vlan_filter *f;
/* re-add all VLAN filters */
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_INACTIVE)
f->state = IAVF_VLAN_ADD;
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
}
/**
* iavf_get_num_vlans_added - get number of VLANs added
@ -1261,13 +1246,12 @@ static void iavf_up_complete(struct iavf_adapter *adapter)
}
/**
* iavf_clear_mac_vlan_filters - Remove mac and vlan filters not sent to PF
* yet and mark other to be removed.
* iavf_clear_mac_filters - Remove MAC filters not sent to PF yet and mark
* others to be removed.
* @adapter: board private structure
**/
static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter)
static void iavf_clear_mac_filters(struct iavf_adapter *adapter)
{
struct iavf_vlan_filter *vlf, *vlftmp;
struct iavf_mac_filter *f, *ftmp;
spin_lock_bh(&adapter->mac_vlan_list_lock);
@ -1286,11 +1270,6 @@ static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter)
}
}
/* disable all VLAN filters */
list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
list)
vlf->state = IAVF_VLAN_DISABLE;
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
@ -1386,7 +1365,7 @@ void iavf_down(struct iavf_adapter *adapter)
iavf_napi_disable_all(adapter);
iavf_irq_disable(adapter);
iavf_clear_mac_vlan_filters(adapter);
iavf_clear_mac_filters(adapter);
iavf_clear_cloud_filters(adapter);
iavf_clear_fdir_filters(adapter);
iavf_clear_adv_rss_conf(adapter);
@ -1403,8 +1382,6 @@ void iavf_down(struct iavf_adapter *adapter)
*/
if (!list_empty(&adapter->mac_filter_list))
adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
if (!list_empty(&adapter->vlan_filter_list))
adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
if (!list_empty(&adapter->cloud_filter_list))
adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
if (!list_empty(&adapter->fdir_list_head))
@ -4559,8 +4536,6 @@ static int iavf_open(struct net_device *netdev)
spin_unlock_bh(&adapter->mac_vlan_list_lock);
/* Restore filters that were removed with IFF_DOWN */
iavf_restore_filters(adapter);
iavf_restore_fdir_filters(adapter);
iavf_configure(adapter);

View File

@ -746,7 +746,7 @@ static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_IS_NEW) {
if (f->state == IAVF_VLAN_ADDING) {
list_del(&f->list);
kfree(f);
adapter->num_vlan_filters--;
@ -812,7 +812,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
if (f->state == IAVF_VLAN_ADD) {
vvfl->vlan_id[i] = f->vlan.vid;
i++;
f->state = IAVF_VLAN_IS_NEW;
f->state = IAVF_VLAN_ADDING;
if (i == count)
break;
}
@ -874,7 +874,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
vlan->tpid = f->vlan.tpid;
i++;
f->state = IAVF_VLAN_IS_NEW;
f->state = IAVF_VLAN_ADDING;
}
}
@ -911,22 +911,12 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
/* since VLAN capabilities are not allowed, we dont want to send
* a VLAN delete request because it will most likely fail and
* create unnecessary errors/noise, so just free the VLAN
* filters marked for removal to enable bailing out before
* sending a virtchnl message
*/
if (f->state == IAVF_VLAN_REMOVE &&
!VLAN_FILTERING_ALLOWED(adapter)) {
list_del(&f->list);
kfree(f);
adapter->num_vlan_filters--;
} else if (f->state == IAVF_VLAN_DISABLE &&
!VLAN_FILTERING_ALLOWED(adapter)) {
f->state = IAVF_VLAN_INACTIVE;
} else if (f->state == IAVF_VLAN_REMOVE ||
f->state == IAVF_VLAN_DISABLE) {
} else if (f->state == IAVF_VLAN_REMOVE) {
count++;
}
}
@ -958,18 +948,10 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
vvfl->vsi_id = adapter->vsi_res->vsi_id;
vvfl->num_elements = count;
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_DISABLE) {
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_REMOVE) {
vvfl->vlan_id[i] = f->vlan.vid;
f->state = IAVF_VLAN_INACTIVE;
i++;
if (i == count)
break;
} else if (f->state == IAVF_VLAN_REMOVE) {
vvfl->vlan_id[i] = f->vlan.vid;
list_del(&f->list);
kfree(f);
adapter->num_vlan_filters--;
f->state = IAVF_VLAN_REMOVING;
i++;
if (i == count)
break;
@ -1006,9 +988,8 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
vvfl_v2->vport_id = adapter->vsi_res->vsi_id;
vvfl_v2->num_elements = count;
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_DISABLE ||
f->state == IAVF_VLAN_REMOVE) {
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_REMOVE) {
struct virtchnl_vlan_supported_caps *filtering_support =
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
@ -1022,13 +1003,7 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
vlan->tci = f->vlan.vid;
vlan->tpid = f->vlan.tpid;
if (f->state == IAVF_VLAN_DISABLE) {
f->state = IAVF_VLAN_INACTIVE;
} else {
list_del(&f->list);
kfree(f);
adapter->num_vlan_filters--;
}
f->state = IAVF_VLAN_REMOVING;
i++;
if (i == count)
break;
@ -2391,10 +2366,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
wake_up(&adapter->vc_waitqueue);
break;
case VIRTCHNL_OP_DEL_VLAN:
dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n",
iavf_stat_str(&adapter->hw, v_retval));
break;
case VIRTCHNL_OP_DEL_ETH_ADDR:
dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
iavf_stat_str(&adapter->hw, v_retval));
@ -2906,17 +2877,42 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
spin_unlock_bh(&adapter->adv_rss_lock);
}
break;
case VIRTCHNL_OP_ADD_VLAN:
case VIRTCHNL_OP_ADD_VLAN_V2: {
struct iavf_vlan_filter *f;
if (v_retval)
break;
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry(f, &adapter->vlan_filter_list, list) {
if (f->state == IAVF_VLAN_IS_NEW)
if (f->state == IAVF_VLAN_ADDING)
f->state = IAVF_VLAN_ACTIVE;
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
break;
case VIRTCHNL_OP_DEL_VLAN:
case VIRTCHNL_OP_DEL_VLAN_V2: {
struct iavf_vlan_filter *f, *ftmp;
spin_lock_bh(&adapter->mac_vlan_list_lock);
list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list,
list) {
if (f->state == IAVF_VLAN_REMOVING) {
if (v_retval) {
/* PF rejected DEL, keep filter */
f->state = IAVF_VLAN_ACTIVE;
} else {
list_del(&f->list);
kfree(f);
adapter->num_vlan_filters--;
}
}
}
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
/* PF enabled vlan strip on this VF.
* Update netdev->features if needed to be in sync with ethtool.

View File

@ -8,7 +8,6 @@ config MLX5_CORE
depends on PCI
select AUXILIARY_BUS
select NET_DEVLINK
depends on VXLAN || !VXLAN
depends on MLXFW || !MLXFW
depends on PTP_1588_CLOCK_OPTIONAL
depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE

View File

@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o
fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o lib/nv_param.o
#
# Netdev basic
@ -29,7 +29,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \
lib/crypto.o lib/sd.o
lib/crypto.o lib/sd.o en/pcie_cong_event.o
#
# Netdev extra
@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
ecpf.o rdma.o esw/legacy.o \
ecpf.o rdma.o esw/legacy.o esw/adj_vport.o \
esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
@ -85,7 +85,9 @@ mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge
mlx5_core-$(CONFIG_HWMON) += hwmon.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
ifneq ($(CONFIG_VXLAN),)
mlx5_core-y += lib/vxlan.o
endif
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
@ -154,7 +156,8 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \
steering/hws/vport.o \
steering/hws/bwc_complex.o \
steering/hws/fs_hws_pools.o \
steering/hws/fs_hws.o
steering/hws/fs_hws.o \
steering/hws/action_ste_pool.o
#
# SF device
@ -166,5 +169,10 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_
#
mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
#
# TPH support
#
mlx5_core-$(CONFIG_PCIE_TPH) += lib/st.o
obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o
mlx5_dpll-y := dpll.o

View File

@ -181,6 +181,7 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
lockdep_assert_held(&cmd->alloc_lock);
cmd->ent_arr[idx] = NULL;
set_bit(idx, &cmd->vars.bitmask);
}
@ -294,6 +295,10 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
return;
}
cond_resched();
if (mlx5_cmd_is_down(dev)) {
ent->ret = -ENXIO;
return;
}
} while (time_before(jiffies, poll_end));
ent->ret = -ETIMEDOUT;
@ -927,8 +932,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
static void cb_timeout_handler(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
work);
struct delayed_work *dwork = to_delayed_work(work);
struct mlx5_cmd_work_ent *ent = container_of(dwork,
struct mlx5_cmd_work_ent,
cb_timeout_work);
@ -1071,7 +1075,7 @@ static void cmd_work_handler(struct work_struct *work)
poll_timeout(ent);
/* make sure we read the descriptor after ownership is SW */
rmb();
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, !!ent->ret);
}
}
@ -1197,6 +1201,44 @@ out_err:
return err;
}
/* Check if all command slots are stalled (timed out and not recovered).
* returns true if all slots timed out on a recent command and have not been
* completed by FW yet. (stalled state)
* false otherwise (at least one slot is not stalled).
*
* In such odd situation "all_stalled", this serves as a protection mechanism
* to avoid blocking the kernel for long periods of time in case FW is not
* responding to commands.
*/
static bool mlx5_cmd_all_stalled(struct mlx5_core_dev *dev)
{
struct mlx5_cmd *cmd = &dev->cmd;
bool all_stalled = true;
unsigned long flags;
int i;
spin_lock_irqsave(&cmd->alloc_lock, flags);
/* at least one command slot is free */
if (bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds) > 0) {
all_stalled = false;
goto out;
}
for_each_clear_bit(i, &cmd->vars.bitmask, cmd->vars.max_reg_cmds) {
struct mlx5_cmd_work_ent *ent = dev->cmd.ent_arr[i];
if (!test_bit(MLX5_CMD_ENT_STATE_TIMEDOUT, &ent->state)) {
all_stalled = false;
break;
}
}
out:
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
return all_stalled;
}
/* Notes:
* 1. Callback functions may not sleep
* 2. page queue commands do not support asynchrous completion
@ -1227,6 +1269,15 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;
if (!page_queue && mlx5_cmd_all_stalled(dev)) {
mlx5_core_err_rl(dev,
"All CMD slots are stalled, aborting command\n");
/* there's no reason to wait and block the whole kernel if FW
* isn't currently responding to all slots, fail immediately
*/
return -EAGAIN;
}
ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
callback, context, page_queue);
if (IS_ERR(ent))
@ -1697,6 +1748,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
if (test_bit(i, &vector)) {
ent = cmd->ent_arr[i];
if (forced && ent->ret == -ETIMEDOUT)
set_bit(MLX5_CMD_ENT_STATE_TIMEDOUT,
&ent->state);
else if (!forced) /* real FW completion */
clear_bit(MLX5_CMD_ENT_STATE_TIMEDOUT,
&ent->state);
/* if we already completed the command, ignore it */
if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
&ent->state)) {
@ -1948,8 +2006,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
pages_queue, token, force_polling);
if (callback)
return err;
if (callback && !err)
return 0;
if (err > 0) /* Failed in FW, command didn't execute */
err = deliv_status_to_err(err);

View File

@ -66,8 +66,8 @@ void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
tasklet_schedule(&ctx->task);
}
static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
struct mlx5_eqe *eqe)
void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
struct mlx5_eqe *eqe)
{
unsigned long flags;
struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
@ -95,7 +95,15 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
if (schedule_tasklet)
tasklet_schedule(&tasklet_ctx->task);
}
EXPORT_SYMBOL(mlx5_add_cq_to_tasklet);
static void mlx5_core_cq_dummy_cb(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
mlx5_core_err(cq->eq->core.dev,
"CQ default completion callback, CQ #%u\n", cq->cqn);
}
#define MLX5_CQ_INIT_CMD_SN cpu_to_be32(2 << 28)
/* Callers must verify outbox status in case of err */
int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen, u32 *out, int outlen)
@ -121,10 +129,19 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->arm_sn = 0;
cq->eq = eq;
cq->uid = MLX5_GET(create_cq_in, in, uid);
/* Kernel CQs must set the arm_db address prior to calling
* this function, allowing for the proper value to be
* initialized. User CQs are responsible for their own
* initialization since they do not use the arm_db field.
*/
if (cq->arm_db)
*cq->arm_db = MLX5_CQ_INIT_CMD_SN;
refcount_set(&cq->refcount, 1);
init_completion(&cq->free);
if (!cq->comp)
cq->comp = mlx5_add_cq_to_tasklet;
cq->comp = mlx5_core_cq_dummy_cb;
/* assuming CQ will be deleted before the EQ */
cq->tasklet_ctx.priv = &eq->tasklet_ctx;
INIT_LIST_HEAD(&cq->tasklet_ctx.list);
@ -145,7 +162,6 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
cq->cqn);
cq->uar = dev->priv.uar;
cq->irqn = eq->core.irqn;
return 0;

View File

@ -613,3 +613,19 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
cq->dbg = NULL;
}
}
static int vhca_id_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
seq_printf(file, "0x%x\n", MLX5_CAP_GEN(dev, vhca_id));
return 0;
}
DEFINE_SHOW_ATTRIBUTE(vhca_id);
void mlx5_vhca_debugfs_init(struct mlx5_core_dev *dev)
{
debugfs_create_file("vhca_id", 0400, dev->priv.dbg.dbg_root, dev,
&vhca_id_fops);
}

View File

@ -564,10 +564,28 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev)
{
u64 fsystem_guid, psystem_guid;
u8 fsystem_guid[MLX5_SW_IMAGE_GUID_MAX_BYTES];
u8 psystem_guid[MLX5_SW_IMAGE_GUID_MAX_BYTES];
u8 flen;
u8 plen;
fsystem_guid = mlx5_query_nic_system_image_guid(dev);
psystem_guid = mlx5_query_nic_system_image_guid(peer_dev);
mlx5_query_nic_sw_system_image_guid(dev, fsystem_guid, &flen);
mlx5_query_nic_sw_system_image_guid(peer_dev, psystem_guid, &plen);
return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid);
return plen && flen && flen == plen &&
!memcmp(fsystem_guid, psystem_guid, flen);
}
void mlx5_core_reps_aux_devs_remove(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
if (priv->adev[MLX5_INTERFACE_PROTOCOL_ETH])
device_lock_assert(&priv->adev[MLX5_INTERFACE_PROTOCOL_ETH]->adev.dev);
else
mlx5_core_err(dev, "ETH driver already removed\n");
if (priv->adev[MLX5_INTERFACE_PROTOCOL_IB_REP])
del_adev(&priv->adev[MLX5_INTERFACE_PROTOCOL_IB_REP]->adev);
if (priv->adev[MLX5_INTERFACE_PROTOCOL_ETH_REP])
del_adev(&priv->adev[MLX5_INTERFACE_PROTOCOL_ETH_REP]->adev);
}

View File

@ -10,6 +10,7 @@
#include "esw/qos.h"
#include "sf/dev/dev.h"
#include "sf/sf.h"
#include "lib/nv_param.h"
static int mlx5_devlink_flash_update(struct devlink *devlink,
struct devlink_flash_update_params *params,
@ -35,6 +36,55 @@ static u16 mlx5_fw_ver_subminor(u32 version)
return version & 0xffff;
}
static int mlx5_devlink_serial_numbers_put(struct mlx5_core_dev *dev,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
{
struct pci_dev *pdev = dev->pdev;
unsigned int vpd_size, kw_len;
char *str, *end;
u8 *vpd_data;
int err = 0;
int start;
vpd_data = pci_vpd_alloc(pdev, &vpd_size);
if (IS_ERR(vpd_data))
return 0;
start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
if (start >= 0) {
str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
if (!str) {
err = -ENOMEM;
goto end;
}
end = strchrnul(str, ' ');
*end = '\0';
err = devlink_info_board_serial_number_put(req, str);
kfree(str);
if (err)
goto end;
}
start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len);
if (start >= 0) {
str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
if (!str) {
err = -ENOMEM;
goto end;
}
err = devlink_info_serial_number_put(req, str);
kfree(str);
if (err)
goto end;
}
end:
kfree(vpd_data);
return err;
}
#define DEVLINK_FW_STRING_LEN 32
static int
@ -49,6 +99,10 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
if (!mlx5_core_is_pf(dev))
return 0;
err = mlx5_devlink_serial_numbers_put(dev, req, extack);
if (err)
return err;
err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
if (err)
return err;
@ -107,7 +161,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli
if (err)
return err;
mlx5_unload_one_devl_locked(dev, true);
mlx5_sync_reset_unload_flow(dev, true);
err = mlx5_health_wait_pci_up(dev);
if (err)
NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");
@ -143,6 +197,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
struct pci_dev *pdev = dev->pdev;
int ret = 0;
if (mlx5_fw_reset_in_progress(dev)) {
NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset");
return -EBUSY;
}
if (mlx5_dev_is_lightweight(dev)) {
if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
return -EOPNOTSUPP;
@ -150,11 +209,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
return 0;
}
if (mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
return -EOPNOTSUPP;
}
if (mlx5_core_is_mp_slave(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported for multi port slave");
return -EOPNOTSUPP;
@ -323,6 +377,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
.rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
.rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
.rate_leaf_tc_bw_set = mlx5_esw_devlink_rate_leaf_tc_bw_set,
.rate_node_tc_bw_set = mlx5_esw_devlink_rate_node_tc_bw_set,
.rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
.rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
.rate_node_new = mlx5_esw_devlink_rate_node_new,
@ -479,6 +535,25 @@ mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id,
return 0;
}
static int mlx5_devlink_num_doorbells_validate(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *mdev = devlink_priv(devlink);
u32 val32 = val.vu32;
u32 max_num_channels;
max_num_channels = mlx5e_get_max_num_channels(mdev);
if (val32 > max_num_channels) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"Requested num_doorbells (%u) exceeds max number of channels (%u)",
val32, max_num_channels);
return -EINVAL;
}
return 0;
}
static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
@ -558,6 +633,9 @@ static const struct devlink_param mlx5_devlink_eth_params[] = {
"hairpin_queue_size", DEVLINK_PARAM_TYPE_U32,
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
mlx5_devlink_hairpin_queue_size_validate),
DEVLINK_PARAM_GENERIC(NUM_DOORBELLS,
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
mlx5_devlink_num_doorbells_validate),
};
static int mlx5_devlink_eth_params_register(struct devlink *devlink)
@ -581,6 +659,10 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink)
mlx5_devlink_hairpin_params_init_values(devlink);
value.vu32 = MLX5_DEFAULT_NUM_DOORBELLS;
devl_param_driverinit_value_set(devlink,
DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS,
value);
return 0;
}
@ -595,6 +677,105 @@ static void mlx5_devlink_eth_params_unregister(struct devlink *devlink)
ARRAY_SIZE(mlx5_devlink_eth_params));
}
#define MLX5_PCIE_CONG_THRESH_MAX 10000
#define MLX5_PCIE_CONG_THRESH_DEF_LOW 7500
#define MLX5_PCIE_CONG_THRESH_DEF_HIGH 9000
static int
mlx5_devlink_pcie_cong_thresh_validate(struct devlink *devl, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
{
if (val.vu16 > MLX5_PCIE_CONG_THRESH_MAX) {
NL_SET_ERR_MSG_FMT_MOD(extack, "Value %u > max supported (%u)",
val.vu16, MLX5_PCIE_CONG_THRESH_MAX);
return -EINVAL;
}
switch (id) {
case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW:
case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH:
case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW:
case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH:
break;
default:
return -EOPNOTSUPP;
}
return 0;
}
static void mlx5_devlink_pcie_cong_init_values(struct devlink *devlink)
{
union devlink_param_value value;
u32 id;
value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_LOW;
id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW;
devl_param_driverinit_value_set(devlink, id, value);
value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_HIGH;
id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH;
devl_param_driverinit_value_set(devlink, id, value);
value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_LOW;
id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW;
devl_param_driverinit_value_set(devlink, id, value);
value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_HIGH;
id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH;
devl_param_driverinit_value_set(devlink, id, value);
}
static const struct devlink_param mlx5_devlink_pcie_cong_params[] = {
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW,
"pcie_cong_inbound_low", DEVLINK_PARAM_TYPE_U16,
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
mlx5_devlink_pcie_cong_thresh_validate),
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
"pcie_cong_inbound_high", DEVLINK_PARAM_TYPE_U16,
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
mlx5_devlink_pcie_cong_thresh_validate),
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
"pcie_cong_outbound_low", DEVLINK_PARAM_TYPE_U16,
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
mlx5_devlink_pcie_cong_thresh_validate),
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
"pcie_cong_outbound_high", DEVLINK_PARAM_TYPE_U16,
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
mlx5_devlink_pcie_cong_thresh_validate),
};
static int mlx5_devlink_pcie_cong_params_register(struct devlink *devlink)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
int err;
if (!mlx5_pcie_cong_event_supported(dev))
return 0;
err = devl_params_register(devlink, mlx5_devlink_pcie_cong_params,
ARRAY_SIZE(mlx5_devlink_pcie_cong_params));
if (err)
return err;
mlx5_devlink_pcie_cong_init_values(devlink);
return 0;
}
static void mlx5_devlink_pcie_cong_params_unregister(struct devlink *devlink)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
if (!mlx5_pcie_cong_event_supported(dev))
return;
devl_params_unregister(devlink, mlx5_devlink_pcie_cong_params,
ARRAY_SIZE(mlx5_devlink_pcie_cong_params));
}
static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
@ -840,8 +1021,20 @@ int mlx5_devlink_params_register(struct devlink *devlink)
if (err)
goto max_uc_list_err;
err = mlx5_devlink_pcie_cong_params_register(devlink);
if (err)
goto pcie_cong_err;
err = mlx5_nv_param_register_dl_params(devlink);
if (err)
goto nv_param_err;
return 0;
nv_param_err:
mlx5_devlink_pcie_cong_params_unregister(devlink);
pcie_cong_err:
mlx5_devlink_max_uc_list_params_unregister(devlink);
max_uc_list_err:
mlx5_devlink_auxdev_params_unregister(devlink);
auxdev_reg_err:
@ -852,6 +1045,8 @@ auxdev_reg_err:
void mlx5_devlink_params_unregister(struct devlink *devlink)
{
mlx5_nv_param_unregister_dl_params(devlink);
mlx5_devlink_pcie_cong_params_unregister(devlink);
mlx5_devlink_max_uc_list_params_unregister(devlink);
mlx5_devlink_auxdev_params_unregister(devlink);
devl_params_unregister(devlink, mlx5_devlink_params,

View File

@ -22,6 +22,11 @@ enum mlx5_devlink_param_id {
MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE
};
struct mlx5_trap_ctx {

View File

@ -33,6 +33,7 @@
#include "lib/eq.h"
#include "fw_tracer.h"
#include "fw_tracer_tracepoint.h"
#include <linux/ctype.h>
static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
{
@ -54,7 +55,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
return -ENOTSUPP;
return -EOPNOTSUPP;
}
tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
@ -358,6 +359,47 @@ static const char *VAL_PARM = "%llx";
static const char *REPLACE_64_VAL_PARM = "%x%x";
static const char *PARAM_CHAR = "%";
static bool mlx5_is_valid_spec(const char *str)
{
/* Parse format specifiers to find the actual type.
* Structure: %[flags][width][.precision][length]type
* Skip flags, width, precision & length.
*/
while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l')
str++;
/* Check if it's a valid integer/hex specifier or %%:
* Valid formats: %x, %d, %i, %u, etc.
*/
if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' &&
*str != 'u' && *str != 'c' && *str != '%')
return false;
return true;
}
static bool mlx5_tracer_validate_params(const char *str)
{
const char *substr = str;
if (!str)
return false;
substr = strstr(substr, PARAM_CHAR);
while (substr) {
if (!mlx5_is_valid_spec(substr + 1))
return false;
if (*(substr + 1) == '%')
substr = strstr(substr + 2, PARAM_CHAR);
else
substr = strstr(substr + 1, PARAM_CHAR);
}
return true;
}
static int mlx5_tracer_message_hash(u32 message_id)
{
return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
@ -419,6 +461,10 @@ static int mlx5_tracer_get_num_of_params(char *str)
char *substr, *pstr = str;
int num_of_params = 0;
/* Validate that all parameters are valid before processing */
if (!mlx5_tracer_validate_params(str))
return -EINVAL;
/* replace %llx with %x%x */
substr = strstr(pstr, VAL_PARM);
while (substr) {
@ -427,11 +473,15 @@ static int mlx5_tracer_get_num_of_params(char *str)
substr = strstr(pstr, VAL_PARM);
}
/* count all the % characters */
/* count all the % characters, but skip %% (escaped percent) */
substr = strstr(str, PARAM_CHAR);
while (substr) {
num_of_params += 1;
str = substr + 1;
if (*(substr + 1) != '%') {
num_of_params += 1;
str = substr + 1;
} else {
str = substr + 2;
}
substr = strstr(str, PARAM_CHAR);
}
@ -570,14 +620,17 @@ void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
{
char tmp[512];
snprintf(tmp, sizeof(tmp), str_frmt->string,
str_frmt->params[0],
str_frmt->params[1],
str_frmt->params[2],
str_frmt->params[3],
str_frmt->params[4],
str_frmt->params[5],
str_frmt->params[6]);
if (str_frmt->invalid_string)
snprintf(tmp, sizeof(tmp), "BAD_FORMAT: %s", str_frmt->string);
else
snprintf(tmp, sizeof(tmp), str_frmt->string,
str_frmt->params[0],
str_frmt->params[1],
str_frmt->params[2],
str_frmt->params[3],
str_frmt->params[4],
str_frmt->params[5],
str_frmt->params[6]);
trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
str_frmt->event_id, tmp);
@ -609,6 +662,13 @@ static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer,
return 0;
}
static void mlx5_tracer_handle_bad_format_string(struct mlx5_fw_tracer *tracer,
struct tracer_string_format *cur_string)
{
cur_string->invalid_string = true;
list_add_tail(&cur_string->list, &tracer->ready_strings_list);
}
static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
struct tracer_event *tracer_event)
{
@ -619,12 +679,18 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
if (!cur_string)
return mlx5_tracer_handle_raw_string(tracer, tracer_event);
cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
cur_string->last_param_num = 0;
cur_string->event_id = tracer_event->event_id;
cur_string->tmsn = tracer_event->string_event.tmsn;
cur_string->timestamp = tracer_event->string_event.timestamp;
cur_string->lost = tracer_event->lost_event;
cur_string->last_param_num = 0;
cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
if (cur_string->num_of_params < 0) {
pr_debug("%s Invalid format string parameters\n",
__func__);
mlx5_tracer_handle_bad_format_string(tracer, cur_string);
return 0;
}
if (cur_string->num_of_params == 0) /* trace with no params */
list_add_tail(&cur_string->list, &tracer->ready_strings_list);
} else {
@ -634,6 +700,11 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
__func__, tracer_event->string_event.tmsn);
return mlx5_tracer_handle_raw_string(tracer, tracer_event);
}
if (cur_string->num_of_params < 0) {
pr_debug("%s string parameter of invalid string, dumping\n",
__func__);
return 0;
}
cur_string->last_param_num += 1;
if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
pr_debug("%s Number of params exceeds the max (%d)\n",

View File

@ -125,6 +125,7 @@ struct tracer_string_format {
struct list_head list;
u32 timestamp;
bool lost;
bool invalid_string;
};
enum mlx5_fw_tracer_ownership_state {

View File

@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
#include <linux/mlx5/vport.h>
#include "reporter_vnic.h"
#include "en_stats.h"
#include "devlink.h"
@ -105,6 +107,15 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
}
if (MLX5_CAP_GEN(dev, nic_cap_reg))
mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport);
if (MLX5_CAP_GEN(dev, vnic_env_cnt_bar_uar_access))
devlink_fmsg_u32_pair_put(fmsg, "bar_uar_access",
VNIC_ENV_GET(&vnic, bar_uar_access));
if (MLX5_CAP_GEN(dev, vnic_env_cnt_odp_page_fault)) {
devlink_fmsg_u32_pair_put(fmsg, "odp_local_triggered_page_fault",
VNIC_ENV_GET(&vnic, odp_local_triggered_page_fault));
devlink_fmsg_u32_pair_put(fmsg, "odp_remote_triggered_page_fault",
VNIC_ENV_GET(&vnic, odp_remote_triggered_page_fault));
}
devlink_fmsg_obj_nest_end(fmsg);
devlink_fmsg_pair_nest_end(fmsg);
@ -136,8 +147,8 @@ void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev)
dev);
if (IS_ERR(health->vnic_reporter))
mlx5_core_warn(dev,
"Failed to create vnic reporter, err = %ld\n",
PTR_ERR(health->vnic_reporter));
"Failed to create vnic reporter, err = %pe\n",
health->vnic_reporter);
}
void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev)

View File

@ -84,9 +84,10 @@ struct page_pool;
#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
#define MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE (PAGE_SHIFT - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024)
#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096)
#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT (6)
#define MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT (12)
#define MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE (16)
#define MLX5E_SHAMPO_WQ_RESRV_SIZE BIT(MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE)
#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
(6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
@ -177,7 +178,8 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
}
/* Use this function to get max num channels (rxqs/txqs) only to create netdev */
static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
static inline unsigned int
mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
MLX5E_MIN_NUM_CHANNELS :
@ -278,10 +280,6 @@ enum packet_merge {
struct mlx5e_packet_merge_param {
enum packet_merge type;
u32 timeout;
struct {
u8 match_criteria_type;
u8 alignment_granularity;
} shampo;
};
struct mlx5e_params {
@ -347,6 +345,7 @@ struct mlx5e_cq {
/* data path - accessed per napi poll */
u16 event_ctr;
struct napi_struct *napi;
struct mlx5_uars_page *uar;
struct mlx5_core_cq mcq;
struct mlx5e_ch_stats *ch_stats;
@ -378,7 +377,7 @@ struct mlx5e_sq_dma {
enum mlx5e_dma_map_type type;
};
/* Keep this enum consistent with with the corresponding strings array
/* Keep this enum consistent with the corresponding strings array
* declared in en/reporter_tx.c
*/
enum {
@ -387,7 +386,6 @@ enum {
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_DIM,
MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
MLX5E_SQ_STATE_PENDING_XSK_TX,
MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
MLX5E_NUM_SQ_STATES, /* Must be kept last */
@ -634,15 +632,16 @@ struct mlx5e_dma_info {
};
struct mlx5e_shampo_hd {
u32 mkey;
struct mlx5e_frag_page *pages;
u32 hd_per_wq;
u32 hd_per_page;
u16 hd_per_wqe;
u16 pages_per_wq;
u8 log_hd_per_page;
u8 log_hd_entry_size;
unsigned long *bitmap;
u16 pi;
u16 ci;
__be32 key;
__be32 mkey_be;
};
struct mlx5e_hw_gro_data {
@ -700,7 +699,7 @@ struct mlx5e_rq {
struct mlx5e_rq_stats *stats;
struct mlx5e_cq cq;
struct mlx5e_cq_decomp cqd;
struct hwtstamp_config *tstamp;
struct kernel_hwtstamp_config *hwtstamp_config;
struct mlx5_clock *clock;
struct mlx5e_icosq *icosq;
struct mlx5e_priv *priv;
@ -721,13 +720,18 @@ struct mlx5e_rq {
struct bpf_prog __rcu *xdp_prog;
struct mlx5e_xdpsq *xdpsq;
DECLARE_BITMAP(flags, 8);
/* page pools */
struct page_pool *page_pool;
struct page_pool *hd_page_pool;
struct mlx5e_xdp_buff mxbuf;
/* AF_XDP zero-copy */
struct xsk_buff_pool *xsk_pool;
struct work_struct recover_work;
struct work_struct rx_timeout_work;
/* control */
struct mlx5_wq_ctrl wq_ctrl;
@ -783,12 +787,12 @@ struct mlx5e_channel {
/* control */
struct mlx5e_priv *priv;
struct mlx5_core_dev *mdev;
struct hwtstamp_config *tstamp;
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int vec_ix;
int sd_ix;
int cpu;
struct mlx5_sq_bfreg *bfreg;
/* Sync between icosq recovery and XSK enable/disable. */
struct mutex icosq_recovery_lock;
@ -916,12 +920,14 @@ struct mlx5e_priv {
u8 max_opened_tc;
bool tx_ptp_opened;
bool rx_ptp_opened;
struct hwtstamp_config tstamp;
struct kernel_hwtstamp_config hwtstamp_config;
u16 q_counter[MLX5_SD_MAX_GROUP_SZ];
u16 drop_rq_q_counter;
struct notifier_block events_nb;
struct notifier_block blocking_events_nb;
struct mlx5e_pcie_cong_event *cong_event;
struct udp_tunnel_nic_info nic_info;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
@ -952,7 +958,7 @@ struct mlx5e_priv {
};
struct mlx5e_dev {
struct mlx5e_priv *priv;
struct net_device *netdev;
struct devlink_port dl_port;
};
@ -1019,8 +1025,11 @@ void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
u64 *buf);
void mlx5e_set_rx_mode_work(struct work_struct *work);
int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
int mlx5e_hwtstamp_set(struct mlx5e_priv *priv,
struct kernel_hwtstamp_config *config,
struct netlink_ext_ack *extack);
int mlx5e_hwtstamp_get(struct mlx5e_priv *priv,
struct kernel_hwtstamp_config *config);
int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter);
int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
@ -1059,6 +1068,7 @@ struct mlx5e_create_cq_param {
struct mlx5e_ch_stats *ch_stats;
int node;
int ix;
struct mlx5_uars_page *uar;
};
struct mlx5e_cq_param;
@ -1145,7 +1155,9 @@ extern const struct ethtool_ops mlx5e_ethtool_ops;
int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey);
int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb,
bool enable_mc_lb);
int mlx5e_refresh_tirs(struct mlx5_core_dev *mdev, bool enable_uc_lb,
bool enable_mc_lb);
void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc);
@ -1226,14 +1238,17 @@ struct net_device *
mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile);
int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
const struct mlx5e_profile *new_profile, void *new_ppriv);
void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct net_device *netdev);
int mlx5e_netdev_change_profile(struct net_device *netdev,
struct mlx5_core_dev *mdev,
const struct mlx5e_profile *new_profile,
void *new_ppriv);
void mlx5e_netdev_attach_nic_profile(struct net_device *netdev,
struct mlx5_core_dev *mdev);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
void mlx5e_set_xdp_feature(struct net_device *netdev);
void mlx5e_set_xdp_feature(struct mlx5e_priv *priv);
netdev_features_t mlx5e_features_check(struct sk_buff *skb,
struct net_device *netdev,
netdev_features_t features);

View File

@ -26,7 +26,6 @@ struct mlx5e_dcbx {
u8 cap;
/* Buffer configuration */
bool manual_buffer;
u32 cable_len;
u32 xoff;
u16 port_buff_cell_sz;

View File

@ -40,11 +40,8 @@ void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev)
static void
mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
{
u64 parent_id;
parent_id = mlx5_query_nic_system_image_guid(dev);
ppid->id_len = sizeof(parent_id);
memcpy(ppid->id, &parent_id, sizeof(parent_id));
BUILD_BUG_ON(MLX5_SW_IMAGE_GUID_MAX_BYTES > MAX_PHYS_ITEM_ID_LEN);
mlx5_query_nic_sw_system_image_guid(dev, ppid->id, &ppid->id_len);
}
int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev,

View File

@ -18,7 +18,8 @@ enum {
enum {
MLX5E_TC_PRIO = 0,
MLX5E_NIC_PRIO
MLX5E_PROMISC_PRIO,
MLX5E_NIC_PRIO,
};
struct mlx5e_flow_table {
@ -56,7 +57,7 @@ struct mlx5e_l2_table {
bool promisc_enabled;
};
#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_INDIR_TIRS)
#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP)
@ -68,9 +69,13 @@ struct mlx5e_l2_table {
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
/* NIC prio FTS */
/* NIC promisc FT level */
enum {
MLX5E_PROMISC_FT_LEVEL,
};
/* NIC prio FTS */
enum {
MLX5E_VLAN_FT_LEVEL,
MLX5E_L2_FT_LEVEL,
MLX5E_TTC_FT_LEVEL,
@ -87,6 +92,7 @@ enum {
MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
MLX5E_ACCEL_FS_POL_FT_LEVEL,
MLX5E_ACCEL_FS_POL_MISS_FT_LEVEL,
MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL,
#endif
};
@ -126,7 +132,8 @@ struct mlx5e_ptp_fs;
void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
struct mlx5e_rx_res *rx_res,
struct ttc_params *ttc_params, bool tunnel);
struct ttc_params *ttc_params, bool tunnel,
bool ipsec_rss);
void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,

View File

@ -138,8 +138,8 @@ void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
if (IS_ERR_OR_NULL(agent)) {
if (IS_ERR(agent))
netdev_warn(priv->netdev,
"Failed to create hv vhca stats agent, err = %ld\n",
PTR_ERR(agent));
"Failed to create hv vhca stats agent, err = %pe\n",
agent);
kvfree(priv->stats_agent.buf);
return;

View File

@ -6,6 +6,7 @@
#include <linux/xarray.h>
#include <linux/hashtable.h>
#include <linux/refcount.h>
#include <linux/mlx5/driver.h>
#include "mapping.h"
@ -24,7 +25,8 @@ struct mapping_ctx {
struct delayed_work dwork;
struct list_head pending_list;
spinlock_t pending_list_lock; /* Guards pending list */
u64 id;
u8 id[MLX5_SW_IMAGE_GUID_MAX_BYTES];
u8 id_len;
u8 type;
struct list_head list;
refcount_t refcount;
@ -220,13 +222,15 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
}
struct mapping_ctx *
mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
mapping_create_for_id(u8 *id, u8 id_len, u8 type, size_t data_size, u32 max_id,
bool delayed_removal)
{
struct mapping_ctx *ctx;
mutex_lock(&shared_ctx_lock);
list_for_each_entry(ctx, &shared_ctx_list, list) {
if (ctx->id == id && ctx->type == type) {
if (ctx->type == type && ctx->id_len == id_len &&
!memcmp(id, ctx->id, id_len)) {
if (refcount_inc_not_zero(&ctx->refcount))
goto unlock;
break;
@ -237,7 +241,8 @@ mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delaye
if (IS_ERR(ctx))
goto unlock;
ctx->id = id;
memcpy(ctx->id, id, id_len);
ctx->id_len = id_len;
ctx->type = type;
list_add(&ctx->list, &shared_ctx_list);

View File

@ -27,6 +27,7 @@ void mapping_destroy(struct mapping_ctx *ctx);
/* adds mapping with an id or get an existing mapping with the same id
*/
struct mapping_ctx *
mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
mapping_create_for_id(u8 *id, u8 id_len, u8 type, size_t data_size, u32 max_id,
bool delayed_removal);
#endif /* __MLX5_MAPPING_H__ */

View File

@ -99,7 +99,7 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
return sizeof(struct mlx5_ksm) * 4;
}
WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
return 0;
return 1;
}
u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
@ -414,25 +414,10 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
return params->log_rq_mtu_frames - log_pkts_per_wqe;
}
u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
static u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5e_params *params)
{
return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE));
}
u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE);
}
u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
struct mlx5e_params *params)
{
u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE;
return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu));
return order_base_2(DIV_ROUND_UP(MLX5E_SHAMPO_WQ_RESRV_SIZE,
params->sw_mtu));
}
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
@ -626,6 +611,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
.ch_stats = c->stats,
.node = cpu_to_node(c->cpu),
.ix = c->vec_ix,
.uar = c->bfreg->up,
};
}
@ -825,7 +811,7 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
{
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.bfreg.up->index);
if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
@ -834,13 +820,12 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE;
u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params));
int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
int wqe_size = BIT(log_stride_sz) * num_strides;
int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE;
/* +1 is for the case that the pkt_per_rsrv dont consume the reservation
* so we get a filler cqe for the rest of the reservation.
@ -901,6 +886,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
{
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
u32 lro_timeout;
int ndsegs = 1;
int err;
@ -926,22 +912,27 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
MLX5_SET(wq, wq, log_wqe_stride_size,
log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
MLX5_SET(wq, wq, shampo_enable, true);
MLX5_SET(wq, wq, log_reservation_size,
mlx5e_shampo_get_log_rsrv_size(mdev, params));
MLX5_SET(wq, wq,
log_max_num_of_packets_per_reservation,
mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
MLX5_SET(wq, wq, log_headers_entry_size,
mlx5e_shampo_get_log_hd_entry_size(mdev, params));
MLX5_SET(rqc, rqc, reservation_timeout,
mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT));
MLX5_SET(rqc, rqc, shampo_match_criteria_type,
params->packet_merge.shampo.match_criteria_type);
MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
params->packet_merge.shampo.alignment_granularity);
}
if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO)
break;
MLX5_SET(wq, wq, shampo_enable, true);
MLX5_SET(wq, wq, log_reservation_size,
MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE -
MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT);
MLX5_SET(wq, wq,
log_max_num_of_packets_per_reservation,
mlx5e_shampo_get_log_pkt_per_rsrv(params));
MLX5_SET(wq, wq, log_headers_entry_size,
MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE -
MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT);
lro_timeout =
mlx5e_choose_lro_timeout(mdev,
MLX5E_DEFAULT_SHAMPO_TIMEOUT);
MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout);
MLX5_SET(rqc, rqc, shampo_match_criteria_type,
MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED);
MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE);
break;
}
default: /* MLX5_WQ_TYPE_CYCLIC */
@ -1044,18 +1035,17 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_param *rq_param)
{
int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) *
MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE;
u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL));
int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL);
int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params));
int wqe_size = BIT(log_stride_sz) * num_strides;
int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE;
u32 hd_per_wqe;
/* Assumption: hd_per_wqe % 8 == 0. */
hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv;
mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n",
__func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv);
hd_per_wqe = (wqe_size / rsrv_size) * pkt_per_rsrv;
mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_rsrv = %d\n",
__func__, hd_per_wqe, rsrv_size, wqe_size, pkt_per_rsrv);
return hd_per_wqe;
}
@ -1240,7 +1230,6 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
@ -1267,7 +1256,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq);
mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);

View File

@ -51,6 +51,7 @@ struct mlx5e_create_sq_param {
u32 tisn;
u8 tis_lst_sz;
u8 min_inline_mode;
u32 uar_page;
};
/* Striding RQ dynamic parameters */
@ -95,12 +96,6 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_param *rq_param);
@ -138,7 +133,6 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
struct mlx5e_cq_param *param);
void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param);
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,

View File

@ -0,0 +1,376 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
#include "../devlink.h"
#include "en.h"
#include "pcie_cong_event.h"
#define MLX5E_CONG_HIGH_STATE 0x7
enum {
MLX5E_INBOUND_CONG = BIT(0),
MLX5E_OUTBOUND_CONG = BIT(1),
};
struct mlx5e_pcie_cong_thresh {
u16 inbound_high;
u16 inbound_low;
u16 outbound_high;
u16 outbound_low;
};
struct mlx5e_pcie_cong_stats {
u32 pci_bw_inbound_high;
u32 pci_bw_inbound_low;
u32 pci_bw_outbound_high;
u32 pci_bw_outbound_low;
u32 pci_bw_stale_event;
};
struct mlx5e_pcie_cong_event {
u64 obj_id;
struct mlx5e_priv *priv;
/* For event notifier and workqueue. */
struct work_struct work;
struct mlx5_nb nb;
/* Stores last read state. */
u8 state;
/* For ethtool stats group. */
struct mlx5e_pcie_cong_stats stats;
};
static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
pci_bw_inbound_high) },
{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
pci_bw_inbound_low) },
{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
pci_bw_outbound_high) },
{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
pci_bw_outbound_low) },
{ MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
pci_bw_stale_event) },
};
#define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong)
{
return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0;
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong)
{
if (!priv->cong_event)
return;
for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++)
ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong)
{
if (!priv->cong_event)
return;
for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) {
u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats,
mlx5e_pcie_cong_stats_desc,
i);
mlx5e_ethtool_put_stat(data, ctr);
}
}
MLX5E_DEFINE_STATS_GRP(pcie_cong, 0);
static int
mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
const struct mlx5e_pcie_cong_thresh *config,
u64 *obj_id)
{
u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
void *cong_obj;
void *hdr;
int err;
hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj);
MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1);
MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1);
MLX5_SET(pcie_cong_event_obj, cong_obj,
inbound_cong_high_threshold, config->inbound_high);
MLX5_SET(pcie_cong_event_obj, cong_obj,
inbound_cong_low_threshold, config->inbound_low);
MLX5_SET(pcie_cong_event_obj, cong_obj,
outbound_cong_high_threshold, config->outbound_high);
MLX5_SET(pcie_cong_event_obj, cong_obj,
outbound_cong_low_threshold, config->outbound_low);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n",
*obj_id,
config->inbound_high, config->inbound_low,
config->outbound_high, config->outbound_low);
return 0;
}
static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev,
u64 obj_id)
{
u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
void *hdr;
hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
u64 obj_id,
u32 *state)
{
u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)];
void *obj;
void *hdr;
u8 cong;
int err;
hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj);
if (state) {
cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state);
if (cong == MLX5E_CONG_HIGH_STATE)
*state |= MLX5E_INBOUND_CONG;
cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state);
if (cong == MLX5E_CONG_HIGH_STATE)
*state |= MLX5E_OUTBOUND_CONG;
}
return 0;
}
static void mlx5e_pcie_cong_event_work(struct work_struct *work)
{
struct mlx5e_pcie_cong_event *cong_event;
struct mlx5_core_dev *dev;
struct mlx5e_priv *priv;
u32 new_cong_state = 0;
u32 changes;
int err;
cong_event = container_of(work, struct mlx5e_pcie_cong_event, work);
priv = cong_event->priv;
dev = priv->mdev;
err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id,
&new_cong_state);
if (err) {
mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n",
err, cong_event->obj_id);
return;
}
changes = cong_event->state ^ new_cong_state;
if (!changes) {
cong_event->stats.pci_bw_stale_event++;
return;
}
cong_event->state = new_cong_state;
if (changes & MLX5E_INBOUND_CONG) {
if (new_cong_state & MLX5E_INBOUND_CONG)
cong_event->stats.pci_bw_inbound_high++;
else
cong_event->stats.pci_bw_inbound_low++;
}
if (changes & MLX5E_OUTBOUND_CONG) {
if (new_cong_state & MLX5E_OUTBOUND_CONG)
cong_event->stats.pci_bw_outbound_high++;
else
cong_event->stats.pci_bw_outbound_low++;
}
}
static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb,
unsigned long event, void *eqe)
{
struct mlx5e_pcie_cong_event *cong_event;
cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb);
queue_work(cong_event->priv->wq, &cong_event->work);
return NOTIFY_OK;
}
static int
mlx5e_pcie_cong_get_thresh_config(struct mlx5_core_dev *dev,
struct mlx5e_pcie_cong_thresh *config)
{
u32 ids[4] = {
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW,
MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH,
};
struct devlink *devlink = priv_to_devlink(dev);
union devlink_param_value val[4];
for (int i = 0; i < 4; i++) {
u32 id = ids[i];
int err;
err = devl_param_driverinit_value_get(devlink, id, &val[i]);
if (err)
return err;
}
config->inbound_low = val[0].vu16;
config->inbound_high = val[1].vu16;
config->outbound_low = val[2].vu16;
config->outbound_high = val[3].vu16;
return 0;
}
static int
mlx5e_thresh_config_validate(struct mlx5_core_dev *mdev,
const struct mlx5e_pcie_cong_thresh *config)
{
int err = 0;
if (config->inbound_low >= config->inbound_high) {
err = -EINVAL;
mlx5_core_err(mdev, "PCIe inbound congestion threshold configuration invalid: low (%u) >= high (%u).\n",
config->inbound_low, config->inbound_high);
}
if (config->outbound_low >= config->outbound_high) {
err = -EINVAL;
mlx5_core_err(mdev, "PCIe outbound congestion threshold configuration invalid: low (%u) >= high (%u).\n",
config->outbound_low, config->outbound_high);
}
return err;
}
int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
{
struct mlx5e_pcie_cong_thresh thresh_config = {};
struct mlx5e_pcie_cong_event *cong_event;
struct mlx5_core_dev *mdev = priv->mdev;
int err;
if (!mlx5_pcie_cong_event_supported(mdev))
return 0;
err = mlx5e_pcie_cong_get_thresh_config(mdev, &thresh_config);
if (WARN_ON(err))
return err;
err = mlx5e_thresh_config_validate(mdev, &thresh_config);
if (err) {
mlx5_core_err(mdev, "PCIe congestion event feature disabled\n");
return err;
}
cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL,
mdev->priv.numa_node);
if (!cong_event)
return -ENOMEM;
INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work);
MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler,
OBJECT_CHANGE);
cong_event->priv = priv;
err = mlx5_cmd_pcie_cong_event_set(mdev, &thresh_config,
&cong_event->obj_id);
if (err) {
mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");
goto err_free;
}
err = mlx5_eq_notifier_register(mdev, &cong_event->nb);
if (err) {
mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n");
goto err_obj_destroy;
}
priv->cong_event = cong_event;
return 0;
err_obj_destroy:
mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id);
err_free:
kvfree(cong_event);
return err;
}
void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv)
{
struct mlx5e_pcie_cong_event *cong_event = priv->cong_event;
struct mlx5_core_dev *mdev = priv->mdev;
if (!cong_event)
return;
priv->cong_event = NULL;
mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
cancel_work_sync(&cong_event->work);
if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id))
mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n",
cong_event->obj_id);
kvfree(cong_event);
}

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. */
#ifndef __MLX5_PCIE_CONG_EVENT_H__
#define __MLX5_PCIE_CONG_EVENT_H__
int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv);
void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv);
#endif /* __MLX5_PCIE_CONG_EVENT_H__ */

View File

@ -272,8 +272,8 @@ static int port_update_shared_buffer(struct mlx5_core_dev *mdev,
/* Total shared buffer size is split in a ratio of 3:1 between
* lossy and lossless pools respectively.
*/
lossy_epool_size = (shared_buffer_size / 4) * 3;
lossless_ipool_size = shared_buffer_size / 4;
lossy_epool_size = shared_buffer_size - lossless_ipool_size;
mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0,
lossy_epool_size);
@ -288,14 +288,12 @@ static int port_set_buffer(struct mlx5e_priv *priv,
u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz;
struct mlx5_core_dev *mdev = priv->mdev;
int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
u32 new_headroom_size = 0;
u32 current_headroom_size;
u32 current_headroom_cells = 0;
u32 new_headroom_cells = 0;
void *in;
int err;
int i;
current_headroom_size = port_buffer->headroom_size;
in = kzalloc(sz, GFP_KERNEL);
if (!in)
return -ENOMEM;
@ -306,12 +304,14 @@ static int port_set_buffer(struct mlx5e_priv *priv,
for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
current_headroom_cells += MLX5_GET(bufferx_reg, buffer, size);
u64 size = port_buffer->buffer[i].size;
u64 xoff = port_buffer->buffer[i].xoff;
u64 xon = port_buffer->buffer[i].xon;
new_headroom_size += size;
do_div(size, port_buff_cell_sz);
new_headroom_cells += size;
do_div(xoff, port_buff_cell_sz);
do_div(xon, port_buff_cell_sz);
MLX5_SET(bufferx_reg, buffer, size, size);
@ -320,10 +320,8 @@ static int port_set_buffer(struct mlx5e_priv *priv,
MLX5_SET(bufferx_reg, buffer, xon_threshold, xon);
}
new_headroom_size /= port_buff_cell_sz;
current_headroom_size /= port_buff_cell_sz;
err = port_update_shared_buffer(priv->mdev, current_headroom_size,
new_headroom_size);
err = port_update_shared_buffer(priv->mdev, current_headroom_cells,
new_headroom_cells);
if (err)
goto out;
@ -331,6 +329,9 @@ static int port_set_buffer(struct mlx5e_priv *priv,
if (err)
goto out;
/* RO bits should be set to 0 on write */
MLX5_SET(pbmc_reg, in, port_buffer_size, 0);
err = mlx5e_port_set_pbmc(mdev, in);
out:
kfree(in);
@ -574,7 +575,6 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
}
priv->dcbx.xoff = xoff;
/* Apply the settings */
if (update_buffer) {
@ -583,6 +583,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
return err;
}
priv->dcbx.xoff = xoff;
if (update_prio2buffer)
err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);

View File

@ -81,7 +81,7 @@ static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
}
static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
struct mlx5e_ptp_cq_stats *cq_stats)
struct mlx5e_ptpsq *ptpsq)
{
struct skb_shared_hwtstamps hwts = {};
ktime_t diff;
@ -91,8 +91,17 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
/* Maximal allowed diff is 1 / 128 second */
if (diff > (NSEC_PER_SEC >> 7)) {
cq_stats->abort++;
cq_stats->abort_abs_diff_ns += diff;
struct mlx5e_txqsq *sq = &ptpsq->txqsq;
ptpsq->cq_stats->abort++;
ptpsq->cq_stats->abort_abs_diff_ns += diff;
if (diff > (NSEC_PER_SEC >> 1) &&
!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netdev_warn(sq->channel->netdev,
"PTP TX timestamp difference between CQE and port exceeds threshold: %lld ns, recovering SQ %u\n",
(s64)diff, sq->sqn);
queue_work(sq->priv->wq, &ptpsq->report_unhealthy_work);
}
return;
}
@ -102,7 +111,7 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
ktime_t hwtstamp,
struct mlx5e_ptp_cq_stats *cq_stats)
struct mlx5e_ptpsq *ptpsq)
{
switch (hwtstamp_type) {
case (MLX5E_SKB_CB_CQE_HWTSTAMP):
@ -120,7 +129,7 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
!mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
return;
mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
mlx5e_skb_cb_hwtstamp_tx(skb, ptpsq);
memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
}
@ -208,7 +217,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
hwtstamp, ptpsq->cq_stats);
hwtstamp, ptpsq);
ptpsq->cq_stats->cqe++;
mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
@ -333,14 +342,12 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
sq->mdev = mdev;
sq->ch_ix = MLX5E_PTP_CHANNEL_IX;
sq->txq_ix = txq_ix;
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->uar_map = c->bfreg->map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
sq->stats = &c->priv->ptp_stats.sq[tc];
sq->ptpsq = ptpsq;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
sq->stop_room = param->stop_room;
sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
@ -473,6 +480,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
csp.wq_ctrl = &txqsq->wq_ctrl;
csp.min_inline_mode = txqsq->min_inline_mode;
csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
csp.uar_page = c->bfreg->index;
err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
if (err)
@ -564,6 +572,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
ccp.ch_stats = c->stats;
ccp.napi = &c->napi;
ccp.ix = MLX5E_PTP_CHANNEL_IX;
ccp.uar = c->bfreg->up;
cq_param = &cparams->txq_sq_param.cqp;
@ -613,6 +622,7 @@ static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c,
ccp.ch_stats = c->stats;
ccp.napi = &c->napi;
ccp.ix = MLX5E_PTP_CHANNEL_IX;
ccp.uar = c->bfreg->up;
cq_param = &cparams->rq_param.cqp;
@ -697,7 +707,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
rq->netdev = priv->netdev;
rq->priv = priv;
rq->clock = mdev->clock;
rq->tstamp = &priv->tstamp;
rq->hwtstamp_config = &priv->hwtstamp_config;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->stats = &c->priv->ptp_stats.rq;
@ -880,13 +890,13 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
c->priv = priv;
c->mdev = priv->mdev;
c->tstamp = &priv->tstamp;
c->pdev = mlx5_core_dma_dev(priv->mdev);
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
c->num_tc = mlx5e_get_dcb_num_tc(params);
c->stats = &priv->ptp_stats.ch;
c->lag_port = lag_port;
c->bfreg = &mdev->priv.bfreg;
err = mlx5e_ptp_set_state(c, params);
if (err)

Some files were not shown because too many files have changed in this diff Show More