From 0e0250be6de437bd62f02fe5462cc302b9606042 Mon Sep 17 00:00:00 2001 From: almalinux-bot-kernel Date: Fri, 7 Nov 2025 04:06:40 +0000 Subject: [PATCH] Import of kernel-5.14.0-570.60.1.el9_6 --- ...70.58.1.el9 => COPYING-5.14.0-570.60.1.el9 | 0 Makefile.rhelver | 2 +- arch/arm64/kvm/sys_regs.c | 109 ++++-- arch/powerpc/include/asm/hvcall.h | 1 + crypto/xts.c | 8 +- drivers/net/ethernet/ibm/ibmveth.c | 342 +++++++++++++----- drivers/net/ethernet/ibm/ibmveth.h | 22 ++ drivers/nvme/host/tcp.c | 31 +- fs/inode.c | 7 +- fs/nfs/delegation.c | 25 +- fs/nfs/nfs4_fs.h | 3 +- fs/nfs/nfs4proc.c | 12 +- include/linux/nfs4.h | 1 + net/ipv6/seg6_hmac.c | 3 +- net/sunrpc/svc.c | 24 -- redhat/kernel.changelog-9.6 | 16 + 16 files changed, 436 insertions(+), 170 deletions(-) rename COPYING-5.14.0-570.58.1.el9 => COPYING-5.14.0-570.60.1.el9 (100%) diff --git a/COPYING-5.14.0-570.58.1.el9 b/COPYING-5.14.0-570.60.1.el9 similarity index 100% rename from COPYING-5.14.0-570.58.1.el9 rename to COPYING-5.14.0-570.60.1.el9 diff --git a/Makefile.rhelver b/Makefile.rhelver index ff850d02b8..68a9da2acd 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 6 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 570.58.1 +RHEL_RELEASE = 570.60.1 # # ZSTREAM diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d97ad62207..43bbd0fcc0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1527,6 +1527,9 @@ static u8 pmuver_to_perfmon(u8 pmuver) } } +static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val); +static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); + /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -1540,6 +1543,12 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val = read_sanitised_ftr_reg(id); switch (id) { + case SYS_ID_AA64DFR0_EL1: + val = sanitise_id_aa64dfr0_el1(vcpu, val); + break; + case SYS_ID_AA64PFR0_EL1: + val = sanitise_id_aa64pfr0_el1(vcpu, val); + break; case SYS_ID_AA64PFR1_EL1: if (!kvm_has_mte(vcpu->kvm)) val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); @@ -1553,6 +1562,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1684,11 +1694,8 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } -static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { - u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - if (!vcpu_has_sve(vcpu)) val &= ~ID_AA64PFR0_EL1_SVE_MASK; @@ -1716,6 +1723,13 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, val &= ~ID_AA64PFR0_EL1_AMU_MASK; + /* + * MPAM is disabled by default as KVM also needs a set of PARTID to + * program the MPAMVPMx_EL2 PARTID remapping registers with. But some + * older kernels let the guest see the ID bit. + */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + return val; } @@ -1729,11 +1743,8 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (val); \ }) -static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { - u64 val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); - val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); /* @@ -1826,6 +1837,42 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, return set_id_reg(vcpu, rd, val); } +static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, u64 user_val) +{ + u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + u64 mpam_mask = ID_AA64PFR0_EL1_MPAM_MASK; + + /* + * Commit 011e5f5bf529f ("arm64/cpufeature: Add remaining feature bits + * in ID_AA64PFR0 register") exposed the MPAM field of AA64PFR0_EL1 to + * guests, but didn't add trap handling. KVM doesn't support MPAM and + * always returns an UNDEF for these registers. The guest must see 0 + * for this field. + * + * But KVM must also accept values from user-space that were provided + * by KVM. On CPUs that support MPAM, permit user-space to write + * the sanitizied value to ID_AA64PFR0_EL1.MPAM, but ignore this field. + */ + if ((hw_val & mpam_mask) == (user_val & mpam_mask)) + user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + + return set_id_reg(vcpu, rd, user_val); +} + +static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, u64 user_val) +{ + u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + u64 mpam_mask = ID_AA64PFR1_EL1_MPAM_frac_MASK; + + /* See set_id_aa64pfr0_el1 for comment about MPAM */ + if ((hw_val & mpam_mask) == (user_val & mpam_mask)) + user_val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + + return set_id_reg(vcpu, rd, user_val); +} + /* * cpufeature ID register user accessors * @@ -2162,6 +2209,15 @@ static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu, .val = mask, \ } +/* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ +#define ID_FILTERED(sysreg, name, mask) { \ + ID_DESC(sysreg), \ + .set_user = set_##name, \ + .visibility = id_visibility, \ + .reset = kvm_read_sanitised_id_reg, \ + .val = (mask), \ +} + /* * sys_reg_desc initialiser for architecturally unallocated cpufeature ID * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 @@ -2365,19 +2421,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 ID registers */ /* CRm=4 */ - { SYS_DESC(SYS_ID_AA64PFR0_EL1), - .access = access_id_reg, - .get_user = get_id_reg, - .set_user = set_id_reg, - .reset = read_sanitised_id_aa64pfr0_el1, - .val = ~(ID_AA64PFR0_EL1_AMU | - ID_AA64PFR0_EL1_MPAM | - ID_AA64PFR0_EL1_SVE | - ID_AA64PFR0_EL1_RAS | - ID_AA64PFR0_EL1_GIC | - ID_AA64PFR0_EL1_AdvSIMD | - ID_AA64PFR0_EL1_FP), }, - ID_WRITABLE(ID_AA64PFR1_EL1, ~(ID_AA64PFR1_EL1_PFAR | + ID_FILTERED(ID_AA64PFR0_EL1, id_aa64pfr0_el1, + ~(ID_AA64PFR0_EL1_AMU | + ID_AA64PFR0_EL1_MPAM | + ID_AA64PFR0_EL1_SVE | + ID_AA64PFR0_EL1_RAS | + ID_AA64PFR0_EL1_GIC | + ID_AA64PFR0_EL1_AdvSIMD | + ID_AA64PFR0_EL1_FP)), + ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, + ~(ID_AA64PFR1_EL1_PFAR | ID_AA64PFR1_EL1_DF2 | ID_AA64PFR1_EL1_MTEX | ID_AA64PFR1_EL1_THE | @@ -2398,11 +2451,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,7), /* CRm=5 */ - { SYS_DESC(SYS_ID_AA64DFR0_EL1), - .access = access_id_reg, - .get_user = get_id_reg, - .set_user = set_id_aa64dfr0_el1, - .reset = read_sanitised_id_aa64dfr0_el1, /* * Prior to FEAT_Debugv8.9, the architecture defines context-aware * breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs). @@ -2415,10 +2463,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { * See DDI0487K.a, section D2.8.3 Breakpoint types and linking * of breakpoints for more details. */ - .val = ID_AA64DFR0_EL1_DoubleLock_MASK | - ID_AA64DFR0_EL1_WRPs_MASK | - ID_AA64DFR0_EL1_PMUVer_MASK | - ID_AA64DFR0_EL1_DebugVer_MASK, }, + ID_FILTERED(ID_AA64DFR0_EL1, id_aa64dfr0_el1, + ID_AA64DFR0_EL1_DoubleLock_MASK | + ID_AA64DFR0_EL1_WRPs_MASK | + ID_AA64DFR0_EL1_PMUVer_MASK | + ID_AA64DFR0_EL1_DebugVer_MASK), ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,3), diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 56e0c8767a..4693a00f71 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -258,6 +258,7 @@ #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 #define H_ILLAN_ATTRIBUTES 0x244 +#define H_ADD_LOGICAL_LAN_BUFFERS 0x248 #define H_MODIFY_HEA_QP 0x250 #define H_QUERY_HEA_QP 0x254 #define H_QUERY_HEA 0x258 diff --git a/crypto/xts.c b/crypto/xts.c index 6c12f30dbd..9f90121b69 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -203,12 +203,12 @@ static void xts_encrypt_done(struct crypto_async_request *areq, int err) if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); - rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, true); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_encrypt); - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; } } @@ -223,12 +223,12 @@ static void xts_decrypt_done(struct crypto_async_request *areq, int err) if (!err) { struct xts_request_ctx *rctx = skcipher_request_ctx(req); - rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = xts_xor_tweak_post(req, false); if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { err = xts_cts_final(req, crypto_skcipher_decrypt); - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; } } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 04192190be..7f94e84d09 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -39,8 +39,6 @@ #include "ibmveth.h" static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, - bool reuse); static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; @@ -213,95 +211,170 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length) static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struct ibmveth_buff_pool *pool) { - u32 i; - u32 count = pool->size - atomic_read(&pool->available); - u32 buffers_added = 0; - struct sk_buff *skb; - unsigned int free_index, index; - u64 correlator; + union ibmveth_buf_desc descs[IBMVETH_MAX_RX_PER_HCALL] = {0}; + u32 remaining = pool->size - atomic_read(&pool->available); + u64 correlators[IBMVETH_MAX_RX_PER_HCALL] = {0}; unsigned long lpar_rc; + u32 buffers_added = 0; + u32 i, filled, batch; + struct vio_dev *vdev; dma_addr_t dma_addr; + struct device *dev; + u32 index; + + vdev = adapter->vdev; + dev = &vdev->dev; mb(); - for (i = 0; i < count; ++i) { - union ibmveth_buf_desc desc; + batch = adapter->rx_buffers_per_hcall; - free_index = pool->consumer_index; - index = pool->free_map[free_index]; - skb = NULL; + while (remaining > 0) { + unsigned int free_index = pool->consumer_index; - BUG_ON(index == IBM_VETH_INVALID_MAP); + /* Fill a batch of descriptors */ + for (filled = 0; filled < min(remaining, batch); filled++) { + index = pool->free_map[free_index]; + if (WARN_ON(index == IBM_VETH_INVALID_MAP)) { + adapter->replenish_add_buff_failure++; + netdev_info(adapter->netdev, + "Invalid map index %u, reset\n", + index); + schedule_work(&adapter->work); + break; + } - /* are we allocating a new buffer or recycling an old one */ - if (pool->skbuff[index]) - goto reuse; + if (!pool->skbuff[index]) { + struct sk_buff *skb = NULL; - skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); + skb = netdev_alloc_skb(adapter->netdev, + pool->buff_size); + if (!skb) { + adapter->replenish_no_mem++; + adapter->replenish_add_buff_failure++; + break; + } - if (!skb) { - netdev_dbg(adapter->netdev, - "replenish: unable to allocate skb\n"); - adapter->replenish_no_mem++; + dma_addr = dma_map_single(dev, skb->data, + pool->buff_size, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + break; + } + + pool->dma_addr[index] = dma_addr; + pool->skbuff[index] = skb; + } else { + /* re-use case */ + dma_addr = pool->dma_addr[index]; + } + + if (rx_flush) { + unsigned int len; + + len = adapter->netdev->mtu + IBMVETH_BUFF_OH; + len = min(pool->buff_size, len); + ibmveth_flush_buffer(pool->skbuff[index]->data, + len); + } + + descs[filled].fields.flags_len = IBMVETH_BUF_VALID | + pool->buff_size; + descs[filled].fields.address = dma_addr; + + correlators[filled] = ((u64)pool->index << 32) | index; + *(u64 *)pool->skbuff[index]->data = correlators[filled]; + + free_index++; + if (free_index >= pool->size) + free_index = 0; + } + + if (!filled) break; - } - - dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, - pool->buff_size, DMA_FROM_DEVICE); - - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto failure; - - pool->dma_addr[index] = dma_addr; - pool->skbuff[index] = skb; - - if (rx_flush) { - unsigned int len = min(pool->buff_size, - adapter->netdev->mtu + - IBMVETH_BUFF_OH); - ibmveth_flush_buffer(skb->data, len); - } -reuse: - dma_addr = pool->dma_addr[index]; - desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size; - desc.fields.address = dma_addr; - - correlator = ((u64)pool->index << 32) | index; - *(u64 *)pool->skbuff[index]->data = correlator; - - lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, - desc.desc); + /* single buffer case*/ + if (filled == 1) + lpar_rc = h_add_logical_lan_buffer(vdev->unit_address, + descs[0].desc); + else + /* Multi-buffer hcall */ + lpar_rc = h_add_logical_lan_buffers(vdev->unit_address, + descs[0].desc, + descs[1].desc, + descs[2].desc, + descs[3].desc, + descs[4].desc, + descs[5].desc, + descs[6].desc, + descs[7].desc); if (lpar_rc != H_SUCCESS) { - netdev_warn(adapter->netdev, - "%sadd_logical_lan failed %lu\n", - skb ? "" : "When recycling: ", lpar_rc); - goto failure; + dev_warn_ratelimited(dev, + "RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n", + filled, lpar_rc, batch); + goto hcall_failure; } - pool->free_map[free_index] = IBM_VETH_INVALID_MAP; - pool->consumer_index++; - if (pool->consumer_index >= pool->size) - pool->consumer_index = 0; + /* Only update pool state after hcall succeeds */ + for (i = 0; i < filled; i++) { + free_index = pool->consumer_index; + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; - buffers_added++; - adapter->replenish_add_buff_success++; + pool->consumer_index++; + if (pool->consumer_index >= pool->size) + pool->consumer_index = 0; + } + + buffers_added += filled; + adapter->replenish_add_buff_success += filled; + remaining -= filled; + + memset(&descs, 0, sizeof(descs)); + memset(&correlators, 0, sizeof(correlators)); + continue; + +hcall_failure: + for (i = 0; i < filled; i++) { + index = correlators[i] & 0xffffffffUL; + dma_addr = pool->dma_addr[index]; + + if (pool->skbuff[index]) { + if (dma_addr && + !dma_mapping_error(dev, dma_addr)) + dma_unmap_single(dev, dma_addr, + pool->buff_size, + DMA_FROM_DEVICE); + + dev_kfree_skb_any(pool->skbuff[index]); + pool->skbuff[index] = NULL; + } + } + adapter->replenish_add_buff_failure += filled; + + /* + * If multi rx buffers hcall is no longer supported by FW + * e.g. in the case of Live Parttion Migration + */ + if (batch > 1 && lpar_rc == H_FUNCTION) { + /* + * Instead of retry submit single buffer individually + * here just set the max rx buffer per hcall to 1 + * buffers will be respleshed next time + * when ibmveth_replenish_buffer_pool() is called again + * with single-buffer case + */ + netdev_info(adapter->netdev, + "RX Multi buffers not supported by FW, rc=%lu\n", + lpar_rc); + adapter->rx_buffers_per_hcall = 1; + netdev_info(adapter->netdev, + "Next rx replesh will fall back to single-buffer hcall\n"); + } + break; } - mb(); - atomic_add(buffers_added, &(pool->available)); - return; - -failure: - - if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr)) - dma_unmap_single(&adapter->vdev->dev, - pool->dma_addr[index], pool->buff_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(pool->skbuff[index]); - pool->skbuff[index] = NULL; - adapter->replenish_add_buff_failure++; - mb(); atomic_add(buffers_added, &(pool->available)); } @@ -370,20 +443,36 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, } } -/* remove a buffer from a pool */ -static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, - u64 correlator, bool reuse) +/** + * ibmveth_remove_buffer_from_pool - remove a buffer from a pool + * @adapter: adapter instance + * @correlator: identifies pool and index + * @reuse: whether to reuse buffer + * + * Return: + * * %0 - success + * * %-EINVAL - correlator maps to pool or index out of range + * * %-EFAULT - pool and index map to null skb + */ +static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator, bool reuse) { unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; unsigned int free_index; struct sk_buff *skb; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return -EINVAL; + } skb = adapter->rx_buff_pool[pool].skbuff[index]; - BUG_ON(skb == NULL); + if (WARN_ON(!skb)) { + schedule_work(&adapter->work); + return -EFAULT; + } /* if we are going to reuse the buffer then keep the pointers around * but mark index as available. replenish will see the skb pointer and @@ -411,6 +500,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, mb(); atomic_dec(&(adapter->rx_buff_pool[pool].available)); + + return 0; } /* get the current buffer on the rx queue */ @@ -420,24 +511,44 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return NULL; + } return adapter->rx_buff_pool[pool].skbuff[index]; } -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, - bool reuse) +/** + * ibmveth_rxq_harvest_buffer - Harvest buffer from pool + * + * @adapter: pointer to adapter + * @reuse: whether to reuse buffer + * + * Context: called from ibmveth_poll + * + * Return: + * * %0 - success + * * other - non-zero return from ibmveth_remove_buffer_from_pool + */ +static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, + bool reuse) { u64 cor; + int rc; cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; - ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + if (unlikely(rc)) + return rc; if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } + + return 0; } static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) @@ -709,6 +820,35 @@ static int ibmveth_close(struct net_device *netdev) return 0; } +/** + * ibmveth_reset - Handle scheduled reset work + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: This routine acquires rtnl_mutex and disables its NAPI through + * ibmveth_close. It can't be called directly in a context that has + * already acquired rtnl_mutex or disabled its NAPI, or directly from + * a poll routine. + * + * Return: void + */ +static void ibmveth_reset(struct work_struct *w) +{ + struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work); + struct net_device *netdev = adapter->netdev; + + netdev_dbg(netdev, "reset starting\n"); + + rtnl_lock(); + + dev_close(adapter->netdev); + dev_open(adapter->netdev, NULL); + + rtnl_unlock(); + + netdev_dbg(netdev, "reset complete\n"); +} + static int ibmveth_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { @@ -1324,7 +1464,8 @@ restart_poll: wmb(); /* suggested by larson1 */ adapter->rx_invalid_buffer++; netdev_dbg(netdev, "recycling invalid buffer\n"); - ibmveth_rxq_harvest_buffer(adapter, true); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; } else { struct sk_buff *skb, *new_skb; int length = ibmveth_rxq_frame_length(adapter); @@ -1334,6 +1475,8 @@ restart_poll: __sum16 iph_check = 0; skb = ibmveth_rxq_get_buffer(adapter); + if (unlikely(!skb)) + break; /* if the large packet bit is set in the rx queue * descriptor, the mss will be written by PHYP eight @@ -1357,10 +1500,12 @@ restart_poll: if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); - ibmveth_rxq_harvest_buffer(adapter, true); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; skb = new_skb; } else { - ibmveth_rxq_harvest_buffer(adapter, false); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false))) + break; skb_reserve(skb, offset); } @@ -1407,7 +1552,10 @@ restart_poll: * then check once more to make sure we are done. */ lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE); - BUG_ON(lpar_rc != H_SUCCESS); + if (WARN_ON(lpar_rc != H_SUCCESS)) { + schedule_work(&adapter->work); + goto out; + } if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, @@ -1428,7 +1576,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) if (napi_schedule_prep(&adapter->napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - BUG_ON(lpar_rc != H_SUCCESS); + WARN_ON(lpar_rc != H_SUCCESS); __napi_schedule(&adapter->napi); } return IRQ_HANDLED; @@ -1670,6 +1818,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; + INIT_WORK(&adapter->work, ibmveth_reset); adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); ibmveth_init_link_settings(netdev); @@ -1705,6 +1854,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netdev->features |= NETIF_F_FRAGLIST; } + if (ret == H_SUCCESS && + (ret_attr & IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT)) { + adapter->rx_buffers_per_hcall = IBMVETH_MAX_RX_PER_HCALL; + netdev_dbg(netdev, + "RX Multi-buffer hcall supported by FW, batch set to %u\n", + adapter->rx_buffers_per_hcall); + } else { + adapter->rx_buffers_per_hcall = 1; + netdev_dbg(netdev, + "RX Single-buffer hcall mode, batch set to %u\n", + adapter->rx_buffers_per_hcall); + } + netdev->min_mtu = IBMVETH_MIN_MTU; netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; @@ -1762,6 +1924,8 @@ static void ibmveth_remove(struct vio_dev *dev) struct ibmveth_adapter *adapter = netdev_priv(netdev); int i; + cancel_work_sync(&adapter->work); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) kobject_put(&adapter->rx_buff_pool[i].kobj); diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 8468e2c59d..47b9051840 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -28,6 +28,7 @@ #define IbmVethMcastRemoveFilter 0x2UL #define IbmVethMcastClearFilterTable 0x3UL +#define IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT 0x0000000000040000UL #define IBMVETH_ILLAN_LRG_SR_ENABLED 0x0000000000010000UL #define IBMVETH_ILLAN_LRG_SND_SUPPORT 0x0000000000008000UL #define IBMVETH_ILLAN_PADDED_PKT_CSUM 0x0000000000002000UL @@ -46,6 +47,24 @@ #define h_add_logical_lan_buffer(ua, buf) \ plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) +static inline long h_add_logical_lan_buffers(unsigned long unit_address, + unsigned long desc1, + unsigned long desc2, + unsigned long desc3, + unsigned long desc4, + unsigned long desc5, + unsigned long desc6, + unsigned long desc7, + unsigned long desc8) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + return plpar_hcall9(H_ADD_LOGICAL_LAN_BUFFERS, + retbuf, unit_address, + desc1, desc2, desc3, desc4, + desc5, desc6, desc7, desc8); +} + /* FW allows us to send 6 descriptors but we only use one so mark * the other 5 as unused (0) */ @@ -101,6 +120,7 @@ static inline long h_illan_attributes(unsigned long unit_address, #define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) #define IBMVETH_MAX_QUEUES 16U #define IBMVETH_DEFAULT_QUEUES 8U +#define IBMVETH_MAX_RX_PER_HCALL 8U static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; @@ -137,6 +157,7 @@ struct ibmveth_adapter { struct vio_dev *vdev; struct net_device *netdev; struct napi_struct napi; + struct work_struct work; unsigned int mcastFilterSize; void * buffer_list_addr; void * filter_list_addr; @@ -150,6 +171,7 @@ struct ibmveth_adapter { int rx_csum; int large_send; bool is_active_trunk; + unsigned int rx_buffers_per_hcall; u64 fw_ipv6_csum_support; u64 fw_ipv4_csum_support; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1d5af8e88c..ef0eb0a342 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1862,7 +1862,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1878,6 +1878,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -1944,7 +1969,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, diff --git a/fs/inode.c b/fs/inode.c index 30161102f3..bd7f8b3356 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -191,8 +191,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_wb_frn_history = 0; #endif - if (security_inode_alloc(inode)) - goto out; spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); @@ -229,11 +227,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif inode->i_flctx = NULL; + + if (unlikely(security_inode_alloc(inode))) + return -ENOMEM; this_cpu_inc(nr_inodes); return 0; -out: - return -ENOMEM; } EXPORT_SYMBOL(inode_init_always); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index da44280002..fe47de4201 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -969,13 +969,6 @@ out: nfs_inode_find_state_and_recover(inode, stateid); } -void nfs_remove_bad_delegation(struct inode *inode, - const nfs4_stateid *stateid) -{ - nfs_revoke_delegation(inode, stateid); -} -EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); - void nfs_delegation_mark_returned(struct inode *inode, const nfs4_stateid *stateid) { @@ -1012,6 +1005,24 @@ out_rcu_unlock: nfs_inode_find_state_and_recover(inode, stateid); } +/** + * nfs_remove_bad_delegation - handle delegations that are unusable + * @inode: inode to process + * @stateid: the delegation's stateid + * + * If the server ACK-ed our FREE_STATEID then clean + * up the delegation, else mark and keep the revoked state. + */ +void nfs_remove_bad_delegation(struct inode *inode, + const nfs4_stateid *stateid) +{ + if (stateid && stateid->type == NFS4_FREED_STATEID_TYPE) + nfs_delegation_mark_returned(inode, stateid); + else + nfs_revoke_delegation(inode, stateid); +} +EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); + /** * nfs_expire_unused_delegation_types * @clp: client to process diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7b84c1e685..42aedb47e2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -66,8 +66,7 @@ struct nfs4_minor_version_ops { void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, - const nfs4_stateid *, - const struct cred *); + nfs4_stateid *, const struct cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); void (*session_trunk)(struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bbd0a59bf6..daebf73973 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -105,7 +105,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, bool is_privileged); static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *); -static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, +static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, const struct cred *, bool); #endif @@ -2883,16 +2883,14 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st } static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, - const nfs4_stateid *stateid, - const struct cred *cred) + nfs4_stateid *stateid, const struct cred *cred) { return -NFS4ERR_BAD_STATEID; } #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, - const nfs4_stateid *stateid, - const struct cred *cred) + nfs4_stateid *stateid, const struct cred *cred) { int status; @@ -2901,6 +2899,7 @@ static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, break; case NFS4_INVALID_STATEID_TYPE: case NFS4_SPECIAL_STATEID_TYPE: + case NFS4_FREED_STATEID_TYPE: return -NFS4ERR_BAD_STATEID; case NFS4_REVOKED_STATEID_TYPE: goto out_free; @@ -10568,7 +10567,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { * Note: this function is always asynchronous. */ static int nfs41_free_stateid(struct nfs_server *server, - const nfs4_stateid *stateid, + nfs4_stateid *stateid, const struct cred *cred, bool privileged) { @@ -10608,6 +10607,7 @@ static int nfs41_free_stateid(struct nfs_server *server, if (IS_ERR(task)) return PTR_ERR(task); rpc_put_task(task); + stateid->type = NFS4_FREED_STATEID_TYPE; return 0; } diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index f9df88091c..501e8b94cd 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -70,6 +70,7 @@ struct nfs4_stateid_struct { NFS4_LAYOUT_STATEID_TYPE, NFS4_PNFS_DS_STATEID_TYPE, NFS4_REVOKED_STATEID_TYPE, + NFS4_FREED_STATEID_TYPE, } type; }; diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index bbf5b84a70..6708ee3d43 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -271,7 +272,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) return false; - if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + if (crypto_memneq(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN)) return false; return true; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index aef501b090..ba6113db45 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -630,27 +630,6 @@ svc_destroy(struct svc_serv **servp) } EXPORT_SYMBOL_GPL(svc_destroy); -static bool -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) -{ - unsigned long pages, ret; - - /* bc_xprt uses fore channel allocated buffers */ - if (svc_is_backchannel(rqstp)) - return true; - - pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. - * We assume one is at most one page - */ - WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); - if (pages > RPCSVC_MAXPAGES) - pages = RPCSVC_MAXPAGES; - - ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages, - rqstp->rq_pages); - return ret == pages; -} - /* * Release an RPC server buffer */ @@ -690,9 +669,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp->rq_resp) goto out_enomem; - if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) - goto out_enomem; - return rqstp; out_enomem: svc_rqst_free(rqstp); diff --git a/redhat/kernel.changelog-9.6 b/redhat/kernel.changelog-9.6 index c5fa278a04..838b23df53 100644 --- a/redhat/kernel.changelog-9.6 +++ b/redhat/kernel.changelog-9.6 @@ -1,3 +1,19 @@ +* Sat Oct 25 2025 CKI KWF Bot [5.14.0-570.60.1.el9_6] +- ibmveth: Add multi buffers rx replenishment hcall support (Mamatha Inamdar) [RHEL-117437] +- net: ibmveth: Reset the adapter when unexpected states are detected (Mamatha Inamdar) [RHEL-117437] +- crypto: xts - Handle EBUSY correctly (CKI Backport Bot) [RHEL-119235] {CVE-2023-53494} +Resolves: RHEL-117437, RHEL-119235 + +* Thu Oct 23 2025 CKI KWF Bot [5.14.0-570.59.1.el9_6] +- nvme-tcp: fix premature queue removal and I/O failover (Maurizio Lombardi) [RHEL-105111] +- KVM: arm64: Disable MPAM visibility by default and ignore VMM writes (Gavin Shan) [RHEL-120964] +- KVM: arm64: Add a macro for creating filtered sys_reg_descs entries (Gavin Shan) [RHEL-120964] +- NFSv4: Allow FREE_STATEID to clean up delegations (Benjamin Coddington) [RHEL-118857] +- SUNRPC: Cleanup/fix initial rq_pages allocation (Benjamin Coddington) [RHEL-108160] +- fs: fix UAF/GPF bug in nilfs_mdt_destroy (CKI Backport Bot) [RHEL-116666] {CVE-2022-50367} +- ipv6: sr: Fix MAC comparison to be constant-time (CKI Backport Bot) [RHEL-116384] {CVE-2025-39702} +Resolves: RHEL-105111, RHEL-108160, RHEL-116384, RHEL-116666, RHEL-118857, RHEL-120964 + * Tue Oct 21 2025 CKI KWF Bot [5.14.0-570.58.1.el9_6] - pstore/ram: Check start of empty przs during init (CKI Backport Bot) [RHEL-122067] {CVE-2023-53331} - vsock/virtio: Validate length in packet header before skb_put() (Jon Maloy) [RHEL-114299] {CVE-2025-39718}