diff --git a/COPYING-5.14.0-570.37.1.el9 b/COPYING-5.14.0-570.39.1.el9 similarity index 100% rename from COPYING-5.14.0-570.37.1.el9 rename to COPYING-5.14.0-570.39.1.el9 diff --git a/Makefile.rhelver b/Makefile.rhelver index 3640050bf8..18fe1a6366 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 6 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 570.37.1 +RHEL_RELEASE = 570.39.1 # # ZSTREAM diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 207998e64f..7507d57787 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -122,6 +122,7 @@ struct zpci_dev { struct rcu_head rcu; struct hotplug_slot hotplug_slot; + struct mutex state_lock; /* protect state changes */ enum zpci_state state; u32 fid; /* function ID, used by sclp */ u32 fh; /* function handle, used by insn's */ @@ -146,7 +147,6 @@ struct zpci_dev { u8 reserved : 1; unsigned int devfn; /* DEVFN part of the RID*/ - struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ @@ -174,6 +174,7 @@ struct zpci_dev { u64 dma_mask; /* DMA address space mask */ /* Function measurement block */ + struct mutex fmb_lock; struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ u16 fmb_length; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index db7585b78d..2a3a85899c 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); +static DEFINE_MUTEX(zpci_add_remove_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -68,6 +70,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb); struct airq_iv *zpci_aif_sbv; EXPORT_SYMBOL_GPL(zpci_aif_sbv); +void zpci_zdev_put(struct zpci_dev *zdev) +{ + if (!zdev) + return; + mutex_lock(&zpci_add_remove_lock); + kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); +} + struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; @@ -728,12 +739,12 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); * equivalent to its state during boot when first probing a driver. * Consequently after reset the PCI function requires re-initialization via the * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors() - * and enabling the function via e.g.pci_enablde_device_flags().The caller + * and enabling the function via e.g. pci_enable_device_flags(). The caller * must guard against concurrent reset attempts. * * In most cases this function should not be called directly but through * pci_reset_function() or pci_reset_bus() which handle the save/restore and - * locking. + * locking - asserted by lockdep. * * Return: 0 on success and an error value otherwise */ @@ -742,6 +753,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) u8 status; int rc; + lockdep_assert_held(&zdev->state_lock); zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); if (zdev_enabled(zdev)) { /* Disables device access, DMAs and IRQs (reset state) */ @@ -803,7 +815,8 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) goto error; zdev->state = state; - mutex_init(&zdev->lock); + mutex_init(&zdev->state_lock); + mutex_init(&zdev->fmb_lock); mutex_init(&zdev->kzdev_lock); return zdev; @@ -829,6 +842,7 @@ int zpci_add_device(struct zpci_dev *zdev) { int rc; + mutex_lock(&zpci_add_remove_lock); zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); rc = zpci_init_iommu(zdev); if (rc) @@ -842,12 +856,14 @@ int zpci_add_device(struct zpci_dev *zdev) spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); return 0; error_destroy_iommu: zpci_destroy_iommu(zdev); error: zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc); + mutex_unlock(&zpci_add_remove_lock); return rc; } @@ -890,6 +906,10 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) { int rc; + lockdep_assert_held(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + return 0; + if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); @@ -913,59 +933,44 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) * @zdev: the zpci_dev that was reserved * * Handle the case that a given zPCI function was reserved by another system. - * After a call to this function the zpci_dev can not be found via - * get_zdev_by_fid() anymore but may still be accessible via existing - * references though it will not be functional anymore. */ void zpci_device_reserved(struct zpci_dev *zdev) { - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - /* - * Remove device from zpci_list as it is going away. This also - * makes sure we ignore subsequent zPCI events for this device. - */ - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); + lockdep_assert_held(&zdev->state_lock); + /* We may declare the device reserved multiple times */ + if (zdev->state == ZPCI_FN_STATE_RESERVED) + return; zdev->state = ZPCI_FN_STATE_RESERVED; zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + /* + * The underlying device is gone. Allow the zdev to be freed + * as soon as all other references are gone by accounting for + * the removal as a dropped reference. + */ zpci_zdev_put(zdev); } void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); - int ret; - if (zdev->zbus->bus) - zpci_bus_remove_device(zdev, false); + lockdep_assert_held(&zpci_add_remove_lock); + WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); + /* + * We already hold zpci_list_lock thanks to kref_put_lock(). + * This makes sure no new reference can be taken from the list. + */ + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); - if (zdev_enabled(zdev)) - zpci_disable_device(zdev); + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); - switch (zdev->state) { - case ZPCI_FN_STATE_CONFIGURED: - ret = sclp_pci_deconfigure(zdev->fid); - zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); - fallthrough; - case ZPCI_FN_STATE_STANDBY: - if (zdev->has_hp_slot) - zpci_exit_slot(zdev); - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - zpci_dbg(3, "rsv fid:%x\n", zdev->fid); - fallthrough; - case ZPCI_FN_STATE_RESERVED: - if (zdev->has_resources) - zpci_cleanup_bus_resources(zdev); - zpci_bus_device_unregister(zdev); - zpci_destroy_iommu(zdev); - fallthrough; - default: - break; - } + if (zdev->has_resources) + zpci_cleanup_bus_resources(zdev); + + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree_rcu(zdev, rcu); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 1b74a000ff..23c27c6320 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void) static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && - zpci_is_device_configured(zdev) && !zdev->vfn; } @@ -332,6 +331,20 @@ error: return rc; } +static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_dev *pdev; + + if (!zdev->vfn) + return false; + + pdev = zpci_iov_find_parent_pf(zbus, zdev); + if (!pdev) + return true; + pci_dev_put(pdev); + return false; +} + int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) { bool topo_is_tid = zdev->tid_avail; @@ -346,6 +359,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) topo = topo_is_tid ? zdev->tid : zdev->pchid; zbus = zpci_bus_get(topo, topo_is_tid); + /* + * An isolated VF gets its own domain/bus even if there exists + * a matching domain/bus already + */ + if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) { + zpci_bus_put(zbus); + zbus = NULL; + } + if (!zbus) { zbus = zpci_bus_alloc(topo, topo_is_tid); if (!zbus) diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index af9f0ac79a..3febb3b297 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -17,11 +17,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); -static inline void zpci_zdev_put(struct zpci_dev *zdev) -{ - if (zdev) - kref_put(&zdev->kref, zpci_release_device); -} + +void zpci_zdev_put(struct zpci_dev *zdev); static inline void zpci_zdev_get(struct zpci_dev *zdev) { diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 0202ed4556..f16ab3ab86 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -423,6 +423,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index ac4d172f27..38014206c1 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -97,9 +97,9 @@ static int pci_perf_show(struct seq_file *m, void *v) if (!zdev) return 0; - mutex_lock(&zdev->lock); + mutex_lock(&zdev->fmb_lock); if (!zdev->fmb) { - mutex_unlock(&zdev->lock); + mutex_unlock(&zdev->fmb_lock); seq_puts(m, "FMB statistics disabled\n"); return 0; } @@ -136,7 +136,7 @@ static int pci_perf_show(struct seq_file *m, void *v) } pci_sw_counter_show(m); - mutex_unlock(&zdev->lock); + mutex_unlock(&zdev->fmb_lock); return 0; } @@ -154,7 +154,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, if (rc) return rc; - mutex_lock(&zdev->lock); + mutex_lock(&zdev->fmb_lock); switch (val) { case 0: rc = zpci_fmb_disable_device(zdev); @@ -163,7 +163,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, rc = zpci_fmb_enable_device(zdev); break; } - mutex_unlock(&zdev->lock); + mutex_unlock(&zdev->fmb_lock); return rc ? rc : count; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 6e0a8228e6..2fbee3887d 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -280,6 +280,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) zpci_err_hex(ccdf, sizeof(*ccdf)); if (zdev) { + mutex_lock(&zdev->state_lock); zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); @@ -308,6 +309,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) } pci_dev_put(pdev); no_pdev: + if (zdev) + mutex_unlock(&zdev->state_lock); zpci_zdev_put(zdev); } @@ -332,6 +335,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) zdev->state = ZPCI_FN_STATE_STANDBY; } +static void zpci_event_reappear(struct zpci_dev *zdev) +{ + lockdep_assert_held(&zdev->state_lock); + /* + * The zdev is in the reserved state. This means that it was presumed to + * go away but there are still undropped references. Now, the platform + * announced its availability again. Bring back the lingering zdev + * to standby. This is safe because we hold a temporary reference + * now so that it won't go away. Account for the re-appearance of the + * underlying device by incrementing the reference count. + */ + zdev->state = ZPCI_FN_STATE_STANDBY; + zpci_zdev_get(zdev); + zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); +} + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); @@ -340,6 +359,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); + + if (existing_zdev) + mutex_lock(&zdev->state_lock); + switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { @@ -351,8 +374,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); /* the configuration request may be stale */ - if (zdev->state != ZPCI_FN_STATE_STANDBY) + else if (zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } @@ -368,6 +393,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); zpci_update_fh(zdev, ccdf->fh); } break; @@ -408,8 +435,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } - if (existing_zdev) + if (existing_zdev) { + mutex_unlock(&zdev->state_lock); zpci_zdev_put(zdev); + } } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index ead062bf2b..191e56a623 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in return 0; } -int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +/** + * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function + * @zbus: The bus that the PCI function is on, or would be added on + * @zdev: The PCI function + * + * Finds the parent PF, if it exists and is configured, of the given PCI function + * and increments its refcount. Th PF is searched for on the provided bus so the + * caller has to ensure that this is the correct bus to search. This function may + * be used before adding the PCI function to a zbus. + * + * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not + * found. If the function is not a VF or has no RequesterID information, + * NULL is returned as well. + */ +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) { - int i, cand_devfn; - struct zpci_dev *zdev; + int i, vfid, devfn, cand_devfn; struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the + return NULL; + /* Non-VFs and VFs without RID available don't have a parent */ + if (!zdev->vfn || !zdev->rid_available) + return NULL; + /* Linux vfid starts at 0 vfn at 1 */ + vfid = zdev->vfn - 1; + devfn = zdev->rid & ZPCI_RID_MASK_DEVFN; + /* + * If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what * devfn the VF would have if it belonged to that PF using the PF's @@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn if (!pdev) continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } + if (cand_devfn == devfn) + return pdev; /* balance pci_get_slot() */ pci_dev_put(pdev); } } + return NULL; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + struct zpci_dev *zdev = to_zpci(virtfn); + struct pci_dev *pdev_pf; + int rc = 0; + + pdev_pf = zpci_iov_find_parent_pf(zbus, zdev); + if (pdev_pf) { + /* Linux' vfids start at 0 while zdev->vfn starts at 1 */ + rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1); + pci_dev_put(pdev_pf); + } return rc; } diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h index b2c828003b..05df728f98 100644 --- a/arch/s390/pci/pci_iov.h +++ b/arch/s390/pci/pci_iov.h @@ -17,6 +17,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev); int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev); + #else /* CONFIG_PCI_IOV */ static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} @@ -26,5 +28,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v { return 0; } + +static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + return NULL; +} #endif /* CONFIG_PCI_IOV */ #endif /* __S390_PCI_IOV_h */ diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 3a992eda4d..65db2522b5 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -50,6 +50,39 @@ static ssize_t mio_enabled_show(struct device *dev, } static DEVICE_ATTR_RO(mio_enabled); +static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) +{ + u8 status; + int ret; + + pci_stop_and_remove_bus_device(pdev); + if (zdev_enabled(zdev)) { + ret = zpci_disable_device(zdev); + /* + * Due to a z/VM vs LPAR inconsistency in the error + * state the FH may indicate an enabled device but + * disable says the device is already disabled don't + * treat it as an error here. + */ + if (ret == -EINVAL) + ret = 0; + if (ret) + return ret; + } + + ret = zpci_enable_device(zdev); + if (ret) + return ret; + + if (zdev->dma_table) { + ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + virt_to_phys(zdev->dma_table), &status); + if (ret) + zpci_disable_device(zdev); + } + return ret; +} + static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -57,7 +90,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); int ret = 0; - u8 status; /* Can't use device_remove_self() here as that would lead us to lock * the pci_rescan_remove_lock while holding the device' kernfs lock. @@ -71,6 +103,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, */ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); + + /* Device needs to be configured and state must not change */ + mutex_lock(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + goto out; + /* device_remove_file() serializes concurrent calls ignoring all but * the first */ @@ -83,35 +121,13 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, */ pci_lock_rescan_remove(); if (pci_dev_is_added(pdev)) { - pci_stop_and_remove_bus_device(pdev); - if (zdev_enabled(zdev)) { - ret = zpci_disable_device(zdev); - /* - * Due to a z/VM vs LPAR inconsistency in the error - * state the FH may indicate an enabled device but - * disable says the device is already disabled don't - * treat it as an error here. - */ - if (ret == -EINVAL) - ret = 0; - if (ret) - goto out; - } - - ret = zpci_enable_device(zdev); - if (ret) - goto out; - - if (zdev->dma_table) { - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (ret) - zpci_disable_device(zdev); - } + ret = _do_recover(pdev, zdev); } -out: pci_rescan_bus(zdev->zbus->bus); pci_unlock_rescan_remove(); + +out: + mutex_unlock(&zdev->state_lock); if (kn) sysfs_unbreak_active_protection(kn); return ret ? ret : count; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index d09126e855..5bc3d7a102 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -367,12 +367,9 @@ EXPORT_SYMBOL(iw_cm_disconnect); /* * CM_ID <-- DESTROYING * - * Clean up all resources associated with the connection and release - * the initial reference taken by iw_create_cm_id. - * - * Returns true if and only if the last cm_id_priv reference has been dropped. + * Clean up all resources associated with the connection. */ -static bool destroy_cm_id(struct iw_cm_id *cm_id) +static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; @@ -441,20 +438,22 @@ static bool destroy_cm_id(struct iw_cm_id *cm_id) iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); } - - return iwcm_deref_id(cm_id_priv); } /* - * This function is only called by the application thread and cannot - * be called by the event thread. The function will wait for all - * references to be released on the cm_id and then kfree the cm_id - * object. + * Destroy cm_id. If the cm_id still has other references, wait for all + * references to be released on the cm_id and then release the initial + * reference taken by iw_create_cm_id. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { - if (!destroy_cm_id(cm_id)) + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + destroy_cm_id(cm_id); + if (refcount_read(&cm_id_priv->refcount) > 1) flush_workqueue(iwcm_wq); + iwcm_deref_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -1034,8 +1033,10 @@ static void cm_work_handler(struct work_struct *_work) if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); - if (ret) - WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); + if (ret) { + destroy_cm_id(&cm_id_priv->id); + WARN_ON_ONCE(iwcm_deref_id(cm_id_priv)); + } } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index e8031f1a9b..2f5a850148 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -817,10 +817,11 @@ int i40e_pf_reset(struct i40e_hw *hw) void i40e_clear_hw(struct i40e_hw *hw) { u32 num_queues, base_queue; - u32 num_pf_int; - u32 num_vf_int; + s32 num_pf_int; + s32 num_vf_int; u32 num_vfs; - u32 i, j; + s32 i; + u32 j; u32 val; u32 eol = 0x7ff; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb..eb2e082811 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -5003,7 +5003,7 @@ int i40e_get_vf_stats(struct net_device *netdev, int vf_id, vf_stats->broadcast = stats->rx_broadcast; vf_stats->multicast = stats->rx_multicast; vf_stats->rx_dropped = stats->rx_discards + stats->rx_discards_other; - vf_stats->tx_dropped = stats->tx_discards; + vf_stats->tx_dropped = stats->tx_errors; return 0; } diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 4dd43ac5a3..48b899834d 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -51,32 +51,6 @@ static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, return 0; } -static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, - const struct gdma_resp_hdr *resp_msg) -{ - struct hwc_caller_ctx *ctx; - int err; - - if (!test_bit(resp_msg->response.hwc_msg_id, - hwc->inflight_msg_res.map)) { - dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", - resp_msg->response.hwc_msg_id); - return; - } - - ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; - err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); - if (err) - goto out; - - ctx->status_code = resp_msg->status; - - memcpy(ctx->output_buf, resp_msg, resp_len); -out: - ctx->error = err; - complete(&ctx->comp_event); -} - static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, struct hwc_work_request *req) { @@ -100,6 +74,40 @@ static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, return err; } +static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, + struct hwc_work_request *rx_req) +{ + const struct gdma_resp_hdr *resp_msg = rx_req->buf_va; + struct hwc_caller_ctx *ctx; + int err; + + if (!test_bit(resp_msg->response.hwc_msg_id, + hwc->inflight_msg_res.map)) { + dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", + resp_msg->response.hwc_msg_id); + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); + return; + } + + ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; + err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); + if (err) + goto out; + + ctx->status_code = resp_msg->status; + + memcpy(ctx->output_buf, resp_msg, resp_len); +out: + ctx->error = err; + + /* Must post rx wqe before complete(), otherwise the next rx may + * hit no_wqe error. + */ + mana_hwc_post_rx_wqe(hwc->rxq, rx_req); + + complete(&ctx->comp_event); +} + static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, struct gdma_event *event) { @@ -234,14 +242,12 @@ static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, return; } - mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req); - /* Do no longer use 'resp', because the buffer is posted to the HW - * in the below mana_hwc_post_rx_wqe(). + /* Can no longer use 'resp', because the buffer is posted to the HW + * in mana_hwc_handle_resp() above. */ resp = NULL; - - mana_hwc_post_rx_wqe(hwc_rxq, rx_req); } static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index a89b7de72d..970a57c64d 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -26,58 +26,78 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) hotplug_slot); int rc; - if (zdev->state != ZPCI_FN_STATE_STANDBY) - return -EIO; + mutex_lock(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_STANDBY) { + rc = -EIO; + goto out; + } rc = sclp_pci_configure(zdev->fid); zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc); if (rc) - return rc; + goto out; zdev->state = ZPCI_FN_STATE_CONFIGURED; - return zpci_scan_configured_device(zdev, zdev->fh); + rc = zpci_scan_configured_device(zdev, zdev->fh); +out: + mutex_unlock(&zdev->state_lock); + return rc; } static int disable_slot(struct hotplug_slot *hotplug_slot) { struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); - struct pci_dev *pdev; + struct pci_dev *pdev = NULL; + int rc; - if (zdev->state != ZPCI_FN_STATE_CONFIGURED) - return -EIO; + mutex_lock(&zdev->state_lock); + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) { + rc = -EIO; + goto out; + } pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); - return -EBUSY; + rc = -EBUSY; + goto out; } - pci_dev_put(pdev); - return zpci_deconfigure_device(zdev); + rc = zpci_deconfigure_device(zdev); +out: + if (pdev) + pci_dev_put(pdev); + mutex_unlock(&zdev->state_lock); + return rc; } static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) { struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); + int rc = -EIO; - if (zdev->state != ZPCI_FN_STATE_CONFIGURED) - return -EIO; /* - * We can't take the zdev->lock as reset_slot may be called during - * probing and/or device removal which already happens under the - * zdev->lock. Instead the user should use the higher level - * pci_reset_function() or pci_bus_reset() which hold the PCI device - * lock preventing concurrent removal. If not using these functions - * holding the PCI device lock is required. + * If we can't get the zdev->state_lock the device state is + * currently undergoing a transition and we bail out - just + * the same as if the device's state is not configured at all. */ + if (!mutex_trylock(&zdev->state_lock)) + return rc; - /* As long as the function is configured we can reset */ - if (probe) - return 0; + /* We can reset only if the function is configured */ + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + goto out; - return zpci_hot_reset_device(zdev); + if (probe) { + rc = 0; + goto out; + } + + rc = zpci_hot_reset_device(zdev); +out: + mutex_unlock(&zdev->state_lock); + return rc; } static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c4f0c55459..bbc1a40b2d 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -974,13 +974,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server) msleep(125); if (cifs_rdma_enabled(server)) smbd_destroy(server); - if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; - - /* Release netns reference for the socket. */ - put_net(cifs_net_ns(server)); } if (!list_empty(&server->pending_mid_q)) { @@ -1028,7 +1024,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server) */ } - /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); kfree(server->leaf_fullpath); kfree(server->hostname); @@ -1674,8 +1669,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; - - /* Grab netns reference for this server. */ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); @@ -1804,7 +1797,6 @@ smbd_connected: out_err_crypto_release: cifs_crypto_secmech_release(tcp_ses); - /* Release netns reference for this server. */ put_net(cifs_net_ns(tcp_ses)); out_err: @@ -1813,10 +1805,8 @@ out_err: cifs_put_tcp_session(tcp_ses->primary_server, false); kfree(tcp_ses->hostname); kfree(tcp_ses->leaf_fullpath); - if (tcp_ses->ssocket) { + if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); - put_net(cifs_net_ns(tcp_ses)); - } kfree(tcp_ses); } return ERR_PTR(rc); @@ -3107,8 +3097,8 @@ generic_ip_connect(struct TCP_Server_Info *server) socket = server->ssocket; } else { struct net *net = cifs_net_ns(server); + struct sock *sk; - rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket); rc = __sock_create(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket, 1); if (rc < 0) { @@ -3116,13 +3106,11 @@ generic_ip_connect(struct TCP_Server_Info *server) return rc; } - /* - * Grab netns reference for the socket. - * - * It'll be released here, on error, or in clean_demultiplex_info() upon server - * teardown. - */ - get_net(net); + sk = server->ssocket->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); @@ -3136,10 +3124,8 @@ generic_ip_connect(struct TCP_Server_Info *server) } rc = bind_socket(server); - if (rc < 0) { - put_net(cifs_net_ns(server)); + if (rc < 0) return rc; - } /* * Eventually check for other socket options to change from @@ -3176,7 +3162,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); - put_net(cifs_net_ns(server)); sock_release(socket); server->ssocket = NULL; return rc; @@ -3185,9 +3170,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); - if (rc < 0) - put_net(cifs_net_ns(server)); - return rc; } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b158..c9098a95fc 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -91,11 +91,15 @@ static bool watchdog_check_timestamp(void) __this_cpu_write(last_timestamp, now); return true; } -#else -static inline bool watchdog_check_timestamp(void) + +static void watchdog_init_timestamp(void) { - return true; + __this_cpu_write(nmi_rearmed, 0); + __this_cpu_write(last_timestamp, ktime_get_mono_fast_ns()); } +#else +static inline bool watchdog_check_timestamp(void) { return true; } +static inline void watchdog_init_timestamp(void) { } #endif static struct perf_event_attr wd_hw_attr = { @@ -196,6 +200,7 @@ void hardlockup_detector_perf_enable(void) if (!atomic_fetch_inc(&watchdog_cpus)) pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); + watchdog_init_timestamp(); perf_event_enable(this_cpu_read(watchdog_ev)); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a1d27bc039..a3b1c212ac 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -334,17 +334,22 @@ out: return q; } -static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, + struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; - if (cops == NULL) - return NULL; + if (cops == NULL) { + NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); + return ERR_PTR(-EOPNOTSUPP); + } cl = cops->find(p, classid); - if (cl == 0) - return NULL; + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return ERR_PTR(-ENOENT); + } return cops->leaf(p, cl); } @@ -779,15 +784,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { - bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; - if (n == 0 && len == 0) - return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { @@ -796,17 +798,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) if (sch->flags & TCQ_F_NOPARENT) break; - /* Notify parent qdisc only if child qdisc becomes empty. - * - * If child was empty even before update then backlog - * counter is screwed and we skip notification because - * parent class is already passive. - * - * If the original child was offloaded then it is allowed - * to be seem as empty, so the parent is notified anyway. - */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n && - !qdisc_is_offloaded); + /* Notify parent qdisc only if child qdisc becomes empty. */ + notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -815,6 +808,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { + /* Note that qlen_notify must be idempotent as it may get called + * multiple times. + */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } @@ -1535,7 +1531,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } @@ -1546,6 +1542,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } + if (IS_ERR(q)) + return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); @@ -1639,7 +1637,9 @@ replay: NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); + if (IS_ERR(q)) + return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 3e8d4fe4d9..e1f6e7618d 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -65,10 +65,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) &q->stats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->stats.drop_count && sch->q.qlen) { + if (q->stats.drop_count) { qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; q->stats.drop_len = 0; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index c69b999fae..e0a81d313a 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -105,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOBUFS; gnet_stats_basic_sync_init(&cl->bstats); + INIT_LIST_HEAD(&cl->alist); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -229,7 +230,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - list_del(&cl->alist); + list_del_init(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -390,7 +391,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) if (unlikely(skb == NULL)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); @@ -431,7 +432,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); qdisc_reset(cl->qdisc); } } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index cc9baa719c..1ca098bf64 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -649,6 +649,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); + for (i = nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + list_del_init(&q->classes[i].alist); + qdisc_purge_queue(q->classes[i].qdisc); + } + WRITE_ONCE(q->nbands, nbands); for (i = nstrict; i < q->nstrict; i++) { if (q->classes[i].qdisc->q.qlen) { @@ -656,11 +662,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } - for (i = q->nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del_init(&q->classes[i].alist); - qdisc_tree_flush_backlog(q->classes[i].qdisc); - } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4f908c11ba..778f6e5966 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -314,10 +314,8 @@ begin: } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->cstats.drop_count && sch->q.qlen) { + + if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 8b7252b3cb..5a7745170e 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1648,10 +1648,16 @@ hfsc_dequeue(struct Qdisc *sch) if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); - if (realtime) - update_ed(cl, next_len); - else - update_d(cl, next_len); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ + if (cl->qdisc->q.qlen != 0) { + if (realtime) + update_ed(cl, next_len); + else + update_d(cl, next_len); + } } else { /* the class becomes passive */ eltree_remove(cl); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index ff3de37874..b2494d24a5 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1738,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1947,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d584c0c258..d1e6f9fcc0 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -347,7 +347,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) struct qfq_aggregate *agg = cl->agg; - list_del(&cl->alist); /* remove from RR queue of the aggregate */ + list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } @@ -477,6 +477,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; + INIT_LIST_HEAD(&cl->alist); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); @@ -985,7 +986,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ - list_del(&cl->alist); + list_del_init(&cl->alist); else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); @@ -1418,6 +1419,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; + if (list_empty(&cl->alist)) + return; qfq_deactivate_class(q, cl); } diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d91acdd528..6e06babe9c 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index eeee724b28..77b8564de7 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -406,6 +406,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -463,6 +465,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -478,12 +482,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -498,8 +505,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -518,6 +533,9 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 02ce60d84a..8b9026e209 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -834,7 +834,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } - if (p.collect_md) { + if (p.collect_md || xi->p.collect_md) { NL_SET_ERR_MSG(extack, "collect_md can't be changed"); return -EINVAL; } @@ -845,11 +845,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], } else { if (xi->dev != dev) return -EEXIST; - if (xi->p.collect_md) { - NL_SET_ERR_MSG(extack, - "device can't be changed to collect_md"); - return -EINVAL; - } } return xfrmi_update(xi, &p); diff --git a/redhat/kernel.changelog-9.6 b/redhat/kernel.changelog-9.6 index 3faa1d52ce..ab6a19d5fe 100644 --- a/redhat/kernel.changelog-9.6 +++ b/redhat/kernel.changelog-9.6 @@ -1,3 +1,44 @@ +* Sat Aug 23 2025 CKI KWF Bot [5.14.0-570.39.1.el9_6] +- xfrm: interface: fix use-after-free after changing collect_md xfrm interface (CKI Backport Bot) [RHEL-109529] {CVE-2025-38500} +- Merge: net: mana: Fix race of mana_hwc_post_rx_wqe and new hwc response [rhel-9.6.z] (Maxim Levitsky) [RHEL-58904] +- s390/pci: Serialize device addition and removal (Mete Durlu) [RHEL-102036] +- s390/pci: Allow re-add of a reserved but not yet removed device (Mete Durlu) [RHEL-102036] +- s390/pci: Prevent self deletion in disable_slot() (Mete Durlu) [RHEL-102036] +- s390/pci: Remove redundant bus removal and disable from zpci_release_device() (Mete Durlu) [RHEL-102036] +- s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs (Thomas Huth) [RHEL-102036] {CVE-2025-37946} +- s390/pci: Fix missing check for zpci_create_device() error return (Mete Durlu) [RHEL-102036] {CVE-2025-37974} +- s390/pci: Fix potential double remove of hotplug slot (Thomas Huth) [RHEL-102036] {CVE-2024-56699} +- s390/pci: remove hotplug slot when releasing the device (Thomas Huth) [RHEL-102036] +- s390/pci: introduce lock to synchronize state of zpci_dev's (Thomas Huth) [RHEL-102036] +- s390/pci: rename lock member in struct zpci_dev (Thomas Huth) [RHEL-102036] +- net/sched: Abort __tc_modify_qdisc if parent class does not exist (CKI Backport Bot) [RHEL-107895] +- i40e: report VF tx_dropped with tx_errors instead of tx_discards (Dennis Chen) [RHEL-105137] +- s390/pci: Fix zpci_bus_is_isolated_vf() for non-VFs (Mete Durlu) [RHEL-94815] +- s390/pci: Fix handling of isolated VFs (CKI Backport Bot) [RHEL-85387] +- s390/pci: Pull search for parent PF out of zpci_iov_setup_virtfn() (CKI Backport Bot) [RHEL-85387] +- s390/pci: Fix SR-IOV for PFs initially in standby (CKI Backport Bot) [RHEL-85387] +- tipc: Fix use-after-free in tipc_conn_close(). (CKI Backport Bot) [RHEL-106651] {CVE-2025-38464} +- Revert "smb: client: fix TCP timers deadlock after rmmod" (Paulo Alcantara) [RHEL-106415] {CVE-2025-22077} +- Revert "smb: client: Fix netns refcount imbalance causing leaks and use-after-free" (Paulo Alcantara) [RHEL-106415] +- smb: client: Fix netns refcount imbalance causing leaks and use-after-free (Paulo Alcantara) [RHEL-106415] +- watchdog/perf: properly initialize the turbo mode timestamp and rearm counter (David Arcari) [RHEL-103555] +Resolves: RHEL-102036, RHEL-103555, RHEL-105137, RHEL-106415, RHEL-106651, RHEL-107895, RHEL-109529, RHEL-58904, RHEL-85387, RHEL-94815 + +* Wed Aug 20 2025 CKI KWF Bot [5.14.0-570.38.1.el9_6] +- net/sched: ets: use old 'nbands' while purging unused classes (CKI Backport Bot) [RHEL-107537] {CVE-2025-38350} +- net/sched: Always pass notifications when child class becomes empty (Ivan Vecera) [RHEL-93387] {CVE-2025-38350} +- net_sched: ets: fix a race in ets_qdisc_change() (Ivan Vecera) [RHEL-107537] {CVE-2025-38107} +- sch_htb: make htb_deactivate() idempotent (Ivan Vecera) [RHEL-93387] {CVE-2025-37953} +- codel: remove sch->q.qlen check before qdisc_tree_reduce_backlog() (Ivan Vecera) [RHEL-93387] {CVE-2025-37798} +- sch_qfq: make qfq_qlen_notify() idempotent (Ivan Vecera) [RHEL-93387] {CVE-2025-38350} +- sch_drr: make drr_qlen_notify() idempotent (Ivan Vecera) [RHEL-93387] {CVE-2025-38350} +- sch_htb: make htb_qlen_notify() idempotent (Ivan Vecera) [RHEL-93387] {CVE-2025-37932} +- net_sched: hfsc: Fix a potential UAF in hfsc_dequeue() too (CKI Backport Bot) [RHEL-107630] {CVE-2025-37823} +- i40e: fix MMIO write access to an invalid page in i40e_clear_hw (CKI Backport Bot) [RHEL-106046] {CVE-2025-38200} +- vsock: Fix transport_* TOCTOU (CKI Backport Bot) [RHEL-106003] {CVE-2025-38461} +- RDMA/iwcm: Fix use-after-free of work objects after cm_id destruction (CKI Backport Bot) [RHEL-104273] {CVE-2025-38211} +Resolves: RHEL-104273, RHEL-106003, RHEL-106046, RHEL-107537, RHEL-107630, RHEL-93387 + * Sat Aug 16 2025 CKI KWF Bot [5.14.0-570.37.1.el9_6] - ice: fix eswitch code memory leak in reset scenario (CKI Backport Bot) [RHEL-108152] {CVE-2025-38417} - ftrace: Clean up hash direct_functions on register failures (Gregory Bell) [RHEL-105151]