diff --git a/COPYING-5.14.0-570.51.1.el9 b/COPYING-5.14.0-570.52.1.el9 similarity index 100% rename from COPYING-5.14.0-570.51.1.el9 rename to COPYING-5.14.0-570.52.1.el9 diff --git a/Makefile.rhelver b/Makefile.rhelver index 11ba8f8396..fdd8a18af9 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 6 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 570.51.1 +RHEL_RELEASE = 570.52.1 # # ZSTREAM diff --git a/crypto/seqiv.c b/crypto/seqiv.c index 86bb33644d..ae7256cfc7 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -23,7 +23,7 @@ static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err) struct aead_request *subreq = aead_request_ctx(req); struct crypto_aead *geniv; - if (err == -EINPROGRESS) + if (err == -EINPROGRESS || err == -EBUSY) return; if (err) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 106112a50a..6fee890cd9 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -45,6 +45,38 @@ static int hid_ignore_special_drivers = 0; module_param_named(ignore_special_drivers, hid_ignore_special_drivers, int, 0600); MODULE_PARM_DESC(ignore_special_drivers, "Ignore any special drivers and handle all devices by generic driver"); +/* + * Convert a signed n-bit integer to signed 32-bit integer. + */ + +static s32 snto32(__u32 value, unsigned int n) +{ + if (!value || !n) + return 0; + + if (n > 32) + n = 32; + + return sign_extend32(value, n - 1); +} + +/* + * Convert a signed 32-bit integer to a signed n-bit integer. + */ + +static u32 s32ton(__s32 value, unsigned int n) +{ + s32 a; + + if (!value || !n) + return 0; + + a = value >> (n - 1); + if (a && a != -1) + return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; + return value & ((1 << n) - 1); +} + /* * Register a new report for a device. */ @@ -425,7 +457,7 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) * both this and the standard encoding. */ raw_value = item_sdata(item); if (!(raw_value & 0xfffffff0)) - parser->global.unit_exponent = hid_snto32(raw_value, 4); + parser->global.unit_exponent = snto32(raw_value, 4); else parser->global.unit_exponent = raw_value; return 0; @@ -1315,46 +1347,6 @@ alloc_err: } EXPORT_SYMBOL_GPL(hid_open_report); -/* - * Convert a signed n-bit integer to signed 32-bit integer. Common - * cases are done through the compiler, the screwed things has to be - * done by hand. - */ - -static s32 snto32(__u32 value, unsigned n) -{ - if (!value || !n) - return 0; - - if (n > 32) - n = 32; - - switch (n) { - case 8: return ((__s8)value); - case 16: return ((__s16)value); - case 32: return ((__s32)value); - } - return value & (1 << (n - 1)) ? value | (~0U << n) : value; -} - -s32 hid_snto32(__u32 value, unsigned n) -{ - return snto32(value, n); -} -EXPORT_SYMBOL_GPL(hid_snto32); - -/* - * Convert a signed 32-bit integer to a signed n-bit integer. - */ - -static u32 s32ton(__s32 value, unsigned n) -{ - s32 a = value >> (n - 1); - if (a && a != -1) - return value < 0 ? 1 << (n - 1) : (1 << (n - 1)) - 1; - return value & ((1 << n) - 1); -} - /* * Extract/implement a data field from/to a little endian report (bit array). * diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 400d70e6db..30ad42aac8 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -3296,13 +3296,13 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size) 120); } - v = hid_snto32(hid_field_extract(hdev, data+3, 0, 12), 12); + v = sign_extend32(hid_field_extract(hdev, data + 3, 0, 12), 11); input_report_rel(hidpp->input, REL_X, v); - v = hid_snto32(hid_field_extract(hdev, data+3, 12, 12), 12); + v = sign_extend32(hid_field_extract(hdev, data + 3, 12, 12), 11); input_report_rel(hidpp->input, REL_Y, v); - v = hid_snto32(data[6], 8); + v = sign_extend32(data[6], 7); if (v != 0) hidpp_scroll_counter_handle_scroll(hidpp->input, &hidpp->vertical_wheel_counter, v); diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 50437272b7..181717fd4f 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -52,6 +52,38 @@ static const struct ib_device_ops mana_ib_dev_ops = { ib_ind_table), }; +static int mana_ib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb); + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct gdma_context *gc = dev->gdma_dev->gdma_context; + struct mana_context *mc = gc->mana.driver_data; + struct net_device *ndev; + + /* Only process events from our parent device */ + if (event_dev != mc->ports[0]) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); + /* + * RDMA core will setup GID based on updated netdev. + * It's not possible to race with the core as rtnl lock is being + * held. + */ + ib_device_set_netdev(&dev->ib_dev, ndev, 1); + + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + static int mana_ib_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -85,10 +117,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues; dev->ib_dev.dev.parent = mdev->gdma_context->dev; - rcu_read_lock(); /* required to get primary netdev */ - ndev = mana_get_primary_netdev_rcu(mc, 0); + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); if (!ndev) { - rcu_read_unlock(); ret = -ENODEV; ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); goto free_ib_device; @@ -96,7 +126,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, ether_addr_copy(mac_addr, ndev->dev_addr); addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1); - rcu_read_unlock(); + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); if (ret) { ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); goto free_ib_device; @@ -110,17 +141,25 @@ static int mana_ib_probe(struct auxiliary_device *adev, } dev->gdma_dev = &mdev->gdma_context->mana_ib; + dev->nb.notifier_call = mana_ib_netdev_event; + ret = register_netdevice_notifier(&dev->nb); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", + ret); + goto deregister_device; + } + ret = mana_ib_gd_query_adapter_caps(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } ret = mana_ib_create_eqs(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } ret = mana_ib_gd_create_rnic_adapter(dev); @@ -149,6 +188,8 @@ destroy_rnic: mana_ib_gd_destroy_rnic_adapter(dev); destroy_eqs: mana_ib_destroy_eqs(dev); +deregister_net_notifier: + unregister_netdevice_notifier(&dev->nb); deregister_device: mana_gd_deregister_device(dev->gdma_dev); free_ib_device: @@ -164,6 +205,7 @@ static void mana_ib_remove(struct auxiliary_device *adev) xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); mana_ib_destroy_eqs(dev); + unregister_netdevice_notifier(&dev->nb); mana_gd_deregister_device(dev->gdma_dev); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index d13abc954d..739f60f3ce 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; - req.alignment = 1; + req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; @@ -358,7 +358,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem unsigned int tail = 0; u64 *page_addr_list; void *request_buf; - int err; + int err = 0; gc = mdev_to_gc(dev); hwc = gc->hwc.driver_data; @@ -383,7 +383,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem create_req->length = umem->length; create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz); - create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; + create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT; create_req->page_count = num_pages_total; ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n", @@ -511,13 +511,13 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) PAGE_SHIFT; prot = pgprot_writecombine(vma->vm_page_prot); - ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, + ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot, NULL); if (ret) ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); else - ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", - pfn, gc->db_page_size, ret); + ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n", + pfn, PAGE_SIZE, ret); return ret; } @@ -634,7 +634,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V3; + req.hdr.resp.msg_version = GDMA_MESSAGE_V4; req.hdr.dev_id = dev->gdma_dev->dev_id; err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), @@ -663,6 +663,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) caps->max_inline_data_size = resp.max_inline_data_size; caps->max_send_sge_count = resp.max_send_sge_count; caps->max_recv_sge_count = resp.max_recv_sge_count; + caps->feature_flags = resp.feature_flags; return 0; } @@ -762,6 +763,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev) req.hdr.dev_id = gc->mana_ib.dev_id; req.notify_eq_id = mdev->fatal_err_eq->id; + if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT) + req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err); diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index b53a5b4de9..d10175eaa5 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -48,6 +48,7 @@ struct mana_ib_adapter_caps { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; struct mana_ib_queue { @@ -64,6 +65,8 @@ struct mana_ib_dev { struct gdma_queue **eqs; struct xarray qp_table_wq; struct mana_ib_adapter_caps adapter_caps; + netdevice_tracker dev_tracker; + struct notifier_block nb; }; struct mana_ib_wq { @@ -156,6 +159,10 @@ struct mana_ib_query_adapter_caps_req { struct gdma_req_hdr hdr; }; /*HW Data */ +enum mana_ib_adapter_features { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4), +}; + struct mana_ib_query_adapter_caps_resp { struct gdma_resp_hdr hdr; u32 max_sq_id; @@ -176,8 +183,13 @@ struct mana_ib_query_adapter_caps_resp { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; /* HW Data */ +enum mana_ib_adapter_features_request { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1), +}; /*HW Data */ + struct mana_rnic_create_adapter_req { struct gdma_req_hdr hdr; u32 notify_eq_id; diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c853b..48fef98931 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -561,7 +561,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, req.ah_attr.dest_port = ROCE_V2_UDP_DPORT; req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, ibqp->qp_num, attr->dest_qp_num); - req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class; + req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2; req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 8acffe8b14..1479d16cd0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -547,6 +547,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) adapter->rx_pool[i].active = 0; } +static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) +{ + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { + netdev_info(adapter->netdev, + "set max ind descs from %u to safe limit %u\n", + adapter->cur_max_ind_descs, + IBMVNIC_SAFE_IND_DESC); + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; + } +} + static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { @@ -633,7 +644,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); /* if send_subcrq_indirect queue is full, flush to VIOS */ - if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + if (ind_bufp->index == adapter->cur_max_ind_descs || i == count - 1) { lpar_rc = send_subcrq_indirect(adapter, handle, @@ -652,6 +663,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + + /* Detect platform limit H_PARAMETER */ + if (lpar_rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ for (i = ind_bufp->index - 1; i >= 0; --i) { struct ibmvnic_rx_buff *rx_buff; @@ -2174,16 +2193,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, rc = send_subcrq_direct(adapter, handle, (u64 *)ind_bufp->indir_arr); - if (rc) + if (rc) { + dev_err_ratelimited(&adapter->vdev->dev, + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", + rc, entries, &dma_addr, handle); + /* Detect platform limit H_PARAMETER */ + if (rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); - else + } else { ind_bufp->index = 0; + } return rc; } static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; @@ -2377,7 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_crq_elem = num_entries; tx_buff->num_entries = num_entries; /* flush buffer if current entry can not fit */ - if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + if (num_entries + ind_bufp->index > cur_max_ind_descs) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_flush_err; @@ -2390,7 +2421,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ind_bufp->index += num_entries; if (__netdev_tx_sent_queue(txq, skb->len, netdev_xmit_more() && - ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + ind_bufp->index < cur_max_ind_descs)) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_err; @@ -3842,7 +3873,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } dma_free_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, scrq->ind_buf.indir_arr, scrq->ind_buf.indir_dma); @@ -3899,7 +3930,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->ind_buf.indir_arr = dma_alloc_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, &scrq->ind_buf.indir_dma, GFP_KERNEL); @@ -6204,6 +6235,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) rc = reset_sub_crq_queues(adapter); } } else { + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + /* After an LPM, reset the max number of indirect + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT + * hcall to the default max (e.g POWER8 -> POWER10) + * + * If the new destination platform does not support + * the higher limit max (e.g. POWER10-> POWER8 LPM) + * H_PARAMETER will trigger automatic fallback to the + * safe minimum limit. + */ + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + } + rc = init_sub_crqs(adapter); } @@ -6355,6 +6399,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; rc = register_netdev(netdev); if (rc) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index a145c0d11f..3f7902b3db 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -29,8 +29,9 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 16 -#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_SAFE_IND_DESC 16 +#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 @@ -885,6 +886,7 @@ struct ibmvnic_adapter { dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; u32 priv_flags; + u32 cur_max_ind_descs; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 0df906f2cd..79b25843a2 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -8,6 +8,7 @@ #include #include +#include #include struct dentry *mana_debugfs_root; @@ -32,6 +33,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + gc->phys_db_page_base = gc->bar0_pa + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); sriov_base_va = gc->bar0_va + sriov_base_off; @@ -64,6 +68,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev) mana_gd_init_vf_regs(pdev); } +/* Suppress logging when we set timeout to zero */ +bool mana_need_log(struct gdma_context *gc, int err) +{ + struct hw_channel_context *hwc; + + if (err != -ETIMEDOUT) + return true; + + if (!gc) + return true; + + hwc = gc->hwc.driver_data; + if (hwc && hwc->hwc_timeout == 0) + return false; + + return true; +} + static int mana_gd_query_max_resources(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); @@ -135,9 +157,10 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) struct gdma_list_devices_resp resp = {}; struct gdma_general_req req = {}; struct gdma_dev_id dev; - u32 i, max_num_devs; + int found_dev = 0; u16 dev_type; int err; + u32 i; mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), sizeof(resp)); @@ -149,12 +172,17 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) return err ? err : -EPROTO; } - max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); - - for (i = 0; i < max_num_devs; i++) { + for (i = 0; i < GDMA_DEV_LIST_SIZE && + found_dev < resp.num_of_devs; i++) { dev = resp.devs[i]; dev_type = dev.type; + /* Skip empty devices */ + if (dev.as_uint32 == 0) + continue; + + found_dev++; + /* HWC is already detected in mana_hwc_create_channel(). */ if (dev_type == GDMA_DEVICE_HWC) continue; @@ -261,8 +289,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { - dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, - resp.hdr.status); + if (mana_need_log(gc, err)) + dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, + resp.hdr.status); return err ? err : -EPROTO; } @@ -345,11 +374,113 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) head, arm_bit); } +#define MANA_SERVICE_PERIOD 10 + +static void mana_serv_fpga(struct pci_dev *pdev) +{ + struct pci_bus *bus, *parent; + + pci_lock_rescan_remove(); + + bus = pdev->bus; + if (!bus) { + dev_err(&pdev->dev, "MANA service: no bus\n"); + goto out; + } + + parent = bus->parent; + if (!parent) { + dev_err(&pdev->dev, "MANA service: no parent bus\n"); + goto out; + } + + pci_stop_and_remove_bus_device(bus->self); + + msleep(MANA_SERVICE_PERIOD * 1000); + + pci_rescan_bus(parent); + +out: + pci_unlock_rescan_remove(); +} + +static void mana_serv_reset(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct hw_channel_context *hwc; + + if (!gc) { + dev_err(&pdev->dev, "MANA service: no GC\n"); + return; + } + + hwc = gc->hwc.driver_data; + if (!hwc) { + dev_err(&pdev->dev, "MANA service: no HWC\n"); + goto out; + } + + /* HWC is not responding in this case, so don't wait */ + hwc->hwc_timeout = 0; + + dev_info(&pdev->dev, "MANA reset cycle start\n"); + + mana_gd_suspend(pdev, PMSG_SUSPEND); + + msleep(MANA_SERVICE_PERIOD * 1000); + + mana_gd_resume(pdev); + + dev_info(&pdev->dev, "MANA reset cycle completed\n"); + +out: + gc->in_service = false; +} + +struct mana_serv_work { + struct work_struct serv_work; + struct pci_dev *pdev; + enum gdma_eqe_type type; +}; + +static void mana_serv_func(struct work_struct *w) +{ + struct mana_serv_work *mns_wk; + struct pci_dev *pdev; + + mns_wk = container_of(w, struct mana_serv_work, serv_work); + pdev = mns_wk->pdev; + + if (!pdev) + goto out; + + switch (mns_wk->type) { + case GDMA_EQE_HWC_FPGA_RECONFIG: + mana_serv_fpga(pdev); + break; + + case GDMA_EQE_HWC_RESET_REQUEST: + mana_serv_reset(pdev); + break; + + default: + dev_err(&pdev->dev, "MANA service: unknown type %d\n", + mns_wk->type); + break; + } + +out: + pci_dev_put(pdev); + kfree(mns_wk); + module_put(THIS_MODULE); +} + static void mana_gd_process_eqe(struct gdma_queue *eq) { u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); struct gdma_context *gc = eq->gdma_dev->gdma_context; struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; + struct mana_serv_work *mns_wk; union gdma_eqe_info eqe_info; enum gdma_eqe_type type; struct gdma_event event; @@ -393,6 +524,35 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) eq->eq.callback(eq->eq.context, eq, &event); break; + case GDMA_EQE_HWC_FPGA_RECONFIG: + case GDMA_EQE_HWC_RESET_REQUEST: + dev_info(gc->dev, "Recv MANA service type:%d\n", type); + + if (gc->in_service) { + dev_info(gc->dev, "Already in service\n"); + break; + } + + if (!try_module_get(THIS_MODULE)) { + dev_info(gc->dev, "Module is unloading\n"); + break; + } + + mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC); + if (!mns_wk) { + module_put(THIS_MODULE); + break; + } + + dev_info(gc->dev, "Start MANA service type:%d\n", type); + gc->in_service = true; + mns_wk->pdev = to_pci_dev(gc->dev); + mns_wk->type = type; + pci_dev_get(mns_wk->pdev); + INIT_WORK(&mns_wk->serv_work, mana_serv_func); + schedule_work(&mns_wk->serv_work); + break; + default: break; } @@ -534,7 +694,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { - dev_err(dev, "test_eq failed: %d\n", err); + if (mana_need_log(gc, err)) + dev_err(dev, "test_eq failed: %d\n", err); goto out; } @@ -569,7 +730,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, if (flush_evenets) { err = mana_gd_test_eq(gc, queue); - if (err) + if (err && mana_need_log(gc, err)) dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); } @@ -667,8 +828,11 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } queue->head = 0; queue->tail = 0; @@ -689,6 +853,8 @@ int mana_gd_create_hwc_queue(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -710,8 +876,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { - dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", - err, resp.hdr.status); + if (mana_need_log(gc, err)) + dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", + err, resp.hdr.status); return -EPROTO; } @@ -771,7 +938,13 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, } gmi->dma_region_handle = resp.dma_region_handle; + dev_dbg(gc->dev, "Created DMA region handle 0x%llx\n", + gmi->dma_region_handle); out: + if (err) + dev_dbg(gc->dev, + "Failed to create DMA region of length: %u, page_type: %d, status: 0x%x, err: %d\n", + length, req->gdma_page_type, resp.hdr.status, err); kfree(req); return err; } @@ -794,8 +967,11 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, gdma memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -816,6 +992,8 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -842,8 +1020,11 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, gmi = &queue->mem_info; err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); - if (err) + if (err) { + dev_err(gc->dev, "GDMA queue type: %d, size: %u, memory allocation err: %d\n", + spec->type, spec->queue_size, err); goto free_q; + } err = mana_gd_create_dma_region(gd, gmi); if (err) @@ -863,6 +1044,8 @@ int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, *queue_ptr = queue; return 0; out: + dev_err(gc->dev, "Failed to create queue type %d of size: %u, err: %d\n", + spec->type, spec->queue_size, err); mana_gd_free_memory(gmi); free_q: kfree(queue); @@ -994,8 +1177,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd) err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err || resp.hdr.status) { - dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", - err, resp.hdr.status); + if (mana_need_log(gc, err)) + dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", + err, resp.hdr.status); if (!err) err = -EPROTO; } @@ -1158,8 +1342,11 @@ int mana_gd_post_and_ring(struct gdma_queue *queue, int err; err = mana_gd_post_work_request(queue, wqe_req, wqe_info); - if (err) + if (err) { + dev_err(gc->dev, "Failed to post work req from queue type %d of size %u (err=%d)\n", + queue->type, queue->queue_size, err); return err; + } mana_gd_wq_ring_doorbell(gc, queue); @@ -1436,8 +1623,10 @@ static int mana_gd_setup(struct pci_dev *pdev) mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); err = mana_gd_setup_irqs(pdev); - if (err) + if (err) { + dev_err(gc->dev, "Failed to setup IRQs: %d\n", err); return err; + } err = mana_hwc_create_channel(gc); if (err) @@ -1455,12 +1644,14 @@ static int mana_gd_setup(struct pci_dev *pdev) if (err) goto destroy_hwc; + dev_dbg(&pdev->dev, "mana gdma setup successful\n"); return 0; destroy_hwc: mana_hwc_destroy_channel(gc); remove_irq: mana_gd_remove_irqs(pdev); + dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err); return err; } @@ -1471,6 +1662,7 @@ static void mana_gd_cleanup(struct pci_dev *pdev) mana_hwc_destroy_channel(gc); mana_gd_remove_irqs(pdev); + dev_dbg(&pdev->dev, "mana gdma cleanup successful\n"); } static bool mana_is_pf(unsigned short dev_id) @@ -1489,8 +1681,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); err = pci_enable_device(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, "Failed to enable pci device (err=%d)\n", err); return -ENXIO; + } pci_set_master(pdev); @@ -1499,9 +1693,10 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto disable_dev; err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) + if (err) { + dev_err(&pdev->dev, "DMA set mask failed: %d\n", err); goto release_region; - + } dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; @@ -1579,10 +1774,12 @@ static void mana_gd_remove(struct pci_dev *pdev) pci_release_regions(pdev); pci_disable_device(pdev); + + dev_dbg(&pdev->dev, "mana gdma remove successful\n"); } /* The 'state' parameter is not used. */ -static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) +int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) { struct gdma_context *gc = pci_get_drvdata(pdev); @@ -1597,7 +1794,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) * fail -- if this happens, it's safer to just report an error than try to undo * what has been done. */ -static int mana_gd_resume(struct pci_dev *pdev) +int mana_gd_resume(struct pci_dev *pdev) { struct gdma_context *gc = pci_get_drvdata(pdev); int err; diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 48b899834d..b68e665234 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021, Microsoft Corporation. */ #include +#include #include static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) @@ -439,7 +440,8 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); if (err) { - dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n", + buf_size, err); goto out; } @@ -528,6 +530,9 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, out: if (err) mana_hwc_destroy_wq(hwc, hwc_wq); + + dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n", + queue_size, q_type, err); return err; } @@ -855,7 +860,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, if (!wait_for_completion_timeout(&ctx->comp_event, (msecs_to_jiffies(hwc->hwc_timeout)))) { - dev_err(hwc->dev, "HWC: Request timed out!\n"); + if (hwc->hwc_timeout != 0) + dev_err(hwc->dev, "HWC: Request timed out!\n"); + err = -ETIMEDOUT; goto out; } @@ -866,8 +873,13 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, } if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { - dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", - ctx->status_code); + if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) { + err = -EOPNOTSUPP; + goto out; + } + if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT) + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); err = -EPROTO; goto out; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index 23b1521c0d..d30721d451 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -91,7 +91,7 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, goto out; xdp_init_buff(xdp, PAGE_SIZE, &rxq->xdp_rxq); - xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, false); + xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, true); act = bpf_prog_run_xdp(prog, xdp); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ae796e64d2..c59e1b9bea 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -45,16 +45,27 @@ static const struct file_operations mana_dbg_q_fops = { .read = mana_dbg_q_read, }; +static bool mana_en_need_log(struct mana_port_context *apc, int err) +{ + if (apc && apc->ac && apc->ac->gdma_dev && + apc->ac->gdma_dev->gdma_context) + return mana_need_log(apc->ac->gdma_dev->gdma_context, err); + else + return true; +} + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); int err; - err = mana_alloc_queues(ndev); - if (err) + + if (err) { + netdev_err(ndev, "%s failed to allocate queues: %d\n", __func__, err); return err; + } apc->port_is_up = true; @@ -63,7 +74,7 @@ static int mana_open(struct net_device *ndev) netif_carrier_on(ndev); netif_tx_wake_all_queues(ndev); - + netdev_dbg(ndev, "%s successful\n", __func__); return 0; } @@ -175,6 +186,9 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return 0; frag_err: + if (net_ratelimit()) + netdev_err(apc->ndev, "Failed to map skb of size %u to DMA\n", + skb->len); for (i = sg_i - 1; i >= hsg; i--) dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], DMA_TO_DEVICE); @@ -244,10 +258,10 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct netdev_queue *net_txq; struct mana_stats_tx *tx_stats; struct gdma_queue *gdma_sq; + int err, len, num_gso_seg; unsigned int csum_type; struct mana_txq *txq; struct mana_cq *cq; - int err, len; if (unlikely(!apc->port_is_up)) goto tx_drop; @@ -255,6 +269,9 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (skb_cow_head(skb, MANA_HEADROOM)) goto tx_drop_count; + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto tx_drop_count; + txq = &apc->tx_qp[txq_idx].txq; gdma_sq = txq->gdma_sq; cq = &apc->tx_qp[txq_idx].tx_cq; @@ -397,6 +414,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_queue_tail(&txq->pending_skbs, skb); len = skb->len; + num_gso_seg = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; net_txq = netdev_get_tx_queue(ndev, txq_idx); err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, @@ -421,10 +439,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* skb may be freed after mana_gd_post_work_request. Do not use it. */ skb = NULL; + /* Populated the packet and bytes counters based on post GSO packet + * calculations + */ tx_stats = &txq->stats; u64_stats_update_begin(&tx_stats->syncp); - tx_stats->packets++; - tx_stats->bytes += len; + tx_stats->packets += num_gso_seg; + tx_stats->bytes += len + ((num_gso_seg - 1) * gso_hs); u64_stats_update_end(&tx_stats->syncp); tx_busy: @@ -614,7 +635,11 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } @@ -647,30 +672,16 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu mpc->rxbpre_total = 0; for (i = 0; i < num_rxb; i++) { - if (mpc->rxbpre_alloc_size > PAGE_SIZE) { - va = netdev_alloc_frag(mpc->rxbpre_alloc_size); - if (!va) - goto error; + page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size)); + if (!page) + goto error; - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < - get_order(mpc->rxbpre_alloc_size)) { - put_page(page); - goto error; - } - } else { - page = dev_alloc_page(); - if (!page) - goto error; - - va = page_to_virt(page); - } + va = page_to_virt(page); da = dma_map_single(dev, va + mpc->rxbpre_headroom, mpc->rxbpre_datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { - put_page(virt_to_head_page(va)); + put_page(page); goto error; } @@ -682,6 +693,7 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu return 0; error: + netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d queues\n", num_queues); mana_pre_dealloc_rxbufs(mpc); return -ENOMEM; } @@ -773,8 +785,13 @@ static int mana_send_request(struct mana_context *ac, void *in_buf, err = mana_gd_send_request(gc, in_len, in_buf, out_len, out_buf); if (err || resp->status) { - dev_err(dev, "Failed to send mana message: %d, 0x%x\n", - err, resp->status); + if (err == -EOPNOTSUPP) + return err; + + if (req->req.msg_type != MANA_QUERY_PHY_STAT && + mana_need_log(gc, err)) + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); return err ? err : -EPROTO; } @@ -849,8 +866,10 @@ static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) err = mana_send_request(apc->ac, &req, sizeof(req), &resp, sizeof(resp)); if (err) { - netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", - err); + if (mana_en_need_log(apc, err)) + netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", + err); + return; } @@ -905,8 +924,10 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) err = mana_send_request(apc->ac, &req, sizeof(req), &resp, sizeof(resp)); if (err) { - netdev_err(apc->ndev, "Failed to unregister filter: %d\n", - err); + if (mana_en_need_log(apc, err)) + netdev_err(apc->ndev, "Failed to unregister filter: %d\n", + err); + return; } @@ -920,7 +941,7 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc) static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, u32 proto_minor_ver, u32 proto_micro_ver, - u16 *max_num_vports) + u16 *max_num_vports, u8 *bm_hostmode) { struct gdma_context *gc = ac->gdma_dev->gdma_context; struct mana_query_device_cfg_resp resp = {}; @@ -931,7 +952,7 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; req.proto_major_ver = proto_major_ver; req.proto_minor_ver = proto_minor_ver; @@ -955,11 +976,16 @@ static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, *max_num_vports = resp.max_num_vports; - if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V2) gc->adapter_mtu = resp.adapter_mtu; else gc->adapter_mtu = ETH_FRAME_LEN; + if (resp.hdr.response.msg_version >= GDMA_MESSAGE_V3) + *bm_hostmode = resp.bm_hostmode; + else + *bm_hostmode = 0; + debugfs_create_u16("adapter-MTU", 0400, gc->mana_pci_debugfs, &gc->adapter_mtu); return 0; @@ -1131,7 +1157,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, err = mana_send_request(apc->ac, req, req_buf_size, &resp, sizeof(resp)); if (err) { - netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + if (mana_en_need_log(apc, err)) + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; } @@ -1226,7 +1254,9 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, err = mana_send_request(apc->ac, &req, sizeof(req), &resp, sizeof(resp)); if (err) { - netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + if (mana_en_need_log(apc, err)) + netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + return; } @@ -1299,8 +1329,10 @@ static int mana_create_eq(struct mana_context *ac) for (i = 0; i < gc->max_num_queues; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); - if (err) + if (err) { + dev_err(gc->dev, "Failed to create EQ %d : %d\n", i, err); goto out; + } mana_create_eq_debugfs(ac, i); } @@ -1542,8 +1574,12 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, return NULL; if (xdp->data_hard_start) { + u32 metasize = xdp->data - xdp->data_meta; + skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); return skb; } @@ -1655,7 +1691,7 @@ drop: } static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, - dma_addr_t *da, bool *from_pool, bool is_napi) + dma_addr_t *da, bool *from_pool) { struct page *page; void *va; @@ -1666,21 +1702,6 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, if (rxq->xdp_save_va) { va = rxq->xdp_save_va; rxq->xdp_save_va = NULL; - } else if (rxq->alloc_size > PAGE_SIZE) { - if (is_napi) - va = napi_alloc_frag(rxq->alloc_size); - else - va = netdev_alloc_frag(rxq->alloc_size); - - if (!va) - return NULL; - - page = virt_to_head_page(va); - /* Check if the frag falls back to single page */ - if (compound_order(page) < get_order(rxq->alloc_size)) { - put_page(page); - return NULL; - } } else { page = page_pool_dev_alloc_pages(rxq->page_pool); if (!page) @@ -1713,7 +1734,7 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, dma_addr_t da; void *va; - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; @@ -1829,7 +1850,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1840,16 +1860,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; + cq->work_done_since_doorbell += w; - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } @@ -2070,6 +2097,8 @@ static int mana_create_txq(struct mana_port_context *apc, return 0; out: + netdev_err(net, "Failed to create %d TX queues, %d\n", + apc->num_queues, err); mana_destroy_txq(apc); return err; } @@ -2147,7 +2176,7 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, if (mpc->rxbufs_pre) va = mana_get_rxbuf_pre(rxq, &da); else - va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false); + va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return -ENOMEM; @@ -2233,6 +2262,7 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; + pprm.order = get_order(rxq->alloc_size); rxq->page_pool = page_pool_create(&pprm); @@ -2406,6 +2436,7 @@ static int mana_add_rx_queues(struct mana_port_context *apc, rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); if (!rxq) { err = -ENOMEM; + netdev_err(ndev, "Failed to create rxq %d : %d\n", i, err); goto out; } @@ -2439,7 +2470,7 @@ static void mana_destroy_vport(struct mana_port_context *apc) mana_destroy_txq(apc); mana_uncfg_vport(apc); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_hw_vport(apc); } @@ -2451,7 +2482,7 @@ static int mana_create_vport(struct mana_port_context *apc, apc->default_rxobj = INVALID_MANA_HANDLE; - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_hw_vport(apc); if (err) return err; @@ -2604,6 +2635,88 @@ void mana_query_gf_stats(struct mana_port_context *apc) apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma; } +void mana_query_phy_stats(struct mana_port_context *apc) +{ + struct mana_query_phy_stat_resp resp = {}; + struct mana_query_phy_stat_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT, + sizeof(req), sizeof(resp)); + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) + return; + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, + "Failed to query PHY stats: %d, resp:0x%x\n", + err, resp.hdr.status); + return; + } + + /* Aggregate drop counters */ + apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy; + apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy; + apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy; + apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy; + apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy; + apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy; + apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy; + apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy; + apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy; + apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy; + apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy; + apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy; + apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy; + apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy; + apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy; + apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy; + apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy; + + /* Per TC byte Counters */ + apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy; + apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy; + apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy; + apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy; + apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy; + apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy; + apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy; + apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy; + apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy; + apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy; + apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy; + apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy; + apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy; + apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy; + apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy; + apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy; + + /* Per TC pause Counters */ + apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy; + apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy; + apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy; + apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy; + apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy; + apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy; + apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy; + apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy; + apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy; + apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy; + apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy; + apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy; + apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy; + apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy; + apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy; + apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy; +} + static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); @@ -2652,12 +2765,18 @@ int mana_alloc_queues(struct net_device *ndev) int err; err = mana_create_vport(apc, ndev); - if (err) + if (err) { + netdev_err(ndev, "Failed to create vPort %u : %d\n", apc->port_idx, err); return err; + } err = netif_set_real_num_tx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_tx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } err = mana_add_rx_queues(apc, ndev); if (err) @@ -2666,16 +2785,22 @@ int mana_alloc_queues(struct net_device *ndev) apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; err = netif_set_real_num_rx_queues(ndev, apc->num_queues); - if (err) + if (err) { + netdev_err(ndev, + "netif_set_real_num_rx_queues () failed for ndev with num_queues %u : %d\n", + apc->num_queues, err); goto destroy_vport; + } mana_rss_table_init(apc); err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); - if (err) + if (err) { + netdev_err(ndev, "Failed to configure RSS table: %d\n", err); goto destroy_vport; + } - if (gd->gdma_context->is_pf) { + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) { err = mana_pf_register_filter(apc); if (err) goto destroy_vport; @@ -2737,7 +2862,7 @@ static int mana_dealloc_queues(struct net_device *ndev) mana_chn_setxdp(apc, NULL); - if (gd->gdma_context->is_pf) + if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); /* No packet can be transmitted now since apc->port_is_up is false. @@ -2786,11 +2911,10 @@ static int mana_dealloc_queues(struct net_device *ndev) apc->rss_state = TRI_STATE_FALSE; err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); - if (err) { + if (err && mana_en_need_log(apc, err)) netdev_err(ndev, "Failed to disable vPort: %d\n", err); - return err; - } + /* Even in err case, still need to cleanup the vPort */ mana_destroy_vport(apc); return 0; @@ -2814,8 +2938,10 @@ int mana_detach(struct net_device *ndev, bool from_close) if (apc->port_st_save) { err = mana_dealloc_queues(ndev); - if (err) + if (err) { + netdev_err(ndev, "%s failed to deallocate queues: %d\n", __func__, err); return err; + } } if (!from_close) { @@ -2864,6 +2990,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->dev_port = port_idx; SET_NETDEV_DEV(ndev, gc->dev); + netif_set_tso_max_size(ndev, GSO_MAX_SIZE); + netif_carrier_off(ndev); netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); @@ -2959,6 +3087,8 @@ static int add_adev(struct gdma_dev *gd) goto add_fail; gd->adev = adev; + dev_dbg(gd->gdma_context->dev, + "Auxiliary device added successfully\n"); return 0; add_fail: @@ -2978,6 +3108,7 @@ int mana_probe(struct gdma_dev *gd, bool resuming) struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; struct device *dev = gc->dev; + u8 bm_hostmode = 0; u16 num_ports = 0; int err; int i; @@ -3000,13 +3131,17 @@ int mana_probe(struct gdma_dev *gd, bool resuming) } err = mana_create_eq(ac); + if (err) { + dev_err(dev, "Failed to create EQs: %d\n", err); + goto out; + } + + err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, + MANA_MICRO_VERSION, &num_ports, &bm_hostmode); if (err) goto out; - err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, - MANA_MICRO_VERSION, &num_ports); - if (err) - goto out; + ac->bm_hostmode = bm_hostmode; if (!resuming) { ac->num_ports = num_ports; @@ -3057,8 +3192,14 @@ int mana_probe(struct gdma_dev *gd, bool resuming) err = add_adev(gd); out: - if (err) + if (err) { mana_remove(gd, false); + } else { + dev_dbg(dev, "gd=%p, id=%u, num_ports=%d, type=%u, instance=%u\n", + gd, gd->dev_id.as_uint32, ac->num_ports, + gd->dev_id.type, gd->dev_id.instance); + dev_dbg(dev, "%s succeeded\n", __func__); + } return err; } @@ -3120,23 +3261,30 @@ out: gd->driver_data = NULL; gd->gdma_context = NULL; kfree(ac); + dev_dbg(dev, "%s succeeded\n", __func__); } -struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index) +struct net_device *mana_get_primary_netdev(struct mana_context *ac, + u32 port_index, + netdevice_tracker *tracker) { struct net_device *ndev; - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "Taking primary netdev without holding the RCU read lock"); if (port_index >= ac->num_ports) return NULL; - /* When mana is used in netvsc, the upper netdevice should be returned. */ - if (ac->ports[port_index]->flags & IFF_SLAVE) - ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); - else + rcu_read_lock(); + + /* If mana is used in netvsc, the upper netdevice should be returned. */ + ndev = netdev_master_upper_dev_get_rcu(ac->ports[port_index]); + + /* If there is no upper device, use the parent Ethernet device */ + if (!ndev) ndev = ac->ports[port_index]; + netdev_hold(ndev, tracker, GFP_ATOMIC); + rcu_read_unlock(); + return ndev; } -EXPORT_SYMBOL_NS(mana_get_primary_netdev_rcu, NET_MANA); +EXPORT_SYMBOL_NS(mana_get_primary_netdev, NET_MANA); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index c419626073..4fb3a04994 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -7,10 +7,12 @@ #include -static const struct { +struct mana_stats_desc { char name[ETH_GSTRING_LEN]; u16 offset; -} mana_eth_stats[] = { +}; + +static const struct mana_stats_desc mana_eth_stats[] = { {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, {"hc_rx_discards_no_wqe", offsetof(struct mana_ethtool_stats, @@ -75,6 +77,59 @@ static const struct { rx_cqe_unknown_type)}, }; +static const struct mana_stats_desc mana_phy_stats[] = { + { "hc_rx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_drop_phy) }, + { "hc_tx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_drop_phy) }, + { "hc_tc0_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc0_phy) }, + { "hc_tc0_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc0_phy) }, + { "hc_tc0_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc0_phy) }, + { "hc_tc0_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc0_phy) }, + { "hc_tc1_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc1_phy) }, + { "hc_tc1_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc1_phy) }, + { "hc_tc1_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc1_phy) }, + { "hc_tc1_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc1_phy) }, + { "hc_tc2_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc2_phy) }, + { "hc_tc2_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc2_phy) }, + { "hc_tc2_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc2_phy) }, + { "hc_tc2_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc2_phy) }, + { "hc_tc3_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc3_phy) }, + { "hc_tc3_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc3_phy) }, + { "hc_tc3_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc3_phy) }, + { "hc_tc3_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc3_phy) }, + { "hc_tc4_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc4_phy) }, + { "hc_tc4_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc4_phy) }, + { "hc_tc4_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc4_phy) }, + { "hc_tc4_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc4_phy) }, + { "hc_tc5_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc5_phy) }, + { "hc_tc5_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc5_phy) }, + { "hc_tc5_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc5_phy) }, + { "hc_tc5_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc5_phy) }, + { "hc_tc6_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc6_phy) }, + { "hc_tc6_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc6_phy) }, + { "hc_tc6_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc6_phy) }, + { "hc_tc6_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc6_phy) }, + { "hc_tc7_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc7_phy) }, + { "hc_tc7_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc7_phy) }, + { "hc_tc7_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc7_phy) }, + { "hc_tc7_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc7_phy) }, + { "hc_tc0_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc0_phy) }, + { "hc_tc0_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc0_phy) }, + { "hc_tc1_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc1_phy) }, + { "hc_tc1_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc1_phy) }, + { "hc_tc2_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc2_phy) }, + { "hc_tc2_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc2_phy) }, + { "hc_tc3_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc3_phy) }, + { "hc_tc3_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc3_phy) }, + { "hc_tc4_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc4_phy) }, + { "hc_tc4_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc4_phy) }, + { "hc_tc5_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc5_phy) }, + { "hc_tc5_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc5_phy) }, + { "hc_tc6_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc6_phy) }, + { "hc_tc6_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc6_phy) }, + { "hc_tc7_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc7_phy) }, + { "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) }, +}; + static int mana_get_sset_count(struct net_device *ndev, int stringset) { struct mana_port_context *apc = netdev_priv(ndev); @@ -83,8 +138,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * - (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); + return ARRAY_SIZE(mana_eth_stats) + ARRAY_SIZE(mana_phy_stats) + + num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -99,6 +154,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) ethtool_puts(&data, mana_eth_stats[i].name); + for (i = 0; i < ARRAY_SIZE(mana_phy_stats); i++) + ethtool_puts(&data, mana_phy_stats[i].name); + for (i = 0; i < num_queues; i++) { ethtool_sprintf(&data, "rx_%d_packets", i); ethtool_sprintf(&data, "rx_%d_bytes", i); @@ -128,6 +186,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; void *eth_stats = &apc->eth_stats; + void *phy_stats = &apc->phy_stats; struct mana_stats_rx *rx_stats; struct mana_stats_tx *tx_stats; unsigned int start; @@ -151,9 +210,18 @@ static void mana_get_ethtool_stats(struct net_device *ndev, /* we call mana function to update stats from GDMA */ mana_query_gf_stats(apc); + /* We call this mana function to get the phy stats from GDMA and includes + * aggregate tx/rx drop counters, Per-TC(Traffic Channel) tx/rx and pause + * counters. + */ + mana_query_phy_stats(apc); + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); + for (q = 0; q < ARRAY_SIZE(mana_phy_stats); q++) + data[i++] = *(u64 *)(phy_stats + mana_phy_stats[q].offset); + for (q = 0; q < num_queues; q++) { rx_stats = &apc->rxqs[q]->stats; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 2e868d3522..fd23530e65 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1049,6 +1049,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1063,6 +1064,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ @@ -1165,6 +1168,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; + u32 netvsc_gso_max_size; + atomic_t open_chn; struct work_struct subchan_work; wait_queue_head_t subchan_open; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2b6ec979a6..9afb08dbc3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -712,8 +712,13 @@ void netvsc_device_remove(struct hv_device *device) for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ /* only disable enabled NAPI channel */ - if (i < ndev->real_num_rx_queues) + if (i < ndev->real_num_rx_queues) { + netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_TX, + NULL); + netif_queue_set_napi(ndev, i, NETDEV_QUEUE_TYPE_RX, + NULL); napi_disable(&net_device->chan_table[i].napi); + } netif_napi_del(&net_device->chan_table[i].napi); } @@ -1787,6 +1792,10 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netdev_dbg(ndev, "hv_netvsc channel opened successfully\n"); napi_enable(&net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, + &net_device->chan_table[0].napi); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, + &net_device->chan_table[0].napi); /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device, net_device, device_info); @@ -1805,6 +1814,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, close: RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_TX, NULL); + netif_queue_set_napi(ndev, 0, NETDEV_QUEUE_TYPE_RX, NULL); napi_disable(&net_device->chan_table[0].napi); /* Now, we can close the channel safely */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 23180f7b67..86c3bdad0f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2350,8 +2350,11 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) if (!ndev) return NOTIFY_DONE; - /* set slave flag before open to prevent IPv6 addrconf */ + /* Set slave flag and no addrconf flag before open + * to prevent IPv6 addrconf. + */ vf_netdev->flags |= IFF_SLAVE; + vf_netdev->priv_flags |= IFF_NO_ADDRCONF; return NOTIFY_DONE; } @@ -2461,6 +2464,21 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) } else { netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); + + /* In Azure, when accelerated networking in enabled, other NICs + * like MANA, MLX, are configured as a bonded nic with + * Netvsc(failover) NIC. For bonded NICs, the min of the max + * pkt aggregate size of the members is propagated in the stack. + * In order to allow these NICs (MANA/MLX) to use up to + * GSO_MAX_SIZE gso packet size, we need to allow Netvsc NIC to + * also support this in the guest. + * This value is only increased for netvsc NIC when datapath is + * switched over to the VF + */ + if (vf_is_up) + netif_set_tso_max_size(ndev, vf_netdev->tso_max_size); + else + netif_set_tso_max_size(ndev, netvsc_dev->netvsc_gso_max_size); } return NOTIFY_OK; @@ -2547,6 +2565,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2689,6 +2708,8 @@ static void netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2730,6 +2751,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2823,6 +2845,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev) } } +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2833,10 +2876,12 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; int ret = 0; if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { - netvsc_event_set_vf_ns(event_dev); + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index ecc2128ca9..82747dfacd 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1269,10 +1269,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); - if (ret == 0) + if (ret == 0) { napi_enable(&nvchan->napi); - else + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_RX, + &nvchan->napi); + netif_queue_set_napi(ndev, chn_index, NETDEV_QUEUE_TYPE_TX, + &nvchan->napi); + } else { netdev_notice(ndev, "sub channel open failed: %d\n", ret); + } if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) wake_up(&nvscdev->subchan_open); @@ -1351,9 +1356,10 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, struct net_device_context *net_device_ctx = netdev_priv(net); struct ndis_offload hwcaps; struct ndis_offload_params offloads; - unsigned int gso_max_size = GSO_LEGACY_MAX_SIZE; int ret; + nvdev->netvsc_gso_max_size = GSO_LEGACY_MAX_SIZE; + /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, nvdev, &hwcaps); if (ret != 0) @@ -1385,8 +1391,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO; - if (hwcaps.lsov2.ip4_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip4_maxsz; + if (hwcaps.lsov2.ip4_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip4_maxsz; } if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { @@ -1406,8 +1412,8 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; net->hw_features |= NETIF_F_TSO6; - if (hwcaps.lsov2.ip6_maxsz < gso_max_size) - gso_max_size = hwcaps.lsov2.ip6_maxsz; + if (hwcaps.lsov2.ip6_maxsz < nvdev->netvsc_gso_max_size) + nvdev->netvsc_gso_max_size = hwcaps.lsov2.ip6_maxsz; } if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { @@ -1433,7 +1439,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, */ net->features &= ~NETVSC_SUPPORTED_HW_FEATURES | net->hw_features; - netif_set_tso_max_size(net, gso_max_size); + netif_set_tso_max_size(net, nvdev->netvsc_gso_max_size); ret = rndis_filter_set_offload_params(net, nvdev, &offloads); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index eb5884a7f0..f354667e22 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -217,6 +217,7 @@ struct eventpoll { /* used to optimize loop detection check */ u64 gen; struct hlist_head refs; + u8 loop_check_depth; /* * usage count, used together with epitem->dying to @@ -1966,23 +1967,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } /** - * ep_loop_check_proc - verify that adding an epoll file inside another - * epoll structure does not violate the constraints, in - * terms of closed loops, or too deep chains (which can - * result in excessive stack usage). + * ep_loop_check_proc - verify that adding an epoll file @ep inside another + * epoll file does not create closed loops, and + * determine the depth of the subtree starting at @ep * * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * - * Return: %zero if adding the epoll @file inside current epoll - * structure @ep does not violate the constraints, or %-1 otherwise. + * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { - int error = 0; + int result = 0; struct rb_node *rbp; struct epitem *epi; + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { @@ -1990,13 +1992,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) if (unlikely(is_file_epoll(epi->ffd.file))) { struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->gen == loop_check_gen) - continue; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) - error = -1; + result = INT_MAX; else - error = ep_loop_check_proc(ep_tovisit, depth + 1); - if (error != 0) + result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); + if (result > EP_MAX_NESTS) break; } else { /* @@ -2010,9 +2010,27 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) list_file(epi->ffd.file); } } + ep->loop_check_depth = result; mutex_unlock(&ep->mtx); - return error; + return result; +} + +/** + * ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards + */ +static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth) +{ + int result = 0; + struct epitem *epi; + + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + hlist_for_each_entry_rcu(epi, &ep->refs, fllink) + result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1); + ep->gen = loop_check_gen; + ep->loop_check_depth = result; + return result; } /** @@ -2028,8 +2046,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) */ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { + int depth, upwards_depth; + inserting_into = ep; - return ep_loop_check_proc(to, 0); + /* + * Check how deep down we can get from @to, and whether it is possible + * to loop up to @ep. + */ + depth = ep_loop_check_proc(to, 0); + if (depth > EP_MAX_NESTS) + return -1; + /* Check how far up we can go from @ep. */ + rcu_read_lock(); + upwards_depth = ep_get_upwards_depth_proc(ep, 0); + rcu_read_unlock(); + + return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0; } static void clear_tfile_check_list(void) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 99755b607b..26f52ca17c 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -329,7 +329,7 @@ static struct ntlmssp2_name *find_next_av(struct cifs_ses *ses, len = AV_LEN(av); if (AV_TYPE(av) == NTLMSSP_AV_EOL) return NULL; - if (!len || (u8 *)av + sizeof(*av) + len > end) + if ((u8 *)av + sizeof(*av) + len > end) return NULL; return av; } @@ -349,7 +349,7 @@ static int find_av_name(struct cifs_ses *ses, u16 type, char **name, u16 maxlen) av_for_each_entry(ses, av) { len = AV_LEN(av); - if (AV_TYPE(av) != type) + if (AV_TYPE(av) != type || !len) continue; if (!IS_ALIGNED(len, sizeof(__le16))) { cifs_dbg(VFS | ONCE, "%s: bad length(%u) for type %u\n", @@ -518,17 +518,67 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_ return rc; } +/* + * Set up NTLMv2 response blob with SPN (cifs/) appended to the + * existing list of AV pairs. + */ +static int set_auth_key_response(struct cifs_ses *ses) +{ + size_t baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); + size_t len, spnlen, tilen = 0, num_avs = 2 /* SPN + EOL */; + struct TCP_Server_Info *server = ses->server; + char *spn __free(kfree) = NULL; + struct ntlmssp2_name *av; + char *rsp = NULL; + int rc; + + spnlen = strlen(server->hostname); + len = sizeof("cifs/") + spnlen; + spn = kmalloc(len, GFP_KERNEL); + if (!spn) { + rc = -ENOMEM; + goto out; + } + + spnlen = scnprintf(spn, len, "cifs/%.*s", + (int)spnlen, server->hostname); + + av_for_each_entry(ses, av) + tilen += sizeof(*av) + AV_LEN(av); + + len = baselen + tilen + spnlen * sizeof(__le16) + num_avs * sizeof(*av); + rsp = kmalloc(len, GFP_KERNEL); + if (!rsp) { + rc = -ENOMEM; + goto out; + } + + memcpy(rsp + baselen, ses->auth_key.response, tilen); + av = (void *)(rsp + baselen + tilen); + av->type = cpu_to_le16(NTLMSSP_AV_TARGET_NAME); + av->length = cpu_to_le16(spnlen * sizeof(__le16)); + cifs_strtoUTF16((__le16 *)av->data, spn, spnlen, ses->local_nls); + av = (void *)((__u8 *)av + sizeof(*av) + AV_LEN(av)); + av->type = cpu_to_le16(NTLMSSP_AV_EOL); + av->length = 0; + + rc = 0; + ses->auth_key.len = len; +out: + ses->auth_key.response = rsp; + return rc; +} + int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { struct shash_desc *hmacmd5 = NULL; - int rc; - int baselen; - unsigned int tilen; + unsigned char *tiblob = NULL; /* target info blob */ struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; - unsigned char *tiblob = NULL; /* target info blob */ __le64 rsp_timestamp; + __u64 cc; + int rc; if (nls_cp == NULL) { cifs_dbg(VFS, "%s called with nls_cp==NULL\n", __func__); @@ -574,32 +624,25 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) * (as Windows 7 does) */ rsp_timestamp = find_timestamp(ses); + get_random_bytes(&cc, sizeof(cc)); + + cifs_server_lock(ses->server); - baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); - tilen = ses->auth_key.len; tiblob = ses->auth_key.response; - - ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); - if (!ses->auth_key.response) { - rc = -ENOMEM; + rc = set_auth_key_response(ses); + if (rc) { ses->auth_key.len = 0; - goto setup_ntlmv2_rsp_ret; + goto unlock; } - ses->auth_key.len += baselen; ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); ntlmv2->blob_signature = cpu_to_le32(0x00000101); ntlmv2->reserved = 0; ntlmv2->time = rsp_timestamp; - - get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); + ntlmv2->client_chal = cc; ntlmv2->reserved2 = 0; - memcpy(ses->auth_key.response + baselen, tiblob, tilen); - - cifs_server_lock(ses->server); - rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); if (rc) { cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); diff --git a/include/linux/hid.h b/include/linux/hid.h index 1533c9dcd3..6dd0eb6cda 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -972,7 +972,6 @@ const struct hid_device_id *hid_match_device(struct hid_device *hdev, struct hid_driver *hdrv); bool hid_compare_device_paths(struct hid_device *hdev_a, struct hid_device *hdev_b, char separator); -s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 90f56656b5..5fb7e87708 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -10,6 +10,7 @@ #include "shm_channel.h" #define GDMA_STATUS_MORE_ENTRIES 0x00000105 +#define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff /* Structures labeled with "HW DATA" are exchanged with the hardware. All of * them are naturally aligned and hence don't need __packed. @@ -58,8 +59,9 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, GDMA_EQE_HWC_INIT_DATA = 130, GDMA_EQE_HWC_INIT_DONE = 131, - GDMA_EQE_HWC_SOC_RECONFIG = 132, + GDMA_EQE_HWC_FPGA_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_HWC_RESET_REQUEST = 135, GDMA_EQE_RNIC_QP_FATAL = 176, }; @@ -152,6 +154,7 @@ struct gdma_general_req { #define GDMA_MESSAGE_V1 1 #define GDMA_MESSAGE_V2 2 #define GDMA_MESSAGE_V3 3 +#define GDMA_MESSAGE_V4 4 struct gdma_general_resp { struct gdma_resp_hdr hdr; @@ -387,6 +390,8 @@ struct gdma_context { u32 test_event_eq_id; bool is_pf; + bool in_service; + phys_addr_t bar0_pa; void __iomem *bar0_va; void __iomem *shm_base; @@ -408,8 +413,6 @@ struct gdma_context { struct gdma_dev mana_ib; }; -#define MAX_NUM_GDMA_DEVICES 4 - static inline bool mana_gd_is_mana(struct gdma_dev *gd) { return gd->dev_id.type == GDMA_DEVICE_MANA; @@ -556,11 +559,23 @@ enum { #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) #define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) +/* Driver can handle holes (zeros) in the device list */ +#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) + +/* Driver can self reset on EQE notification */ +#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14) + +/* Driver can self reset on FPGA Reconfig EQE notification */ +#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ - GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ + GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) #define GDMA_DRV_CAP_FLAGS2 0 @@ -621,11 +636,12 @@ struct gdma_query_max_resources_resp { }; /* HW DATA */ /* GDMA_LIST_DEVICES */ +#define GDMA_DEV_LIST_SIZE 64 struct gdma_list_devices_resp { struct gdma_resp_hdr hdr; u32 num_of_devs; u32 reserved; - struct gdma_dev_id devs[64]; + struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE]; }; /* HW DATA */ /* GDMA_REGISTER_DEVICE */ @@ -883,4 +899,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle); void mana_register_debugfs(void); void mana_unregister_debugfs(void); +int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state); +int mana_gd_resume(struct pci_dev *pdev); + +bool mana_need_log(struct gdma_context *gc, int err); + #endif /* _GDMA_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index ceac201caa..5a2dccc011 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -287,6 +287,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; @@ -401,10 +402,70 @@ struct mana_ethtool_stats { u64 rx_cqe_unknown_type; }; +struct mana_ethtool_phy_stats { + /* Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; + struct mana_context { struct gdma_dev *gdma_dev; u16 num_ports; + u8 bm_hostmode; struct mana_eq *eqs; struct dentry *mana_eqs_debugfs; @@ -470,6 +531,8 @@ struct mana_port_context { struct mana_ethtool_stats eth_stats; + struct mana_ethtool_phy_stats phy_stats; + /* Debugfs */ struct dentry *mana_port_debugfs; }; @@ -494,6 +557,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); +void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); @@ -520,6 +584,7 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + MANA_QUERY_PHY_STAT = 0x2000c, /* Privileged commands for the PF mode */ MANA_REGISTER_FILTER = 0x28000, @@ -554,7 +619,8 @@ struct mana_query_device_cfg_resp { u64 pf_cap_flags4; u16 max_num_vports; - u16 reserved; + u8 bm_hostmode; /* response v3: Bare Metal Host Mode */ + u8 reserved; u32 max_num_eqs; /* response v2: */ @@ -681,6 +747,74 @@ struct mana_query_gf_stat_resp { u64 tx_err_gdma; }; /* HW DATA */ +/* Query phy stats */ +struct mana_query_phy_stat_req { + struct gdma_req_hdr hdr; + u64 req_stats; +}; /* HW DATA */ + +struct mana_query_phy_stat_resp { + struct gdma_resp_hdr hdr; + u64 reported_stats; + + /* Aggregate Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC(Traffic class) traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC(Traffic Class) pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; /* HW DATA */ + /* Configure vPort Rx Steering */ struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; @@ -824,5 +958,7 @@ int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, u32 doorbell_pg_id); void mana_uncfg_vport(struct mana_port_context *apc); -struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index); +struct net_device *mana_get_primary_netdev(struct mana_context *ac, + u32 port_index, + netdevice_tracker *tracker); #endif /* _MANA_H */ diff --git a/redhat/kernel.changelog-9.6 b/redhat/kernel.changelog-9.6 index fbb9644943..7d7c07b8e3 100644 --- a/redhat/kernel.changelog-9.6 +++ b/redhat/kernel.changelog-9.6 @@ -1,3 +1,44 @@ +* Sat Oct 04 2025 CKI KWF Bot [5.14.0-570.52.1.el9_6] +- crypto: seqiv - Handle EBUSY correctly (CKI Backport Bot) [RHEL-117232] {CVE-2023-53373} +- ibmvnic: Increase max subcrq indirect entries with fallback (Mamatha Inamdar) [RHEL-116186] +- hv_netvsc: Fix panic during namespace deletion with VF (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Fix DSCP value in modify QP (Maxim Levitsky) [RHEL-115069] +- net: mana: Handle Reset Request from MANA NIC (Maxim Levitsky) [RHEL-115069] +- net: mana: Set tx_packets to post gso processing packet count (Maxim Levitsky) [RHEL-115069] +- net: mana: Handle unsupported HWC commands (Maxim Levitsky) [RHEL-115069] +- net: mana: Add handler for hardware servicing events (Maxim Levitsky) [RHEL-115069] +- net: mana: Expose additional hardware counters for drop and TC via ethtool. (Maxim Levitsky) [RHEL-115069] +- hv_netvsc: Set VF priv_flags to IFF_NO_ADDRCONF before open to prevent IPv6 addrconf (Maxim Levitsky) [RHEL-115069] +- net: mana: Record doorbell physical address in PF mode (Maxim Levitsky) [RHEL-115069] +- net: mana: Add support for Multi Vports on Bare metal (Maxim Levitsky) [RHEL-115069] +- net: mana: Switch to page pool for jumbo frames (Maxim Levitsky) [RHEL-115069] +- net: mana: Add metadata support for xdp mode (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Handle net event for pointing to the current netdev (Maxim Levitsky) [RHEL-115069] +- net: mana: Change the function signature of mana_get_primary_netdev_rcu (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Ensure variable err is initialized (Maxim Levitsky) [RHEL-115069] +- net: mana: Add debug logs in MANA network driver (Maxim Levitsky) [RHEL-115069] +- hv_netvsc: Use VF's tso_max_size value when data path is VF (Maxim Levitsky) [RHEL-115069] +- net: mana: Allow tso_max_size to go up-to GSO_MAX_SIZE (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: request error CQEs when supported (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Query feature_flags bitmask from FW (Maxim Levitsky) [RHEL-115069] +- net: mana: Support holes in device list reply msg (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: Allocate PAGE aligned doorbell index (Maxim Levitsky) [RHEL-115069] +- hv_netvsc: Link queues to NAPIs (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: use the correct page size for mapping user-mode doorbell page (Maxim Levitsky) [RHEL-115069] +- RDMA/mana_ib: use the correct page table index based on hardware page size (Maxim Levitsky) [RHEL-115069] +- net: mana: Fix doorbell out of order violation and avoid unnecessary doorbell rings (Maxim Levitsky) [RHEL-115069] +- net: mana: Fix RX buf alloc_size alignment and atomic op panic (Maxim Levitsky) [RHEL-115069] +- ALSA: usb-audio: Validate UAC3 power domain descriptors, too (CKI Backport Bot) [RHEL-114688] {CVE-2025-38729} +- ALSA: usb-audio: Fix size validation in convert_chmap_v3() (CKI Backport Bot) [RHEL-114688] +- ALSA: usb-audio: Validate UAC3 cluster segment descriptors (CKI Backport Bot) [RHEL-114688] {CVE-2025-39757} +- HID: core: Harden s32ton() against conversion to 0 bits (CKI Backport Bot) [RHEL-111036] {CVE-2025-38556} +- HID: stop exporting hid_snto32() (CKI Backport Bot) [RHEL-111036] {CVE-2025-38556} +- HID: simplify snto32() (CKI Backport Bot) [RHEL-111036] {CVE-2025-38556} +- eventpoll: Fix semi-unbounded recursion (CKI Backport Bot) [RHEL-111052] {CVE-2025-38614} +- smb: client: fix session setup against servers that require SPN (Paulo Alcantara) [RHEL-107109] +- smb: client: allow parsing zero-length AV pairs (Paulo Alcantara) [RHEL-107109] +Resolves: RHEL-107109, RHEL-111036, RHEL-111052, RHEL-114688, RHEL-115069, RHEL-116186, RHEL-117232 + * Sat Sep 27 2025 CKI KWF Bot [5.14.0-570.51.1.el9_6] - wifi: ath12k: Decrement TID on RX peer frag setup error handling (CKI Backport Bot) [RHEL-114705] {CVE-2025-39761} - RDMA/cxgb4: Notify rdma stack for IB_EVENT_QP_LAST_WQE_REACHED event (CKI Backport Bot) [RHEL-100798] diff --git a/sound/usb/stream.c b/sound/usb/stream.c index c1ea8844a4..410f33c077 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -341,20 +341,28 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor len = le16_to_cpu(cluster->wLength); c = 0; - p += sizeof(struct uac3_cluster_header_descriptor); + p += sizeof(*cluster); + len -= sizeof(*cluster); - while (((p - (void *)cluster) < len) && (c < channels)) { + while (len > 0 && (c < channels)) { struct uac3_cluster_segment_descriptor *cs_desc = p; u16 cs_len; u8 cs_type; + if (len < sizeof(*cs_desc)) + break; cs_len = le16_to_cpu(cs_desc->wLength); + if (len < cs_len) + break; cs_type = cs_desc->bSegmentType; if (cs_type == UAC3_CHANNEL_INFORMATION) { struct uac3_cluster_information_segment_descriptor *is = p; unsigned char map; + if (cs_len < sizeof(*is)) + break; + /* * TODO: this conversion is not complete, update it * after adding UAC3 values to asound.h @@ -456,6 +464,7 @@ snd_pcm_chmap_elem *convert_chmap_v3(struct uac3_cluster_header_descriptor chmap->map[c++] = map; } p += cs_len; + len -= cs_len; } if (channels < c) @@ -880,7 +889,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, u64 badd_formats = 0; unsigned int num_channels; struct audioformat *fp; - u16 cluster_id, wLength; + u16 cluster_id, wLength, cluster_wLength; int clock = 0; int err; @@ -1008,6 +1017,16 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, return ERR_PTR(-EIO); } + cluster_wLength = le16_to_cpu(cluster->wLength); + if (cluster_wLength < sizeof(*cluster) || + cluster_wLength > wLength) { + dev_err(&dev->dev, + "%u:%d : invalid Cluster Descriptor size\n", + iface_no, altno); + kfree(cluster); + return ERR_PTR(-EIO); + } + num_channels = cluster->bNrChannels; chmap = convert_chmap_v3(cluster); kfree(cluster); diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 6fe206f6e9..4f4e8e87a1 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -221,6 +221,17 @@ static bool validate_uac3_feature_unit(const void *p, return d->bLength >= sizeof(*d) + 4 + 2; } +static bool validate_uac3_power_domain_unit(const void *p, + const struct usb_desc_validator *v) +{ + const struct uac3_power_domain_descriptor *d = p; + + if (d->bLength < sizeof(*d)) + return false; + /* baEntities[] + wPDomainDescrStr */ + return d->bLength >= sizeof(*d) + d->bNrEntities + 2; +} + static bool validate_midi_out_jack(const void *p, const struct usb_desc_validator *v) { @@ -285,6 +296,7 @@ static const struct usb_desc_validator audio_validators[] = { struct uac3_clock_multiplier_descriptor), /* UAC_VERSION_3, UAC3_SAMPLE_RATE_CONVERTER: not implemented yet */ /* UAC_VERSION_3, UAC3_CONNECTORS: not implemented yet */ + FUNC(UAC_VERSION_3, UAC3_POWER_DOMAIN, validate_uac3_power_domain_unit), { } /* terminator */ };