diff --git a/SOURCES/0001-Apply-GCP-related-changes.patch b/SOURCES/0001-Apply-GCP-related-changes.patch index 38530a0..7c124c0 100644 --- a/SOURCES/0001-Apply-GCP-related-changes.patch +++ b/SOURCES/0001-Apply-GCP-related-changes.patch @@ -1,14 +1,23 @@ -From c0523819a6cdb726b3f56e0e51fcedb033401822 Mon Sep 17 00:00:00 2001 +From bd7d191b7e9bff566c6547b529c1c75665da0895 Mon Sep 17 00:00:00 2001 From: Andrew Lukoshko -Date: Tue, 2 May 2023 13:39:22 +0200 +Date: Tue, 2 May 2023 18:56:47 +0200 Subject: [PATCH] Apply GCP related changes --- - drivers/net/ethernet/google/Kconfig | 2 +- - drivers/net/ethernet/google/gve/gve_main.c | 6 ++---- - drivers/net/ethernet/google/gve/gve_rx.c | 4 +--- - drivers/net/ethernet/google/gve/gve_tx.c | 6 ------ - 4 files changed, 4 insertions(+), 14 deletions(-) + drivers/net/ethernet/google/Kconfig | 2 +- + drivers/net/ethernet/google/gve/gve.h | 27 +- + drivers/net/ethernet/google/gve/gve_adminq.c | 21 +- + drivers/net/ethernet/google/gve/gve_adminq.h | 51 ++ + .../net/ethernet/google/gve/gve_desc_dqo.h | 5 + + drivers/net/ethernet/google/gve/gve_ethtool.c | 2 + + drivers/net/ethernet/google/gve/gve_main.c | 61 +- + drivers/net/ethernet/google/gve/gve_rx.c | 546 ++++++++++-------- + drivers/net/ethernet/google/gve/gve_rx_dqo.c | 4 +- + drivers/net/ethernet/google/gve/gve_tx.c | 6 - + drivers/net/ethernet/google/gve/gve_tx_dqo.c | 24 +- + drivers/net/ethernet/google/gve/gve_utils.c | 30 +- + drivers/net/ethernet/google/gve/gve_utils.h | 2 +- + 13 files changed, 489 insertions(+), 292 deletions(-) diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig index 8641a00f8..b8f04d052 100644 @@ -23,11 +32,311 @@ index 8641a00f8..b8f04d052 100644 help This driver supports Google Virtual NIC (gVNIC)" +diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h +index 160735484..64eb0442c 100644 +--- a/drivers/net/ethernet/google/gve/gve.h ++++ b/drivers/net/ethernet/google/gve/gve.h +@@ -60,7 +60,8 @@ struct gve_rx_slot_page_info { + void *page_address; + u32 page_offset; /* offset to write to in page */ + int pagecnt_bias; /* expected pagecnt if only the driver has a ref */ +- u8 can_flip; ++ u16 pad; /* adjustment for rx padding */ ++ u8 can_flip; /* tracks if the networking stack is using the page */ + }; + + /* A list of pages registered with the device during setup and used by a queue +@@ -149,10 +150,17 @@ struct gve_rx_ctx { + /* head and tail of skb chain for the current packet or NULL if none */ + struct sk_buff *skb_head; + struct sk_buff *skb_tail; +- u16 total_expected_size; +- u8 expected_frag_cnt; +- u8 curr_frag_cnt; +- u8 reuse_frags; ++ u32 total_size; ++ u8 frag_cnt; ++ bool drop_pkt; ++}; ++ ++struct gve_rx_cnts { ++ u32 ok_pkt_bytes; ++ u16 ok_pkt_cnt; ++ u16 total_pkt_cnt; ++ u16 cont_pkt_cnt; ++ u16 desc_err_pkt_cnt; + }; + + /* Contains datapath state used to represent an RX queue. */ +@@ -167,6 +175,10 @@ struct gve_rx_ring { + /* threshold for posting new buffs and descs */ + u32 db_threshold; + u16 packet_buffer_size; ++ ++ u32 qpl_copy_pool_mask; ++ u32 qpl_copy_pool_head; ++ struct gve_rx_slot_page_info *qpl_copy_pool; + }; + + /* DQO fields. */ +@@ -216,7 +228,9 @@ struct gve_rx_ring { + u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ + u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ + u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ +- u64 rx_frag_copy_cnt; /* free-running count of rx segments copied into skb linear portion */ ++ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ ++ u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */ ++ + u32 q_num; /* queue index */ + u32 ntfy_id; /* notification block index */ + struct gve_queue_resources *q_resources; /* head and tail pointer idx */ +@@ -549,6 +563,7 @@ struct gve_priv { + u32 adminq_report_stats_cnt; + u32 adminq_report_link_speed_cnt; + u32 adminq_get_ptype_map_cnt; ++ u32 adminq_verify_driver_compatibility_cnt; + + /* Global stats */ + u32 interface_up_cnt; /* count of times interface turned up since last reset */ +diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c +index f7621ab67..60061288a 100644 +--- a/drivers/net/ethernet/google/gve/gve_adminq.c ++++ b/drivers/net/ethernet/google/gve/gve_adminq.c +@@ -289,7 +289,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) + case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED: + return -ENOMEM; + case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED: +- return -ENOTSUPP; ++ return -EOPNOTSUPP; + default: + dev_err(&priv->pdev->dev, "parse_aq_err: unknown status code %d\n", status); + return -EINVAL; +@@ -407,6 +407,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, + case GVE_ADMINQ_GET_PTYPE_MAP: + priv->adminq_get_ptype_map_cnt++; + break; ++ case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY: ++ priv->adminq_verify_driver_compatibility_cnt++; ++ break; + default: + dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode); + } +@@ -878,6 +881,22 @@ int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, + return gve_adminq_execute_cmd(priv, &cmd); + } + ++int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, ++ u64 driver_info_len, ++ dma_addr_t driver_info_addr) ++{ ++ union gve_adminq_command cmd; ++ ++ memset(&cmd, 0, sizeof(cmd)); ++ cmd.opcode = cpu_to_be32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY); ++ cmd.verify_driver_compatibility = (struct gve_adminq_verify_driver_compatibility) { ++ .driver_info_len = cpu_to_be64(driver_info_len), ++ .driver_info_addr = cpu_to_be64(driver_info_addr), ++ }; ++ ++ return gve_adminq_execute_cmd(priv, &cmd); ++} ++ + int gve_adminq_report_link_speed(struct gve_priv *priv) + { + union gve_adminq_command gvnic_cmd; +diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h +index 83c0b40cd..cf29662e6 100644 +--- a/drivers/net/ethernet/google/gve/gve_adminq.h ++++ b/drivers/net/ethernet/google/gve/gve_adminq.h +@@ -24,6 +24,7 @@ enum gve_adminq_opcodes { + GVE_ADMINQ_REPORT_STATS = 0xC, + GVE_ADMINQ_REPORT_LINK_SPEED = 0xD, + GVE_ADMINQ_GET_PTYPE_MAP = 0xE, ++ GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, + }; + + /* Admin queue status codes */ +@@ -146,6 +147,51 @@ enum gve_sup_feature_mask { + + #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 + ++#define GVE_VERSION_STR_LEN 128 ++ ++enum gve_driver_capbility { ++ gve_driver_capability_gqi_qpl = 0, ++ gve_driver_capability_gqi_rda = 1, ++ gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ ++ gve_driver_capability_dqo_rda = 3, ++ gve_driver_capability_alt_miss_compl = 4, ++}; ++ ++#define GVE_CAP1(a) BIT((int)a) ++#define GVE_CAP2(a) BIT(((int)a) - 64) ++#define GVE_CAP3(a) BIT(((int)a) - 128) ++#define GVE_CAP4(a) BIT(((int)a) - 192) ++ ++#define GVE_DRIVER_CAPABILITY_FLAGS1 \ ++ (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ ++ GVE_CAP1(gve_driver_capability_gqi_rda) | \ ++ GVE_CAP1(gve_driver_capability_dqo_rda) | \ ++ GVE_CAP1(gve_driver_capability_alt_miss_compl)) ++ ++#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 ++#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 ++#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0 ++ ++struct gve_driver_info { ++ u8 os_type; /* 0x01 = Linux */ ++ u8 driver_major; ++ u8 driver_minor; ++ u8 driver_sub; ++ __be32 os_version_major; ++ __be32 os_version_minor; ++ __be32 os_version_sub; ++ __be64 driver_capability_flags[4]; ++ u8 os_version_str1[GVE_VERSION_STR_LEN]; ++ u8 os_version_str2[GVE_VERSION_STR_LEN]; ++}; ++ ++struct gve_adminq_verify_driver_compatibility { ++ __be64 driver_info_len; ++ __be64 driver_info_addr; ++}; ++ ++static_assert(sizeof(struct gve_adminq_verify_driver_compatibility) == 16); ++ + struct gve_adminq_configure_device_resources { + __be64 counter_array; + __be64 irq_db_addr; +@@ -345,6 +391,8 @@ union gve_adminq_command { + struct gve_adminq_report_stats report_stats; + struct gve_adminq_report_link_speed report_link_speed; + struct gve_adminq_get_ptype_map get_ptype_map; ++ struct gve_adminq_verify_driver_compatibility ++ verify_driver_compatibility; + }; + }; + u8 reserved[64]; +@@ -372,6 +420,9 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); + int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); + int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, + dma_addr_t stats_report_addr, u64 interval); ++int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, ++ u64 driver_info_len, ++ dma_addr_t driver_info_addr); + int gve_adminq_report_link_speed(struct gve_priv *priv); + + struct gve_ptype_lut; +diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h +index e8fe9adef..f79cd0591 100644 +--- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h ++++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h +@@ -176,6 +176,11 @@ static_assert(sizeof(struct gve_tx_compl_desc) == 8); + #define GVE_COMPL_TYPE_DQO_MISS 0x1 /* Miss path completion */ + #define GVE_COMPL_TYPE_DQO_REINJECTION 0x3 /* Re-injection completion */ + ++/* The most significant bit in the completion tag can change the completion ++ * type from packet completion to miss path completion. ++ */ ++#define GVE_ALT_MISS_COMPL_BIT BIT(15) ++ + /* Descriptor to post buffers to HW on buffer queue. */ + struct gve_rx_desc_dqo { + __le16 buf_id; /* ID returned in Rx completion descriptor */ +diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c +index 50b384910..ce574d097 100644 +--- a/drivers/net/ethernet/google/gve/gve_ethtool.c ++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c +@@ -45,6 +45,7 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { + static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { + "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]", + "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", ++ "rx_frag_alloc_cnt[%u]", + "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", + "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", + "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", +@@ -271,6 +272,7 @@ gve_get_ethtool_stats(struct net_device *netdev, + data[i++] = rx->rx_cont_packet_cnt; + data[i++] = rx->rx_frag_flip_cnt; + data[i++] = rx->rx_frag_copy_cnt; ++ data[i++] = rx->rx_frag_alloc_cnt; + /* rx dropped packets */ + data[i++] = tmp_rx_skb_alloc_fail + + tmp_rx_buf_alloc_fail + diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c -index 54e51c822..6cafee55e 100644 +index 54e51c822..5ffb9b200 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c -@@ -857,8 +857,7 @@ static int gve_alloc_qpls(struct gve_priv *priv) +@@ -12,6 +12,8 @@ + #include + #include + #include ++#include ++#include + #include + #include "gve.h" + #include "gve_dqo.h" +@@ -30,6 +32,49 @@ + const char gve_version_str[] = GVE_VERSION; + static const char gve_version_prefix[] = GVE_VERSION_PREFIX; + ++static int gve_verify_driver_compatibility(struct gve_priv *priv) ++{ ++ int err; ++ struct gve_driver_info *driver_info; ++ dma_addr_t driver_info_bus; ++ ++ driver_info = dma_alloc_coherent(&priv->pdev->dev, ++ sizeof(struct gve_driver_info), ++ &driver_info_bus, GFP_KERNEL); ++ if (!driver_info) ++ return -ENOMEM; ++ ++ *driver_info = (struct gve_driver_info) { ++ .os_type = 1, /* Linux */ ++ .os_version_major = cpu_to_be32(6), ++ .os_version_minor = cpu_to_be32(2), ++ .os_version_sub = cpu_to_be32(0), ++ .driver_capability_flags = { ++ cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), ++ cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), ++ cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), ++ cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), ++ }, ++ }; ++ strscpy(driver_info->os_version_str1, utsname()->release, ++ sizeof(driver_info->os_version_str1)); ++ strscpy(driver_info->os_version_str2, utsname()->version, ++ sizeof(driver_info->os_version_str2)); ++ ++ err = gve_adminq_verify_driver_compatibility(priv, ++ sizeof(struct gve_driver_info), ++ driver_info_bus); ++ ++ /* It's ok if the device doesn't support this */ ++ if (err == -EOPNOTSUPP) ++ err = 0; ++ ++ dma_free_coherent(&priv->pdev->dev, ++ sizeof(struct gve_driver_info), ++ driver_info, driver_info_bus); ++ return err; ++} ++ + static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct gve_priv *priv = netdev_priv(dev); +@@ -526,8 +571,7 @@ static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, + { + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + +- netif_napi_add(priv->dev, &block->napi, gve_poll, +- NAPI_POLL_WEIGHT); ++ netif_napi_add(priv->dev, &block->napi, gve_poll, 64); + } + + static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +@@ -857,8 +901,7 @@ static int gve_alloc_qpls(struct gve_priv *priv) int i, j; int err; @@ -37,7 +346,7 @@ index 54e51c822..6cafee55e 100644 return 0; priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL); -@@ -901,8 +900,7 @@ static void gve_free_qpls(struct gve_priv *priv) +@@ -901,8 +944,7 @@ static void gve_free_qpls(struct gve_priv *priv) int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); int i; @@ -47,28 +356,767 @@ index 54e51c822..6cafee55e 100644 return; kvfree(priv->qpl_cfg.qpl_id_map); +@@ -1371,6 +1413,13 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) + return err; + } + ++ err = gve_verify_driver_compatibility(priv); ++ if (err) { ++ dev_err(&priv->pdev->dev, ++ "Could not verify driver compatibility: err=%d\n", err); ++ goto err; ++ } ++ + if (skip_describe_device) + goto setup_device; + diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c -index cf3fe5ad5..021bbf308 100644 +index cf3fe5ad5..1f5513772 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c -@@ -439,7 +439,7 @@ static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx) - if (frag_size > rx->packet_buffer_size) { - packet_size_error = true; - netdev_warn(priv->dev, -- "RX fragment error: packet_buffer_size=%d, frag_size=%d, droping packet.", -+ "RX fragment error: packet_buffer_size=%d, frag_size=%d, dropping packet.", - rx->packet_buffer_size, be16_to_cpu(desc->len)); +@@ -35,6 +35,12 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) + rx->data.page_info[i].pagecnt_bias - 1); + gve_unassign_qpl(priv, rx->data.qpl->id); + rx->data.qpl = NULL; ++ ++ for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { ++ page_ref_sub(rx->qpl_copy_pool[i].page, ++ rx->qpl_copy_pool[i].pagecnt_bias - 1); ++ put_page(rx->qpl_copy_pool[i].page); ++ } + } + kvfree(rx->data.page_info); + rx->data.page_info = NULL; +@@ -63,6 +69,10 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx) + dma_free_coherent(dev, bytes, rx->data.data_ring, + rx->data.data_bus); + rx->data.data_ring = NULL; ++ ++ kvfree(rx->qpl_copy_pool); ++ rx->qpl_copy_pool = NULL; ++ + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); + } + +@@ -101,6 +111,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) + u32 slots; + int err; + int i; ++ int j; + + /* Allocate one page per Rx queue slot. Each page is split into two + * packet buffers, when possible we "page flip" between the two. +@@ -135,7 +146,33 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) + goto alloc_err; + } + ++ if (!rx->data.raw_addressing) { ++ for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) { ++ struct page *page = alloc_page(GFP_KERNEL); ++ ++ if (!page) { ++ err = -ENOMEM; ++ goto alloc_err_qpl; ++ } ++ ++ rx->qpl_copy_pool[j].page = page; ++ rx->qpl_copy_pool[j].page_offset = 0; ++ rx->qpl_copy_pool[j].page_address = page_address(page); ++ ++ /* The page already has 1 ref. */ ++ page_ref_add(page, INT_MAX - 1); ++ rx->qpl_copy_pool[j].pagecnt_bias = INT_MAX; ++ } ++ } ++ + return slots; ++ ++alloc_err_qpl: ++ while (j--) { ++ page_ref_sub(rx->qpl_copy_pool[j].page, ++ rx->qpl_copy_pool[j].pagecnt_bias - 1); ++ put_page(rx->qpl_copy_pool[j].page); ++ } + alloc_err: + while (i--) + gve_rx_free_buffer(&priv->pdev->dev, +@@ -146,12 +183,11 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) + + static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) + { +- ctx->curr_frag_cnt = 0; +- ctx->total_expected_size = 0; +- ctx->expected_frag_cnt = 0; + ctx->skb_head = NULL; + ctx->skb_tail = NULL; +- ctx->reuse_frags = false; ++ ctx->total_size = 0; ++ ctx->frag_cnt = 0; ++ ctx->drop_pkt = false; + } + + static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) +@@ -181,10 +217,22 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) + GFP_KERNEL); + if (!rx->data.data_ring) + return -ENOMEM; ++ ++ rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1; ++ rx->qpl_copy_pool_head = 0; ++ rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1, ++ sizeof(rx->qpl_copy_pool[0]), ++ GFP_KERNEL); ++ ++ if (!rx->qpl_copy_pool) { ++ err = -ENOMEM; ++ goto abort_with_slots; ++ } ++ + filled_pages = gve_prefill_rx_pages(rx); + if (filled_pages < 0) { + err = -ENOMEM; +- goto abort_with_slots; ++ goto abort_with_copy_pool; + } + rx->fill_cnt = filled_pages; + /* Ensure data ring slots (packet buffers) are visible. */ +@@ -236,6 +284,9 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) + rx->q_resources = NULL; + abort_filled: + gve_rx_unfill_pages(priv, rx); ++abort_with_copy_pool: ++ kvfree(rx->qpl_copy_pool); ++ rx->qpl_copy_pool = NULL; + abort_with_slots: + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); +@@ -292,30 +343,47 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) + return PKT_HASH_TYPE_L2; + } + +-static u16 gve_rx_ctx_padding(struct gve_rx_ctx *ctx) +-{ +- return (ctx->curr_frag_cnt == 0) ? GVE_RX_PAD : 0; +-} +- + static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, + u16 packet_buffer_size, u16 len, + struct gve_rx_ctx *ctx) + { +- u32 offset = page_info->page_offset + gve_rx_ctx_padding(ctx); +- struct sk_buff *skb; ++ u32 offset = page_info->page_offset + page_info->pad; ++ struct sk_buff *skb = ctx->skb_tail; ++ int num_frags = 0; + +- if (!ctx->skb_head) +- ctx->skb_head = napi_get_frags(napi); ++ if (!skb) { ++ skb = napi_get_frags(napi); ++ if (unlikely(!skb)) ++ return NULL; + +- if (unlikely(!ctx->skb_head)) +- return NULL; ++ ctx->skb_head = skb; ++ ctx->skb_tail = skb; ++ } else { ++ num_frags = skb_shinfo(ctx->skb_tail)->nr_frags; ++ if (num_frags == MAX_SKB_FRAGS) { ++ skb = napi_alloc_skb(napi, 0); ++ if (!skb) ++ return NULL; + +- skb = ctx->skb_head; +- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page_info->page, ++ // We will never chain more than two SKBs: 2 * 16 * 2k > 64k ++ // which is why we do not need to chain by using skb->next ++ skb_shinfo(ctx->skb_tail)->frag_list = skb; ++ ++ ctx->skb_tail = skb; ++ num_frags = 0; ++ } ++ } ++ ++ if (skb != ctx->skb_head) { ++ ctx->skb_head->len += len; ++ ctx->skb_head->data_len += len; ++ ctx->skb_head->truesize += packet_buffer_size; ++ } ++ skb_add_rx_frag(skb, num_frags, page_info->page, + offset, len, packet_buffer_size); + +- return skb; ++ return ctx->skb_head; + } + + static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) +@@ -363,6 +431,92 @@ gve_rx_raw_addressing(struct device *dev, struct net_device *netdev, + return skb; + } + ++static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, ++ struct gve_rx_slot_page_info *page_info, ++ u16 len, struct napi_struct *napi) ++{ ++ u32 pool_idx = rx->qpl_copy_pool_head & rx->qpl_copy_pool_mask; ++ void *src = page_info->page_address + page_info->page_offset; ++ struct gve_rx_slot_page_info *copy_page_info; ++ struct gve_rx_ctx *ctx = &rx->ctx; ++ bool alloc_page = false; ++ struct sk_buff *skb; ++ void *dst; ++ ++ copy_page_info = &rx->qpl_copy_pool[pool_idx]; ++ if (!copy_page_info->can_flip) { ++ int recycle = gve_rx_can_recycle_buffer(copy_page_info); ++ ++ if (unlikely(recycle < 0)) { ++ gve_schedule_reset(rx->gve); ++ return NULL; ++ } ++ alloc_page = !recycle; ++ } ++ ++ if (alloc_page) { ++ struct gve_rx_slot_page_info alloc_page_info; ++ struct page *page; ++ ++ /* The least recently used page turned out to be ++ * still in use by the kernel. Ignoring it and moving ++ * on alleviates head-of-line blocking. ++ */ ++ rx->qpl_copy_pool_head++; ++ ++ page = alloc_page(GFP_ATOMIC); ++ if (!page) ++ return NULL; ++ ++ alloc_page_info.page = page; ++ alloc_page_info.page_offset = 0; ++ alloc_page_info.page_address = page_address(page); ++ alloc_page_info.pad = page_info->pad; ++ ++ memcpy(alloc_page_info.page_address, src, page_info->pad + len); ++ skb = gve_rx_add_frags(napi, &alloc_page_info, ++ rx->packet_buffer_size, ++ len, ctx); ++ ++ u64_stats_update_begin(&rx->statss); ++ rx->rx_frag_copy_cnt++; ++ rx->rx_frag_alloc_cnt++; ++ u64_stats_update_end(&rx->statss); ++ ++ return skb; ++ } ++ ++ dst = copy_page_info->page_address + copy_page_info->page_offset; ++ memcpy(dst, src, page_info->pad + len); ++ copy_page_info->pad = page_info->pad; ++ ++ skb = gve_rx_add_frags(napi, copy_page_info, ++ rx->packet_buffer_size, len, ctx); ++ if (unlikely(!skb)) ++ return NULL; ++ ++ gve_dec_pagecnt_bias(copy_page_info); ++ copy_page_info->page_offset += rx->packet_buffer_size; ++ copy_page_info->page_offset &= (PAGE_SIZE - 1); ++ ++ if (copy_page_info->can_flip) { ++ /* We have used both halves of this copy page, it ++ * is time for it to go to the back of the queue. ++ */ ++ copy_page_info->can_flip = false; ++ rx->qpl_copy_pool_head++; ++ prefetch(rx->qpl_copy_pool[rx->qpl_copy_pool_head & rx->qpl_copy_pool_mask].page); ++ } else { ++ copy_page_info->can_flip = true; ++ } ++ ++ u64_stats_update_begin(&rx->statss); ++ rx->rx_frag_copy_cnt++; ++ u64_stats_update_end(&rx->statss); ++ ++ return skb; ++} ++ + static struct sk_buff * + gve_rx_qpl(struct device *dev, struct net_device *netdev, + struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info, +@@ -377,7 +531,7 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, + * choice is to copy the data out of it so that we can return it to the + * device. + */ +- if (ctx->reuse_frags) { ++ if (page_info->can_flip) { + skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); + /* No point in recycling if we didn't get the skb */ + if (skb) { +@@ -386,116 +540,23 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, + gve_rx_flip_buff(page_info, &data_slot->qpl_offset); } - page_info = &rx->data.page_info[idx]; -@@ -766,8 +766,6 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + } else { +- const u16 padding = gve_rx_ctx_padding(ctx); +- +- skb = gve_rx_copy(netdev, napi, page_info, len, padding, ctx); +- if (skb) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_frag_copy_cnt++; +- u64_stats_update_end(&rx->statss); +- } ++ skb = gve_rx_copy_to_pool(rx, page_info, len, napi); + } + return skb; + } + +-#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) +-static u16 gve_rx_get_fragment_size(struct gve_rx_ctx *ctx, struct gve_rx_desc *desc) +-{ +- return be16_to_cpu(desc->len) - gve_rx_ctx_padding(ctx); +-} +- +-static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx) +-{ +- bool qpl_mode = !rx->data.raw_addressing, packet_size_error = false; +- bool buffer_error = false, desc_error = false, seqno_error = false; +- struct gve_rx_slot_page_info *page_info; +- struct gve_priv *priv = rx->gve; +- u32 idx = rx->cnt & rx->mask; +- bool reuse_frags, can_flip; +- struct gve_rx_desc *desc; +- u16 packet_size = 0; +- u16 n_frags = 0; +- int recycle; +- +- /** In QPL mode, we only flip buffers when all buffers containing the packet +- * can be flipped. RDA can_flip decisions will be made later, per frag. +- */ +- can_flip = qpl_mode; +- reuse_frags = can_flip; +- do { +- u16 frag_size; +- +- n_frags++; +- desc = &rx->desc.desc_ring[idx]; +- desc_error = unlikely(desc->flags_seq & GVE_RXF_ERR) || desc_error; +- if (GVE_SEQNO(desc->flags_seq) != rx->desc.seqno) { +- seqno_error = true; +- netdev_warn(priv->dev, +- "RX seqno error: want=%d, got=%d, dropping packet and scheduling reset.", +- rx->desc.seqno, GVE_SEQNO(desc->flags_seq)); +- } +- frag_size = be16_to_cpu(desc->len); +- packet_size += frag_size; +- if (frag_size > rx->packet_buffer_size) { +- packet_size_error = true; +- netdev_warn(priv->dev, +- "RX fragment error: packet_buffer_size=%d, frag_size=%d, droping packet.", +- rx->packet_buffer_size, be16_to_cpu(desc->len)); +- } +- page_info = &rx->data.page_info[idx]; +- if (can_flip) { +- recycle = gve_rx_can_recycle_buffer(page_info); +- reuse_frags = reuse_frags && recycle > 0; +- buffer_error = buffer_error || unlikely(recycle < 0); +- } +- idx = (idx + 1) & rx->mask; +- rx->desc.seqno = gve_next_seqno(rx->desc.seqno); +- } while (GVE_PKTCONT_BIT_IS_SET(desc->flags_seq)); +- +- prefetch(rx->desc.desc_ring + idx); +- +- ctx->curr_frag_cnt = 0; +- ctx->total_expected_size = packet_size - GVE_RX_PAD; +- ctx->expected_frag_cnt = n_frags; +- ctx->skb_head = NULL; +- ctx->reuse_frags = reuse_frags; +- +- if (ctx->expected_frag_cnt > 1) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_cont_packet_cnt++; +- u64_stats_update_end(&rx->statss); +- } +- if (ctx->total_expected_size > priv->rx_copybreak && !ctx->reuse_frags && qpl_mode) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_copied_pkt++; +- u64_stats_update_end(&rx->statss); +- } +- +- if (unlikely(buffer_error || seqno_error || packet_size_error)) { +- gve_schedule_reset(priv); +- return false; +- } +- +- if (unlikely(desc_error)) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_desc_err_dropped_pkt++; +- u64_stats_update_end(&rx->statss); +- return false; +- } +- return true; +-} +- + static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_slot_page_info *page_info, struct napi_struct *napi, +- u16 len, union gve_rx_data_slot *data_slot) ++ u16 len, union gve_rx_data_slot *data_slot, ++ bool is_only_frag) + { + struct net_device *netdev = priv->dev; + struct gve_rx_ctx *ctx = &rx->ctx; + struct sk_buff *skb = NULL; + +- if (len <= priv->rx_copybreak && ctx->expected_frag_cnt == 1) { ++ if (len <= priv->rx_copybreak && is_only_frag) { + /* Just copy small packets */ +- skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD, ctx); ++ skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD); + if (skb) { + u64_stats_update_begin(&rx->statss); + rx->rx_copied_pkt++; +@@ -504,29 +565,25 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, + u64_stats_update_end(&rx->statss); + } + } else { +- if (rx->data.raw_addressing) { +- int recycle = gve_rx_can_recycle_buffer(page_info); ++ int recycle = gve_rx_can_recycle_buffer(page_info); + +- if (unlikely(recycle < 0)) { +- gve_schedule_reset(priv); +- return NULL; +- } +- page_info->can_flip = recycle; +- if (page_info->can_flip) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_frag_flip_cnt++; +- u64_stats_update_end(&rx->statss); +- } ++ if (unlikely(recycle < 0)) { ++ gve_schedule_reset(priv); ++ return NULL; ++ } ++ page_info->can_flip = recycle; ++ if (page_info->can_flip) { ++ u64_stats_update_begin(&rx->statss); ++ rx->rx_frag_flip_cnt++; ++ u64_stats_update_end(&rx->statss); ++ } ++ ++ if (rx->data.raw_addressing) { + skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, + page_info, len, napi, + data_slot, + rx->packet_buffer_size, ctx); + } else { +- if (ctx->reuse_frags) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_frag_flip_cnt++; +- u64_stats_update_end(&rx->statss); +- } + skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, + page_info, len, napi, data_slot); + } +@@ -534,101 +591,113 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, + return skb; + } + +-static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, +- u64 *packet_size_bytes, u32 *work_done) ++#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) ++static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, ++ struct gve_rx_desc *desc, u32 idx, ++ struct gve_rx_cnts *cnts) + { ++ bool is_last_frag = !GVE_PKTCONT_BIT_IS_SET(desc->flags_seq); + struct gve_rx_slot_page_info *page_info; ++ u16 frag_size = be16_to_cpu(desc->len); + struct gve_rx_ctx *ctx = &rx->ctx; + union gve_rx_data_slot *data_slot; + struct gve_priv *priv = rx->gve; +- struct gve_rx_desc *first_desc; + struct sk_buff *skb = NULL; +- struct gve_rx_desc *desc; +- struct napi_struct *napi; + dma_addr_t page_bus; +- u32 work_cnt = 0; + void *va; +- u32 idx; +- u16 len; + +- idx = rx->cnt & rx->mask; +- first_desc = &rx->desc.desc_ring[idx]; +- desc = first_desc; +- napi = &priv->ntfy_blocks[rx->ntfy_id].napi; ++ struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; ++ bool is_first_frag = ctx->frag_cnt == 0; + +- if (unlikely(!gve_rx_ctx_init(ctx, rx))) +- goto skb_alloc_fail; ++ bool is_only_frag = is_first_frag && is_last_frag; + +- while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) { +- /* Prefetch two packet buffers ahead, we will need it soon. */ +- page_info = &rx->data.page_info[(idx + 2) & rx->mask]; +- va = page_info->page_address + page_info->page_offset; ++ if (unlikely(ctx->drop_pkt)) ++ goto finish_frag; + +- prefetch(page_info->page); /* Kernel page struct. */ +- prefetch(va); /* Packet header. */ +- prefetch(va + 64); /* Next cacheline too. */ ++ if (desc->flags_seq & GVE_RXF_ERR) { ++ ctx->drop_pkt = true; ++ cnts->desc_err_pkt_cnt++; ++ napi_free_frags(napi); ++ goto finish_frag; ++ } + +- len = gve_rx_get_fragment_size(ctx, desc); ++ if (unlikely(frag_size > rx->packet_buffer_size)) { ++ netdev_warn(priv->dev, "Unexpected frag size %d, can't exceed %d, scheduling reset", ++ frag_size, rx->packet_buffer_size); ++ ctx->drop_pkt = true; ++ napi_free_frags(napi); ++ gve_schedule_reset(rx->gve); ++ goto finish_frag; ++ } + +- page_info = &rx->data.page_info[idx]; +- data_slot = &rx->data.data_ring[idx]; +- page_bus = rx->data.raw_addressing ? +- be64_to_cpu(data_slot->addr) - page_info->page_offset : +- rx->data.qpl->page_buses[idx]; +- dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE); +- +- skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot); +- if (!skb) { +- u64_stats_update_begin(&rx->statss); +- rx->rx_skb_alloc_fail++; +- u64_stats_update_end(&rx->statss); +- goto skb_alloc_fail; ++ /* Prefetch two packet buffers ahead, we will need it soon. */ ++ page_info = &rx->data.page_info[(idx + 2) & rx->mask]; ++ va = page_info->page_address + page_info->page_offset; ++ prefetch(page_info->page); /* Kernel page struct. */ ++ prefetch(va); /* Packet header. */ ++ prefetch(va + 64); /* Next cacheline too. */ ++ ++ page_info = &rx->data.page_info[idx]; ++ data_slot = &rx->data.data_ring[idx]; ++ page_bus = (rx->data.raw_addressing) ? ++ be64_to_cpu(data_slot->addr) - page_info->page_offset : ++ rx->data.qpl->page_buses[idx]; ++ dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, ++ PAGE_SIZE, DMA_FROM_DEVICE); ++ page_info->pad = is_first_frag ? GVE_RX_PAD : 0; ++ frag_size -= page_info->pad; ++ ++ skb = gve_rx_skb(priv, rx, page_info, napi, frag_size, ++ data_slot, is_only_frag); ++ if (!skb) { ++ u64_stats_update_begin(&rx->statss); ++ rx->rx_skb_alloc_fail++; ++ u64_stats_update_end(&rx->statss); ++ ++ napi_free_frags(napi); ++ ctx->drop_pkt = true; ++ goto finish_frag; ++ } ++ ctx->total_size += frag_size; ++ ++ if (is_first_frag) { ++ if (likely(feat & NETIF_F_RXCSUM)) { ++ /* NIC passes up the partial sum */ ++ if (desc->csum) ++ skb->ip_summed = CHECKSUM_COMPLETE; ++ else ++ skb->ip_summed = CHECKSUM_NONE; ++ skb->csum = csum_unfold(desc->csum); + } + +- ctx->curr_frag_cnt++; +- rx->cnt++; +- idx = rx->cnt & rx->mask; +- work_cnt++; +- desc = &rx->desc.desc_ring[idx]; ++ /* parse flags & pass relevant info up */ ++ if (likely(feat & NETIF_F_RXHASH) && ++ gve_needs_rss(desc->flags_seq)) ++ skb_set_hash(skb, be32_to_cpu(desc->rss_hash), ++ gve_rss_type(desc->flags_seq)); + } + +- if (likely(feat & NETIF_F_RXCSUM)) { +- /* NIC passes up the partial sum */ +- if (first_desc->csum) +- skb->ip_summed = CHECKSUM_COMPLETE; ++ if (is_last_frag) { ++ skb_record_rx_queue(skb, rx->q_num); ++ if (skb_is_nonlinear(skb)) ++ napi_gro_frags(napi); + else +- skb->ip_summed = CHECKSUM_NONE; +- skb->csum = csum_unfold(first_desc->csum); ++ napi_gro_receive(napi, skb); ++ goto finish_ok_pkt; + } + +- /* parse flags & pass relevant info up */ +- if (likely(feat & NETIF_F_RXHASH) && +- gve_needs_rss(first_desc->flags_seq)) +- skb_set_hash(skb, be32_to_cpu(first_desc->rss_hash), +- gve_rss_type(first_desc->flags_seq)); +- +- *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0); +- *work_done = work_cnt; +- skb_record_rx_queue(skb, rx->q_num); +- if (skb_is_nonlinear(skb)) +- napi_gro_frags(napi); +- else +- napi_gro_receive(napi, skb); +- +- gve_rx_ctx_clear(ctx); +- return true; +- +-skb_alloc_fail: +- if (napi->skb) +- napi_free_frags(napi); +- *packet_size_bytes = 0; +- *work_done = ctx->expected_frag_cnt; +- while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) { +- rx->cnt++; +- ctx->curr_frag_cnt++; ++ goto finish_frag; ++ ++finish_ok_pkt: ++ cnts->ok_pkt_bytes += ctx->total_size; ++ cnts->ok_pkt_cnt++; ++finish_frag: ++ ctx->frag_cnt++; ++ if (is_last_frag) { ++ cnts->total_pkt_cnt++; ++ cnts->cont_pkt_cnt += (ctx->frag_cnt > 1); ++ gve_rx_ctx_clear(ctx); + } +- gve_rx_ctx_clear(ctx); +- return false; + } + + bool gve_rx_work_pending(struct gve_rx_ring *rx) +@@ -704,36 +773,39 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) + static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + netdev_features_t feat) + { +- u32 work_done = 0, total_packet_cnt = 0, ok_packet_cnt = 0; ++ struct gve_rx_ctx *ctx = &rx->ctx; + struct gve_priv *priv = rx->gve; ++ struct gve_rx_cnts cnts = {0}; ++ struct gve_rx_desc *next_desc; + u32 idx = rx->cnt & rx->mask; +- struct gve_rx_desc *desc; +- u64 bytes = 0; ++ u32 work_done = 0; ++ ++ struct gve_rx_desc *desc = &rx->desc.desc_ring[idx]; + +- desc = &rx->desc.desc_ring[idx]; ++ // Exceed budget only if (and till) the inflight packet is consumed. + while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && +- work_done < budget) { +- u64 packet_size_bytes = 0; +- u32 work_cnt = 0; +- bool dropped; +- +- netif_info(priv, rx_status, priv->dev, +- "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", +- rx->q_num, idx, desc, desc->flags_seq); +- netif_info(priv, rx_status, priv->dev, +- "[%d] seqno=%d rx->desc.seqno=%d\n", +- rx->q_num, GVE_SEQNO(desc->flags_seq), +- rx->desc.seqno); +- +- dropped = !gve_rx(rx, feat, &packet_size_bytes, &work_cnt); +- if (!dropped) { +- bytes += packet_size_bytes; +- ok_packet_cnt++; +- } +- total_packet_cnt++; ++ (work_done < budget || ctx->frag_cnt)) { ++ next_desc = &rx->desc.desc_ring[(idx + 1) & rx->mask]; ++ prefetch(next_desc); ++ ++ gve_rx(rx, feat, desc, idx, &cnts); ++ ++ rx->cnt++; + idx = rx->cnt & rx->mask; + desc = &rx->desc.desc_ring[idx]; +- work_done += work_cnt; ++ rx->desc.seqno = gve_next_seqno(rx->desc.seqno); ++ work_done++; ++ } ++ ++ // The device will only send whole packets. ++ if (unlikely(ctx->frag_cnt)) { ++ struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; ++ ++ napi_free_frags(napi); ++ gve_rx_ctx_clear(&rx->ctx); ++ netdev_warn(priv->dev, "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset", ++ GVE_SEQNO(desc->flags_seq), rx->desc.seqno); ++ gve_schedule_reset(rx->gve); + } + + if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold) +@@ -741,8 +813,10 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + + if (work_done) { + u64_stats_update_begin(&rx->statss); +- rx->rpackets += ok_packet_cnt; +- rx->rbytes += bytes; ++ rx->rpackets += cnts.ok_pkt_cnt; ++ rx->rbytes += cnts.ok_pkt_bytes; ++ rx->rx_cont_packet_cnt += cnts.cont_pkt_cnt; ++ rx->rx_desc_err_dropped_pkt += cnts.desc_err_pkt_cnt; + u64_stats_update_end(&rx->statss); + } + +@@ -766,10 +840,8 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget, } } - /* restock desc ring slots */ - dma_wmb(); /* Ensure descs are visible before ringing doorbell */ gve_rx_write_doorbell(priv, rx); - return total_packet_cnt; +- return total_packet_cnt; ++ return cnts.total_pkt_cnt; } + + int gve_rx_poll(struct gve_notify_block *block, int budget) +diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c +index 8c939628e..630f42a30 100644 +--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c ++++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c +@@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv, + int err; + + err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page, +- &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL); ++ &buf_state->addr, DMA_FROM_DEVICE, GFP_ATOMIC); + if (err) + return err; + +@@ -568,7 +568,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, + + if (eop && buf_len <= priv->rx_copybreak) { + rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, +- &buf_state->page_info, buf_len, 0, NULL); ++ &buf_state->page_info, buf_len, 0); + if (unlikely(!rx->ctx.skb_head)) + goto error; + rx->ctx.skb_tail = rx->ctx.skb_head; diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index a75430117..4888bf05f 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c @@ -93,6 +1141,123 @@ index a75430117..4888bf05f 100644 /* Give packets to NIC. Even if this packet failed to send the doorbell * might need to be rung because of xmit_more. */ +diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c +index ec394d991..9ae567077 100644 +--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c ++++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c +@@ -598,9 +598,9 @@ static int gve_num_buffer_descs_needed(const struct sk_buff *skb) + */ + static bool gve_can_send_tso(const struct sk_buff *skb) + { +- const int header_len = skb_checksum_start_offset(skb) + tcp_hdrlen(skb); + const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1; + const struct skb_shared_info *shinfo = skb_shinfo(skb); ++ const int header_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + const int gso_size = shinfo->gso_size; + int cur_seg_num_bufs; + int cur_seg_size; +@@ -795,7 +795,7 @@ static void gve_handle_packet_completion(struct gve_priv *priv, + GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) { + /* No outstanding miss completion but packet allocated + * implies packet receives a re-injection completion +- * without a a prior miss completion. Return without ++ * without a prior miss completion. Return without + * completing the packet. + */ + net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n", +@@ -953,12 +953,18 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + atomic_set_release(&tx->dqo_compl.hw_tx_head, tx_head); + } else if (type == GVE_COMPL_TYPE_DQO_PKT) { + u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); +- +- gve_handle_packet_completion(priv, tx, !!napi, +- compl_tag, +- &pkt_compl_bytes, +- &pkt_compl_pkts, +- /*is_reinjection=*/false); ++ if (compl_tag & GVE_ALT_MISS_COMPL_BIT) { ++ compl_tag &= ~GVE_ALT_MISS_COMPL_BIT; ++ gve_handle_miss_completion(priv, tx, compl_tag, ++ &miss_compl_bytes, ++ &miss_compl_pkts); ++ } else { ++ gve_handle_packet_completion(priv, tx, !!napi, ++ compl_tag, ++ &pkt_compl_bytes, ++ &pkt_compl_pkts, ++ false); ++ } + } else if (type == GVE_COMPL_TYPE_DQO_MISS) { + u16 compl_tag = le16_to_cpu(compl_desc->completion_tag); + +@@ -972,7 +978,7 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + compl_tag, + &reinject_compl_bytes, + &reinject_compl_pkts, +- /*is_reinjection=*/true); ++ true); + } + + tx->dqo_compl.head = +diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c +index d57508bc4..6ba46adaa 100644 +--- a/drivers/net/ethernet/google/gve/gve_utils.c ++++ b/drivers/net/ethernet/google/gve/gve_utils.c +@@ -50,34 +50,18 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) + + struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, u16 len, +- u16 padding, struct gve_rx_ctx *ctx) ++ u16 padding) + { + void *va = page_info->page_address + padding + page_info->page_offset; +- int skb_linear_offset = 0; +- bool set_protocol = false; + struct sk_buff *skb; + +- if (ctx) { +- if (!ctx->skb_head) +- ctx->skb_head = napi_alloc_skb(napi, ctx->total_expected_size); +- +- if (unlikely(!ctx->skb_head)) +- return NULL; +- skb = ctx->skb_head; +- skb_linear_offset = skb->len; +- set_protocol = ctx->curr_frag_cnt == ctx->expected_frag_cnt - 1; +- } else { +- skb = napi_alloc_skb(napi, len); +- +- if (unlikely(!skb)) +- return NULL; +- set_protocol = true; +- } +- __skb_put(skb, len); +- skb_copy_to_linear_data_offset(skb, skb_linear_offset, va, len); ++ skb = napi_alloc_skb(napi, len); ++ if (unlikely(!skb)) ++ return NULL; + +- if (set_protocol) +- skb->protocol = eth_type_trans(skb, dev); ++ __skb_put(skb, len); ++ skb_copy_to_linear_data_offset(skb, 0, va, len); ++ skb->protocol = eth_type_trans(skb, dev); + + return skb; + } +diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h +index 6d98e69fd..79595940b 100644 +--- a/drivers/net/ethernet/google/gve/gve_utils.h ++++ b/drivers/net/ethernet/google/gve/gve_utils.h +@@ -19,7 +19,7 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); + + struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, u16 len, +- u16 pad, struct gve_rx_ctx *ctx); ++ u16 pad); + + /* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */ + void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info); -- 2.39.2