diff --git a/kernel.spec b/kernel.spec index f52a9c88c..cbedb1e7b 100644 --- a/kernel.spec +++ b/kernel.spec @@ -62,7 +62,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 1 +%global baserelease 2 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -746,6 +746,11 @@ Patch22059: uvcvideo-Reset-bytesused-field-when-recycling-erroneous-buffer.patch Patch22062: cfg80211-add-channel-flag-to-prohibit-OFDM-operation.patch Patch22063: brcmsmac-use-channel-flags-to-restrict-OFDM.patch +#rhbz 845558 844714 +Patch22070: net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch +Patch22071: sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch +Patch22072: tcp-Apply-device-TSO-segment-limit-earlier.patch + # END OF PATCH DEFINITIONS %endif @@ -1437,6 +1442,11 @@ ApplyPatch uvcvideo-Reset-bytesused-field-when-recycling-erroneous-buffer.patch ApplyPatch cfg80211-add-channel-flag-to-prohibit-OFDM-operation.patch ApplyPatch brcmsmac-use-channel-flags-to-restrict-OFDM.patch +#rhbz 845558 844714 +ApplyPatch net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch +ApplyPatch sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch +ApplyPatch tcp-Apply-device-TSO-segment-limit-earlier.patch + # END OF PATCH APPLICATIONS %endif @@ -2294,6 +2304,9 @@ fi # ||----w | # || || %changelog +* Fri Aug 03 2012 Josh Boyer - 3.6.0-0.rc1.git0.2 +- CVE-2012-3412 sfc: potential rDOS through TCP MSS option (rhbz 844714 845558) + * Fri Aug 03 2012 Josh Boyer - 3.6.0-0.rc1.git0.1 - Linux v3.6-rc1 - Disable debugging options. diff --git a/net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch b/net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch new file mode 100644 index 000000000..bebccf3d0 --- /dev/null +++ b/net-Allow-driver-to-limit-number-of-GSO-segments-per-skb.patch @@ -0,0 +1,70 @@ +From 30b678d844af3305cda5953467005cebb5d7b687 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Mon, 30 Jul 2012 15:57:00 +0000 +Subject: [PATCH] net: Allow driver to limit number of GSO segments per skb + +A peer (or local user) may cause TCP to use a nominal MSS of as little +as 88 (actual MSS of 76 with timestamps). Given that we have a +sufficiently prodigious local sender and the peer ACKs quickly enough, +it is nevertheless possible to grow the window for such a connection +to the point that we will try to send just under 64K at once. This +results in a single skb that expands to 861 segments. + +In some drivers with TSO support, such an skb will require hundreds of +DMA descriptors; a substantial fraction of a TX ring or even more than +a full ring. The TX queue selected for the skb may stall and trigger +the TX watchdog repeatedly (since the problem skb will be retried +after the TX reset). This particularly affects sfc, for which the +issue is designated as CVE-2012-3412. + +Therefore: +1. Add the field net_device::gso_max_segs holding the device-specific + limit. +2. In netif_skb_features(), if the number of segments is too high then + mask out GSO features to force fall back to software GSO. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +--- + include/linux/netdevice.h | 2 ++ + net/core/dev.c | 4 ++++ + 2 files changed, 6 insertions(+), 0 deletions(-) + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index eb06e58..a9db4f3 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1300,6 +1300,8 @@ struct net_device { + /* for setting kernel sock attribute on TCP connection setup */ + #define GSO_MAX_SIZE 65536 + unsigned int gso_max_size; ++#define GSO_MAX_SEGS 65535 ++ u16 gso_max_segs; + + #ifdef CONFIG_DCB + /* Data Center Bridging netlink ops */ +diff --git a/net/core/dev.c b/net/core/dev.c +index 0cb3fe8..f91abf8 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2134,6 +2134,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) + __be16 protocol = skb->protocol; + netdev_features_t features = skb->dev->features; + ++ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) ++ features &= ~NETIF_F_GSO_MASK; ++ + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; +@@ -5986,6 +5989,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + dev_net_set(dev, &init_net); + + dev->gso_max_size = GSO_MAX_SIZE; ++ dev->gso_max_segs = GSO_MAX_SEGS; + + INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); +-- +1.7.7.6 + diff --git a/sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch b/sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch new file mode 100644 index 000000000..07ec616e0 --- /dev/null +++ b/sfc-Fix-maximum-number-of-TSO-segments-and-minimum-TX-queue-size.patch @@ -0,0 +1,156 @@ +From 7e6d06f0de3f74ca929441add094518ae332257c Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Mon, 30 Jul 2012 15:57:44 +0000 +Subject: [PATCH] sfc: Fix maximum number of TSO segments and minimum TX queue + size + +Currently an skb requiring TSO may not fit within a minimum-size TX +queue. The TX queue selected for the skb may stall and trigger the TX +watchdog repeatedly (since the problem skb will be retried after the +TX reset). This issue is designated as CVE-2012-3412. + +Set the maximum number of TSO segments for our devices to 100. This +should make no difference to behaviour unless the actual MSS is less +than about 700. Increase the minimum TX queue size accordingly to +allow for 2 worst-case skbs, so that there will definitely be space +to add an skb after we wake a queue. + +To avoid invalidating existing configurations, change +efx_ethtool_set_ringparam() to fix up values that are too small rather +than returning -EINVAL. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +--- + drivers/net/ethernet/sfc/efx.c | 6 ++++++ + drivers/net/ethernet/sfc/efx.h | 14 ++++++++++---- + drivers/net/ethernet/sfc/ethtool.c | 16 +++++++++++----- + drivers/net/ethernet/sfc/tx.c | 19 +++++++++++++++++++ + 4 files changed, 46 insertions(+), 9 deletions(-) + +diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c +index 70554a1..65a8d49 100644 +--- a/drivers/net/ethernet/sfc/efx.c ++++ b/drivers/net/ethernet/sfc/efx.c +@@ -1503,6 +1503,11 @@ static int efx_probe_all(struct efx_nic *efx) + goto fail2; + } + ++ BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT); ++ if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) { ++ rc = -EINVAL; ++ goto fail3; ++ } + efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; + + rc = efx_probe_filters(efx); +@@ -2070,6 +2075,7 @@ static int efx_register_netdev(struct efx_nic *efx) + net_dev->irq = efx->pci_dev->irq; + net_dev->netdev_ops = &efx_netdev_ops; + SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); ++ net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; + + rtnl_lock(); + +diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h +index be8f915..70755c9 100644 +--- a/drivers/net/ethernet/sfc/efx.h ++++ b/drivers/net/ethernet/sfc/efx.h +@@ -30,6 +30,7 @@ extern netdev_tx_t + efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); + extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); + extern int efx_setup_tc(struct net_device *net_dev, u8 num_tc); ++extern unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); + + /* RX */ + extern int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); +@@ -52,10 +53,15 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); + #define EFX_MAX_EVQ_SIZE 16384UL + #define EFX_MIN_EVQ_SIZE 512UL + +-/* The smallest [rt]xq_entries that the driver supports. Callers of +- * efx_wake_queue() assume that they can subsequently send at least one +- * skb. Falcon/A1 may require up to three descriptors per skb_frag. */ +-#define EFX_MIN_RING_SIZE (roundup_pow_of_two(2 * 3 * MAX_SKB_FRAGS)) ++/* Maximum number of TCP segments we support for soft-TSO */ ++#define EFX_TSO_MAX_SEGS 100 ++ ++/* The smallest [rt]xq_entries that the driver supports. RX minimum ++ * is a bit arbitrary. For TX, we must have space for at least 2 ++ * TSO skbs. ++ */ ++#define EFX_RXQ_MIN_ENT 128U ++#define EFX_TXQ_MIN_ENT(efx) (2 * efx_tx_max_skb_descs(efx)) + + /* Filters */ + extern int efx_probe_filters(struct efx_nic *efx); +diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c +index 10536f9..8cba2df 100644 +--- a/drivers/net/ethernet/sfc/ethtool.c ++++ b/drivers/net/ethernet/sfc/ethtool.c +@@ -680,21 +680,27 @@ static int efx_ethtool_set_ringparam(struct net_device *net_dev, + struct ethtool_ringparam *ring) + { + struct efx_nic *efx = netdev_priv(net_dev); ++ u32 txq_entries; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending || + ring->rx_pending > EFX_MAX_DMAQ_SIZE || + ring->tx_pending > EFX_MAX_DMAQ_SIZE) + return -EINVAL; + +- if (ring->rx_pending < EFX_MIN_RING_SIZE || +- ring->tx_pending < EFX_MIN_RING_SIZE) { ++ if (ring->rx_pending < EFX_RXQ_MIN_ENT) { + netif_err(efx, drv, efx->net_dev, +- "TX and RX queues cannot be smaller than %ld\n", +- EFX_MIN_RING_SIZE); ++ "RX queues cannot be smaller than %u\n", ++ EFX_RXQ_MIN_ENT); + return -EINVAL; + } + +- return efx_realloc_channels(efx, ring->rx_pending, ring->tx_pending); ++ txq_entries = max(ring->tx_pending, EFX_TXQ_MIN_ENT(efx)); ++ if (txq_entries != ring->tx_pending) ++ netif_warn(efx, drv, efx->net_dev, ++ "increasing TX queue size to minimum of %u\n", ++ txq_entries); ++ ++ return efx_realloc_channels(efx, ring->rx_pending, txq_entries); + } + + static int efx_ethtool_set_pauseparam(struct net_device *net_dev, +diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c +index 9b225a7..1871343 100644 +--- a/drivers/net/ethernet/sfc/tx.c ++++ b/drivers/net/ethernet/sfc/tx.c +@@ -119,6 +119,25 @@ efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) + return len; + } + ++unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) ++{ ++ /* Header and payload descriptor for each output segment, plus ++ * one for every input fragment boundary within a segment ++ */ ++ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; ++ ++ /* Possibly one more per segment for the alignment workaround */ ++ if (EFX_WORKAROUND_5391(efx)) ++ max_descs += EFX_TSO_MAX_SEGS; ++ ++ /* Possibly more for PCIe page boundaries within input fragments */ ++ if (PAGE_SIZE > EFX_PAGE_SIZE) ++ max_descs += max_t(unsigned int, MAX_SKB_FRAGS, ++ DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); ++ ++ return max_descs; ++} ++ + /* + * Add a socket buffer to a TX queue + * +-- +1.7.7.6 + diff --git a/tcp-Apply-device-TSO-segment-limit-earlier.patch b/tcp-Apply-device-TSO-segment-limit-earlier.patch new file mode 100644 index 000000000..223862326 --- /dev/null +++ b/tcp-Apply-device-TSO-segment-limit-earlier.patch @@ -0,0 +1,137 @@ +From 1485348d2424e1131ea42efc033cbd9366462b01 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Mon, 30 Jul 2012 16:11:42 +0000 +Subject: [PATCH] tcp: Apply device TSO segment limit earlier + +Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to +limit the size of TSO skbs. This avoids the need to fall back to +software GSO for local TCP senders. + +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +--- + include/net/sock.h | 2 ++ + net/core/sock.c | 1 + + net/ipv4/tcp.c | 4 +++- + net/ipv4/tcp_cong.c | 3 ++- + net/ipv4/tcp_output.c | 21 ++++++++++++--------- + 5 files changed, 20 insertions(+), 11 deletions(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index b373023..72132ae 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -218,6 +218,7 @@ struct cg_proto; + * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) + * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) + * @sk_gso_max_size: Maximum GSO segment size to build ++ * @sk_gso_max_segs: Maximum number of GSO segments + * @sk_lingertime: %SO_LINGER l_linger setting + * @sk_backlog: always used with the per-socket spinlock held + * @sk_callback_lock: used with the callbacks in the end of this struct +@@ -338,6 +339,7 @@ struct sock { + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; ++ u16 sk_gso_max_segs; + int sk_rcvlowat; + unsigned long sk_lingertime; + struct sk_buff_head sk_error_queue; +diff --git a/net/core/sock.c b/net/core/sock.c +index 6b654b3..8f67ced 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1458,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) + } else { + sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; + sk->sk_gso_max_size = dst->dev->gso_max_size; ++ sk->sk_gso_max_segs = dst->dev->gso_max_segs; + } + } + } +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index e7e6eea..2109ff4 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, + old_size_goal + mss_now > xmit_size_goal)) { + xmit_size_goal = old_size_goal; + } else { +- tp->xmit_size_goal_segs = xmit_size_goal / mss_now; ++ tp->xmit_size_goal_segs = ++ min_t(u16, xmit_size_goal / mss_now, ++ sk->sk_gso_max_segs); + xmit_size_goal = tp->xmit_size_goal_segs * mss_now; + } + } +diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c +index 4d4db16..1432cdb 100644 +--- a/net/ipv4/tcp_cong.c ++++ b/net/ipv4/tcp_cong.c +@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) + left = tp->snd_cwnd - in_flight; + if (sk_can_gso(sk) && + left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && +- left * tp->mss_cache < sk->sk_gso_max_size) ++ left * tp->mss_cache < sk->sk_gso_max_size && ++ left < sk->sk_gso_max_segs) + return true; + return left <= tcp_max_tso_deferred_mss(tp); + } +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 3f1bcff..a7b3ec9 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1522,21 +1522,21 @@ static void tcp_cwnd_validate(struct sock *sk) + * when we would be allowed to send the split-due-to-Nagle skb fully. + */ + static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, +- unsigned int mss_now, unsigned int cwnd) ++ unsigned int mss_now, unsigned int max_segs) + { + const struct tcp_sock *tp = tcp_sk(sk); +- u32 needed, window, cwnd_len; ++ u32 needed, window, max_len; + + window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; +- cwnd_len = mss_now * cwnd; ++ max_len = mss_now * max_segs; + +- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) +- return cwnd_len; ++ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) ++ return max_len; + + needed = min(skb->len, window); + +- if (cwnd_len <= needed) +- return cwnd_len; ++ if (max_len <= needed) ++ return max_len; + + return needed - needed % mss_now; + } +@@ -1765,7 +1765,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) + limit = min(send_win, cong_win); + + /* If a full-sized TSO skb can be sent, do it. */ +- if (limit >= sk->sk_gso_max_size) ++ if (limit >= min_t(unsigned int, sk->sk_gso_max_size, ++ sk->sk_gso_max_segs * tp->mss_cache)) + goto send_now; + + /* Middle in queue won't get any more data, full sendable already? */ +@@ -1999,7 +2000,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + limit = mss_now; + if (tso_segs > 1 && !tcp_urg_mode(tp)) + limit = tcp_mss_split_point(sk, skb, mss_now, +- cwnd_quota); ++ min_t(unsigned int, ++ cwnd_quota, ++ sk->sk_gso_max_segs)); + + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) +-- +1.7.7.6 +