From a0eee8681a9bc21d3f83260a8ceb3fa30236b189 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 May 2019 17:17:22 -0700 Subject: [PATCH] tcp: limit payload size of sacked skbs commit 3b4929f65b0d8249f19a50245cd88ed1a2f78cff upstream. Jonathan Looney reported that TCP can trigger the following crash in tcp_shifted_skb() : BUG_ON(tcp_skb_pcount(skb) < pcount); This can happen if the remote peer has advertized the smallest MSS that linux TCP accepts : 48 An skb can hold 17 fragments, and each fragment can hold 32KB on x86, or 64KB on PowerPC. This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs can overflow. Note that tcp_sendmsg() builds skbs with less than 64KB of payload, so this problem needs SACK to be enabled. SACK blocks allow TCP to coalesce multiple skbs in the retransmit queue, thus filling the 17 fragments to maximal capacity. CVE-2019-11477 -- u16 overflow of TCP_SKB_CB(skb)->tcp_gso_segs Backport notes, provided by Joao Martins v4.15 or since commit 737ff314563 ("tcp: use sequence distance to detect reordering") had switched from the packet-based FACK tracking and switched to sequence-based. v4.14 and older still have the old logic and hence on tcp_skb_shift_data() needs to retain its original logic and have @fack_count in sync. In other words, we keep the increment of pcount with tcp_skb_pcount(skb) to later used that to update fack_count. To make it more explicit we track the new skb that gets incremented to pcount in @next_pcount, and we get to avoid the constant invocation of tcp_skb_pcount(skb) all together. Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing") Signed-off-by: Eric Dumazet Reported-by: Jonathan Looney Acked-by: Neal Cardwell Reviewed-by: Tyler Hicks Cc: Yuchung Cheng Cc: Bruce Curtis Cc: Jonathan Lemon Signed-off-by: David S. Miller [Salvatore Bonaccorso: Adjust for context changes to backport to 4.9.168] [bwh: Backported to 3.16: adjust context] Signed-off-by: Ben Hutchings --- include/linux/tcp.h | 3 +++ include/net/tcp.h | 2 ++ net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 27 ++++++++++++++++++++++----- net/ipv4/tcp_output.c | 4 ++-- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7f59ee946983..3f0ede97ffa3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -497,4 +497,7 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) #endif /* __KERNEL__ */ +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, + int shiftlen); + #endif /* _LINUX_TCP_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 3ff241edf62b..2ed5c1818bf7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -52,6 +52,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 +#define TCP_MIN_SND_MSS 48 +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) /* * Never offer a window over 32767 without using window scaling. Some diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9d0a94bfe5dc..a803bfc22fd1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3163,6 +3163,7 @@ void __init tcp_init(void) int i, max_rshare, max_wshare, cnt; unsigned long jiffy = jiffies; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 73a4355c7431..7db22978114c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1431,7 +1431,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, TCP_SKB_CB(skb)->seq += shifted; skb_shinfo(prev)->gso_segs += pcount; - BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); skb_shinfo(skb)->gso_segs -= pcount; /* When we're adding to gso_segs == 1, gso_size will be zero, @@ -1499,6 +1499,21 @@ static int skb_can_shift(const struct sk_buff *skb) return !skb_headlen(skb) && skb_is_nonlinear(skb); } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, + int pcount, int shiftlen) +{ + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need + * to make sure not storing more than 65535 * 8 bytes per skb, + * even if current MSS is bigger. + */ + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) + return 0; + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) + return 0; + return skb_shift(to, from, shiftlen); +} + /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ @@ -1510,6 +1525,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; int mss; + int next_pcount; int pcount = 0; int len; int in_sack; @@ -1604,7 +1620,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) goto fallback; - if (!skb_shift(prev, skb, len)) + if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) goto out; @@ -1623,9 +1639,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; len = skb->len; - if (skb_shift(prev, skb, len)) { - pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + next_pcount = tcp_skb_pcount(skb); + if (tcp_skb_shift(prev, skb, next_pcount, len)) { + pcount += next_pcount; + tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0); } out: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index da11cd75e6f6..cb1130cbf0cf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1172,8 +1172,8 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + if (mss_now < TCP_MIN_SND_MSS) + mss_now = TCP_MIN_SND_MSS; /* Now subtract TCP options size, not including SACKs */ mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); -- 2.39.2