tcp: reduce out_of_order memory use

author Eric Dumazet <eric.dumazet@gmail.com>

Sun, 18 Mar 2012 11:07:47 +0000 (11:07 +0000)

committer David S. Miller <davem@davemloft.net>

Mon, 19 Mar 2012 20:53:08 +0000 (16:53 -0400)
author Eric Dumazet <eric.dumazet@gmail.com>
Sun, 18 Mar 2012 11:07:47 +0000 (11:07 +0000)
committer David S. Miller <davem@davemloft.net>
Mon, 19 Mar 2012 20:53:08 +0000 (16:53 -0400)
diff --git a/include/linux/snmp.h b/include/linux/snmp.h

index 8ee8af4..2e68f5b 100644 (file)
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -233,6 +233,7 @@ enum
         LINUX_MIB_TCPREQQFULLDOCOOKIES,         /* TCPReqQFullDoCookies */
         LINUX_MIB_TCPREQQFULLDROP,              /* TCPReqQFullDrop */
         LINUX_MIB_TCPRETRANSFAIL,               /* TCPRetransFail */
+       LINUX_MIB_TCPRCVCOALESCE,                       /* TCPRcvCoalesce */
         __LINUX_MIB_MAX
  };
  
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c

index 02d6107..8af0d44 100644 (file)
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = {
         SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
         SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
         SNMP_MIB_ITEM("TCPRetransFail", LINUX_MIB_TCPRETRANSFAIL),
+       SNMP_MIB_ITEM("TCPRcvCoalesce", LINUX_MIB_TCPRCVCOALESCE),
         SNMP_MIB_SENTINEL
  };
  
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index fa7de12..e886e2f 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4484,7 +4484,24 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
         end_seq = TCP_SKB_CB(skb)->end_seq;
  
         if (seq == TCP_SKB_CB(skb1)->end_seq) {
-               __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+               /* Packets in ofo can stay in queue a long time.
+                * Better try to coalesce them right now
+                * to avoid future tcp_collapse_ofo_queue(),
+                * probably the most expensive function in tcp stack.
+                */
+               if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
+                       NET_INC_STATS_BH(sock_net(sk),
+                                        LINUX_MIB_TCPRCVCOALESCE);
+                       BUG_ON(skb_copy_bits(skb, 0,
+                                            skb_put(skb1, skb->len),
+                                            skb->len));
+                       TCP_SKB_CB(skb1)->end_seq = end_seq;
+                       TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+                       __kfree_skb(skb);
+                       skb = NULL;
+               } else {
+                       __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+               }
  
                 if (!tp->rx_opt.num_sacks ||
                     tp->selective_acks[0].end_seq != seq)
author	Eric Dumazet <eric.dumazet@gmail.com>
	Sun, 18 Mar 2012 11:07:47 +0000 (11:07 +0000)
committer	David S. Miller <davem@davemloft.net>
	Mon, 19 Mar 2012 20:53:08 +0000 (16:53 -0400)
include/linux/snmp.h		patch \| blob \| history
net/ipv4/proc.c		patch \| blob \| history
net/ipv4/tcp_input.c		patch \| blob \| history