net: implement tcp coalescing in tcp_queue_rcv()
authorEric Dumazet <edumazet@google.com>
Wed, 2 May 2012 09:58:29 +0000 (09:58 +0000)
committerDavid S. Miller <davem@davemloft.net>
Thu, 3 May 2012 01:11:11 +0000 (21:11 -0400)
Extend tcp coalescing implementing it from tcp_queue_rcv(), the main
receiver function when application is not blocked in recvmsg().

Function tcp_queue_rcv() is moved a bit to allow its call from
tcp_data_queue()

This gives good results especially if GRO could not kick, and if skb
head is a fragment.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_input.c

index 5283aa4..c826ed7 100644 (file)
@@ -439,7 +439,8 @@ extern int tcp_disconnect(struct sock *sk, int flags);
 
 void tcp_connect_init(struct sock *sk);
 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
-void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen);
+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+                              int hdrlen, bool *fragstolen);
 
 /* From syncookies.c */
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
index 6802c89..c2cff8b 100644 (file)
@@ -981,8 +981,8 @@ static inline int select_size(const struct sock *sk, bool sg)
 static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 {
        struct sk_buff *skb;
-       struct tcp_skb_cb *cb;
        struct tcphdr *th;
+       bool fragstolen;
 
        skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
        if (!skb)
@@ -995,14 +995,14 @@ static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
        if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
                goto err_free;
 
-       cb = TCP_SKB_CB(skb);
-
        TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
        TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
 
-       tcp_queue_rcv(sk, skb, sizeof(*th));
-
+       if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
+               WARN_ON_ONCE(fragstolen); /* should not happen */
+               __kfree_skb(skb);
+       }
        return size;
 
 err_free:
index a882937..2f696ef 100644 (file)
@@ -4739,6 +4739,22 @@ end:
                skb_set_owner_r(skb, sk);
 }
 
+int tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+                 bool *fragstolen)
+{
+       int eaten;
+       struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
+
+       __skb_pull(skb, hdrlen);
+       eaten = (tail &&
+                tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+       tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+       if (!eaten) {
+               __skb_queue_tail(&sk->sk_receive_queue, skb);
+               skb_set_owner_r(skb, sk);
+       }
+       return eaten;
+}
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
@@ -4785,20 +4801,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
                }
 
                if (eaten <= 0) {
-                       struct sk_buff *tail;
 queue_and_out:
                        if (eaten < 0 &&
                            tcp_try_rmem_schedule(sk, skb->truesize))
                                goto drop;
 
-                       tail = skb_peek_tail(&sk->sk_receive_queue);
-                       eaten = (tail &&
-                                tcp_try_coalesce(sk, tail, skb,
-                                                 &fragstolen)) ? 1 : 0;
-                       if (eaten <= 0) {
-                               skb_set_owner_r(skb, sk);
-                               __skb_queue_tail(&sk->sk_receive_queue, skb);
-                       }
+                       eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
                }
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if (skb->len)
@@ -5493,14 +5501,6 @@ discard:
        return 0;
 }
 
-void tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen)
-{
-       __skb_pull(skb, hdrlen);
-       __skb_queue_tail(&sk->sk_receive_queue, skb);
-       skb_set_owner_r(skb, sk);
-       tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-}
-
 /*
  *     TCP receive function for the ESTABLISHED state.
  *
@@ -5609,6 +5609,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                } else {
                        int eaten = 0;
                        int copied_early = 0;
+                       bool fragstolen = false;
 
                        if (tp->copied_seq == tp->rcv_nxt &&
                            len - tcp_header_len <= tp->ucopy.len) {
@@ -5666,7 +5667,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
                                /* Bulk data transfer: receiver */
-                               tcp_queue_rcv(sk, skb, tcp_header_len);
+                               eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+                                                     &fragstolen);
                        }
 
                        tcp_event_data_recv(sk, skb);
@@ -5688,7 +5690,7 @@ no_ack:
                        else
 #endif
                        if (eaten)
-                               __kfree_skb(skb);
+                               kfree_skb_partial(skb, fragstolen);
                        else
                                sk->sk_data_ready(sk, 0);
                        return 0;