tcp: fix tcp_ack() performance problem
[pandora-kernel.git] / net / ipv4 / tcp_input.c
index aa38f98..a12b455 100644 (file)
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/kernel.h>
+#include <linux/prefetch.h>
 #include <net/dst.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
-#include <net/netdma.h>
 #include <linux/errqueue.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
@@ -3030,6 +3030,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
        return packets_acked;
 }
 
+static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
+                          u32 prior_snd_una)
+{
+       const struct skb_shared_info *shinfo;
+
+       /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
+       if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
+               return;
+
+       shinfo = skb_shinfo(skb);
+       if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+           between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
+               __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+}
+
 /* Remove acknowledged frames from the retransmission queue. If our packet
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
@@ -3053,14 +3068,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        first_ackt.v64 = 0;
 
        while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
-               struct skb_shared_info *shinfo = skb_shinfo(skb);
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
                u8 sacked = scb->sacked;
                u32 acked_pcount;
 
-               if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-                   between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
-                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+               tcp_ack_tstamp(sk, skb, prior_snd_una);
 
                /* Determine how many packets and what bytes were acked, tso and else */
                if (after(scb->end_seq, tp->snd_una)) {
@@ -3074,10 +3086,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
                        fully_acked = false;
                } else {
+                       /* Speedup tcp_unlink_write_queue() and next loop */
+                       prefetchw(skb->next);
                        acked_pcount = tcp_skb_pcount(skb);
                }
 
-               if (sacked & TCPCB_RETRANS) {
+               if (unlikely(sacked & TCPCB_RETRANS)) {
                        if (sacked & TCPCB_SACKED_RETRANS)
                                tp->retrans_out -= acked_pcount;
                        flag |= FLAG_RETRANS_DATA_ACKED;
@@ -3108,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 * connection startup slow start one packet too
                 * quickly.  This is severely frowned upon behavior.
                 */
-               if (!(scb->tcp_flags & TCPHDR_SYN)) {
+               if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
                        flag |= FLAG_DATA_ACKED;
                } else {
                        flag |= FLAG_SYN_ACKED;
@@ -3120,9 +3134,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
                tcp_unlink_write_queue(skb, sk);
                sk_wmem_free_skb(sk, skb);
-               if (skb == tp->retransmit_skb_hint)
+               if (unlikely(skb == tp->retransmit_skb_hint))
                        tp->retransmit_skb_hint = NULL;
-               if (skb == tp->lost_skb_hint)
+               if (unlikely(skb == tp->lost_skb_hint))
                        tp->lost_skb_hint = NULL;
        }
 
@@ -3133,7 +3147,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                flag |= FLAG_SACK_RENEGING;
 
        skb_mstamp_get(&now);
-       if (first_ackt.v64) {
+       if (likely(first_ackt.v64)) {
                seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
                ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
        }
@@ -3395,6 +3409,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        int acked = 0; /* Number of packets newly acked */
        long sack_rtt_us = -1L;
 
+       /* We very likely will need to access write queue head. */
+       prefetchw(sk->sk_write_queue.next);
+
        /* If the ack is older than previous acks
         * then we can probably ignore it.
         */
@@ -4958,53 +4975,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
               __tcp_checksum_complete_user(sk, skb);
 }
 
-#ifdef CONFIG_NET_DMA
-static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
-                                 int hlen)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       int chunk = skb->len - hlen;
-       int dma_cookie;
-       bool copied_early = false;
-
-       if (tp->ucopy.wakeup)
-               return false;
-
-       if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-               tp->ucopy.dma_chan = net_dma_find_channel();
-
-       if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
-
-               dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
-                                                        skb, hlen,
-                                                        tp->ucopy.iov, chunk,
-                                                        tp->ucopy.pinned_list);
-
-               if (dma_cookie < 0)
-                       goto out;
-
-               tp->ucopy.dma_cookie = dma_cookie;
-               copied_early = true;
-
-               tp->ucopy.len -= chunk;
-               tp->copied_seq += chunk;
-               tcp_rcv_space_adjust(sk);
-
-               if ((tp->ucopy.len == 0) ||
-                   (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
-                   (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
-                       tp->ucopy.wakeup = 1;
-                       sk->sk_data_ready(sk);
-               }
-       } else if (chunk > 0) {
-               tp->ucopy.wakeup = 1;
-               sk->sk_data_ready(sk);
-       }
-out:
-       return copied_early;
-}
-#endif /* CONFIG_NET_DMA */
-
 /* Does PAWS and seqno based validation of an incoming segment, flags will
  * play significant role here.
  */
@@ -5184,27 +5154,15 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                        }
                } else {
                        int eaten = 0;
-                       int copied_early = 0;
                        bool fragstolen = false;
 
-                       if (tp->copied_seq == tp->rcv_nxt &&
-                           len - tcp_header_len <= tp->ucopy.len) {
-#ifdef CONFIG_NET_DMA
-                               if (tp->ucopy.task == current &&
-                                   sock_owned_by_user(sk) &&
-                                   tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
-                                       copied_early = 1;
-                                       eaten = 1;
-                               }
-#endif
-                               if (tp->ucopy.task == current &&
-                                   sock_owned_by_user(sk) && !copied_early) {
-                                       __set_current_state(TASK_RUNNING);
+                       if (tp->ucopy.task == current &&
+                           tp->copied_seq == tp->rcv_nxt &&
+                           len - tcp_header_len <= tp->ucopy.len &&
+                           sock_owned_by_user(sk)) {
+                               __set_current_state(TASK_RUNNING);
 
-                                       if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
-                                               eaten = 1;
-                               }
-                               if (eaten) {
+                               if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
                                        /* Predicted packet is in window by definition.
                                         * seq == rcv_nxt and rcv_wup <= rcv_nxt.
                                         * Hence, check seq<=rcv_wup reduces to:
@@ -5220,9 +5178,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                        __skb_pull(skb, tcp_header_len);
                                        tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
+                                       eaten = 1;
                                }
-                               if (copied_early)
-                                       tcp_cleanup_rbuf(sk, skb->len);
                        }
                        if (!eaten) {
                                if (tcp_checksum_complete_user(sk, skb))
@@ -5259,14 +5216,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                        goto no_ack;
                        }
 
-                       if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
-                               __tcp_ack_snd_check(sk, 0);
+                       __tcp_ack_snd_check(sk, 0);
 no_ack:
-#ifdef CONFIG_NET_DMA
-                       if (copied_early)
-                               __skb_queue_tail(&sk->sk_async_wait_queue, skb);
-                       else
-#endif
                        if (eaten)
                                kfree_skb_partial(skb, fragstolen);
                        sk->sk_data_ready(sk);