tcp: must unclone packets before mangling them

[pandora-kernel.git] / net / ipv4 / tcp_output.c
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 63170e2..0d5a118 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -835,11 +835,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
                                                            &md5);
         tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
  
-       if (tcp_packets_in_flight(tp) == 0) {
+       if (tcp_packets_in_flight(tp) == 0)
                 tcp_ca_event(sk, CA_EVENT_TX_START);
-               skb->ooo_okay = 1;
-       } else
-               skb->ooo_okay = 0;
+
+       /* if no packet is in qdisc/device queue, then allow XPS to select
+        * another queue.
+        */
+       skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
  
         skb_push(skb, tcp_header_size);
         skb_reset_transport_header(skb);
@@ -931,6 +933,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
  static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
                                  unsigned int mss_now)
  {
+       /* Make sure we own this skb before messing gso_size/gso_segs */
+       WARN_ON_ONCE(skb_cloned(skb));
+
         if (skb->len <= mss_now || !sk_can_gso(sk) ||
             skb->ip_summed == CHECKSUM_NONE) {
                 /* Avoid the costly divide in the normal
@@ -1012,9 +1017,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
         if (nsize < 0)
                 nsize = 0;
  
-       if (skb_cloned(skb) &&
-           skb_is_nonlinear(skb) &&
-           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+       if (skb_unclone(skb, GFP_ATOMIC))
                 return -ENOMEM;
  
         /* Get a new skb... force flag on. */
@@ -1093,6 +1096,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
  {
         int i, k, eat;
  
+       eat = min_t(int, len, skb_headlen(skb));
+       if (eat) {
+               __skb_pull(skb, eat);
+               len -= eat;
+               if (!len)
+                       return;
+       }
         eat = len;
         k = 0;
         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -1124,11 +1134,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
         if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
                 return -ENOMEM;
  
-       /* If len == headlen, we avoid __skb_pull to preserve alignment. */
-       if (unlikely(len < skb_headlen(skb)))
-               __skb_pull(skb, len);
-       else
-               __pskb_trim_head(skb, len - skb_headlen(skb));
+       __pskb_trim_head(skb, len);
  
         TCP_SKB_CB(skb)->seq += len;
         skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1138,11 +1144,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
         sk_mem_uncharge(sk, len);
         sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
  
-       /* Any change of skb->len requires recalculation of tso
-        * factor and mss.
-        */
+       /* Any change of skb->len requires recalculation of tso factor. */
         if (tcp_skb_pcount(skb) > 1)
-               tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
+               tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
  
         return 0;
  }
@@ -1316,21 +1320,21 @@ static void tcp_cwnd_validate(struct sock *sk)
   * when we would be allowed to send the split-due-to-Nagle skb fully.
   */
  static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
-                                       unsigned int mss_now, unsigned int cwnd)
+                                       unsigned int mss_now, unsigned int max_segs)
  {
         const struct tcp_sock *tp = tcp_sk(sk);
-       u32 needed, window, cwnd_len;
+       u32 needed, window, max_len;
  
         window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
-       cwnd_len = mss_now * cwnd;
+       max_len = mss_now * max_segs;
  
-       if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
-               return cwnd_len;
+       if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+               return max_len;
  
         needed = min(skb->len, window);
  
-       if (cwnd_len <= needed)
-               return cwnd_len;
+       if (max_len <= needed)
+               return max_len;
  
         return needed - needed % mss_now;
  }
@@ -1558,7 +1562,8 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
         limit = min(send_win, cong_win);
  
         /* If a full-sized TSO skb can be sent, do it. */
-       if (limit >= sk->sk_gso_max_size)
+       if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+                          sk->sk_gso_max_segs * tp->mss_cache))
                 goto send_now;
  
         /* Middle in queue won't get any more data, full sendable already? */
@@ -1585,8 +1590,11 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
                         goto send_now;
         }
  
-       /* Ok, it looks like it is advisable to defer.  */
-       tp->tso_deferred = 1 | (jiffies << 1);
+       /* Ok, it looks like it is advisable to defer.
+        * Do not rearm the timer if already set to not break TCP ACK clocking.
+        */
+       if (!tp->tso_deferred)
+               tp->tso_deferred = 1 | (jiffies << 1);
  
         return 1;
  
@@ -1784,7 +1792,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                 limit = mss_now;
                 if (tso_segs > 1 && !tcp_urg_mode(tp))
                         limit = tcp_mss_split_point(sk, skb, mss_now,
-                                                   cwnd_quota);
+                                                   min_t(unsigned int,
+                                                         cwnd_quota,
+                                                         sk->sk_gso_max_segs));
  
                 if (skb->len > limit &&
                     unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -2059,7 +2069,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
                 /* Punt if not enough space exists in the first SKB for
                  * the data in the second
                  */
-               if (skb->len > skb_tailroom(to))
+               if (skb->len > skb_availroom(to))
                         break;
  
                 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
@@ -2120,6 +2130,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                 int oldpcount = tcp_skb_pcount(skb);
  
                 if (unlikely(oldpcount > 1)) {
+                       if (skb_unclone(skb, GFP_ATOMIC))
+                               return -ENOMEM;
                         tcp_init_tso_segs(sk, skb, cur_mss);
                         tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
                 }