tcp_yeah: don't set ssthresh below 2

[pandora-kernel.git] / net / ipv4 / tcp_output.c
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 63170e2..e614810 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -835,11 +835,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
                                                            &md5);
         tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
  
-       if (tcp_packets_in_flight(tp) == 0) {
+       if (tcp_packets_in_flight(tp) == 0)
                 tcp_ca_event(sk, CA_EVENT_TX_START);
-               skb->ooo_okay = 1;
-       } else
-               skb->ooo_okay = 0;
+
+       /* if no packet is in qdisc/device queue, then allow XPS to select
+        * another queue.
+        */
+       skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
  
         skb_push(skb, tcp_header_size);
         skb_reset_transport_header(skb);
@@ -931,6 +933,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
  static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
                                  unsigned int mss_now)
  {
+       /* Make sure we own this skb before messing gso_size/gso_segs */
+       WARN_ON_ONCE(skb_cloned(skb));
+
         if (skb->len <= mss_now || !sk_can_gso(sk) ||
             skb->ip_summed == CHECKSUM_NONE) {
                 /* Avoid the costly divide in the normal
@@ -1012,9 +1017,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
         if (nsize < 0)
                 nsize = 0;
  
-       if (skb_cloned(skb) &&
-           skb_is_nonlinear(skb) &&
-           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+       if (skb_unclone(skb, GFP_ATOMIC))
                 return -ENOMEM;
  
         /* Get a new skb... force flag on. */
@@ -1093,6 +1096,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
  {
         int i, k, eat;
  
+       eat = min_t(int, len, skb_headlen(skb));
+       if (eat) {
+               __skb_pull(skb, eat);
+               len -= eat;
+               if (!len)
+                       return;
+       }
         eat = len;
         k = 0;
         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -1124,11 +1134,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
         if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
                 return -ENOMEM;
  
-       /* If len == headlen, we avoid __skb_pull to preserve alignment. */
-       if (unlikely(len < skb_headlen(skb)))
-               __skb_pull(skb, len);
-       else
-               __pskb_trim_head(skb, len - skb_headlen(skb));
+       __pskb_trim_head(skb, len);
  
         TCP_SKB_CB(skb)->seq += len;
         skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1138,11 +1144,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
         sk_mem_uncharge(sk, len);
         sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
  
-       /* Any change of skb->len requires recalculation of tso
-        * factor and mss.
-        */
+       /* Any change of skb->len requires recalculation of tso factor. */
         if (tcp_skb_pcount(skb) > 1)
-               tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
+               tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
  
         return 0;
  }
@@ -1585,8 +1589,11 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
                         goto send_now;
         }
  
-       /* Ok, it looks like it is advisable to defer.  */
-       tp->tso_deferred = 1 | (jiffies << 1);
+       /* Ok, it looks like it is advisable to defer.
+        * Do not rearm the timer if already set to not break TCP ACK clocking.
+        */
+       if (!tp->tso_deferred)
+               tp->tso_deferred = 1 | (jiffies << 1);
  
         return 1;
  
@@ -2059,7 +2066,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
                 /* Punt if not enough space exists in the first SKB for
                  * the data in the second
                  */
-               if (skb->len > skb_tailroom(to))
+               if (skb->len > skb_availroom(to))
                         break;
  
                 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
@@ -2120,6 +2127,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                 int oldpcount = tcp_skb_pcount(skb);
  
                 if (unlikely(oldpcount > 1)) {
+                       if (skb_unclone(skb, GFP_ATOMIC))
+                               return -ENOMEM;
                         tcp_init_tso_segs(sk, skb, cur_mss);
                         tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
                 }
@@ -2311,33 +2320,40 @@ begin_fwd:
         }
  }
  
-/* Send a fin.  The caller locks the socket for us.  This cannot be
- * allowed to fail queueing a FIN frame under any circumstances.
+/* Send a FIN. The caller locks the socket for us.
+ * We should try to send a FIN packet really hard, but eventually give up.
   */
  void tcp_send_fin(struct sock *sk)
  {
+       struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
         struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb = tcp_write_queue_tail(sk);
-       int mss_now;
  
-       /* Optimization, tack on the FIN if we have a queue of
-        * unsent frames.  But be careful about outgoing SACKS
-        * and IP options.
+       /* Optimization, tack on the FIN if we have one skb in write queue and
+        * this skb was not yet sent, or we are under memory pressure.
+        * Note: in the latter case, FIN packet will be sent after a timeout,
+        * as TCP stack thinks it has already been transmitted.
          */
-       mss_now = tcp_current_mss(sk);
-
-       if (tcp_send_head(sk) != NULL) {
-               TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
-               TCP_SKB_CB(skb)->end_seq++;
+       if (tskb && (tcp_send_head(sk) || tcp_memory_pressure)) {
+coalesce:
+               TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
+               TCP_SKB_CB(tskb)->end_seq++;
                 tp->write_seq++;
+               if (!tcp_send_head(sk)) {
+                       /* This means tskb was already sent.
+                        * Pretend we included the FIN on previous transmit.
+                        * We need to set tp->snd_nxt to the value it would have
+                        * if FIN had been sent. This is because retransmit path
+                        * does not change tp->snd_nxt.
+                        */
+                       tp->snd_nxt++;
+                       return;
+               }
         } else {
-               /* Socket is locked, keep trying until memory is available. */
-               for (;;) {
-                       skb = alloc_skb_fclone(MAX_TCP_HEADER,
-                                              sk->sk_allocation);
-                       if (skb)
-                               break;
-                       yield();
+               skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+               if (unlikely(!skb)) {
+                       if (tskb)
+                               goto coalesce;
+                       return;
                 }
  
                 /* Reserve space for headers and prepare control bits. */
@@ -2347,7 +2363,7 @@ void tcp_send_fin(struct sock *sk)
                                      TCPHDR_ACK | TCPHDR_FIN);
                 tcp_queue_skb(sk, skb);
         }
-       __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
+       __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
  }
  
  /* We get here when a process closes a file descriptor (either due to
@@ -2623,13 +2639,10 @@ int tcp_connect(struct sock *sk)
  
         tcp_connect_init(sk);
  
-       buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
-       if (unlikely(buff == NULL))
+       buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+       if (unlikely(!buff))
                 return -ENOBUFS;
  
-       /* Reserve space for headers. */
-       skb_reserve(buff, MAX_TCP_HEADER);
-
         tp->snd_nxt = tp->write_seq;
         tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
         TCP_ECN_send_syn(sk, buff);