[TCP]: Make fackets_out accurate
[pandora-kernel.git] / net / ipv4 / tcp_output.c
index 3c8c8e7..cbe8bf6 100644 (file)
@@ -61,6 +61,18 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+static inline void tcp_packets_out_inc(struct sock *sk,
+                                      const struct sk_buff *skb)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       int orig = tp->packets_out;
+
+       tp->packets_out += tcp_skb_pcount(skb);
+       if (!orig)
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+}
+
 static void update_send_head(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -634,16 +646,32 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
                skb_shinfo(skb)->gso_size = 0;
                skb_shinfo(skb)->gso_type = 0;
        } else {
-               unsigned int factor;
-
-               factor = skb->len + (mss_now - 1);
-               factor /= mss_now;
-               skb_shinfo(skb)->gso_segs = factor;
+               skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
                skb_shinfo(skb)->gso_size = mss_now;
                skb_shinfo(skb)->gso_type = sk->sk_gso_type;
        }
 }
 
+/* When a modification to fackets out becomes necessary, we need to check
+ * skb is counted to fackets_out or not. Another important thing is to
+ * tweak SACK fastpath hint too as it would overwrite all changes unless
+ * hint is also changed.
+ */
+static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
+                                  int decr)
+{
+       if (!tp->sacked_out)
+               return;
+
+       if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq))
+               tp->fackets_out -= decr;
+
+       /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
+       if (tp->fastpath_skb_hint != NULL &&
+           after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
+               tp->fastpath_cnt_hint -= decr;
+}
+
 /* Function to create two new TCP segments.  Shrinks the given segment
  * to the specified size and appends a new segment with the rest of the
  * packet to the list.  This won't be called frequently, I hope.
@@ -684,6 +712,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
        TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
 
+       if (tp->sacked_out && (TCP_SKB_CB(skb)->seq == tp->highest_sack))
+               tp->highest_sack = TCP_SKB_CB(buff)->seq;
+
        /* PSH and FIN should only be set in the second packet. */
        flags = TCP_SKB_CB(skb)->flags;
        TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
@@ -732,26 +763,15 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
                if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
                        tp->retrans_out -= diff;
 
-               if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+               if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
                        tp->lost_out -= diff;
-                       tp->left_out -= diff;
-               }
-
-               if (diff > 0) {
-                       /* Adjust Reno SACK estimate. */
-                       if (!tp->rx_opt.sack_ok) {
-                               tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
-                               tcp_sync_left_out(tp);
-                       }
 
-                       tcp_dec_pcount_approx_int(&tp->fackets_out, diff);
-                       /* SACK fastpath might overwrite it unless dealt with */
-                       if (tp->fastpath_skb_hint != NULL &&
-                           after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq,
-                                 TCP_SKB_CB(skb)->seq)) {
-                               tcp_dec_pcount_approx_int(&tp->fastpath_cnt_hint, diff);
-                       }
+               /* Adjust Reno SACK estimate. */
+               if (tcp_is_reno(tp) && diff > 0) {
+                       tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
+                       tcp_verify_left_out(tp);
                }
+               tcp_adjust_fackets_out(tp, skb, diff);
        }
 
        /* Link BUFF into the send queue. */
@@ -1717,6 +1737,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                /* Update sequence range on original skb. */
                TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
 
+               if (WARN_ON(tp->sacked_out &&
+                   (TCP_SKB_CB(next_skb)->seq == tp->highest_sack)))
+                       return;
+
                /* Merge over control information. */
                flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
                TCP_SKB_CB(skb)->flags = flags;
@@ -1727,21 +1751,14 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
                if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
                        tp->retrans_out -= tcp_skb_pcount(next_skb);
-               if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
+               if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
                        tp->lost_out -= tcp_skb_pcount(next_skb);
-                       tp->left_out -= tcp_skb_pcount(next_skb);
-               }
                /* Reno case is special. Sigh... */
-               if (!tp->rx_opt.sack_ok && tp->sacked_out) {
+               if (tcp_is_reno(tp) && tp->sacked_out)
                        tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
-                       tp->left_out -= tcp_skb_pcount(next_skb);
-               }
 
-               /* Not quite right: it can be > snd.fack, but
-                * it is better to underestimate fackets.
-                */
-               tcp_dec_pcount_approx(&tp->fackets_out, next_skb);
-               tcp_packets_out_dec(tp, next_skb);
+               tcp_adjust_fackets_out(tp, skb, tcp_skb_pcount(next_skb));
+               tp->packets_out -= tcp_skb_pcount(next_skb);
                sk_stream_free_skb(sk, next_skb);
        }
 }
@@ -1780,7 +1797,7 @@ void tcp_simple_retransmit(struct sock *sk)
        if (!lost)
                return;
 
-       tcp_sync_left_out(tp);
+       tcp_verify_left_out(tp);
 
        /* Don't muck with the congestion window here.
         * Reason is that we do not increase amount of _data_
@@ -1982,7 +1999,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                return;
 
        /* No forward retransmissions in Reno are possible. */
-       if (!tp->rx_opt.sack_ok)
+       if (tcp_is_reno(tp))
                return;
 
        /* Yeah, we have to make difficult choice between forward transmission