tcp: don't clear retransmit_skb_hint when not necessary
[pandora-kernel.git] / net / ipv4 / tcp_output.c
index 8165f5a..239cea7 100644 (file)
@@ -1824,6 +1824,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
 
        /* changed transmit queue under us so clear hints */
        tcp_clear_retrans_hints_partial(tp);
+       if (next_skb == tp->retransmit_skb_hint)
+               tp->retransmit_skb_hint = skb;
 
        sk_wmem_free_skb(sk, next_skb);
 }
@@ -1838,7 +1840,7 @@ void tcp_simple_retransmit(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        unsigned int mss = tcp_current_mss(sk, 0);
-       int lost = 0;
+       u32 prior_lost = tp->lost_out;
 
        tcp_for_write_queue(skb, sk) {
                if (skb == tcp_send_head(sk))
@@ -1849,17 +1851,13 @@ void tcp_simple_retransmit(struct sock *sk)
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                                tp->retrans_out -= tcp_skb_pcount(skb);
                        }
-                       if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
-                               TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                               tp->lost_out += tcp_skb_pcount(skb);
-                               lost = 1;
-                       }
+                       tcp_skb_mark_lost_uncond_verify(tp, skb);
                }
        }
 
-       tcp_clear_all_retrans_hints(tp);
+       tcp_clear_retrans_hints_partial(tp);
 
-       if (!lost)
+       if (prior_lost == tp->lost_out)
                return;
 
        if (tcp_is_reno(tp))
@@ -1996,86 +1994,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        return err;
 }
 
-/* This gets called after a retransmit timeout, and the initially
- * retransmitted data is acknowledged.  It tries to continue
- * resending the rest of the retransmit queue, until either
- * we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
- */
-void tcp_xmit_retransmit_queue(struct sock *sk)
+static int tcp_can_forward_retransmit(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb;
-       int packet_cnt;
-
-       if (tp->retransmit_skb_hint) {
-               skb = tp->retransmit_skb_hint;
-               packet_cnt = tp->retransmit_cnt_hint;
-       } else {
-               skb = tcp_write_queue_head(sk);
-               packet_cnt = 0;
-       }
-
-       /* First pass: retransmit lost packets. */
-       if (tp->lost_out) {
-               tcp_for_write_queue_from(skb, sk) {
-                       __u8 sacked = TCP_SKB_CB(skb)->sacked;
-
-                       if (skb == tcp_send_head(sk))
-                               break;
-                       /* we could do better than to assign each time */
-                       tp->retransmit_skb_hint = skb;
-                       tp->retransmit_cnt_hint = packet_cnt;
-
-                       /* Assume this retransmit will generate
-                        * only one packet for congestion window
-                        * calculation purposes.  This works because
-                        * tcp_retransmit_skb() will chop up the
-                        * packet to be MSS sized and all the
-                        * packet counting works out.
-                        */
-                       if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-                               return;
-
-                       if (sacked & TCPCB_LOST) {
-                               if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-                                       int mib_idx;
-
-                                       if (tcp_retransmit_skb(sk, skb)) {
-                                               tp->retransmit_skb_hint = NULL;
-                                               return;
-                                       }
-                                       if (icsk->icsk_ca_state != TCP_CA_Loss)
-                                               mib_idx = LINUX_MIB_TCPFASTRETRANS;
-                                       else
-                                               mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
-                                       NET_INC_STATS_BH(sock_net(sk), mib_idx);
-
-                                       if (skb == tcp_write_queue_head(sk))
-                                               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-                                                                         inet_csk(sk)->icsk_rto,
-                                                                         TCP_RTO_MAX);
-                               }
-
-                               packet_cnt += tcp_skb_pcount(skb);
-                               if (packet_cnt >= tp->lost_out)
-                                       break;
-                       }
-               }
-       }
-
-       /* OK, demanded retransmission is finished. */
 
        /* Forward retransmissions are possible only during Recovery. */
        if (icsk->icsk_ca_state != TCP_CA_Recovery)
-               return;
+               return 0;
 
        /* No forward retransmissions in Reno are possible. */
        if (tcp_is_reno(tp))
-               return;
+               return 0;
 
        /* Yeah, we have to make difficult choice between forward transmission
         * and retransmission... Both ways have their merits...
@@ -2086,43 +2016,96 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
         */
 
        if (tcp_may_send_now(sk))
-               return;
+               return 0;
 
-       /* If nothing is SACKed, highest_sack in the loop won't be valid */
-       if (!tp->sacked_out)
-               return;
+       return 1;
+}
 
-       if (tp->forward_skb_hint)
-               skb = tp->forward_skb_hint;
+/* This gets called after a retransmit timeout, and the initially
+ * retransmitted data is acknowledged.  It tries to continue
+ * resending the rest of the retransmit queue, until either
+ * we've sent it all or the congestion window limit is reached.
+ * If doing SACK, the first ACK which comes back for a timeout
+ * based retransmit packet might feed us FACK information again.
+ * If so, we use it to avoid unnecessarily retransmissions.
+ */
+void tcp_xmit_retransmit_queue(struct sock *sk)
+{
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *skb;
+       struct sk_buff *hole = NULL;
+       int mib_idx;
+       int fwd_rexmitting = 0;
+
+       if (!tp->lost_out)
+               tp->retransmit_high = tp->snd_una;
+
+       if (tp->retransmit_skb_hint)
+               skb = tp->retransmit_skb_hint;
        else
                skb = tcp_write_queue_head(sk);
 
+       /* First pass: retransmit lost packets. */
        tcp_for_write_queue_from(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
-               tp->forward_skb_hint = skb;
+               __u8 sacked = TCP_SKB_CB(skb)->sacked;
 
-               if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+               if (skb == tcp_send_head(sk))
                        break;
+               /* we could do better than to assign each time */
+               if (hole == NULL)
+                       tp->retransmit_skb_hint = skb;
 
+               /* Assume this retransmit will generate
+                * only one packet for congestion window
+                * calculation purposes.  This works because
+                * tcp_retransmit_skb() will chop up the
+                * packet to be MSS sized and all the
+                * packet counting works out.
+                */
                if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-                       break;
+                       return;
 
-               if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+               if (fwd_rexmitting) {
+begin_fwd:
+                       if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+                               break;
+                       mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
+
+               } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
+                       if (!tcp_can_forward_retransmit(sk))
+                               break;
+                       /* Backtrack if necessary to non-L'ed skb */
+                       if (hole != NULL) {
+                               skb = hole;
+                               hole = NULL;
+                       }
+                       fwd_rexmitting = 1;
+                       goto begin_fwd;
+
+               } else if (!(sacked & TCPCB_LOST)) {
+                       if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS))
+                               hole = skb;
                        continue;
 
-               /* Ok, retransmit it. */
-               if (tcp_retransmit_skb(sk, skb)) {
-                       tp->forward_skb_hint = NULL;
-                       break;
+               } else {
+                       if (icsk->icsk_ca_state != TCP_CA_Loss)
+                               mib_idx = LINUX_MIB_TCPFASTRETRANS;
+                       else
+                               mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
                }
 
+               if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
+                       continue;
+
+               if (tcp_retransmit_skb(sk, skb))
+                       return;
+               NET_INC_STATS_BH(sock_net(sk), mib_idx);
+
                if (skb == tcp_write_queue_head(sk))
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                                  inet_csk(sk)->icsk_rto,
                                                  TCP_RTO_MAX);
-
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
        }
 }