tcp: fix tcp_grow_window() for large incoming frames
[pandora-kernel.git] / net / ipv4 / tcp_input.c
index 976034f..3ff3640 100644 (file)
@@ -61,6 +61,8 @@
  *             Pasi Sarolahti:         F-RTO for dealing with spurious RTOs
  */
 
+#define pr_fmt(fmt) "TCP: " fmt
+
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -333,6 +335,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
                        incr = __tcp_grow_window(sk, skb);
 
                if (incr) {
+                       incr = max_t(int, incr, 2 * skb->len);
                        tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
                                               tp->window_clamp);
                        inet_csk(sk)->icsk_ack.quick |= 1;
@@ -472,8 +475,11 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
                if (!win_dep) {
                        m -= (new_sample >> 3);
                        new_sample += m;
-               } else if (m < new_sample)
-                       new_sample = m << 3;
+               } else {
+                       m <<= 3;
+                       if (m < new_sample)
+                               new_sample = m;
+               }
        } else {
                /* No previous measure. */
                new_sample = m << 3;
@@ -1307,25 +1313,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
        return in_sack;
 }
 
-static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
-                         struct tcp_sacktag_state *state,
+/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
+static u8 tcp_sacktag_one(struct sock *sk,
+                         struct tcp_sacktag_state *state, u8 sacked,
+                         u32 start_seq, u32 end_seq,
                          int dup_sack, int pcount)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       u8 sacked = TCP_SKB_CB(skb)->sacked;
        int fack_count = state->fack_count;
 
        /* Account D-SACK for retransmitted packet. */
        if (dup_sack && (sacked & TCPCB_RETRANS)) {
                if (tp->undo_marker && tp->undo_retrans &&
-                   after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+                   after(end_seq, tp->undo_marker))
                        tp->undo_retrans--;
                if (sacked & TCPCB_SACKED_ACKED)
                        state->reord = min(fack_count, state->reord);
        }
 
        /* Nothing to do; acked frame is about to be dropped (was ACKed). */
-       if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+       if (!after(end_seq, tp->snd_una))
                return sacked;
 
        if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1344,13 +1351,13 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
                                /* New sack for not retransmitted frame,
                                 * which was in hole. It is reordering.
                                 */
-                               if (before(TCP_SKB_CB(skb)->seq,
+                               if (before(start_seq,
                                           tcp_highest_sack_seq(tp)))
                                        state->reord = min(fack_count,
                                                           state->reord);
 
                                /* SACK enhanced F-RTO (RFC4138; Appendix B) */
-                               if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+                               if (!after(end_seq, tp->frto_highmark))
                                        state->flag |= FLAG_ONLY_ORIG_SACKED;
                        }
 
@@ -1368,8 +1375,7 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
 
                /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
                if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
-                   before(TCP_SKB_CB(skb)->seq,
-                          TCP_SKB_CB(tp->lost_skb_hint)->seq))
+                   before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
                        tp->lost_cnt_hint += pcount;
 
                if (fack_count > tp->fackets_out)
@@ -1388,6 +1394,9 @@ static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
        return sacked;
 }
 
+/* Shift newly-SACKed bytes from this skb to the immediately previous
+ * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
+ */
 static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                           struct tcp_sacktag_state *state,
                           unsigned int pcount, int shifted, int mss,
@@ -1395,9 +1404,20 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
+       u32 start_seq = TCP_SKB_CB(skb)->seq;   /* start of newly-SACKed */
+       u32 end_seq = start_seq + shifted;      /* end of newly-SACKed */
 
        BUG_ON(!pcount);
 
+       /* Adjust counters and hints for the newly sacked sequence
+        * range but discard the return value since prev is already
+        * marked. We must tag the range first because the seq
+        * advancement below implicitly advances
+        * tcp_highest_sack_seq() when skb is highest_sack.
+        */
+       tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
+                       start_seq, end_seq, dup_sack, pcount);
+
        if (skb == tp->lost_skb_hint)
                tp->lost_cnt_hint += pcount;
 
@@ -1424,9 +1444,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                skb_shinfo(skb)->gso_type = 0;
        }
 
-       /* We discard results */
-       tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
-
        /* Difference in this won't matter, both ACKed by the same cumul. ACK */
        TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
 
@@ -1574,6 +1591,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                }
        }
 
+       /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
+       if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
+               goto fallback;
+
        if (!skb_shift(prev, skb, len))
                goto fallback;
        if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
@@ -1664,10 +1685,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                        break;
 
                if (in_sack) {
-                       TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
-                                                                 state,
-                                                                 dup_sack,
-                                                                 tcp_skb_pcount(skb));
+                       TCP_SKB_CB(skb)->sacked =
+                               tcp_sacktag_one(sk,
+                                               state,
+                                               TCP_SKB_CB(skb)->sacked,
+                                               TCP_SKB_CB(skb)->seq,
+                                               TCP_SKB_CB(skb)->end_seq,
+                                               dup_sack,
+                                               tcp_skb_pcount(skb));
 
                        if (!before(TCP_SKB_CB(skb)->seq,
                                    tcp_highest_sack_seq(tp)))
@@ -2554,6 +2579,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 
                if (cnt > packets) {
                        if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+                           (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                            (oldcnt >= packets))
                                break;
 
@@ -3847,9 +3873,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
                                        opt_rx->wscale_ok = 1;
                                        if (snd_wscale > 14) {
                                                if (net_ratelimit())
-                                                       printk(KERN_INFO "tcp_parse_options: Illegal window "
-                                                              "scaling value %d >14 received.\n",
-                                                              snd_wscale);
+                                                       pr_info("%s: Illegal window scaling value %d >14 received\n",
+                                                               __func__,
+                                                               snd_wscale);
                                                snd_wscale = 14;
                                        }
                                        opt_rx->snd_wscale = snd_wscale;
@@ -4171,7 +4197,7 @@ static void tcp_fin(struct sock *sk)
                /* Only TCP_LISTEN and TCP_CLOSE are left, in these
                 * cases we should never reach this piece of code.
                 */
-               printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+               pr_err("%s: Impossible, sk->sk_state=%d\n",
                       __func__, sk->sk_state);
                break;
        }
@@ -4424,6 +4450,137 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
        return 0;
 }
 
+static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *skb1;
+       u32 seq, end_seq;
+
+       TCP_ECN_check_ce(tp, skb);
+
+       if (tcp_try_rmem_schedule(sk, skb->truesize)) {
+               /* TODO: should increment a counter */
+               __kfree_skb(skb);
+               return;
+       }
+
+       /* Disable header prediction. */
+       tp->pred_flags = 0;
+       inet_csk_schedule_ack(sk);
+
+       SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
+                  tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+
+       skb1 = skb_peek_tail(&tp->out_of_order_queue);
+       if (!skb1) {
+               /* Initial out of order segment, build 1 SACK. */
+               if (tcp_is_sack(tp)) {
+                       tp->rx_opt.num_sacks = 1;
+                       tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
+                       tp->selective_acks[0].end_seq =
+                                               TCP_SKB_CB(skb)->end_seq;
+               }
+               __skb_queue_head(&tp->out_of_order_queue, skb);
+               goto end;
+       }
+
+       seq = TCP_SKB_CB(skb)->seq;
+       end_seq = TCP_SKB_CB(skb)->end_seq;
+
+       if (seq == TCP_SKB_CB(skb1)->end_seq) {
+               /* Packets in ofo can stay in queue a long time.
+                * Better try to coalesce them right now
+                * to avoid future tcp_collapse_ofo_queue(),
+                * probably the most expensive function in tcp stack.
+                */
+               if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
+                       NET_INC_STATS_BH(sock_net(sk),
+                                        LINUX_MIB_TCPRCVCOALESCE);
+                       BUG_ON(skb_copy_bits(skb, 0,
+                                            skb_put(skb1, skb->len),
+                                            skb->len));
+                       TCP_SKB_CB(skb1)->end_seq = end_seq;
+                       TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+                       __kfree_skb(skb);
+                       skb = NULL;
+               } else {
+                       __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+               }
+
+               if (!tp->rx_opt.num_sacks ||
+                   tp->selective_acks[0].end_seq != seq)
+                       goto add_sack;
+
+               /* Common case: data arrive in order after hole. */
+               tp->selective_acks[0].end_seq = end_seq;
+               goto end;
+       }
+
+       /* Find place to insert this segment. */
+       while (1) {
+               if (!after(TCP_SKB_CB(skb1)->seq, seq))
+                       break;
+               if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
+                       skb1 = NULL;
+                       break;
+               }
+               skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+       }
+
+       /* Do skb overlap to previous one? */
+       if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+               if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                       /* All the bits are present. Drop. */
+                       __kfree_skb(skb);
+                       skb = NULL;
+                       tcp_dsack_set(sk, seq, end_seq);
+                       goto add_sack;
+               }
+               if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+                       /* Partial overlap. */
+                       tcp_dsack_set(sk, seq,
+                                     TCP_SKB_CB(skb1)->end_seq);
+               } else {
+                       if (skb_queue_is_first(&tp->out_of_order_queue,
+                                              skb1))
+                               skb1 = NULL;
+                       else
+                               skb1 = skb_queue_prev(
+                                       &tp->out_of_order_queue,
+                                       skb1);
+               }
+       }
+       if (!skb1)
+               __skb_queue_head(&tp->out_of_order_queue, skb);
+       else
+               __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+
+       /* And clean segments covered by new one as whole. */
+       while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
+               skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+
+               if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
+                       break;
+               if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                       tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+                                        end_seq);
+                       break;
+               }
+               __skb_unlink(skb1, &tp->out_of_order_queue);
+               tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
+                                TCP_SKB_CB(skb1)->end_seq);
+               __kfree_skb(skb1);
+       }
+
+add_sack:
+       if (tcp_is_sack(tp))
+               tcp_sack_new_ofo_skb(sk, seq, end_seq);
+end:
+       if (skb)
+               skb_set_owner_r(skb, sk);
+}
+
+
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
        const struct tcphdr *th = tcp_hdr(skb);
@@ -4539,105 +4696,7 @@ drop:
                goto queue_and_out;
        }
 
-       TCP_ECN_check_ce(tp, skb);
-
-       if (tcp_try_rmem_schedule(sk, skb->truesize))
-               goto drop;
-
-       /* Disable header prediction. */
-       tp->pred_flags = 0;
-       inet_csk_schedule_ack(sk);
-
-       SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-                  tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
-
-       skb_set_owner_r(skb, sk);
-
-       if (!skb_peek(&tp->out_of_order_queue)) {
-               /* Initial out of order segment, build 1 SACK. */
-               if (tcp_is_sack(tp)) {
-                       tp->rx_opt.num_sacks = 1;
-                       tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
-                       tp->selective_acks[0].end_seq =
-                                               TCP_SKB_CB(skb)->end_seq;
-               }
-               __skb_queue_head(&tp->out_of_order_queue, skb);
-       } else {
-               struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
-               u32 seq = TCP_SKB_CB(skb)->seq;
-               u32 end_seq = TCP_SKB_CB(skb)->end_seq;
-
-               if (seq == TCP_SKB_CB(skb1)->end_seq) {
-                       __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
-                       if (!tp->rx_opt.num_sacks ||
-                           tp->selective_acks[0].end_seq != seq)
-                               goto add_sack;
-
-                       /* Common case: data arrive in order after hole. */
-                       tp->selective_acks[0].end_seq = end_seq;
-                       return;
-               }
-
-               /* Find place to insert this segment. */
-               while (1) {
-                       if (!after(TCP_SKB_CB(skb1)->seq, seq))
-                               break;
-                       if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
-                               skb1 = NULL;
-                               break;
-                       }
-                       skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
-               }
-
-               /* Do skb overlap to previous one? */
-               if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
-                       if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-                               /* All the bits are present. Drop. */
-                               __kfree_skb(skb);
-                               tcp_dsack_set(sk, seq, end_seq);
-                               goto add_sack;
-                       }
-                       if (after(seq, TCP_SKB_CB(skb1)->seq)) {
-                               /* Partial overlap. */
-                               tcp_dsack_set(sk, seq,
-                                             TCP_SKB_CB(skb1)->end_seq);
-                       } else {
-                               if (skb_queue_is_first(&tp->out_of_order_queue,
-                                                      skb1))
-                                       skb1 = NULL;
-                               else
-                                       skb1 = skb_queue_prev(
-                                               &tp->out_of_order_queue,
-                                               skb1);
-                       }
-               }
-               if (!skb1)
-                       __skb_queue_head(&tp->out_of_order_queue, skb);
-               else
-                       __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
-               /* And clean segments covered by new one as whole. */
-               while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
-                       skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
-
-                       if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
-                               break;
-                       if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-                               tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
-                                                end_seq);
-                               break;
-                       }
-                       __skb_unlink(skb1, &tp->out_of_order_queue);
-                       tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
-                                        TCP_SKB_CB(skb1)->end_seq);
-                       __kfree_skb(skb1);
-               }
-
-add_sack:
-               if (tcp_is_sack(tp))
-                       tcp_sack_new_ofo_skb(sk, seq, end_seq);
-       }
+       tcp_data_queue_ofo(sk, skb);
 }
 
 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -5170,7 +5229,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
                return 0;
 
        if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-               tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+               tp->ucopy.dma_chan = net_dma_find_channel();
 
        if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {