tcp: limit payload size of sacked skbs

[pandora-kernel.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index d73aab3..7db2297 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -83,9 +83,12 @@ int sysctl_tcp_ecn __read_mostly = 2;
  EXPORT_SYMBOL(sysctl_tcp_ecn);
  int sysctl_tcp_dsack __read_mostly = 1;
  int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 2;
+int sysctl_tcp_adv_win_scale __read_mostly = 1;
  EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
  
+/* rfc5961 challenge ack rate limiting */
+int sysctl_tcp_challenge_ack_limit = 1000;
+
  int sysctl_tcp_stdurg __read_mostly;
  int sysctl_tcp_rfc1337 __read_mostly;
  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
@@ -112,6 +115,7 @@ int sysctl_tcp_abc __read_mostly;
  #define FLAG_DSACKING_ACK      0x800 /* SACK blocks contained D-SACK info */
  #define FLAG_NONHEAD_RETRANS_ACKED     0x1000 /* Non-head rexmitted data was ACKed */
  #define FLAG_SACK_RENEGING     0x2000 /* snd_una advanced to a sacked seq */
+#define FLAG_UPDATE_TS_RECENT  0x4000 /* tcp_replace_ts_recent() */
  
  #define FLAG_ACKED             (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
  #define FLAG_NOT_DUP           (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -206,7 +210,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
                 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
  }
  
-static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
  {
         if (tcp_hdr(skb)->cwr)
                 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -217,32 +221,41 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
         tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
  }
  
-static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
  {
-       if (tp->ecn_flags & TCP_ECN_OK) {
-               if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
-                       tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+       if (!(tp->ecn_flags & TCP_ECN_OK))
+               return;
+
+       switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
+       case INET_ECN_NOT_ECT:
                 /* Funny extension: if ECT is not set on a segment,
-                * it is surely retransmit. It is not in ECN RFC,
-                * but Linux follows this rule. */
-               else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
+                * and we already seen ECT on a previous segment,
+                * it is probably a retransmit.
+                */
+               if (tp->ecn_flags & TCP_ECN_SEEN)
                         tcp_enter_quickack_mode((struct sock *)tp);
+               break;
+       case INET_ECN_CE:
+               tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+               /* fallinto */
+       default:
+               tp->ecn_flags |= TCP_ECN_SEEN;
         }
  }
  
-static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
  {
         if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
                 tp->ecn_flags &= ~TCP_ECN_OK;
  }
  
-static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
  {
         if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
                 tp->ecn_flags &= ~TCP_ECN_OK;
  }
  
-static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
+static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
  {
         if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
                 return 1;
@@ -256,14 +269,11 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
  
  static void tcp_fixup_sndbuf(struct sock *sk)
  {
-       int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
-                    sizeof(struct sk_buff);
+       int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
  
-       if (sk->sk_sndbuf < 3 * sndmem) {
-               sk->sk_sndbuf = 3 * sndmem;
-               if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
-                       sk->sk_sndbuf = sysctl_tcp_wmem[2];
-       }
+       sndmem *= TCP_INIT_CWND;
+       if (sk->sk_sndbuf < sndmem)
+               sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
  }
  
  /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -309,7 +319,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
         return 0;
  }
  
-static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -328,6 +338,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
                         incr = __tcp_grow_window(sk, skb);
  
                 if (incr) {
+                       incr = max_t(int, incr, 2 * skb->len);
                         tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
                                                tp->window_clamp);
                         inet_csk(sk)->icsk_ack.quick |= 1;
@@ -339,17 +350,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
  
  static void tcp_fixup_rcvbuf(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+       u32 mss = tcp_sk(sk)->advmss;
+       u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+       int rcvmem;
  
-       /* Try to select rcvbuf so that 4 mss-sized segments
-        * will fit to window and corresponding skbs will fit to our rcvbuf.
-        * (was 3; 4 is minimum to allow fast retransmit to work.)
+       /* Limit to 10 segments if mss <= 1460,
+        * or 14600/mss segments, with a minimum of two segments.
          */
-       while (tcp_win_from_space(rcvmem) < tp->advmss)
+       if (mss > 1460)
+               icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+
+       rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
+       while (tcp_win_from_space(rcvmem) < mss)
                 rcvmem += 128;
-       if (sk->sk_rcvbuf < 4 * rcvmem)
-               sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
+
+       rcvmem *= icwnd;
+
+       if (sk->sk_rcvbuf < rcvmem)
+               sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
  }
  
  /* 4. Try to fixup all. It is made immediately after connection enters
@@ -416,7 +434,7 @@ static void tcp_clamp_window(struct sock *sk)
   */
  void tcp_initialize_rcv_mss(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
         unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
  
         hint = min(hint, tp->rcv_wnd / 2);
@@ -460,8 +478,11 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
                 if (!win_dep) {
                         m -= (new_sample >> 3);
                         new_sample += m;
-               } else if (m < new_sample)
-                       new_sample = m << 3;
+               } else {
+                       m <<= 3;
+                       if (m < new_sample)
+                               new_sample = m;
+               }
         } else {
                 /* No previous measure. */
                 new_sample = m << 3;
@@ -531,8 +552,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
                         space /= tp->advmss;
                         if (!space)
                                 space = 1;
-                       rcvmem = (tp->advmss + MAX_TCP_HEADER +
-                                 16 + sizeof(struct sk_buff));
+                       rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
                         while (tcp_win_from_space(rcvmem) < tp->advmss)
                                 rcvmem += 128;
                         space *= rcvmem;
@@ -812,7 +832,7 @@ void tcp_update_metrics(struct sock *sk)
         }
  }
  
-__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
+__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
  {
         __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
  
@@ -1204,7 +1224,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
                 tp->lost_retrans_low = new_low_seq;
  }
  
-static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
                            struct tcp_sack_block_wire *sp, int num_sacks,
                            u32 prior_snd_una)
  {
@@ -1283,13 +1303,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
                  */
                 if (pkt_len > mss) {
                         unsigned int new_len = (pkt_len / mss) * mss;
-                       if (!in_sack && new_len < pkt_len) {
+                       if (!in_sack && new_len < pkt_len)
                                 new_len += mss;
-                               if (new_len > skb->len)
-                                       return 0;
-                       }
                         pkt_len = new_len;
                 }
+
+               if (pkt_len >= skb->len && !in_sack)
+                       return 0;
+
                 err = tcp_fragment(sk, skb, pkt_len, mss);
                 if (err < 0)
                         return err;
@@ -1298,25 +1319,26 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
         return in_sack;
  }
  
-static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
-                         struct tcp_sacktag_state *state,
+/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
+static u8 tcp_sacktag_one(struct sock *sk,
+                         struct tcp_sacktag_state *state, u8 sacked,
+                         u32 start_seq, u32 end_seq,
                           int dup_sack, int pcount)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       u8 sacked = TCP_SKB_CB(skb)->sacked;
         int fack_count = state->fack_count;
  
         /* Account D-SACK for retransmitted packet. */
         if (dup_sack && (sacked & TCPCB_RETRANS)) {
                 if (tp->undo_marker && tp->undo_retrans &&
-                   after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+                   after(end_seq, tp->undo_marker))
                         tp->undo_retrans--;
                 if (sacked & TCPCB_SACKED_ACKED)
                         state->reord = min(fack_count, state->reord);
         }
  
         /* Nothing to do; acked frame is about to be dropped (was ACKed). */
-       if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+       if (!after(end_seq, tp->snd_una))
                 return sacked;
  
         if (!(sacked & TCPCB_SACKED_ACKED)) {
@@ -1335,13 +1357,13 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
                                 /* New sack for not retransmitted frame,
                                  * which was in hole. It is reordering.
                                  */
-                               if (before(TCP_SKB_CB(skb)->seq,
+                               if (before(start_seq,
                                            tcp_highest_sack_seq(tp)))
                                         state->reord = min(fack_count,
                                                            state->reord);
  
                                 /* SACK enhanced F-RTO (RFC4138; Appendix B) */
-                               if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+                               if (!after(end_seq, tp->frto_highmark))
                                         state->flag |= FLAG_ONLY_ORIG_SACKED;
                         }
  
@@ -1359,8 +1381,7 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
  
                 /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
                 if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
-                   before(TCP_SKB_CB(skb)->seq,
-                          TCP_SKB_CB(tp->lost_skb_hint)->seq))
+                   before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
                         tp->lost_cnt_hint += pcount;
  
                 if (fack_count > tp->fackets_out)
@@ -1379,6 +1400,9 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
         return sacked;
  }
  
+/* Shift newly-SACKed bytes from this skb to the immediately previous
+ * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
+ */
  static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                            struct tcp_sacktag_state *state,
                            unsigned int pcount, int shifted, int mss,
@@ -1386,9 +1410,20 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
  {
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
+       u32 start_seq = TCP_SKB_CB(skb)->seq;   /* start of newly-SACKed */
+       u32 end_seq = start_seq + shifted;      /* end of newly-SACKed */
  
         BUG_ON(!pcount);
  
+       /* Adjust counters and hints for the newly sacked sequence
+        * range but discard the return value since prev is already
+        * marked. We must tag the range first because the seq
+        * advancement below implicitly advances
+        * tcp_highest_sack_seq() when skb is highest_sack.
+        */
+       tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
+                       start_seq, end_seq, dup_sack, pcount);
+
         if (skb == tp->lost_skb_hint)
                 tp->lost_cnt_hint += pcount;
  
@@ -1396,7 +1431,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
         TCP_SKB_CB(skb)->seq += shifted;
  
         skb_shinfo(prev)->gso_segs += pcount;
-       BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
+       WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
         skb_shinfo(skb)->gso_segs -= pcount;
  
         /* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1415,9 +1450,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                 skb_shinfo(skb)->gso_type = 0;
         }
  
-       /* We discard results */
-       tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
-
         /* Difference in this won't matter, both ACKed by the same cumul. ACK */
         TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
  
@@ -1438,7 +1470,10 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
                 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
         }
  
-       TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
+       TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+       if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+               TCP_SKB_CB(prev)->end_seq++;
+
         if (skb == tcp_highest_sack(sk))
                 tcp_advance_highest_sack(sk, skb);
  
@@ -1453,17 +1488,32 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
  /* I wish gso_size would have a bit more sane initialization than
   * something-or-zero which complicates things
   */
-static int tcp_skb_seglen(struct sk_buff *skb)
+static int tcp_skb_seglen(const struct sk_buff *skb)
  {
         return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
  }
  
  /* Shifting pages past head area doesn't work */
-static int skb_can_shift(struct sk_buff *skb)
+static int skb_can_shift(const struct sk_buff *skb)
  {
         return !skb_headlen(skb) && skb_is_nonlinear(skb);
  }
  
+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+                 int pcount, int shiftlen)
+{
+       /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+        * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+        * to make sure not storing more than 65535 * 8 bytes per skb,
+        * even if current MSS is bigger.
+        */
+       if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+               return 0;
+       if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+               return 0;
+       return skb_shift(to, from, shiftlen);
+}
+
  /* Try collapsing SACK blocks spanning across multiple skbs to a single
   * skb.
   */
@@ -1475,6 +1525,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
         struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *prev;
         int mss;
+       int next_pcount;
         int pcount = 0;
         int len;
         int in_sack;
@@ -1565,7 +1616,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                 }
         }
  
-       if (!skb_shift(prev, skb, len))
+       /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
+       if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
+               goto fallback;
+
+       if (!tcp_skb_shift(prev, skb, pcount, len))
                 goto fallback;
         if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
                 goto out;
@@ -1584,9 +1639,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                 goto out;
  
         len = skb->len;
-       if (skb_shift(prev, skb, len)) {
-               pcount += tcp_skb_pcount(skb);
-               tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+       next_pcount = tcp_skb_pcount(skb);
+       if (tcp_skb_shift(prev, skb, next_pcount, len)) {
+               pcount += next_pcount;
+               tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0);
         }
  
  out:
@@ -1655,10 +1711,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                         break;
  
                 if (in_sack) {
-                       TCP_SKB_CB(skb)->sacked = tcp_sacktag_one(skb, sk,
-                                                                 state,
-                                                                 dup_sack,
-                                                                 tcp_skb_pcount(skb));
+                       TCP_SKB_CB(skb)->sacked =
+                               tcp_sacktag_one(sk,
+                                               state,
+                                               TCP_SKB_CB(skb)->sacked,
+                                               TCP_SKB_CB(skb)->seq,
+                                               TCP_SKB_CB(skb)->end_seq,
+                                               dup_sack,
+                                               tcp_skb_pcount(skb));
  
                         if (!before(TCP_SKB_CB(skb)->seq,
                                     tcp_highest_sack_seq(tp)))
@@ -1708,19 +1768,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
         return skb;
  }
  
-static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
  {
         return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
  }
  
  static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
                         u32 prior_snd_una)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
-       unsigned char *ptr = (skb_transport_header(ack_skb) +
-                             TCP_SKB_CB(ack_skb)->sacked);
+       const unsigned char *ptr = (skb_transport_header(ack_skb) +
+                                   TCP_SKB_CB(ack_skb)->sacked);
         struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
         struct tcp_sack_block sp[TCP_NUM_SACKS];
         struct tcp_sack_block *cache;
@@ -2227,11 +2287,8 @@ void tcp_enter_loss(struct sock *sk, int how)
         if (tcp_is_reno(tp))
                 tcp_reset_reno_sack(tp);
  
-       if (!how) {
-               /* Push undo marker, if it was plain RTO and nothing
-                * was retransmitted. */
-               tp->undo_marker = tp->snd_una;
-       } else {
+       tp->undo_marker = tp->snd_una;
+       if (how) {
                 tp->sacked_out = 0;
                 tp->fackets_out = 0;
         }
@@ -2284,7 +2341,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
         return 0;
  }
  
-static inline int tcp_fackets_out(struct tcp_sock *tp)
+static inline int tcp_fackets_out(const struct tcp_sock *tp)
  {
         return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
  }
@@ -2304,19 +2361,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
   * they differ. Since neither occurs due to loss, TCP should really
   * ignore them.
   */
-static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
+static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
  {
         return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
  }
  
-static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_skb_timedout(const struct sock *sk,
+                                  const struct sk_buff *skb)
  {
         return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
  }
  
-static inline int tcp_head_timedout(struct sock *sk)
+static inline int tcp_head_timedout(const struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
  
         return tp->packets_out &&
                tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2543,6 +2601,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
  
                 if (cnt > packets) {
                         if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+                           (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
                             (oldcnt >= packets))
                                 break;
  
@@ -2627,7 +2686,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
  /* Nothing was retransmitted or returned timestamp is less
   * than timestamp of the first retransmission.
   */
-static inline int tcp_packet_delayed(struct tcp_sock *tp)
+static inline int tcp_packet_delayed(const struct tcp_sock *tp)
  {
         return !tp->retrans_stamp ||
                 (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2688,7 +2747,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
         tp->snd_cwnd_stamp = tcp_time_stamp;
  }
  
-static inline int tcp_may_undo(struct tcp_sock *tp)
+static inline int tcp_may_undo(const struct tcp_sock *tp)
  {
         return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
  }
@@ -2752,9 +2811,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
   * that successive retransmissions of a segment must not advance
   * retrans_stamp under any conditions.
   */
-static int tcp_any_retrans_done(struct sock *sk)
+static int tcp_any_retrans_done(const struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
         struct sk_buff *skb;
  
         if (tp->retrans_out)
@@ -2828,10 +2887,17 @@ static int tcp_try_undo_loss(struct sock *sk)
  static inline void tcp_complete_cwr(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       /* Do not moderate cwnd if it's already undone in cwr or recovery */
-       if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
-               tp->snd_cwnd = tp->snd_ssthresh;
-               tp->snd_cwnd_stamp = tcp_time_stamp;
+
+       /* Do not moderate cwnd if it's already undone in cwr or recovery. */
+       if (tp->undo_marker) {
+               if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
+                       tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+                       tp->snd_cwnd_stamp = tcp_time_stamp;
+               } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
+                       /* PRR algorithm. */
+                       tp->snd_cwnd = tp->snd_ssthresh;
+                       tp->snd_cwnd_stamp = tcp_time_stamp;
+               }
         }
         tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
  }
@@ -2948,6 +3014,38 @@ void tcp_simple_retransmit(struct sock *sk)
  }
  EXPORT_SYMBOL(tcp_simple_retransmit);
  
+/* This function implements the PRR algorithm, specifcally the PRR-SSRB
+ * (proportional rate reduction with slow start reduction bound) as described in
+ * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+ * It computes the number of packets to send (sndcnt) based on packets newly
+ * delivered:
+ *   1) If the packets in flight is larger than ssthresh, PRR spreads the
+ *     cwnd reductions across a full RTT.
+ *   2) If packets in flight is lower than ssthresh (such as due to excess
+ *     losses and/or application stalls), do not perform any further cwnd
+ *     reductions, but instead slow start up to ssthresh.
+ */
+static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
+                                       int fast_rexmit, int flag)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+       int sndcnt = 0;
+       int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+
+       if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+               u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
+                              tp->prior_cwnd - 1;
+               sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
+       } else {
+               sndcnt = min_t(int, delta,
+                              max_t(int, tp->prr_delivered - tp->prr_out,
+                                    newly_acked_sacked) + 1);
+       }
+
+       sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+       tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+}
+
  /* Process an event, which can update packets-in-flight not trivially.
   * Main goal of this function is to calculate new estimate for left_out,
   * taking into account both packets sitting in receiver's buffer and
@@ -2959,7 +3057,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
   * It does _not_ decide what to send, it is made in function
   * tcp_xmit_retransmit_queue().
   */
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
+                                 int newly_acked_sacked, int flag)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
@@ -3109,13 +3208,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
  
                 tp->bytes_acked = 0;
                 tp->snd_cwnd_cnt = 0;
+               tp->prior_cwnd = tp->snd_cwnd;
+               tp->prr_delivered = 0;
+               tp->prr_out = 0;
                 tcp_set_ca_state(sk, TCP_CA_Recovery);
                 fast_rexmit = 1;
         }
  
         if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
                 tcp_update_scoreboard(sk, fast_rexmit);
-       tcp_cwnd_down(sk, flag);
+       tp->prr_delivered += newly_acked_sacked;
+       tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
         tcp_xmit_retransmit_queue(sk);
  }
  
@@ -3192,7 +3295,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
   */
  static void tcp_rearm_rto(struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
  
         if (!tp->packets_out) {
                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -3296,7 +3399,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                  * connection startup slow start one packet too
                  * quickly.  This is severely frowned upon behavior.
                  */
-               if (!(scb->flags & TCPHDR_SYN)) {
+               if (!(scb->tcp_flags & TCPHDR_SYN)) {
                         flag |= FLAG_DATA_ACKED;
                 } else {
                         flag |= FLAG_SYN_ACKED;
@@ -3339,7 +3442,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                         int delta;
  
                         /* Non-retransmitted hole got filled? That's reordering */
-                       if (reord < prior_fackets)
+                       if (reord < prior_fackets && reord <= tp->fackets_out)
                                 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
  
                         delta = tcp_is_fack(tp) ? pkts_acked :
@@ -3444,7 +3547,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
   * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
   * and in FreeBSD. NetBSD's one is even worse.) is wrong.
   */
-static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
                                  u32 ack_seq)
  {
         struct tcp_sock *tp = tcp_sk(sk);
@@ -3571,6 +3674,11 @@ static int tcp_process_frto(struct sock *sk, int flag)
                 }
         } else {
                 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+                       if (!tcp_packets_in_flight(tp)) {
+                               tcp_enter_frto_loss(sk, 2, flag);
+                               return true;
+                       }
+
                         /* Prevent sending of new data. */
                         tp->snd_cwnd = min(tp->snd_cwnd,
                                            tcp_packets_in_flight(tp));
@@ -3619,8 +3727,54 @@ static int tcp_process_frto(struct sock *sk, int flag)
         return 0;
  }
  
+/* RFC 5961 7 [ACK Throttling] */
+static void tcp_send_challenge_ack(struct sock *sk)
+{
+       /* unprotected vars, we dont care of overwrites */
+       static u32 challenge_timestamp;
+       static unsigned int challenge_count;
+       u32 count, now = jiffies / HZ;
+
+       if (now != challenge_timestamp) {
+               u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
+               challenge_timestamp = now;
+               ACCESS_ONCE(challenge_count) =
+                       half + (u32)(
+                       ((u64) random32() * sysctl_tcp_challenge_ack_limit)
+                       >> 32);
+       }
+       count = ACCESS_ONCE(challenge_count);
+       if (count > 0) {
+               ACCESS_ONCE(challenge_count) = count - 1;
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+               tcp_send_ack(sk);
+       }
+}
+
+static void tcp_store_ts_recent(struct tcp_sock *tp)
+{
+       tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
+       tp->rx_opt.ts_recent_stamp = get_seconds();
+}
+
+static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
+{
+       if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
+               /* PAWS bug workaround wrt. ACK frames, the PAWS discard
+                * extra check below makes sure this can only happen
+                * for pure ACK frames.  -DaveM
+                *
+                * Not only, also it occurs for expired timestamps.
+                */
+
+               if (tcp_paws_check(&tp->rx_opt, 0))
+                       tcp_store_ts_recent(tp);
+       }
+}
+
  /* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
@@ -3630,13 +3784,21 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
         u32 prior_in_flight;
         u32 prior_fackets;
         int prior_packets;
+       int prior_sacked = tp->sacked_out;
+       int newly_acked_sacked = 0;
         int frto_cwnd = 0;
  
         /* If the ack is older than previous acks
          * then we can probably ignore it.
          */
-       if (before(ack, prior_snd_una))
+       if (before(ack, prior_snd_una)) {
+               /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
+               if (before(ack, prior_snd_una - tp->max_window)) {
+                       tcp_send_challenge_ack(sk);
+                       return -1;
+               }
                 goto old_ack;
+       }
  
         /* If the ack includes data we haven't sent yet, discard
          * this segment (RFC793 Section 3.9).
@@ -3659,6 +3821,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
         prior_fackets = tp->fackets_out;
         prior_in_flight = tcp_packets_in_flight(tp);
  
+       /* ts_recent update must be made after we are sure that the packet
+        * is in window.
+        */
+       if (flag & FLAG_UPDATE_TS_RECENT)
+               tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+
         if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
                 /* Window is constant, pure forward advance.
                  * No more checks are required.
@@ -3701,6 +3869,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
         /* See if we can take anything off of the retransmit queue. */
         flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
  
+       newly_acked_sacked = (prior_packets - prior_sacked) -
+                            (tp->packets_out - tp->sacked_out);
+
         if (tp->frto_counter)
                 frto_cwnd = tcp_process_frto(sk, flag);
         /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3713,7 +3884,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                     tcp_may_raise_cwnd(sk, flag))
                         tcp_cong_avoid(sk, ack, prior_in_flight);
                 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
-                                     flag);
+                                     newly_acked_sacked, flag);
         } else {
                 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
                         tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3752,14 +3923,14 @@ old_ack:
   * But, this can also be called on packets in the established flow when
   * the fast version below fails.
   */
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-                      u8 **hvpp, int estab)
+void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
+                      const u8 **hvpp, int estab)
  {
-       unsigned char *ptr;
-       struct tcphdr *th = tcp_hdr(skb);
+       const unsigned char *ptr;
+       const struct tcphdr *th = tcp_hdr(skb);
         int length = (th->doff * 4) - sizeof(struct tcphdr);
  
-       ptr = (unsigned char *)(th + 1);
+       ptr = (const unsigned char *)(th + 1);
         opt_rx->saw_tstamp = 0;
  
         while (length > 0) {
@@ -3870,9 +4041,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
  }
  EXPORT_SYMBOL(tcp_parse_options);
  
-static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
  {
-       __be32 *ptr = (__be32 *)(th + 1);
+       const __be32 *ptr = (const __be32 *)(th + 1);
  
         if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
                           | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3889,8 +4060,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
  /* Fast parse options. This hopes to only see timestamps.
   * If it is wrong it falls back on tcp_parse_options().
   */
-static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-                                 struct tcp_sock *tp, u8 **hvpp)
+static int tcp_fast_parse_options(const struct sk_buff *skb,
+                                 const struct tcphdr *th,
+                                 struct tcp_sock *tp, const u8 **hvpp)
  {
         /* In the spirit of fast parsing, compare doff directly to constant
          * values.  Because equality is used, short doff can be ignored here.
@@ -3911,10 +4083,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
  /*
   * Parse MD5 Signature option
   */
-u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
  {
-       int length = (th->doff << 2) - sizeof (*th);
-       u8 *ptr = (u8*)(th + 1);
+       int length = (th->doff << 2) - sizeof(*th);
+       const u8 *ptr = (const u8 *)(th + 1);
  
         /* If the TCP option is too short, we can short cut */
         if (length < TCPOLEN_MD5SIG)
@@ -3945,27 +4117,6 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
  EXPORT_SYMBOL(tcp_parse_md5sig_option);
  #endif
  
-static inline void tcp_store_ts_recent(struct tcp_sock *tp)
-{
-       tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-       tp->rx_opt.ts_recent_stamp = get_seconds();
-}
-
-static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
-{
-       if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
-               /* PAWS bug workaround wrt. ACK frames, the PAWS discard
-                * extra check below makes sure this can only happen
-                * for pure ACK frames.  -DaveM
-                *
-                * Not only, also it occurs for expired timestamps.
-                */
-
-               if (tcp_paws_check(&tp->rx_opt, 0))
-                       tcp_store_ts_recent(tp);
-       }
-}
-
  /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
   *
   * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
@@ -3991,8 +4142,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
  
  static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct tcphdr *th = tcp_hdr(skb);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcphdr *th = tcp_hdr(skb);
         u32 seq = TCP_SKB_CB(skb)->seq;
         u32 ack = TCP_SKB_CB(skb)->ack_seq;
  
@@ -4031,7 +4182,7 @@ static inline int tcp_paws_discard(const struct sock *sk,
   * (borrowed from freebsd)
   */
  
-static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
  {
         return  !before(end_seq, tp->rcv_wup) &&
                 !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
@@ -4076,7 +4227,7 @@ static void tcp_reset(struct sock *sk)
   *
   *     If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
   */
-static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
+static void tcp_fin(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -4188,7 +4339,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
                 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
  }
  
-static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -4347,7 +4498,7 @@ static void tcp_ofo_queue(struct sock *sk)
                 __skb_queue_tail(&sk->sk_receive_queue, skb);
                 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                 if (tcp_hdr(skb)->fin)
-                       tcp_fin(skb, sk, tcp_hdr(skb));
+                       tcp_fin(sk);
         }
  }
  
@@ -4375,7 +4526,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
  
  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
  {
-       struct tcphdr *th = tcp_hdr(skb);
+       const struct tcphdr *th = tcp_hdr(skb);
         struct tcp_sock *tp = tcp_sk(sk);
         int eaten = -1;
  
@@ -4429,7 +4580,7 @@ queue_and_out:
                 if (skb->len)
                         tcp_event_data_recv(sk, skb);
                 if (th->fin)
-                       tcp_fin(skb, sk, th);
+                       tcp_fin(sk);
  
                 if (!skb_queue_empty(&tp->out_of_order_queue)) {
                         tcp_ofo_queue(sk);
@@ -4859,9 +5010,9 @@ void tcp_cwnd_application_limited(struct sock *sk)
         tp->snd_cwnd_stamp = tcp_time_stamp;
  }
  
-static int tcp_should_expand_sndbuf(struct sock *sk)
+static int tcp_should_expand_sndbuf(const struct sock *sk)
  {
-       struct tcp_sock *tp = tcp_sk(sk);
+       const struct tcp_sock *tp = tcp_sk(sk);
  
         /* If the user specified a specific send buffer setting, do
          * not modify it.
@@ -4895,8 +5046,10 @@ static void tcp_new_space(struct sock *sk)
         struct tcp_sock *tp = tcp_sk(sk);
  
         if (tcp_should_expand_sndbuf(sk)) {
-               int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-                       MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+               int sndmem = SKB_TRUESIZE(max_t(u32,
+                                               tp->rx_opt.mss_clamp,
+                                               tp->mss_cache) +
+                                         MAX_TCP_HEADER);
                 int demanded = max_t(unsigned int, tp->snd_cwnd,
                                      tp->reordering + 1);
                 sndmem *= 2 * demanded;
@@ -4968,7 +5121,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
   *     either form (or just set the sysctl tcp_stdurg).
   */
  
-static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
+static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         u32 ptr = ntohs(th->urg_ptr);
@@ -5034,7 +5187,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
  }
  
  /* This is the 'fast' part of urgent handling. */
-static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
+static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -5154,10 +5307,10 @@ out:
  /* Does PAWS and seqno based validation of an incoming segment, flags will
   * play significant role here.
   */
-static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
-                             struct tcphdr *th, int syn_inerr)
+static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
+                                 const struct tcphdr *th, int syn_inerr)
  {
-       u8 *hash_location;
+       const u8 *hash_location;
         struct tcp_sock *tp = tcp_sk(sk);
  
         /* RFC1323: H1. Apply PAWS check first. */
@@ -5180,38 +5333,48 @@ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
                  * an acknowledgment should be sent in reply (unless the RST
                  * bit is set, if so drop the segment and return)".
                  */
-               if (!th->rst)
+               if (!th->rst) {
+                       if (th->syn)
+                               goto syn_challenge;
                         tcp_send_dupack(sk, skb);
+               }
                 goto discard;
         }
  
         /* Step 2: check RST bit */
         if (th->rst) {
-               tcp_reset(sk);
+               /* RFC 5961 3.2 :
+                * If sequence number exactly matches RCV.NXT, then
+                *     RESET the connection
+                * else
+                *     Send a challenge ACK
+                */
+               if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
+                       tcp_reset(sk);
+               else
+                       tcp_send_challenge_ack(sk);
                 goto discard;
         }
  
-       /* ts_recent update must be made after we are sure that the packet
-        * is in window.
-        */
-       tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-
         /* step 3: check security and precedence [ignored] */
  
-       /* step 4: Check for a SYN in window. */
-       if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+       /* step 4: Check for a SYN
+        * RFC 5691 4.2 : Send a challenge ack
+        */
+       if (th->syn) {
+syn_challenge:
                 if (syn_inerr)
                         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-               tcp_reset(sk);
-               return -1;
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
+               tcp_send_challenge_ack(sk);
+               goto discard;
         }
  
-       return 1;
+       return true;
  
  discard:
         __kfree_skb(skb);
-       return 0;
+       return false;
  }
  
  /*
@@ -5238,10 +5401,9 @@ discard:
   *     tcp_data_queue when everything is OK.
   */
  int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-                       struct tcphdr *th, unsigned len)
+                       const struct tcphdr *th, unsigned int len)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       int res;
  
         /*
          *      Header prediction.
@@ -5326,7 +5488,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                         if (tp->copied_seq == tp->rcv_nxt &&
                             len - tcp_header_len <= tp->ucopy.len) {
  #ifdef CONFIG_NET_DMA
-                               if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+                               if (tp->ucopy.task == current &&
+                                   sock_owned_by_user(sk) &&
+                                   tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
                                         copied_early = 1;
                                         eaten = 1;
                                 }
@@ -5362,6 +5526,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                 if (tcp_checksum_complete_user(sk, skb))
                                         goto csum_error;
  
+                               if ((int)skb->truesize > sk->sk_forward_alloc)
+                                       goto step5;
+
                                 /* Predicted packet is in window by definition.
                                  * seq == rcv_nxt and rcv_wup <= rcv_nxt.
                                  * Hence, check seq<=rcv_wup reduces to:
@@ -5373,9 +5540,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
  
                                 tcp_rcv_rtt_measure_ts(sk, skb);
  
-                               if ((int)skb->truesize > sk->sk_forward_alloc)
-                                       goto step5;
-
                                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
  
                                 /* Bulk data transfer: receiver */
@@ -5419,12 +5583,12 @@ slow_path:
          *      Standard slow path.
          */
  
-       res = tcp_validate_incoming(sk, skb, th, 1);
-       if (res <= 0)
-               return -res;
+       if (!tcp_validate_incoming(sk, skb, th, 1))
+               return 0;
  
  step5:
-       if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
+       if (th->ack &&
+           tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
                 goto discard;
  
         tcp_rcv_rtt_measure_ts(sk, skb);
@@ -5449,9 +5613,9 @@ discard:
  EXPORT_SYMBOL(tcp_rcv_established);
  
  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
-                                        struct tcphdr *th, unsigned len)
+                                        const struct tcphdr *th, unsigned int len)
  {
-       u8 *hash_location;
+       const u8 *hash_location;
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         struct tcp_cookie_values *cvp = tp->cookie_values;
@@ -5672,6 +5836,7 @@ discard:
                 }
  
                 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+               tp->copied_seq = tp->rcv_nxt;
                 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
  
                 /* RFC1323: The window in SYN & SYN/ACK segments is
@@ -5726,12 +5891,11 @@ reset_and_undo:
   */
  
  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-                         struct tcphdr *th, unsigned len)
+                         const struct tcphdr *th, unsigned int len)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
         int queued = 0;
-       int res;
  
         tp->rx_opt.saw_tstamp = 0;
  
@@ -5747,6 +5911,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                         goto discard;
  
                 if (th->syn) {
+                       if (th->fin)
+                               goto discard;
                         if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
                                 return 1;
  
@@ -5784,13 +5950,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                 return 0;
         }
  
-       res = tcp_validate_incoming(sk, skb, th, 0);
-       if (res <= 0)
-               return -res;
+       if (!tcp_validate_incoming(sk, skb, th, 0))
+               return 0;
  
         /* step 5: check the ACK field */
         if (th->ack) {
-               int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
+               int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
+                                                 FLAG_UPDATE_TS_RECENT) > 0;
  
                 switch (sk->sk_state) {
                 case TCP_SYN_RECV: