net: Kill link between CSUM and SG features.
[pandora-kernel.git] / net / ipv4 / tcp_ipv4.c
index eadb693..8cdee12 100644 (file)
@@ -657,7 +657,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
                 * no RST generated if md5 hash doesn't match.
                 */
                sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
-                                            &tcp_hashinfo, ip_hdr(skb)->daddr,
+                                            &tcp_hashinfo, ip_hdr(skb)->saddr,
+                                            th->source, ip_hdr(skb)->daddr,
                                             ntohs(th->source), inet_iif(skb));
                /* don't send rst if it can't find key */
                if (!sk1)
@@ -725,7 +726,7 @@ release_sk1:
  */
 
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
-                           u32 win, u32 ts, int oif,
+                           u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key,
                            int reply_flags, u8 tos)
 {
@@ -746,12 +747,12 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 
        arg.iov[0].iov_base = (unsigned char *)&rep;
        arg.iov[0].iov_len  = sizeof(rep.th);
-       if (ts) {
+       if (tsecr) {
                rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
                                   (TCPOPT_TIMESTAMP << 8) |
                                   TCPOLEN_TIMESTAMP);
-               rep.opt[1] = htonl(tcp_time_stamp);
-               rep.opt[2] = htonl(ts);
+               rep.opt[1] = htonl(tsval);
+               rep.opt[2] = htonl(tsecr);
                arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
        }
 
@@ -766,7 +767,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 
 #ifdef CONFIG_TCP_MD5SIG
        if (key) {
-               int offset = (ts) ? 3 : 0;
+               int offset = (tsecr) ? 3 : 0;
 
                rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
                                          (TCPOPT_NOP << 16) |
@@ -801,6 +802,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 
        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+                       tcp_time_stamp + tcptw->tw_ts_offset,
                        tcptw->tw_ts_recent,
                        tw->tw_bound_dev_if,
                        tcp_twsk_md5_key(tcptw),
@@ -820,6 +822,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
        tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
                        tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+                       tcp_time_stamp,
                        req->ts_recent,
                        0,
                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -951,7 +954,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_md5sig_key *key;
-       struct hlist_node *pos;
        unsigned int size = sizeof(struct in_addr);
        struct tcp_md5sig_info *md5sig;
 
@@ -965,7 +967,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
        if (family == AF_INET6)
                size = sizeof(struct in6_addr);
 #endif
-       hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
+       hlist_for_each_entry_rcu(key, &md5sig->head, node) {
                if (key->family != family)
                        continue;
                if (!memcmp(&key->addr, addr, size))
@@ -1066,14 +1068,14 @@ static void tcp_clear_md5_list(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_md5sig_key *key;
-       struct hlist_node *pos, *n;
+       struct hlist_node *n;
        struct tcp_md5sig_info *md5sig;
 
        md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 
        if (!hlist_empty(&md5sig->head))
                tcp_free_md5sig_pool();
-       hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
+       hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
                hlist_del_rcu(&key->node);
                atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
                kfree_rcu(key, rcu);
@@ -1570,7 +1572,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                goto drop_and_free;
 
        if (!want_cookie || tmp_opt.tstamp_ok)
-               TCP_ECN_create_request(req, skb);
+               TCP_ECN_create_request(req, skb, sock_net(sk));
 
        if (want_cookie) {
                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -1948,6 +1950,50 @@ void tcp_v4_early_demux(struct sk_buff *skb)
        }
 }
 
+/* Packet is added to VJ-style prequeue for processing in process
+ * context, if a reader task is waiting. Apparently, this exciting
+ * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
+ * failed somewhere. Latency? Burstiness? Well, at least now we will
+ * see, why it failed. 8)8)                              --ANK
+ *
+ */
+bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (sysctl_tcp_low_latency || !tp->ucopy.task)
+               return false;
+
+       if (skb->len <= tcp_hdrlen(skb) &&
+           skb_queue_len(&tp->ucopy.prequeue) == 0)
+               return false;
+
+       __skb_queue_tail(&tp->ucopy.prequeue, skb);
+       tp->ucopy.memory += skb->truesize;
+       if (tp->ucopy.memory > sk->sk_rcvbuf) {
+               struct sk_buff *skb1;
+
+               BUG_ON(sock_owned_by_user(sk));
+
+               while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
+                       sk_backlog_rcv(sk, skb1);
+                       NET_INC_STATS_BH(sock_net(sk),
+                                        LINUX_MIB_TCPPREQUEUEDROPPED);
+               }
+
+               tp->ucopy.memory = 0;
+       } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
+               wake_up_interruptible_sync_poll(sk_sleep(sk),
+                                          POLLIN | POLLRDNORM | POLLRDBAND);
+               if (!inet_csk_ack_scheduled(sk))
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                 (3 * tcp_rto_min(sk)) / 4,
+                                                 TCP_RTO_MAX);
+       }
+       return true;
+}
+EXPORT_SYMBOL(tcp_prequeue);
+
 /*
  *     From tcp_input.c
  */
@@ -2077,6 +2123,7 @@ do_time_wait:
        case TCP_TW_SYN: {
                struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
                                                        &tcp_hashinfo,
+                                                       iph->saddr, th->source,
                                                        iph->daddr, th->dest,
                                                        inet_iif(skb));
                if (sk2) {
@@ -2612,7 +2659,7 @@ EXPORT_SYMBOL(tcp_proc_register);
 
 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 {
-       proc_net_remove(net, afinfo->name);
+       remove_proc_entry(afinfo->name, net->proc_net);
 }
 EXPORT_SYMBOL(tcp_proc_unregister);
 
@@ -2891,6 +2938,7 @@ EXPORT_SYMBOL(tcp_prot);
 
 static int __net_init tcp_sk_init(struct net *net)
 {
+       net->ipv4.sysctl_tcp_ecn = 2;
        return 0;
 }