Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[pandora-kernel.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/jiffies.h>
35 #include <linux/in.h>
36 #include <linux/in6.h>
37 #include <linux/netdevice.h>
38 #include <linux/init.h>
39 #include <linux/jhash.h>
40 #include <linux/ipsec.h>
41 #include <linux/times.h>
42
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
46
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/addrconf.h>
60 #include <net/snmp.h>
61 #include <net/dsfield.h>
62 #include <net/timewait_sock.h>
63
64 #include <asm/uaccess.h>
65
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
68
69 /* Socket used for sending RSTs and ACKs */
70 static struct socket *tcp6_socket;
71
72 static void     tcp_v6_send_reset(struct sk_buff *skb);
73 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
74 static void     tcp_v6_send_check(struct sock *sk, int len, 
75                                   struct sk_buff *skb);
76
77 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78
79 static struct inet_connection_sock_af_ops ipv6_mapped;
80 static struct inet_connection_sock_af_ops ipv6_specific;
81
82 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
83 {
84         return inet_csk_get_port(&tcp_hashinfo, sk, snum,
85                                  inet6_csk_bind_conflict);
86 }
87
88 static void tcp_v6_hash(struct sock *sk)
89 {
90         if (sk->sk_state != TCP_CLOSE) {
91                 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
92                         tcp_prot.hash(sk);
93                         return;
94                 }
95                 local_bh_disable();
96                 __inet6_hash(&tcp_hashinfo, sk);
97                 local_bh_enable();
98         }
99 }
100
101 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
102                                    struct in6_addr *saddr, 
103                                    struct in6_addr *daddr, 
104                                    unsigned long base)
105 {
106         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
107 }
108
109 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
110 {
111         if (skb->protocol == htons(ETH_P_IPV6)) {
112                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
113                                                     skb->nh.ipv6h->saddr.s6_addr32,
114                                                     skb->h.th->dest,
115                                                     skb->h.th->source);
116         } else {
117                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
118                                                   skb->nh.iph->saddr,
119                                                   skb->h.th->dest,
120                                                   skb->h.th->source);
121         }
122 }
123
124 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
125                           int addr_len)
126 {
127         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
128         struct inet_sock *inet = inet_sk(sk);
129         struct inet_connection_sock *icsk = inet_csk(sk);
130         struct ipv6_pinfo *np = inet6_sk(sk);
131         struct tcp_sock *tp = tcp_sk(sk);
132         struct in6_addr *saddr = NULL, *final_p = NULL, final;
133         struct flowi fl;
134         struct dst_entry *dst;
135         int addr_type;
136         int err;
137
138         if (addr_len < SIN6_LEN_RFC2133) 
139                 return -EINVAL;
140
141         if (usin->sin6_family != AF_INET6) 
142                 return(-EAFNOSUPPORT);
143
144         memset(&fl, 0, sizeof(fl));
145
146         if (np->sndflow) {
147                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
148                 IP6_ECN_flow_init(fl.fl6_flowlabel);
149                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
150                         struct ip6_flowlabel *flowlabel;
151                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
152                         if (flowlabel == NULL)
153                                 return -EINVAL;
154                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
155                         fl6_sock_release(flowlabel);
156                 }
157         }
158
159         /*
160          *      connect() to INADDR_ANY means loopback (BSD'ism).
161          */
162         
163         if(ipv6_addr_any(&usin->sin6_addr))
164                 usin->sin6_addr.s6_addr[15] = 0x1; 
165
166         addr_type = ipv6_addr_type(&usin->sin6_addr);
167
168         if(addr_type & IPV6_ADDR_MULTICAST)
169                 return -ENETUNREACH;
170
171         if (addr_type&IPV6_ADDR_LINKLOCAL) {
172                 if (addr_len >= sizeof(struct sockaddr_in6) &&
173                     usin->sin6_scope_id) {
174                         /* If interface is set while binding, indices
175                          * must coincide.
176                          */
177                         if (sk->sk_bound_dev_if &&
178                             sk->sk_bound_dev_if != usin->sin6_scope_id)
179                                 return -EINVAL;
180
181                         sk->sk_bound_dev_if = usin->sin6_scope_id;
182                 }
183
184                 /* Connect to link-local address requires an interface */
185                 if (!sk->sk_bound_dev_if)
186                         return -EINVAL;
187         }
188
189         if (tp->rx_opt.ts_recent_stamp &&
190             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
191                 tp->rx_opt.ts_recent = 0;
192                 tp->rx_opt.ts_recent_stamp = 0;
193                 tp->write_seq = 0;
194         }
195
196         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
197         np->flow_label = fl.fl6_flowlabel;
198
199         /*
200          *      TCP over IPv4
201          */
202
203         if (addr_type == IPV6_ADDR_MAPPED) {
204                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
205                 struct sockaddr_in sin;
206
207                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
208
209                 if (__ipv6_only_sock(sk))
210                         return -ENETUNREACH;
211
212                 sin.sin_family = AF_INET;
213                 sin.sin_port = usin->sin6_port;
214                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
215
216                 icsk->icsk_af_ops = &ipv6_mapped;
217                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
218
219                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
220
221                 if (err) {
222                         icsk->icsk_ext_hdr_len = exthdrlen;
223                         icsk->icsk_af_ops = &ipv6_specific;
224                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
225                         goto failure;
226                 } else {
227                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
228                                       inet->saddr);
229                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
230                                       inet->rcv_saddr);
231                 }
232
233                 return err;
234         }
235
236         if (!ipv6_addr_any(&np->rcv_saddr))
237                 saddr = &np->rcv_saddr;
238
239         fl.proto = IPPROTO_TCP;
240         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
241         ipv6_addr_copy(&fl.fl6_src,
242                        (saddr ? saddr : &np->saddr));
243         fl.oif = sk->sk_bound_dev_if;
244         fl.fl_ip_dport = usin->sin6_port;
245         fl.fl_ip_sport = inet->sport;
246
247         if (np->opt && np->opt->srcrt) {
248                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
249                 ipv6_addr_copy(&final, &fl.fl6_dst);
250                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
251                 final_p = &final;
252         }
253
254         err = ip6_dst_lookup(sk, &dst, &fl);
255         if (err)
256                 goto failure;
257         if (final_p)
258                 ipv6_addr_copy(&fl.fl6_dst, final_p);
259
260         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
261                 goto failure;
262
263         if (saddr == NULL) {
264                 saddr = &fl.fl6_src;
265                 ipv6_addr_copy(&np->rcv_saddr, saddr);
266         }
267
268         /* set the source address */
269         ipv6_addr_copy(&np->saddr, saddr);
270         inet->rcv_saddr = LOOPBACK4_IPV6;
271
272         ip6_dst_store(sk, dst, NULL);
273         sk->sk_route_caps = dst->dev->features &
274                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
275
276         icsk->icsk_ext_hdr_len = 0;
277         if (np->opt)
278                 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
279                                           np->opt->opt_nflen);
280
281         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
282
283         inet->dport = usin->sin6_port;
284
285         tcp_set_state(sk, TCP_SYN_SENT);
286         err = inet6_hash_connect(&tcp_death_row, sk);
287         if (err)
288                 goto late_failure;
289
290         if (!tp->write_seq)
291                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
292                                                              np->daddr.s6_addr32,
293                                                              inet->sport,
294                                                              inet->dport);
295
296         err = tcp_connect(sk);
297         if (err)
298                 goto late_failure;
299
300         return 0;
301
302 late_failure:
303         tcp_set_state(sk, TCP_CLOSE);
304         __sk_dst_reset(sk);
305 failure:
306         inet->dport = 0;
307         sk->sk_route_caps = 0;
308         return err;
309 }
310
311 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
312                 int type, int code, int offset, __u32 info)
313 {
314         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
315         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
316         struct ipv6_pinfo *np;
317         struct sock *sk;
318         int err;
319         struct tcp_sock *tp; 
320         __u32 seq;
321
322         sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
323                           th->source, skb->dev->ifindex);
324
325         if (sk == NULL) {
326                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
327                 return;
328         }
329
330         if (sk->sk_state == TCP_TIME_WAIT) {
331                 inet_twsk_put((struct inet_timewait_sock *)sk);
332                 return;
333         }
334
335         bh_lock_sock(sk);
336         if (sock_owned_by_user(sk))
337                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
338
339         if (sk->sk_state == TCP_CLOSE)
340                 goto out;
341
342         tp = tcp_sk(sk);
343         seq = ntohl(th->seq); 
344         if (sk->sk_state != TCP_LISTEN &&
345             !between(seq, tp->snd_una, tp->snd_nxt)) {
346                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
347                 goto out;
348         }
349
350         np = inet6_sk(sk);
351
352         if (type == ICMPV6_PKT_TOOBIG) {
353                 struct dst_entry *dst = NULL;
354
355                 if (sock_owned_by_user(sk))
356                         goto out;
357                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
358                         goto out;
359
360                 /* icmp should have updated the destination cache entry */
361                 dst = __sk_dst_check(sk, np->dst_cookie);
362
363                 if (dst == NULL) {
364                         struct inet_sock *inet = inet_sk(sk);
365                         struct flowi fl;
366
367                         /* BUGGG_FUTURE: Again, it is not clear how
368                            to handle rthdr case. Ignore this complexity
369                            for now.
370                          */
371                         memset(&fl, 0, sizeof(fl));
372                         fl.proto = IPPROTO_TCP;
373                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
374                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
375                         fl.oif = sk->sk_bound_dev_if;
376                         fl.fl_ip_dport = inet->dport;
377                         fl.fl_ip_sport = inet->sport;
378
379                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
380                                 sk->sk_err_soft = -err;
381                                 goto out;
382                         }
383
384                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
385                                 sk->sk_err_soft = -err;
386                                 goto out;
387                         }
388
389                 } else
390                         dst_hold(dst);
391
392                 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
393                         tcp_sync_mss(sk, dst_mtu(dst));
394                         tcp_simple_retransmit(sk);
395                 } /* else let the usual retransmit timer handle it */
396                 dst_release(dst);
397                 goto out;
398         }
399
400         icmpv6_err_convert(type, code, &err);
401
402         /* Might be for an request_sock */
403         switch (sk->sk_state) {
404                 struct request_sock *req, **prev;
405         case TCP_LISTEN:
406                 if (sock_owned_by_user(sk))
407                         goto out;
408
409                 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
410                                            &hdr->saddr, inet6_iif(skb));
411                 if (!req)
412                         goto out;
413
414                 /* ICMPs are not backlogged, hence we cannot get
415                  * an established socket here.
416                  */
417                 BUG_TRAP(req->sk == NULL);
418
419                 if (seq != tcp_rsk(req)->snt_isn) {
420                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
421                         goto out;
422                 }
423
424                 inet_csk_reqsk_queue_drop(sk, req, prev);
425                 goto out;
426
427         case TCP_SYN_SENT:
428         case TCP_SYN_RECV:  /* Cannot happen.
429                                It can, it SYNs are crossed. --ANK */ 
430                 if (!sock_owned_by_user(sk)) {
431                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
432                         sk->sk_err = err;
433                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
434
435                         tcp_done(sk);
436                 } else
437                         sk->sk_err_soft = err;
438                 goto out;
439         }
440
441         if (!sock_owned_by_user(sk) && np->recverr) {
442                 sk->sk_err = err;
443                 sk->sk_error_report(sk);
444         } else
445                 sk->sk_err_soft = err;
446
447 out:
448         bh_unlock_sock(sk);
449         sock_put(sk);
450 }
451
452
453 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
454                               struct dst_entry *dst)
455 {
456         struct inet6_request_sock *treq = inet6_rsk(req);
457         struct ipv6_pinfo *np = inet6_sk(sk);
458         struct sk_buff * skb;
459         struct ipv6_txoptions *opt = NULL;
460         struct in6_addr * final_p = NULL, final;
461         struct flowi fl;
462         int err = -1;
463
464         memset(&fl, 0, sizeof(fl));
465         fl.proto = IPPROTO_TCP;
466         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
467         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
468         fl.fl6_flowlabel = 0;
469         fl.oif = treq->iif;
470         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
471         fl.fl_ip_sport = inet_sk(sk)->sport;
472
473         if (dst == NULL) {
474                 opt = np->opt;
475                 if (opt == NULL &&
476                     np->rxopt.bits.osrcrt == 2 &&
477                     treq->pktopts) {
478                         struct sk_buff *pktopts = treq->pktopts;
479                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
480                         if (rxopt->srcrt)
481                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
482                 }
483
484                 if (opt && opt->srcrt) {
485                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
486                         ipv6_addr_copy(&final, &fl.fl6_dst);
487                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
488                         final_p = &final;
489                 }
490
491                 err = ip6_dst_lookup(sk, &dst, &fl);
492                 if (err)
493                         goto done;
494                 if (final_p)
495                         ipv6_addr_copy(&fl.fl6_dst, final_p);
496                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
497                         goto done;
498         }
499
500         skb = tcp_make_synack(sk, dst, req);
501         if (skb) {
502                 struct tcphdr *th = skb->h.th;
503
504                 th->check = tcp_v6_check(th, skb->len,
505                                          &treq->loc_addr, &treq->rmt_addr,
506                                          csum_partial((char *)th, skb->len, skb->csum));
507
508                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
509                 err = ip6_xmit(sk, skb, &fl, opt, 0);
510                 if (err == NET_XMIT_CN)
511                         err = 0;
512         }
513
514 done:
515         if (opt && opt != np->opt)
516                 sock_kfree_s(sk, opt, opt->tot_len);
517         dst_release(dst);
518         return err;
519 }
520
521 static void tcp_v6_reqsk_destructor(struct request_sock *req)
522 {
523         if (inet6_rsk(req)->pktopts)
524                 kfree_skb(inet6_rsk(req)->pktopts);
525 }
526
527 static struct request_sock_ops tcp6_request_sock_ops = {
528         .family         =       AF_INET6,
529         .obj_size       =       sizeof(struct tcp6_request_sock),
530         .rtx_syn_ack    =       tcp_v6_send_synack,
531         .send_ack       =       tcp_v6_reqsk_send_ack,
532         .destructor     =       tcp_v6_reqsk_destructor,
533         .send_reset     =       tcp_v6_send_reset
534 };
535
536 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
537         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
538         .twsk_unique    = tcp_twsk_unique,
539 };
540
541 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
542 {
543         struct ipv6_pinfo *np = inet6_sk(sk);
544         struct tcphdr *th = skb->h.th;
545
546         if (skb->ip_summed == CHECKSUM_HW) {
547                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
548                 skb->csum = offsetof(struct tcphdr, check);
549         } else {
550                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
551                                             csum_partial((char *)th, th->doff<<2, 
552                                                          skb->csum));
553         }
554 }
555
556
557 static void tcp_v6_send_reset(struct sk_buff *skb)
558 {
559         struct tcphdr *th = skb->h.th, *t1; 
560         struct sk_buff *buff;
561         struct flowi fl;
562
563         if (th->rst)
564                 return;
565
566         if (!ipv6_unicast_destination(skb))
567                 return; 
568
569         /*
570          * We need to grab some memory, and put together an RST,
571          * and then put it into the queue to be sent.
572          */
573
574         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
575                          GFP_ATOMIC);
576         if (buff == NULL) 
577                 return;
578
579         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
580
581         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
582
583         /* Swap the send and the receive. */
584         memset(t1, 0, sizeof(*t1));
585         t1->dest = th->source;
586         t1->source = th->dest;
587         t1->doff = sizeof(*t1)/4;
588         t1->rst = 1;
589   
590         if(th->ack) {
591                 t1->seq = th->ack_seq;
592         } else {
593                 t1->ack = 1;
594                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
595                                     + skb->len - (th->doff<<2));
596         }
597
598         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
599
600         memset(&fl, 0, sizeof(fl));
601         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
602         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
603
604         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
605                                     sizeof(*t1), IPPROTO_TCP,
606                                     buff->csum);
607
608         fl.proto = IPPROTO_TCP;
609         fl.oif = inet6_iif(skb);
610         fl.fl_ip_dport = t1->dest;
611         fl.fl_ip_sport = t1->source;
612
613         /* sk = NULL, but it is safe for now. RST socket required. */
614         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
615
616                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
617                         ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
618                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
619                         TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
620                         return;
621                 }
622         }
623
624         kfree_skb(buff);
625 }
626
627 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
628 {
629         struct tcphdr *th = skb->h.th, *t1;
630         struct sk_buff *buff;
631         struct flowi fl;
632         int tot_len = sizeof(struct tcphdr);
633
634         if (ts)
635                 tot_len += 3*4;
636
637         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
638                          GFP_ATOMIC);
639         if (buff == NULL)
640                 return;
641
642         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
643
644         t1 = (struct tcphdr *) skb_push(buff,tot_len);
645
646         /* Swap the send and the receive. */
647         memset(t1, 0, sizeof(*t1));
648         t1->dest = th->source;
649         t1->source = th->dest;
650         t1->doff = tot_len/4;
651         t1->seq = htonl(seq);
652         t1->ack_seq = htonl(ack);
653         t1->ack = 1;
654         t1->window = htons(win);
655         
656         if (ts) {
657                 u32 *ptr = (u32*)(t1 + 1);
658                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
659                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
660                 *ptr++ = htonl(tcp_time_stamp);
661                 *ptr = htonl(ts);
662         }
663
664         buff->csum = csum_partial((char *)t1, tot_len, 0);
665
666         memset(&fl, 0, sizeof(fl));
667         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
668         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
669
670         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
671                                     tot_len, IPPROTO_TCP,
672                                     buff->csum);
673
674         fl.proto = IPPROTO_TCP;
675         fl.oif = inet6_iif(skb);
676         fl.fl_ip_dport = t1->dest;
677         fl.fl_ip_sport = t1->source;
678
679         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
680                 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
681                         ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
682                         TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
683                         return;
684                 }
685         }
686
687         kfree_skb(buff);
688 }
689
690 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
691 {
692         struct inet_timewait_sock *tw = inet_twsk(sk);
693         const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
694
695         tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
696                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
697                         tcptw->tw_ts_recent);
698
699         inet_twsk_put(tw);
700 }
701
702 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
703 {
704         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
705 }
706
707
708 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
709 {
710         struct request_sock *req, **prev;
711         const struct tcphdr *th = skb->h.th;
712         struct sock *nsk;
713
714         /* Find possible connection requests. */
715         req = inet6_csk_search_req(sk, &prev, th->source,
716                                    &skb->nh.ipv6h->saddr,
717                                    &skb->nh.ipv6h->daddr, inet6_iif(skb));
718         if (req)
719                 return tcp_check_req(sk, skb, req, prev);
720
721         nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
722                                          th->source, &skb->nh.ipv6h->daddr,
723                                          ntohs(th->dest), inet6_iif(skb));
724
725         if (nsk) {
726                 if (nsk->sk_state != TCP_TIME_WAIT) {
727                         bh_lock_sock(nsk);
728                         return nsk;
729                 }
730                 inet_twsk_put((struct inet_timewait_sock *)nsk);
731                 return NULL;
732         }
733
734 #if 0 /*def CONFIG_SYN_COOKIES*/
735         if (!th->rst && !th->syn && th->ack)
736                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
737 #endif
738         return sk;
739 }
740
741 /* FIXME: this is substantially similar to the ipv4 code.
742  * Can some kind of merge be done? -- erics
743  */
744 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
745 {
746         struct inet6_request_sock *treq;
747         struct ipv6_pinfo *np = inet6_sk(sk);
748         struct tcp_options_received tmp_opt;
749         struct tcp_sock *tp = tcp_sk(sk);
750         struct request_sock *req = NULL;
751         __u32 isn = TCP_SKB_CB(skb)->when;
752
753         if (skb->protocol == htons(ETH_P_IP))
754                 return tcp_v4_conn_request(sk, skb);
755
756         if (!ipv6_unicast_destination(skb))
757                 goto drop; 
758
759         /*
760          *      There are no SYN attacks on IPv6, yet...        
761          */
762         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
763                 if (net_ratelimit())
764                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
765                 goto drop;              
766         }
767
768         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
769                 goto drop;
770
771         req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
772         if (req == NULL)
773                 goto drop;
774
775         tcp_clear_options(&tmp_opt);
776         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
777         tmp_opt.user_mss = tp->rx_opt.user_mss;
778
779         tcp_parse_options(skb, &tmp_opt, 0);
780
781         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
782         tcp_openreq_init(req, &tmp_opt, skb);
783
784         treq = inet6_rsk(req);
785         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
786         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
787         TCP_ECN_create_request(req, skb->h.th);
788         treq->pktopts = NULL;
789         if (ipv6_opt_accepted(sk, skb) ||
790             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
791             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
792                 atomic_inc(&skb->users);
793                 treq->pktopts = skb;
794         }
795         treq->iif = sk->sk_bound_dev_if;
796
797         /* So that link locals have meaning */
798         if (!sk->sk_bound_dev_if &&
799             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
800                 treq->iif = inet6_iif(skb);
801
802         if (isn == 0) 
803                 isn = tcp_v6_init_sequence(sk,skb);
804
805         tcp_rsk(req)->snt_isn = isn;
806
807         if (tcp_v6_send_synack(sk, req, NULL))
808                 goto drop;
809
810         inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
811         return 0;
812
813 drop:
814         if (req)
815                 reqsk_free(req);
816
817         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
818         return 0; /* don't send reset */
819 }
820
821 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
822                                           struct request_sock *req,
823                                           struct dst_entry *dst)
824 {
825         struct inet6_request_sock *treq = inet6_rsk(req);
826         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
827         struct tcp6_sock *newtcp6sk;
828         struct inet_sock *newinet;
829         struct tcp_sock *newtp;
830         struct sock *newsk;
831         struct ipv6_txoptions *opt;
832
833         if (skb->protocol == htons(ETH_P_IP)) {
834                 /*
835                  *      v6 mapped
836                  */
837
838                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
839
840                 if (newsk == NULL) 
841                         return NULL;
842
843                 newtcp6sk = (struct tcp6_sock *)newsk;
844                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
845
846                 newinet = inet_sk(newsk);
847                 newnp = inet6_sk(newsk);
848                 newtp = tcp_sk(newsk);
849
850                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
851
852                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
853                               newinet->daddr);
854
855                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
856                               newinet->saddr);
857
858                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
859
860                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
861                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
862                 newnp->pktoptions  = NULL;
863                 newnp->opt         = NULL;
864                 newnp->mcast_oif   = inet6_iif(skb);
865                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
866
867                 /*
868                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
869                  * here, tcp_create_openreq_child now does this for us, see the comment in
870                  * that function for the gory details. -acme
871                  */
872
873                 /* It is tricky place. Until this moment IPv4 tcp
874                    worked with IPv6 icsk.icsk_af_ops.
875                    Sync it now.
876                  */
877                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
878
879                 return newsk;
880         }
881
882         opt = np->opt;
883
884         if (sk_acceptq_is_full(sk))
885                 goto out_overflow;
886
887         if (np->rxopt.bits.osrcrt == 2 &&
888             opt == NULL && treq->pktopts) {
889                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
890                 if (rxopt->srcrt)
891                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
892         }
893
894         if (dst == NULL) {
895                 struct in6_addr *final_p = NULL, final;
896                 struct flowi fl;
897
898                 memset(&fl, 0, sizeof(fl));
899                 fl.proto = IPPROTO_TCP;
900                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
901                 if (opt && opt->srcrt) {
902                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
903                         ipv6_addr_copy(&final, &fl.fl6_dst);
904                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
905                         final_p = &final;
906                 }
907                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
908                 fl.oif = sk->sk_bound_dev_if;
909                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
910                 fl.fl_ip_sport = inet_sk(sk)->sport;
911
912                 if (ip6_dst_lookup(sk, &dst, &fl))
913                         goto out;
914
915                 if (final_p)
916                         ipv6_addr_copy(&fl.fl6_dst, final_p);
917
918                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
919                         goto out;
920         } 
921
922         newsk = tcp_create_openreq_child(sk, req, skb);
923         if (newsk == NULL)
924                 goto out;
925
926         /*
927          * No need to charge this sock to the relevant IPv6 refcnt debug socks
928          * count here, tcp_create_openreq_child now does this for us, see the
929          * comment in that function for the gory details. -acme
930          */
931
932         ip6_dst_store(newsk, dst, NULL);
933         newsk->sk_route_caps = dst->dev->features &
934                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
935
936         newtcp6sk = (struct tcp6_sock *)newsk;
937         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
938
939         newtp = tcp_sk(newsk);
940         newinet = inet_sk(newsk);
941         newnp = inet6_sk(newsk);
942
943         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
944
945         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
946         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
947         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
948         newsk->sk_bound_dev_if = treq->iif;
949
950         /* Now IPv6 options... 
951
952            First: no IPv4 options.
953          */
954         newinet->opt = NULL;
955
956         /* Clone RX bits */
957         newnp->rxopt.all = np->rxopt.all;
958
959         /* Clone pktoptions received with SYN */
960         newnp->pktoptions = NULL;
961         if (treq->pktopts != NULL) {
962                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
963                 kfree_skb(treq->pktopts);
964                 treq->pktopts = NULL;
965                 if (newnp->pktoptions)
966                         skb_set_owner_r(newnp->pktoptions, newsk);
967         }
968         newnp->opt        = NULL;
969         newnp->mcast_oif  = inet6_iif(skb);
970         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
971
972         /* Clone native IPv6 options from listening socket (if any)
973
974            Yes, keeping reference count would be much more clever,
975            but we make one more one thing there: reattach optmem
976            to newsk.
977          */
978         if (opt) {
979                 newnp->opt = ipv6_dup_options(newsk, opt);
980                 if (opt != np->opt)
981                         sock_kfree_s(sk, opt, opt->tot_len);
982         }
983
984         inet_csk(newsk)->icsk_ext_hdr_len = 0;
985         if (newnp->opt)
986                 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
987                                                      newnp->opt->opt_flen);
988
989         tcp_mtup_init(newsk);
990         tcp_sync_mss(newsk, dst_mtu(dst));
991         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
992         tcp_initialize_rcv_mss(newsk);
993
994         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
995
996         __inet6_hash(&tcp_hashinfo, newsk);
997         inet_inherit_port(&tcp_hashinfo, sk, newsk);
998
999         return newsk;
1000
1001 out_overflow:
1002         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1003 out:
1004         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1005         if (opt && opt != np->opt)
1006                 sock_kfree_s(sk, opt, opt->tot_len);
1007         dst_release(dst);
1008         return NULL;
1009 }
1010
1011 static int tcp_v6_checksum_init(struct sk_buff *skb)
1012 {
1013         if (skb->ip_summed == CHECKSUM_HW) {
1014                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1015                                   &skb->nh.ipv6h->daddr,skb->csum)) {
1016                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1017                         return 0;
1018                 }
1019         }
1020
1021         skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1022                                   &skb->nh.ipv6h->daddr, 0);
1023
1024         if (skb->len <= 76) {
1025                 return __skb_checksum_complete(skb);
1026         }
1027         return 0;
1028 }
1029
1030 /* The socket must have it's spinlock held when we get
1031  * here.
1032  *
1033  * We have a potential double-lock case here, so even when
1034  * doing backlog processing we use the BH locking scheme.
1035  * This is because we cannot sleep with the original spinlock
1036  * held.
1037  */
1038 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1039 {
1040         struct ipv6_pinfo *np = inet6_sk(sk);
1041         struct tcp_sock *tp;
1042         struct sk_buff *opt_skb = NULL;
1043
1044         /* Imagine: socket is IPv6. IPv4 packet arrives,
1045            goes to IPv4 receive handler and backlogged.
1046            From backlog it always goes here. Kerboom...
1047            Fortunately, tcp_rcv_established and rcv_established
1048            handle them correctly, but it is not case with
1049            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1050          */
1051
1052         if (skb->protocol == htons(ETH_P_IP))
1053                 return tcp_v4_do_rcv(sk, skb);
1054
1055         if (sk_filter(sk, skb, 0))
1056                 goto discard;
1057
1058         /*
1059          *      socket locking is here for SMP purposes as backlog rcv
1060          *      is currently called with bh processing disabled.
1061          */
1062
1063         /* Do Stevens' IPV6_PKTOPTIONS.
1064
1065            Yes, guys, it is the only place in our code, where we
1066            may make it not affecting IPv4.
1067            The rest of code is protocol independent,
1068            and I do not like idea to uglify IPv4.
1069
1070            Actually, all the idea behind IPV6_PKTOPTIONS
1071            looks not very well thought. For now we latch
1072            options, received in the last packet, enqueued
1073            by tcp. Feel free to propose better solution.
1074                                                --ANK (980728)
1075          */
1076         if (np->rxopt.all)
1077                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1078
1079         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1080                 TCP_CHECK_TIMER(sk);
1081                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1082                         goto reset;
1083                 TCP_CHECK_TIMER(sk);
1084                 if (opt_skb)
1085                         goto ipv6_pktoptions;
1086                 return 0;
1087         }
1088
1089         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1090                 goto csum_err;
1091
1092         if (sk->sk_state == TCP_LISTEN) { 
1093                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1094                 if (!nsk)
1095                         goto discard;
1096
1097                 /*
1098                  * Queue it on the new socket if the new socket is active,
1099                  * otherwise we just shortcircuit this and continue with
1100                  * the new socket..
1101                  */
1102                 if(nsk != sk) {
1103                         if (tcp_child_process(sk, nsk, skb))
1104                                 goto reset;
1105                         if (opt_skb)
1106                                 __kfree_skb(opt_skb);
1107                         return 0;
1108                 }
1109         }
1110
1111         TCP_CHECK_TIMER(sk);
1112         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1113                 goto reset;
1114         TCP_CHECK_TIMER(sk);
1115         if (opt_skb)
1116                 goto ipv6_pktoptions;
1117         return 0;
1118
1119 reset:
1120         tcp_v6_send_reset(skb);
1121 discard:
1122         if (opt_skb)
1123                 __kfree_skb(opt_skb);
1124         kfree_skb(skb);
1125         return 0;
1126 csum_err:
1127         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1128         goto discard;
1129
1130
1131 ipv6_pktoptions:
1132         /* Do you ask, what is it?
1133
1134            1. skb was enqueued by tcp.
1135            2. skb is added to tail of read queue, rather than out of order.
1136            3. socket is not in passive state.
1137            4. Finally, it really contains options, which user wants to receive.
1138          */
1139         tp = tcp_sk(sk);
1140         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1141             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1142                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1143                         np->mcast_oif = inet6_iif(opt_skb);
1144                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1145                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1146                 if (ipv6_opt_accepted(sk, opt_skb)) {
1147                         skb_set_owner_r(opt_skb, sk);
1148                         opt_skb = xchg(&np->pktoptions, opt_skb);
1149                 } else {
1150                         __kfree_skb(opt_skb);
1151                         opt_skb = xchg(&np->pktoptions, NULL);
1152                 }
1153         }
1154
1155         if (opt_skb)
1156                 kfree_skb(opt_skb);
1157         return 0;
1158 }
1159
1160 static int tcp_v6_rcv(struct sk_buff **pskb)
1161 {
1162         struct sk_buff *skb = *pskb;
1163         struct tcphdr *th;      
1164         struct sock *sk;
1165         int ret;
1166
1167         if (skb->pkt_type != PACKET_HOST)
1168                 goto discard_it;
1169
1170         /*
1171          *      Count it even if it's bad.
1172          */
1173         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1174
1175         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1176                 goto discard_it;
1177
1178         th = skb->h.th;
1179
1180         if (th->doff < sizeof(struct tcphdr)/4)
1181                 goto bad_packet;
1182         if (!pskb_may_pull(skb, th->doff*4))
1183                 goto discard_it;
1184
1185         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1186              tcp_v6_checksum_init(skb)))
1187                 goto bad_packet;
1188
1189         th = skb->h.th;
1190         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1191         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1192                                     skb->len - th->doff*4);
1193         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1194         TCP_SKB_CB(skb)->when = 0;
1195         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1196         TCP_SKB_CB(skb)->sacked = 0;
1197
1198         sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1199                             &skb->nh.ipv6h->daddr, ntohs(th->dest),
1200                             inet6_iif(skb));
1201
1202         if (!sk)
1203                 goto no_tcp_socket;
1204
1205 process:
1206         if (sk->sk_state == TCP_TIME_WAIT)
1207                 goto do_time_wait;
1208
1209         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1210                 goto discard_and_relse;
1211
1212         if (sk_filter(sk, skb, 0))
1213                 goto discard_and_relse;
1214
1215         skb->dev = NULL;
1216
1217         bh_lock_sock(sk);
1218         ret = 0;
1219         if (!sock_owned_by_user(sk)) {
1220 #ifdef CONFIG_NET_DMA
1221                 struct tcp_sock *tp = tcp_sk(sk);
1222                 if (tp->ucopy.dma_chan)
1223                         ret = tcp_v6_do_rcv(sk, skb);
1224                 else
1225 #endif
1226                 {
1227                         if (!tcp_prequeue(sk, skb))
1228                                 ret = tcp_v6_do_rcv(sk, skb);
1229                 }
1230         } else
1231                 sk_add_backlog(sk, skb);
1232         bh_unlock_sock(sk);
1233
1234         sock_put(sk);
1235         return ret ? -1 : 0;
1236
1237 no_tcp_socket:
1238         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1239                 goto discard_it;
1240
1241         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1242 bad_packet:
1243                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1244         } else {
1245                 tcp_v6_send_reset(skb);
1246         }
1247
1248 discard_it:
1249
1250         /*
1251          *      Discard frame
1252          */
1253
1254         kfree_skb(skb);
1255         return 0;
1256
1257 discard_and_relse:
1258         sock_put(sk);
1259         goto discard_it;
1260
1261 do_time_wait:
1262         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1263                 inet_twsk_put((struct inet_timewait_sock *)sk);
1264                 goto discard_it;
1265         }
1266
1267         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1268                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1269                 inet_twsk_put((struct inet_timewait_sock *)sk);
1270                 goto discard_it;
1271         }
1272
1273         switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1274                                            skb, th)) {
1275         case TCP_TW_SYN:
1276         {
1277                 struct sock *sk2;
1278
1279                 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1280                                             &skb->nh.ipv6h->daddr,
1281                                             ntohs(th->dest), inet6_iif(skb));
1282                 if (sk2 != NULL) {
1283                         struct inet_timewait_sock *tw = inet_twsk(sk);
1284                         inet_twsk_deschedule(tw, &tcp_death_row);
1285                         inet_twsk_put(tw);
1286                         sk = sk2;
1287                         goto process;
1288                 }
1289                 /* Fall through to ACK */
1290         }
1291         case TCP_TW_ACK:
1292                 tcp_v6_timewait_ack(sk, skb);
1293                 break;
1294         case TCP_TW_RST:
1295                 goto no_tcp_socket;
1296         case TCP_TW_SUCCESS:;
1297         }
1298         goto discard_it;
1299 }
1300
1301 static int tcp_v6_remember_stamp(struct sock *sk)
1302 {
1303         /* Alas, not yet... */
1304         return 0;
1305 }
1306
1307 static struct inet_connection_sock_af_ops ipv6_specific = {
1308         .queue_xmit        = inet6_csk_xmit,
1309         .send_check        = tcp_v6_send_check,
1310         .rebuild_header    = inet6_sk_rebuild_header,
1311         .conn_request      = tcp_v6_conn_request,
1312         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1313         .remember_stamp    = tcp_v6_remember_stamp,
1314         .net_header_len    = sizeof(struct ipv6hdr),
1315         .setsockopt        = ipv6_setsockopt,
1316         .getsockopt        = ipv6_getsockopt,
1317         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1318         .sockaddr_len      = sizeof(struct sockaddr_in6),
1319 #ifdef CONFIG_COMPAT
1320         .compat_setsockopt = compat_ipv6_setsockopt,
1321         .compat_getsockopt = compat_ipv6_getsockopt,
1322 #endif
1323 };
1324
1325 /*
1326  *      TCP over IPv4 via INET6 API
1327  */
1328
1329 static struct inet_connection_sock_af_ops ipv6_mapped = {
1330         .queue_xmit        = ip_queue_xmit,
1331         .send_check        = tcp_v4_send_check,
1332         .rebuild_header    = inet_sk_rebuild_header,
1333         .conn_request      = tcp_v6_conn_request,
1334         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1335         .remember_stamp    = tcp_v4_remember_stamp,
1336         .net_header_len    = sizeof(struct iphdr),
1337         .setsockopt        = ipv6_setsockopt,
1338         .getsockopt        = ipv6_getsockopt,
1339         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1340         .sockaddr_len      = sizeof(struct sockaddr_in6),
1341 #ifdef CONFIG_COMPAT
1342         .compat_setsockopt = compat_ipv6_setsockopt,
1343         .compat_getsockopt = compat_ipv6_getsockopt,
1344 #endif
1345 };
1346
1347 /* NOTE: A lot of things set to zero explicitly by call to
1348  *       sk_alloc() so need not be done here.
1349  */
1350 static int tcp_v6_init_sock(struct sock *sk)
1351 {
1352         struct inet_connection_sock *icsk = inet_csk(sk);
1353         struct tcp_sock *tp = tcp_sk(sk);
1354
1355         skb_queue_head_init(&tp->out_of_order_queue);
1356         tcp_init_xmit_timers(sk);
1357         tcp_prequeue_init(tp);
1358
1359         icsk->icsk_rto = TCP_TIMEOUT_INIT;
1360         tp->mdev = TCP_TIMEOUT_INIT;
1361
1362         /* So many TCP implementations out there (incorrectly) count the
1363          * initial SYN frame in their delayed-ACK and congestion control
1364          * algorithms that we must have the following bandaid to talk
1365          * efficiently to them.  -DaveM
1366          */
1367         tp->snd_cwnd = 2;
1368
1369         /* See draft-stevens-tcpca-spec-01 for discussion of the
1370          * initialization of these values.
1371          */
1372         tp->snd_ssthresh = 0x7fffffff;
1373         tp->snd_cwnd_clamp = ~0;
1374         tp->mss_cache = 536;
1375
1376         tp->reordering = sysctl_tcp_reordering;
1377
1378         sk->sk_state = TCP_CLOSE;
1379
1380         icsk->icsk_af_ops = &ipv6_specific;
1381         icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1382         icsk->icsk_sync_mss = tcp_sync_mss;
1383         sk->sk_write_space = sk_stream_write_space;
1384         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1385
1386         sk->sk_sndbuf = sysctl_tcp_wmem[1];
1387         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1388
1389         atomic_inc(&tcp_sockets_allocated);
1390
1391         return 0;
1392 }
1393
1394 static int tcp_v6_destroy_sock(struct sock *sk)
1395 {
1396         tcp_v4_destroy_sock(sk);
1397         return inet6_destroy_sock(sk);
1398 }
1399
1400 /* Proc filesystem TCPv6 sock list dumping. */
1401 static void get_openreq6(struct seq_file *seq, 
1402                          struct sock *sk, struct request_sock *req, int i, int uid)
1403 {
1404         int ttd = req->expires - jiffies;
1405         struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1406         struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1407
1408         if (ttd < 0)
1409                 ttd = 0;
1410
1411         seq_printf(seq,
1412                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1413                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1414                    i,
1415                    src->s6_addr32[0], src->s6_addr32[1],
1416                    src->s6_addr32[2], src->s6_addr32[3],
1417                    ntohs(inet_sk(sk)->sport),
1418                    dest->s6_addr32[0], dest->s6_addr32[1],
1419                    dest->s6_addr32[2], dest->s6_addr32[3],
1420                    ntohs(inet_rsk(req)->rmt_port),
1421                    TCP_SYN_RECV,
1422                    0,0, /* could print option size, but that is af dependent. */
1423                    1,   /* timers active (only the expire timer) */  
1424                    jiffies_to_clock_t(ttd), 
1425                    req->retrans,
1426                    uid,
1427                    0,  /* non standard timer */  
1428                    0, /* open_requests have no inode */
1429                    0, req);
1430 }
1431
1432 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1433 {
1434         struct in6_addr *dest, *src;
1435         __u16 destp, srcp;
1436         int timer_active;
1437         unsigned long timer_expires;
1438         struct inet_sock *inet = inet_sk(sp);
1439         struct tcp_sock *tp = tcp_sk(sp);
1440         const struct inet_connection_sock *icsk = inet_csk(sp);
1441         struct ipv6_pinfo *np = inet6_sk(sp);
1442
1443         dest  = &np->daddr;
1444         src   = &np->rcv_saddr;
1445         destp = ntohs(inet->dport);
1446         srcp  = ntohs(inet->sport);
1447
1448         if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1449                 timer_active    = 1;
1450                 timer_expires   = icsk->icsk_timeout;
1451         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1452                 timer_active    = 4;
1453                 timer_expires   = icsk->icsk_timeout;
1454         } else if (timer_pending(&sp->sk_timer)) {
1455                 timer_active    = 2;
1456                 timer_expires   = sp->sk_timer.expires;
1457         } else {
1458                 timer_active    = 0;
1459                 timer_expires = jiffies;
1460         }
1461
1462         seq_printf(seq,
1463                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1464                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1465                    i,
1466                    src->s6_addr32[0], src->s6_addr32[1],
1467                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1468                    dest->s6_addr32[0], dest->s6_addr32[1],
1469                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1470                    sp->sk_state, 
1471                    tp->write_seq-tp->snd_una,
1472                    (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1473                    timer_active,
1474                    jiffies_to_clock_t(timer_expires - jiffies),
1475                    icsk->icsk_retransmits,
1476                    sock_i_uid(sp),
1477                    icsk->icsk_probes_out,
1478                    sock_i_ino(sp),
1479                    atomic_read(&sp->sk_refcnt), sp,
1480                    icsk->icsk_rto,
1481                    icsk->icsk_ack.ato,
1482                    (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1483                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1484                    );
1485 }
1486
1487 static void get_timewait6_sock(struct seq_file *seq, 
1488                                struct inet_timewait_sock *tw, int i)
1489 {
1490         struct in6_addr *dest, *src;
1491         __u16 destp, srcp;
1492         struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1493         int ttd = tw->tw_ttd - jiffies;
1494
1495         if (ttd < 0)
1496                 ttd = 0;
1497
1498         dest = &tw6->tw_v6_daddr;
1499         src  = &tw6->tw_v6_rcv_saddr;
1500         destp = ntohs(tw->tw_dport);
1501         srcp  = ntohs(tw->tw_sport);
1502
1503         seq_printf(seq,
1504                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1505                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1506                    i,
1507                    src->s6_addr32[0], src->s6_addr32[1],
1508                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1509                    dest->s6_addr32[0], dest->s6_addr32[1],
1510                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1511                    tw->tw_substate, 0, 0,
1512                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1513                    atomic_read(&tw->tw_refcnt), tw);
1514 }
1515
1516 #ifdef CONFIG_PROC_FS
1517 static int tcp6_seq_show(struct seq_file *seq, void *v)
1518 {
1519         struct tcp_iter_state *st;
1520
1521         if (v == SEQ_START_TOKEN) {
1522                 seq_puts(seq,
1523                          "  sl  "
1524                          "local_address                         "
1525                          "remote_address                        "
1526                          "st tx_queue rx_queue tr tm->when retrnsmt"
1527                          "   uid  timeout inode\n");
1528                 goto out;
1529         }
1530         st = seq->private;
1531
1532         switch (st->state) {
1533         case TCP_SEQ_STATE_LISTENING:
1534         case TCP_SEQ_STATE_ESTABLISHED:
1535                 get_tcp6_sock(seq, v, st->num);
1536                 break;
1537         case TCP_SEQ_STATE_OPENREQ:
1538                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1539                 break;
1540         case TCP_SEQ_STATE_TIME_WAIT:
1541                 get_timewait6_sock(seq, v, st->num);
1542                 break;
1543         }
1544 out:
1545         return 0;
1546 }
1547
1548 static struct file_operations tcp6_seq_fops;
1549 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1550         .owner          = THIS_MODULE,
1551         .name           = "tcp6",
1552         .family         = AF_INET6,
1553         .seq_show       = tcp6_seq_show,
1554         .seq_fops       = &tcp6_seq_fops,
1555 };
1556
1557 int __init tcp6_proc_init(void)
1558 {
1559         return tcp_proc_register(&tcp6_seq_afinfo);
1560 }
1561
1562 void tcp6_proc_exit(void)
1563 {
1564         tcp_proc_unregister(&tcp6_seq_afinfo);
1565 }
1566 #endif
1567
1568 struct proto tcpv6_prot = {
1569         .name                   = "TCPv6",
1570         .owner                  = THIS_MODULE,
1571         .close                  = tcp_close,
1572         .connect                = tcp_v6_connect,
1573         .disconnect             = tcp_disconnect,
1574         .accept                 = inet_csk_accept,
1575         .ioctl                  = tcp_ioctl,
1576         .init                   = tcp_v6_init_sock,
1577         .destroy                = tcp_v6_destroy_sock,
1578         .shutdown               = tcp_shutdown,
1579         .setsockopt             = tcp_setsockopt,
1580         .getsockopt             = tcp_getsockopt,
1581         .sendmsg                = tcp_sendmsg,
1582         .recvmsg                = tcp_recvmsg,
1583         .backlog_rcv            = tcp_v6_do_rcv,
1584         .hash                   = tcp_v6_hash,
1585         .unhash                 = tcp_unhash,
1586         .get_port               = tcp_v6_get_port,
1587         .enter_memory_pressure  = tcp_enter_memory_pressure,
1588         .sockets_allocated      = &tcp_sockets_allocated,
1589         .memory_allocated       = &tcp_memory_allocated,
1590         .memory_pressure        = &tcp_memory_pressure,
1591         .orphan_count           = &tcp_orphan_count,
1592         .sysctl_mem             = sysctl_tcp_mem,
1593         .sysctl_wmem            = sysctl_tcp_wmem,
1594         .sysctl_rmem            = sysctl_tcp_rmem,
1595         .max_header             = MAX_TCP_HEADER,
1596         .obj_size               = sizeof(struct tcp6_sock),
1597         .twsk_prot              = &tcp6_timewait_sock_ops,
1598         .rsk_prot               = &tcp6_request_sock_ops,
1599 #ifdef CONFIG_COMPAT
1600         .compat_setsockopt      = compat_tcp_setsockopt,
1601         .compat_getsockopt      = compat_tcp_getsockopt,
1602 #endif
1603 };
1604
1605 static struct inet6_protocol tcpv6_protocol = {
1606         .handler        =       tcp_v6_rcv,
1607         .err_handler    =       tcp_v6_err,
1608         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1609 };
1610
1611 static struct inet_protosw tcpv6_protosw = {
1612         .type           =       SOCK_STREAM,
1613         .protocol       =       IPPROTO_TCP,
1614         .prot           =       &tcpv6_prot,
1615         .ops            =       &inet6_stream_ops,
1616         .capability     =       -1,
1617         .no_check       =       0,
1618         .flags          =       INET_PROTOSW_PERMANENT |
1619                                 INET_PROTOSW_ICSK,
1620 };
1621
1622 void __init tcpv6_init(void)
1623 {
1624         /* register inet6 protocol */
1625         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1626                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1627         inet6_register_protosw(&tcpv6_protosw);
1628
1629         if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW,
1630                                      IPPROTO_TCP) < 0)
1631                 panic("Failed to create the TCPv6 control socket.\n");
1632 }