[NET] Rename open_request to request_sock
[pandora-kernel.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61
62 #include <asm/uaccess.h>
63
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
66
67 static void     tcp_v6_send_reset(struct sk_buff *skb);
68 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
70                                   struct sk_buff *skb);
71
72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
77
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80                                     struct in6_addr *faddr, u16 fport)
81 {
82         int hashent = (lport ^ fport);
83
84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85         hashent ^= hashent>>16;
86         hashent ^= hashent>>8;
87         return (hashent & (tcp_ehash_size - 1));
88 }
89
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91 {
92         struct inet_sock *inet = inet_sk(sk);
93         struct ipv6_pinfo *np = inet6_sk(sk);
94         struct in6_addr *laddr = &np->rcv_saddr;
95         struct in6_addr *faddr = &np->daddr;
96         __u16 lport = inet->num;
97         __u16 fport = inet->dport;
98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
99 }
100
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102                                        struct tcp_bind_bucket *tb)
103 {
104         struct sock *sk2;
105         struct hlist_node *node;
106
107         /* We must walk the whole port owner list in this case. -DaveM */
108         sk_for_each_bound(sk2, node, &tb->owners) {
109                 if (sk != sk2 &&
110                     (!sk->sk_bound_dev_if ||
111                      !sk2->sk_bound_dev_if ||
112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113                     (!sk->sk_reuse || !sk2->sk_reuse ||
114                      sk2->sk_state == TCP_LISTEN) &&
115                      ipv6_rcv_saddr_equal(sk, sk2))
116                         break;
117         }
118
119         return node != NULL;
120 }
121
122 /* Grrr, addr_type already calculated by caller, but I don't want
123  * to add some silly "cookie" argument to this method just for that.
124  * But it doesn't matter, the recalculation is in the rarest path
125  * this function ever takes.
126  */
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128 {
129         struct tcp_bind_hashbucket *head;
130         struct tcp_bind_bucket *tb;
131         struct hlist_node *node;
132         int ret;
133
134         local_bh_disable();
135         if (snum == 0) {
136                 int low = sysctl_local_port_range[0];
137                 int high = sysctl_local_port_range[1];
138                 int remaining = (high - low) + 1;
139                 int rover;
140
141                 spin_lock(&tcp_portalloc_lock);
142                 if (tcp_port_rover < low)
143                         rover = low;
144                 else
145                         rover = tcp_port_rover;
146                 do {    rover++;
147                         if (rover > high)
148                                 rover = low;
149                         head = &tcp_bhash[tcp_bhashfn(rover)];
150                         spin_lock(&head->lock);
151                         tb_for_each(tb, node, &head->chain)
152                                 if (tb->port == rover)
153                                         goto next;
154                         break;
155                 next:
156                         spin_unlock(&head->lock);
157                 } while (--remaining > 0);
158                 tcp_port_rover = rover;
159                 spin_unlock(&tcp_portalloc_lock);
160
161                 /* Exhausted local port range during search? */
162                 ret = 1;
163                 if (remaining <= 0)
164                         goto fail;
165
166                 /* OK, here is the one we will use. */
167                 snum = rover;
168         } else {
169                 head = &tcp_bhash[tcp_bhashfn(snum)];
170                 spin_lock(&head->lock);
171                 tb_for_each(tb, node, &head->chain)
172                         if (tb->port == snum)
173                                 goto tb_found;
174         }
175         tb = NULL;
176         goto tb_not_found;
177 tb_found:
178         if (tb && !hlist_empty(&tb->owners)) {
179                 if (tb->fastreuse > 0 && sk->sk_reuse &&
180                     sk->sk_state != TCP_LISTEN) {
181                         goto success;
182                 } else {
183                         ret = 1;
184                         if (tcp_v6_bind_conflict(sk, tb))
185                                 goto fail_unlock;
186                 }
187         }
188 tb_not_found:
189         ret = 1;
190         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
191                 goto fail_unlock;
192         if (hlist_empty(&tb->owners)) {
193                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
194                         tb->fastreuse = 1;
195                 else
196                         tb->fastreuse = 0;
197         } else if (tb->fastreuse &&
198                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
199                 tb->fastreuse = 0;
200
201 success:
202         if (!tcp_sk(sk)->bind_hash)
203                 tcp_bind_hash(sk, tb, snum);
204         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
205         ret = 0;
206
207 fail_unlock:
208         spin_unlock(&head->lock);
209 fail:
210         local_bh_enable();
211         return ret;
212 }
213
214 static __inline__ void __tcp_v6_hash(struct sock *sk)
215 {
216         struct hlist_head *list;
217         rwlock_t *lock;
218
219         BUG_TRAP(sk_unhashed(sk));
220
221         if (sk->sk_state == TCP_LISTEN) {
222                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
223                 lock = &tcp_lhash_lock;
224                 tcp_listen_wlock();
225         } else {
226                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
227                 list = &tcp_ehash[sk->sk_hashent].chain;
228                 lock = &tcp_ehash[sk->sk_hashent].lock;
229                 write_lock(lock);
230         }
231
232         __sk_add_node(sk, list);
233         sock_prot_inc_use(sk->sk_prot);
234         write_unlock(lock);
235 }
236
237
238 static void tcp_v6_hash(struct sock *sk)
239 {
240         if (sk->sk_state != TCP_CLOSE) {
241                 struct tcp_sock *tp = tcp_sk(sk);
242
243                 if (tp->af_specific == &ipv6_mapped) {
244                         tcp_prot.hash(sk);
245                         return;
246                 }
247                 local_bh_disable();
248                 __tcp_v6_hash(sk);
249                 local_bh_enable();
250         }
251 }
252
253 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
254 {
255         struct sock *sk;
256         struct hlist_node *node;
257         struct sock *result = NULL;
258         int score, hiscore;
259
260         hiscore=0;
261         read_lock(&tcp_lhash_lock);
262         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
263                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
264                         struct ipv6_pinfo *np = inet6_sk(sk);
265                         
266                         score = 1;
267                         if (!ipv6_addr_any(&np->rcv_saddr)) {
268                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
269                                         continue;
270                                 score++;
271                         }
272                         if (sk->sk_bound_dev_if) {
273                                 if (sk->sk_bound_dev_if != dif)
274                                         continue;
275                                 score++;
276                         }
277                         if (score == 3) {
278                                 result = sk;
279                                 break;
280                         }
281                         if (score > hiscore) {
282                                 hiscore = score;
283                                 result = sk;
284                         }
285                 }
286         }
287         if (result)
288                 sock_hold(result);
289         read_unlock(&tcp_lhash_lock);
290         return result;
291 }
292
293 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
294  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
295  *
296  * The sockhash lock must be held as a reader here.
297  */
298
299 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
300                                                        struct in6_addr *daddr, u16 hnum,
301                                                        int dif)
302 {
303         struct tcp_ehash_bucket *head;
304         struct sock *sk;
305         struct hlist_node *node;
306         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
307         int hash;
308
309         /* Optimize here for direct hit, only listening connections can
310          * have wildcards anyways.
311          */
312         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
313         head = &tcp_ehash[hash];
314         read_lock(&head->lock);
315         sk_for_each(sk, node, &head->chain) {
316                 /* For IPV6 do the cheaper port and family tests first. */
317                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
318                         goto hit; /* You sunk my battleship! */
319         }
320         /* Must check for a TIME_WAIT'er before going to listener hash. */
321         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
322                 /* FIXME: acme: check this... */
323                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
324
325                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
326                    sk->sk_family                == PF_INET6) {
327                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
328                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
329                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
330                                 goto hit;
331                 }
332         }
333         read_unlock(&head->lock);
334         return NULL;
335
336 hit:
337         sock_hold(sk);
338         read_unlock(&head->lock);
339         return sk;
340 }
341
342
343 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
344                                            struct in6_addr *daddr, u16 hnum,
345                                            int dif)
346 {
347         struct sock *sk;
348
349         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
350
351         if (sk)
352                 return sk;
353
354         return tcp_v6_lookup_listener(daddr, hnum, dif);
355 }
356
357 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
358                                   struct in6_addr *daddr, u16 dport,
359                                   int dif)
360 {
361         struct sock *sk;
362
363         local_bh_disable();
364         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
365         local_bh_enable();
366
367         return sk;
368 }
369
370 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
371
372
373 /*
374  * Open request hash tables.
375  */
376
377 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
378 {
379         u32 a, b, c;
380
381         a = raddr->s6_addr32[0];
382         b = raddr->s6_addr32[1];
383         c = raddr->s6_addr32[2];
384
385         a += JHASH_GOLDEN_RATIO;
386         b += JHASH_GOLDEN_RATIO;
387         c += rnd;
388         __jhash_mix(a, b, c);
389
390         a += raddr->s6_addr32[3];
391         b += (u32) rport;
392         __jhash_mix(a, b, c);
393
394         return c & (TCP_SYNQ_HSIZE - 1);
395 }
396
397 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
398                                               struct request_sock ***prevp,
399                                               __u16 rport,
400                                               struct in6_addr *raddr,
401                                               struct in6_addr *laddr,
402                                               int iif)
403 {
404         struct tcp_listen_opt *lopt = tp->listen_opt;
405         struct request_sock *req, **prev;  
406
407         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408              (req = *prev) != NULL;
409              prev = &req->dl_next) {
410                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
411
412                 if (inet_rsk(req)->rmt_port == rport &&
413                     req->rsk_ops->family == AF_INET6 &&
414                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
416                     (!treq->iif || treq->iif == iif)) {
417                         BUG_TRAP(req->sk == NULL);
418                         *prevp = prev;
419                         return req;
420                 }
421         }
422
423         return NULL;
424 }
425
426 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
427                                    struct in6_addr *saddr, 
428                                    struct in6_addr *daddr, 
429                                    unsigned long base)
430 {
431         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
432 }
433
434 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
435 {
436         if (skb->protocol == htons(ETH_P_IPV6)) {
437                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
438                                                     skb->nh.ipv6h->saddr.s6_addr32,
439                                                     skb->h.th->dest,
440                                                     skb->h.th->source);
441         } else {
442                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
443                                                   skb->nh.iph->saddr,
444                                                   skb->h.th->dest,
445                                                   skb->h.th->source);
446         }
447 }
448
449 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
450                                       struct tcp_tw_bucket **twp)
451 {
452         struct inet_sock *inet = inet_sk(sk);
453         struct ipv6_pinfo *np = inet6_sk(sk);
454         struct in6_addr *daddr = &np->rcv_saddr;
455         struct in6_addr *saddr = &np->daddr;
456         int dif = sk->sk_bound_dev_if;
457         u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
458         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
459         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
460         struct sock *sk2;
461         struct hlist_node *node;
462         struct tcp_tw_bucket *tw;
463
464         write_lock(&head->lock);
465
466         /* Check TIME-WAIT sockets first. */
467         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
468                 tw = (struct tcp_tw_bucket*)sk2;
469
470                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
471                    sk2->sk_family               == PF_INET6     &&
472                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
473                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
474                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
475                         struct tcp_sock *tp = tcp_sk(sk);
476
477                         if (tw->tw_ts_recent_stamp &&
478                             (!twp || (sysctl_tcp_tw_reuse &&
479                                       xtime.tv_sec - 
480                                       tw->tw_ts_recent_stamp > 1))) {
481                                 /* See comment in tcp_ipv4.c */
482                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
483                                 if (!tp->write_seq)
484                                         tp->write_seq = 1;
485                                 tp->rx_opt.ts_recent = tw->tw_ts_recent;
486                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
487                                 sock_hold(sk2);
488                                 goto unique;
489                         } else
490                                 goto not_unique;
491                 }
492         }
493         tw = NULL;
494
495         /* And established part... */
496         sk_for_each(sk2, node, &head->chain) {
497                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
498                         goto not_unique;
499         }
500
501 unique:
502         BUG_TRAP(sk_unhashed(sk));
503         __sk_add_node(sk, &head->chain);
504         sk->sk_hashent = hash;
505         sock_prot_inc_use(sk->sk_prot);
506         write_unlock(&head->lock);
507
508         if (twp) {
509                 *twp = tw;
510                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
511         } else if (tw) {
512                 /* Silly. Should hash-dance instead... */
513                 tcp_tw_deschedule(tw);
514                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
515
516                 tcp_tw_put(tw);
517         }
518         return 0;
519
520 not_unique:
521         write_unlock(&head->lock);
522         return -EADDRNOTAVAIL;
523 }
524
525 static inline u32 tcpv6_port_offset(const struct sock *sk)
526 {
527         const struct inet_sock *inet = inet_sk(sk);
528         const struct ipv6_pinfo *np = inet6_sk(sk);
529
530         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
531                                            np->daddr.s6_addr32,
532                                            inet->dport);
533 }
534
535 static int tcp_v6_hash_connect(struct sock *sk)
536 {
537         unsigned short snum = inet_sk(sk)->num;
538         struct tcp_bind_hashbucket *head;
539         struct tcp_bind_bucket *tb;
540         int ret;
541
542         if (!snum) {
543                 int low = sysctl_local_port_range[0];
544                 int high = sysctl_local_port_range[1];
545                 int range = high - low;
546                 int i;
547                 int port;
548                 static u32 hint;
549                 u32 offset = hint + tcpv6_port_offset(sk);
550                 struct hlist_node *node;
551                 struct tcp_tw_bucket *tw = NULL;
552
553                 local_bh_disable();
554                 for (i = 1; i <= range; i++) {
555                         port = low + (i + offset) % range;
556                         head = &tcp_bhash[tcp_bhashfn(port)];
557                         spin_lock(&head->lock);
558
559                         /* Does not bother with rcv_saddr checks,
560                          * because the established check is already
561                          * unique enough.
562                          */
563                         tb_for_each(tb, node, &head->chain) {
564                                 if (tb->port == port) {
565                                         BUG_TRAP(!hlist_empty(&tb->owners));
566                                         if (tb->fastreuse >= 0)
567                                                 goto next_port;
568                                         if (!__tcp_v6_check_established(sk,
569                                                                         port,
570                                                                         &tw))
571                                                 goto ok;
572                                         goto next_port;
573                                 }
574                         }
575
576                         tb = tcp_bucket_create(head, port);
577                         if (!tb) {
578                                 spin_unlock(&head->lock);
579                                 break;
580                         }
581                         tb->fastreuse = -1;
582                         goto ok;
583
584                 next_port:
585                         spin_unlock(&head->lock);
586                 }
587                 local_bh_enable();
588
589                 return -EADDRNOTAVAIL;
590
591 ok:
592                 hint += i;
593
594                 /* Head lock still held and bh's disabled */
595                 tcp_bind_hash(sk, tb, port);
596                 if (sk_unhashed(sk)) {
597                         inet_sk(sk)->sport = htons(port);
598                         __tcp_v6_hash(sk);
599                 }
600                 spin_unlock(&head->lock);
601
602                 if (tw) {
603                         tcp_tw_deschedule(tw);
604                         tcp_tw_put(tw);
605                 }
606
607                 ret = 0;
608                 goto out;
609         }
610
611         head  = &tcp_bhash[tcp_bhashfn(snum)];
612         tb  = tcp_sk(sk)->bind_hash;
613         spin_lock_bh(&head->lock);
614
615         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
616                 __tcp_v6_hash(sk);
617                 spin_unlock_bh(&head->lock);
618                 return 0;
619         } else {
620                 spin_unlock(&head->lock);
621                 /* No definite answer... Walk to established hash table */
622                 ret = __tcp_v6_check_established(sk, snum, NULL);
623 out:
624                 local_bh_enable();
625                 return ret;
626         }
627 }
628
629 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
630 {
631         return IP6CB(skb)->iif;
632 }
633
634 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
635                           int addr_len)
636 {
637         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
638         struct inet_sock *inet = inet_sk(sk);
639         struct ipv6_pinfo *np = inet6_sk(sk);
640         struct tcp_sock *tp = tcp_sk(sk);
641         struct in6_addr *saddr = NULL, *final_p = NULL, final;
642         struct flowi fl;
643         struct dst_entry *dst;
644         int addr_type;
645         int err;
646
647         if (addr_len < SIN6_LEN_RFC2133) 
648                 return -EINVAL;
649
650         if (usin->sin6_family != AF_INET6) 
651                 return(-EAFNOSUPPORT);
652
653         memset(&fl, 0, sizeof(fl));
654
655         if (np->sndflow) {
656                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
657                 IP6_ECN_flow_init(fl.fl6_flowlabel);
658                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
659                         struct ip6_flowlabel *flowlabel;
660                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
661                         if (flowlabel == NULL)
662                                 return -EINVAL;
663                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
664                         fl6_sock_release(flowlabel);
665                 }
666         }
667
668         /*
669          *      connect() to INADDR_ANY means loopback (BSD'ism).
670          */
671         
672         if(ipv6_addr_any(&usin->sin6_addr))
673                 usin->sin6_addr.s6_addr[15] = 0x1; 
674
675         addr_type = ipv6_addr_type(&usin->sin6_addr);
676
677         if(addr_type & IPV6_ADDR_MULTICAST)
678                 return -ENETUNREACH;
679
680         if (addr_type&IPV6_ADDR_LINKLOCAL) {
681                 if (addr_len >= sizeof(struct sockaddr_in6) &&
682                     usin->sin6_scope_id) {
683                         /* If interface is set while binding, indices
684                          * must coincide.
685                          */
686                         if (sk->sk_bound_dev_if &&
687                             sk->sk_bound_dev_if != usin->sin6_scope_id)
688                                 return -EINVAL;
689
690                         sk->sk_bound_dev_if = usin->sin6_scope_id;
691                 }
692
693                 /* Connect to link-local address requires an interface */
694                 if (!sk->sk_bound_dev_if)
695                         return -EINVAL;
696         }
697
698         if (tp->rx_opt.ts_recent_stamp &&
699             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
700                 tp->rx_opt.ts_recent = 0;
701                 tp->rx_opt.ts_recent_stamp = 0;
702                 tp->write_seq = 0;
703         }
704
705         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
706         np->flow_label = fl.fl6_flowlabel;
707
708         /*
709          *      TCP over IPv4
710          */
711
712         if (addr_type == IPV6_ADDR_MAPPED) {
713                 u32 exthdrlen = tp->ext_header_len;
714                 struct sockaddr_in sin;
715
716                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
717
718                 if (__ipv6_only_sock(sk))
719                         return -ENETUNREACH;
720
721                 sin.sin_family = AF_INET;
722                 sin.sin_port = usin->sin6_port;
723                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
724
725                 tp->af_specific = &ipv6_mapped;
726                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
727
728                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
729
730                 if (err) {
731                         tp->ext_header_len = exthdrlen;
732                         tp->af_specific = &ipv6_specific;
733                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
734                         goto failure;
735                 } else {
736                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
737                                       inet->saddr);
738                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
739                                       inet->rcv_saddr);
740                 }
741
742                 return err;
743         }
744
745         if (!ipv6_addr_any(&np->rcv_saddr))
746                 saddr = &np->rcv_saddr;
747
748         fl.proto = IPPROTO_TCP;
749         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750         ipv6_addr_copy(&fl.fl6_src,
751                        (saddr ? saddr : &np->saddr));
752         fl.oif = sk->sk_bound_dev_if;
753         fl.fl_ip_dport = usin->sin6_port;
754         fl.fl_ip_sport = inet->sport;
755
756         if (np->opt && np->opt->srcrt) {
757                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
758                 ipv6_addr_copy(&final, &fl.fl6_dst);
759                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
760                 final_p = &final;
761         }
762
763         err = ip6_dst_lookup(sk, &dst, &fl);
764         if (err)
765                 goto failure;
766         if (final_p)
767                 ipv6_addr_copy(&fl.fl6_dst, final_p);
768
769         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
770                 dst_release(dst);
771                 goto failure;
772         }
773
774         if (saddr == NULL) {
775                 saddr = &fl.fl6_src;
776                 ipv6_addr_copy(&np->rcv_saddr, saddr);
777         }
778
779         /* set the source address */
780         ipv6_addr_copy(&np->saddr, saddr);
781         inet->rcv_saddr = LOOPBACK4_IPV6;
782
783         ip6_dst_store(sk, dst, NULL);
784         sk->sk_route_caps = dst->dev->features &
785                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
786
787         tp->ext_header_len = 0;
788         if (np->opt)
789                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
790
791         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
792
793         inet->dport = usin->sin6_port;
794
795         tcp_set_state(sk, TCP_SYN_SENT);
796         err = tcp_v6_hash_connect(sk);
797         if (err)
798                 goto late_failure;
799
800         if (!tp->write_seq)
801                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
802                                                              np->daddr.s6_addr32,
803                                                              inet->sport,
804                                                              inet->dport);
805
806         err = tcp_connect(sk);
807         if (err)
808                 goto late_failure;
809
810         return 0;
811
812 late_failure:
813         tcp_set_state(sk, TCP_CLOSE);
814         __sk_dst_reset(sk);
815 failure:
816         inet->dport = 0;
817         sk->sk_route_caps = 0;
818         return err;
819 }
820
821 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
822                 int type, int code, int offset, __u32 info)
823 {
824         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
825         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
826         struct ipv6_pinfo *np;
827         struct sock *sk;
828         int err;
829         struct tcp_sock *tp; 
830         __u32 seq;
831
832         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
833
834         if (sk == NULL) {
835                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
836                 return;
837         }
838
839         if (sk->sk_state == TCP_TIME_WAIT) {
840                 tcp_tw_put((struct tcp_tw_bucket*)sk);
841                 return;
842         }
843
844         bh_lock_sock(sk);
845         if (sock_owned_by_user(sk))
846                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
847
848         if (sk->sk_state == TCP_CLOSE)
849                 goto out;
850
851         tp = tcp_sk(sk);
852         seq = ntohl(th->seq); 
853         if (sk->sk_state != TCP_LISTEN &&
854             !between(seq, tp->snd_una, tp->snd_nxt)) {
855                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
856                 goto out;
857         }
858
859         np = inet6_sk(sk);
860
861         if (type == ICMPV6_PKT_TOOBIG) {
862                 struct dst_entry *dst = NULL;
863
864                 if (sock_owned_by_user(sk))
865                         goto out;
866                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
867                         goto out;
868
869                 /* icmp should have updated the destination cache entry */
870                 dst = __sk_dst_check(sk, np->dst_cookie);
871
872                 if (dst == NULL) {
873                         struct inet_sock *inet = inet_sk(sk);
874                         struct flowi fl;
875
876                         /* BUGGG_FUTURE: Again, it is not clear how
877                            to handle rthdr case. Ignore this complexity
878                            for now.
879                          */
880                         memset(&fl, 0, sizeof(fl));
881                         fl.proto = IPPROTO_TCP;
882                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
883                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
884                         fl.oif = sk->sk_bound_dev_if;
885                         fl.fl_ip_dport = inet->dport;
886                         fl.fl_ip_sport = inet->sport;
887
888                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
889                                 sk->sk_err_soft = -err;
890                                 goto out;
891                         }
892
893                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
894                                 sk->sk_err_soft = -err;
895                                 goto out;
896                         }
897
898                 } else
899                         dst_hold(dst);
900
901                 if (tp->pmtu_cookie > dst_mtu(dst)) {
902                         tcp_sync_mss(sk, dst_mtu(dst));
903                         tcp_simple_retransmit(sk);
904                 } /* else let the usual retransmit timer handle it */
905                 dst_release(dst);
906                 goto out;
907         }
908
909         icmpv6_err_convert(type, code, &err);
910
911         /* Might be for an request_sock */
912         switch (sk->sk_state) {
913                 struct request_sock *req, **prev;
914         case TCP_LISTEN:
915                 if (sock_owned_by_user(sk))
916                         goto out;
917
918                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
919                                         &hdr->saddr, tcp_v6_iif(skb));
920                 if (!req)
921                         goto out;
922
923                 /* ICMPs are not backlogged, hence we cannot get
924                  * an established socket here.
925                  */
926                 BUG_TRAP(req->sk == NULL);
927
928                 if (seq != tcp_rsk(req)->snt_isn) {
929                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
930                         goto out;
931                 }
932
933                 tcp_synq_drop(sk, req, prev);
934                 goto out;
935
936         case TCP_SYN_SENT:
937         case TCP_SYN_RECV:  /* Cannot happen.
938                                It can, it SYNs are crossed. --ANK */ 
939                 if (!sock_owned_by_user(sk)) {
940                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
941                         sk->sk_err = err;
942                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
943
944                         tcp_done(sk);
945                 } else
946                         sk->sk_err_soft = err;
947                 goto out;
948         }
949
950         if (!sock_owned_by_user(sk) && np->recverr) {
951                 sk->sk_err = err;
952                 sk->sk_error_report(sk);
953         } else
954                 sk->sk_err_soft = err;
955
956 out:
957         bh_unlock_sock(sk);
958         sock_put(sk);
959 }
960
961
962 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
963                               struct dst_entry *dst)
964 {
965         struct tcp6_request_sock *treq = tcp6_rsk(req);
966         struct ipv6_pinfo *np = inet6_sk(sk);
967         struct sk_buff * skb;
968         struct ipv6_txoptions *opt = NULL;
969         struct in6_addr * final_p = NULL, final;
970         struct flowi fl;
971         int err = -1;
972
973         memset(&fl, 0, sizeof(fl));
974         fl.proto = IPPROTO_TCP;
975         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
976         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
977         fl.fl6_flowlabel = 0;
978         fl.oif = treq->iif;
979         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
980         fl.fl_ip_sport = inet_sk(sk)->sport;
981
982         if (dst == NULL) {
983                 opt = np->opt;
984                 if (opt == NULL &&
985                     np->rxopt.bits.srcrt == 2 &&
986                     treq->pktopts) {
987                         struct sk_buff *pktopts = treq->pktopts;
988                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
989                         if (rxopt->srcrt)
990                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
991                 }
992
993                 if (opt && opt->srcrt) {
994                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
995                         ipv6_addr_copy(&final, &fl.fl6_dst);
996                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
997                         final_p = &final;
998                 }
999
1000                 err = ip6_dst_lookup(sk, &dst, &fl);
1001                 if (err)
1002                         goto done;
1003                 if (final_p)
1004                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1005                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1006                         goto done;
1007         }
1008
1009         skb = tcp_make_synack(sk, dst, req);
1010         if (skb) {
1011                 struct tcphdr *th = skb->h.th;
1012
1013                 th->check = tcp_v6_check(th, skb->len,
1014                                          &treq->loc_addr, &treq->rmt_addr,
1015                                          csum_partial((char *)th, skb->len, skb->csum));
1016
1017                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1018                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1019                 if (err == NET_XMIT_CN)
1020                         err = 0;
1021         }
1022
1023 done:
1024         dst_release(dst);
1025         if (opt && opt != np->opt)
1026                 sock_kfree_s(sk, opt, opt->tot_len);
1027         return err;
1028 }
1029
1030 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1031 {
1032         if (tcp6_rsk(req)->pktopts)
1033                 kfree_skb(tcp6_rsk(req)->pktopts);
1034 }
1035
1036 static struct request_sock_ops tcp6_request_sock_ops = {
1037         .family         =       AF_INET6,
1038         .obj_size       =       sizeof(struct tcp6_request_sock),
1039         .rtx_syn_ack    =       tcp_v6_send_synack,
1040         .send_ack       =       tcp_v6_reqsk_send_ack,
1041         .destructor     =       tcp_v6_reqsk_destructor,
1042         .send_reset     =       tcp_v6_send_reset
1043 };
1044
1045 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1046 {
1047         struct ipv6_pinfo *np = inet6_sk(sk);
1048         struct inet6_skb_parm *opt = IP6CB(skb);
1049
1050         if (np->rxopt.all) {
1051                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1052                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1053                      np->rxopt.bits.rxflow) ||
1054                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1055                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1056                         return 1;
1057         }
1058         return 0;
1059 }
1060
1061
1062 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
1063                               struct sk_buff *skb)
1064 {
1065         struct ipv6_pinfo *np = inet6_sk(sk);
1066
1067         if (skb->ip_summed == CHECKSUM_HW) {
1068                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1069                 skb->csum = offsetof(struct tcphdr, check);
1070         } else {
1071                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
1072                                             csum_partial((char *)th, th->doff<<2, 
1073                                                          skb->csum));
1074         }
1075 }
1076
1077
1078 static void tcp_v6_send_reset(struct sk_buff *skb)
1079 {
1080         struct tcphdr *th = skb->h.th, *t1; 
1081         struct sk_buff *buff;
1082         struct flowi fl;
1083
1084         if (th->rst)
1085                 return;
1086
1087         if (!ipv6_unicast_destination(skb))
1088                 return; 
1089
1090         /*
1091          * We need to grab some memory, and put together an RST,
1092          * and then put it into the queue to be sent.
1093          */
1094
1095         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1096                          GFP_ATOMIC);
1097         if (buff == NULL) 
1098                 return;
1099
1100         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1101
1102         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1103
1104         /* Swap the send and the receive. */
1105         memset(t1, 0, sizeof(*t1));
1106         t1->dest = th->source;
1107         t1->source = th->dest;
1108         t1->doff = sizeof(*t1)/4;
1109         t1->rst = 1;
1110   
1111         if(th->ack) {
1112                 t1->seq = th->ack_seq;
1113         } else {
1114                 t1->ack = 1;
1115                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1116                                     + skb->len - (th->doff<<2));
1117         }
1118
1119         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1120
1121         memset(&fl, 0, sizeof(fl));
1122         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1123         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1124
1125         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1126                                     sizeof(*t1), IPPROTO_TCP,
1127                                     buff->csum);
1128
1129         fl.proto = IPPROTO_TCP;
1130         fl.oif = tcp_v6_iif(skb);
1131         fl.fl_ip_dport = t1->dest;
1132         fl.fl_ip_sport = t1->source;
1133
1134         /* sk = NULL, but it is safe for now. RST socket required. */
1135         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1136
1137                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1138                         dst_release(buff->dst);
1139                         return;
1140                 }
1141
1142                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1143                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1144                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1145                 return;
1146         }
1147
1148         kfree_skb(buff);
1149 }
1150
1151 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1152 {
1153         struct tcphdr *th = skb->h.th, *t1;
1154         struct sk_buff *buff;
1155         struct flowi fl;
1156         int tot_len = sizeof(struct tcphdr);
1157
1158         if (ts)
1159                 tot_len += 3*4;
1160
1161         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1162                          GFP_ATOMIC);
1163         if (buff == NULL)
1164                 return;
1165
1166         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1167
1168         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1169
1170         /* Swap the send and the receive. */
1171         memset(t1, 0, sizeof(*t1));
1172         t1->dest = th->source;
1173         t1->source = th->dest;
1174         t1->doff = tot_len/4;
1175         t1->seq = htonl(seq);
1176         t1->ack_seq = htonl(ack);
1177         t1->ack = 1;
1178         t1->window = htons(win);
1179         
1180         if (ts) {
1181                 u32 *ptr = (u32*)(t1 + 1);
1182                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1183                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1184                 *ptr++ = htonl(tcp_time_stamp);
1185                 *ptr = htonl(ts);
1186         }
1187
1188         buff->csum = csum_partial((char *)t1, tot_len, 0);
1189
1190         memset(&fl, 0, sizeof(fl));
1191         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1192         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1193
1194         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1195                                     tot_len, IPPROTO_TCP,
1196                                     buff->csum);
1197
1198         fl.proto = IPPROTO_TCP;
1199         fl.oif = tcp_v6_iif(skb);
1200         fl.fl_ip_dport = t1->dest;
1201         fl.fl_ip_sport = t1->source;
1202
1203         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1204                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1205                         dst_release(buff->dst);
1206                         return;
1207                 }
1208                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1209                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1210                 return;
1211         }
1212
1213         kfree_skb(buff);
1214 }
1215
1216 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1217 {
1218         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1219
1220         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1221                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1222
1223         tcp_tw_put(tw);
1224 }
1225
1226 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1227 {
1228         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1229 }
1230
1231
1232 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1233 {
1234         struct request_sock *req, **prev;
1235         struct tcphdr *th = skb->h.th;
1236         struct tcp_sock *tp = tcp_sk(sk);
1237         struct sock *nsk;
1238
1239         /* Find possible connection requests. */
1240         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1241                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1242         if (req)
1243                 return tcp_check_req(sk, skb, req, prev);
1244
1245         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1246                                           th->source,
1247                                           &skb->nh.ipv6h->daddr,
1248                                           ntohs(th->dest),
1249                                           tcp_v6_iif(skb));
1250
1251         if (nsk) {
1252                 if (nsk->sk_state != TCP_TIME_WAIT) {
1253                         bh_lock_sock(nsk);
1254                         return nsk;
1255                 }
1256                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1257                 return NULL;
1258         }
1259
1260 #if 0 /*def CONFIG_SYN_COOKIES*/
1261         if (!th->rst && !th->syn && th->ack)
1262                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1263 #endif
1264         return sk;
1265 }
1266
1267 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1268 {
1269         struct tcp_sock *tp = tcp_sk(sk);
1270         struct tcp_listen_opt *lopt = tp->listen_opt;
1271         u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1272
1273         req->sk = NULL;
1274         req->expires = jiffies + TCP_TIMEOUT_INIT;
1275         req->retrans = 0;
1276         req->dl_next = lopt->syn_table[h];
1277
1278         write_lock(&tp->syn_wait_lock);
1279         lopt->syn_table[h] = req;
1280         write_unlock(&tp->syn_wait_lock);
1281
1282         tcp_synq_added(sk);
1283 }
1284
1285
1286 /* FIXME: this is substantially similar to the ipv4 code.
1287  * Can some kind of merge be done? -- erics
1288  */
1289 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1290 {
1291         struct tcp6_request_sock *treq;
1292         struct ipv6_pinfo *np = inet6_sk(sk);
1293         struct tcp_options_received tmp_opt;
1294         struct tcp_sock *tp = tcp_sk(sk);
1295         struct request_sock *req = NULL;
1296         __u32 isn = TCP_SKB_CB(skb)->when;
1297
1298         if (skb->protocol == htons(ETH_P_IP))
1299                 return tcp_v4_conn_request(sk, skb);
1300
1301         if (!ipv6_unicast_destination(skb))
1302                 goto drop; 
1303
1304         /*
1305          *      There are no SYN attacks on IPv6, yet...        
1306          */
1307         if (tcp_synq_is_full(sk) && !isn) {
1308                 if (net_ratelimit())
1309                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1310                 goto drop;              
1311         }
1312
1313         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1314                 goto drop;
1315
1316         req = reqsk_alloc(&tcp6_request_sock_ops);
1317         if (req == NULL)
1318                 goto drop;
1319
1320         tcp_clear_options(&tmp_opt);
1321         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1322         tmp_opt.user_mss = tp->rx_opt.user_mss;
1323
1324         tcp_parse_options(skb, &tmp_opt, 0);
1325
1326         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1327         tcp_openreq_init(req, &tmp_opt, skb);
1328
1329         treq = tcp6_rsk(req);
1330         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1331         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1332         TCP_ECN_create_request(req, skb->h.th);
1333         treq->pktopts = NULL;
1334         if (ipv6_opt_accepted(sk, skb) ||
1335             np->rxopt.bits.rxinfo ||
1336             np->rxopt.bits.rxhlim) {
1337                 atomic_inc(&skb->users);
1338                 treq->pktopts = skb;
1339         }
1340         treq->iif = sk->sk_bound_dev_if;
1341
1342         /* So that link locals have meaning */
1343         if (!sk->sk_bound_dev_if &&
1344             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1345                 treq->iif = tcp_v6_iif(skb);
1346
1347         if (isn == 0) 
1348                 isn = tcp_v6_init_sequence(sk,skb);
1349
1350         tcp_rsk(req)->snt_isn = isn;
1351
1352         if (tcp_v6_send_synack(sk, req, NULL))
1353                 goto drop;
1354
1355         tcp_v6_synq_add(sk, req);
1356
1357         return 0;
1358
1359 drop:
1360         if (req)
1361                 reqsk_free(req);
1362
1363         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1364         return 0; /* don't send reset */
1365 }
1366
1367 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1368                                           struct request_sock *req,
1369                                           struct dst_entry *dst)
1370 {
1371         struct tcp6_request_sock *treq = tcp6_rsk(req);
1372         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1373         struct tcp6_sock *newtcp6sk;
1374         struct inet_sock *newinet;
1375         struct tcp_sock *newtp;
1376         struct sock *newsk;
1377         struct ipv6_txoptions *opt;
1378
1379         if (skb->protocol == htons(ETH_P_IP)) {
1380                 /*
1381                  *      v6 mapped
1382                  */
1383
1384                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1385
1386                 if (newsk == NULL) 
1387                         return NULL;
1388
1389                 newtcp6sk = (struct tcp6_sock *)newsk;
1390                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1391
1392                 newinet = inet_sk(newsk);
1393                 newnp = inet6_sk(newsk);
1394                 newtp = tcp_sk(newsk);
1395
1396                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1397
1398                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1399                               newinet->daddr);
1400
1401                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1402                               newinet->saddr);
1403
1404                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1405
1406                 newtp->af_specific = &ipv6_mapped;
1407                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1408                 newnp->pktoptions  = NULL;
1409                 newnp->opt         = NULL;
1410                 newnp->mcast_oif   = tcp_v6_iif(skb);
1411                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1412
1413                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1414                  * it is IPv6 yet.
1415                  */
1416 #ifdef INET_REFCNT_DEBUG
1417                 atomic_inc(&inet6_sock_nr);
1418 #endif
1419
1420                 /* It is tricky place. Until this moment IPv4 tcp
1421                    worked with IPv6 af_tcp.af_specific.
1422                    Sync it now.
1423                  */
1424                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1425
1426                 return newsk;
1427         }
1428
1429         opt = np->opt;
1430
1431         if (sk_acceptq_is_full(sk))
1432                 goto out_overflow;
1433
1434         if (np->rxopt.bits.srcrt == 2 &&
1435             opt == NULL && treq->pktopts) {
1436                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1437                 if (rxopt->srcrt)
1438                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1439         }
1440
1441         if (dst == NULL) {
1442                 struct in6_addr *final_p = NULL, final;
1443                 struct flowi fl;
1444
1445                 memset(&fl, 0, sizeof(fl));
1446                 fl.proto = IPPROTO_TCP;
1447                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1448                 if (opt && opt->srcrt) {
1449                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1450                         ipv6_addr_copy(&final, &fl.fl6_dst);
1451                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1452                         final_p = &final;
1453                 }
1454                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1455                 fl.oif = sk->sk_bound_dev_if;
1456                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1457                 fl.fl_ip_sport = inet_sk(sk)->sport;
1458
1459                 if (ip6_dst_lookup(sk, &dst, &fl))
1460                         goto out;
1461
1462                 if (final_p)
1463                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1464
1465                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1466                         goto out;
1467         } 
1468
1469         newsk = tcp_create_openreq_child(sk, req, skb);
1470         if (newsk == NULL)
1471                 goto out;
1472
1473         /* Charge newly allocated IPv6 socket */
1474 #ifdef INET_REFCNT_DEBUG
1475         atomic_inc(&inet6_sock_nr);
1476 #endif
1477
1478         ip6_dst_store(newsk, dst, NULL);
1479         newsk->sk_route_caps = dst->dev->features &
1480                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1481
1482         newtcp6sk = (struct tcp6_sock *)newsk;
1483         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1484
1485         newtp = tcp_sk(newsk);
1486         newinet = inet_sk(newsk);
1487         newnp = inet6_sk(newsk);
1488
1489         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1490
1491         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1492         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1493         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1494         newsk->sk_bound_dev_if = treq->iif;
1495
1496         /* Now IPv6 options... 
1497
1498            First: no IPv4 options.
1499          */
1500         newinet->opt = NULL;
1501
1502         /* Clone RX bits */
1503         newnp->rxopt.all = np->rxopt.all;
1504
1505         /* Clone pktoptions received with SYN */
1506         newnp->pktoptions = NULL;
1507         if (treq->pktopts != NULL) {
1508                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1509                 kfree_skb(treq->pktopts);
1510                 treq->pktopts = NULL;
1511                 if (newnp->pktoptions)
1512                         skb_set_owner_r(newnp->pktoptions, newsk);
1513         }
1514         newnp->opt        = NULL;
1515         newnp->mcast_oif  = tcp_v6_iif(skb);
1516         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1517
1518         /* Clone native IPv6 options from listening socket (if any)
1519
1520            Yes, keeping reference count would be much more clever,
1521            but we make one more one thing there: reattach optmem
1522            to newsk.
1523          */
1524         if (opt) {
1525                 newnp->opt = ipv6_dup_options(newsk, opt);
1526                 if (opt != np->opt)
1527                         sock_kfree_s(sk, opt, opt->tot_len);
1528         }
1529
1530         newtp->ext_header_len = 0;
1531         if (newnp->opt)
1532                 newtp->ext_header_len = newnp->opt->opt_nflen +
1533                                         newnp->opt->opt_flen;
1534
1535         tcp_sync_mss(newsk, dst_mtu(dst));
1536         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1537         tcp_initialize_rcv_mss(newsk);
1538
1539         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1540
1541         __tcp_v6_hash(newsk);
1542         tcp_inherit_port(sk, newsk);
1543
1544         return newsk;
1545
1546 out_overflow:
1547         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1548 out:
1549         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1550         if (opt && opt != np->opt)
1551                 sock_kfree_s(sk, opt, opt->tot_len);
1552         dst_release(dst);
1553         return NULL;
1554 }
1555
1556 static int tcp_v6_checksum_init(struct sk_buff *skb)
1557 {
1558         if (skb->ip_summed == CHECKSUM_HW) {
1559                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1560                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1561                                   &skb->nh.ipv6h->daddr,skb->csum))
1562                         return 0;
1563                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1564         }
1565         if (skb->len <= 76) {
1566                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1567                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1568                         return -1;
1569                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570         } else {
1571                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1572                                           &skb->nh.ipv6h->daddr,0);
1573         }
1574         return 0;
1575 }
1576
1577 /* The socket must have it's spinlock held when we get
1578  * here.
1579  *
1580  * We have a potential double-lock case here, so even when
1581  * doing backlog processing we use the BH locking scheme.
1582  * This is because we cannot sleep with the original spinlock
1583  * held.
1584  */
1585 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1586 {
1587         struct ipv6_pinfo *np = inet6_sk(sk);
1588         struct tcp_sock *tp;
1589         struct sk_buff *opt_skb = NULL;
1590
1591         /* Imagine: socket is IPv6. IPv4 packet arrives,
1592            goes to IPv4 receive handler and backlogged.
1593            From backlog it always goes here. Kerboom...
1594            Fortunately, tcp_rcv_established and rcv_established
1595            handle them correctly, but it is not case with
1596            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1597          */
1598
1599         if (skb->protocol == htons(ETH_P_IP))
1600                 return tcp_v4_do_rcv(sk, skb);
1601
1602         if (sk_filter(sk, skb, 0))
1603                 goto discard;
1604
1605         /*
1606          *      socket locking is here for SMP purposes as backlog rcv
1607          *      is currently called with bh processing disabled.
1608          */
1609
1610         /* Do Stevens' IPV6_PKTOPTIONS.
1611
1612            Yes, guys, it is the only place in our code, where we
1613            may make it not affecting IPv4.
1614            The rest of code is protocol independent,
1615            and I do not like idea to uglify IPv4.
1616
1617            Actually, all the idea behind IPV6_PKTOPTIONS
1618            looks not very well thought. For now we latch
1619            options, received in the last packet, enqueued
1620            by tcp. Feel free to propose better solution.
1621                                                --ANK (980728)
1622          */
1623         if (np->rxopt.all)
1624                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1625
1626         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1627                 TCP_CHECK_TIMER(sk);
1628                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1629                         goto reset;
1630                 TCP_CHECK_TIMER(sk);
1631                 if (opt_skb)
1632                         goto ipv6_pktoptions;
1633                 return 0;
1634         }
1635
1636         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1637                 goto csum_err;
1638
1639         if (sk->sk_state == TCP_LISTEN) { 
1640                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1641                 if (!nsk)
1642                         goto discard;
1643
1644                 /*
1645                  * Queue it on the new socket if the new socket is active,
1646                  * otherwise we just shortcircuit this and continue with
1647                  * the new socket..
1648                  */
1649                 if(nsk != sk) {
1650                         if (tcp_child_process(sk, nsk, skb))
1651                                 goto reset;
1652                         if (opt_skb)
1653                                 __kfree_skb(opt_skb);
1654                         return 0;
1655                 }
1656         }
1657
1658         TCP_CHECK_TIMER(sk);
1659         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1660                 goto reset;
1661         TCP_CHECK_TIMER(sk);
1662         if (opt_skb)
1663                 goto ipv6_pktoptions;
1664         return 0;
1665
1666 reset:
1667         tcp_v6_send_reset(skb);
1668 discard:
1669         if (opt_skb)
1670                 __kfree_skb(opt_skb);
1671         kfree_skb(skb);
1672         return 0;
1673 csum_err:
1674         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1675         goto discard;
1676
1677
1678 ipv6_pktoptions:
1679         /* Do you ask, what is it?
1680
1681            1. skb was enqueued by tcp.
1682            2. skb is added to tail of read queue, rather than out of order.
1683            3. socket is not in passive state.
1684            4. Finally, it really contains options, which user wants to receive.
1685          */
1686         tp = tcp_sk(sk);
1687         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1688             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1689                 if (np->rxopt.bits.rxinfo)
1690                         np->mcast_oif = tcp_v6_iif(opt_skb);
1691                 if (np->rxopt.bits.rxhlim)
1692                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1693                 if (ipv6_opt_accepted(sk, opt_skb)) {
1694                         skb_set_owner_r(opt_skb, sk);
1695                         opt_skb = xchg(&np->pktoptions, opt_skb);
1696                 } else {
1697                         __kfree_skb(opt_skb);
1698                         opt_skb = xchg(&np->pktoptions, NULL);
1699                 }
1700         }
1701
1702         if (opt_skb)
1703                 kfree_skb(opt_skb);
1704         return 0;
1705 }
1706
1707 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1708 {
1709         struct sk_buff *skb = *pskb;
1710         struct tcphdr *th;      
1711         struct sock *sk;
1712         int ret;
1713
1714         if (skb->pkt_type != PACKET_HOST)
1715                 goto discard_it;
1716
1717         /*
1718          *      Count it even if it's bad.
1719          */
1720         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1721
1722         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1723                 goto discard_it;
1724
1725         th = skb->h.th;
1726
1727         if (th->doff < sizeof(struct tcphdr)/4)
1728                 goto bad_packet;
1729         if (!pskb_may_pull(skb, th->doff*4))
1730                 goto discard_it;
1731
1732         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1733              tcp_v6_checksum_init(skb) < 0))
1734                 goto bad_packet;
1735
1736         th = skb->h.th;
1737         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739                                     skb->len - th->doff*4);
1740         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741         TCP_SKB_CB(skb)->when = 0;
1742         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1743         TCP_SKB_CB(skb)->sacked = 0;
1744
1745         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1746                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1747
1748         if (!sk)
1749                 goto no_tcp_socket;
1750
1751 process:
1752         if (sk->sk_state == TCP_TIME_WAIT)
1753                 goto do_time_wait;
1754
1755         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1756                 goto discard_and_relse;
1757
1758         if (sk_filter(sk, skb, 0))
1759                 goto discard_and_relse;
1760
1761         skb->dev = NULL;
1762
1763         bh_lock_sock(sk);
1764         ret = 0;
1765         if (!sock_owned_by_user(sk)) {
1766                 if (!tcp_prequeue(sk, skb))
1767                         ret = tcp_v6_do_rcv(sk, skb);
1768         } else
1769                 sk_add_backlog(sk, skb);
1770         bh_unlock_sock(sk);
1771
1772         sock_put(sk);
1773         return ret ? -1 : 0;
1774
1775 no_tcp_socket:
1776         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777                 goto discard_it;
1778
1779         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1780 bad_packet:
1781                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1782         } else {
1783                 tcp_v6_send_reset(skb);
1784         }
1785
1786 discard_it:
1787
1788         /*
1789          *      Discard frame
1790          */
1791
1792         kfree_skb(skb);
1793         return 0;
1794
1795 discard_and_relse:
1796         sock_put(sk);
1797         goto discard_it;
1798
1799 do_time_wait:
1800         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1802                 goto discard_it;
1803         }
1804
1805         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1806                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1807                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1808                 goto discard_it;
1809         }
1810
1811         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1812                                           skb, th, skb->len)) {
1813         case TCP_TW_SYN:
1814         {
1815                 struct sock *sk2;
1816
1817                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1818                 if (sk2 != NULL) {
1819                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1820                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1821                         sk = sk2;
1822                         goto process;
1823                 }
1824                 /* Fall through to ACK */
1825         }
1826         case TCP_TW_ACK:
1827                 tcp_v6_timewait_ack(sk, skb);
1828                 break;
1829         case TCP_TW_RST:
1830                 goto no_tcp_socket;
1831         case TCP_TW_SUCCESS:;
1832         }
1833         goto discard_it;
1834 }
1835
1836 static int tcp_v6_rebuild_header(struct sock *sk)
1837 {
1838         int err;
1839         struct dst_entry *dst;
1840         struct ipv6_pinfo *np = inet6_sk(sk);
1841
1842         dst = __sk_dst_check(sk, np->dst_cookie);
1843
1844         if (dst == NULL) {
1845                 struct inet_sock *inet = inet_sk(sk);
1846                 struct in6_addr *final_p = NULL, final;
1847                 struct flowi fl;
1848
1849                 memset(&fl, 0, sizeof(fl));
1850                 fl.proto = IPPROTO_TCP;
1851                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1852                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1853                 fl.fl6_flowlabel = np->flow_label;
1854                 fl.oif = sk->sk_bound_dev_if;
1855                 fl.fl_ip_dport = inet->dport;
1856                 fl.fl_ip_sport = inet->sport;
1857
1858                 if (np->opt && np->opt->srcrt) {
1859                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1860                         ipv6_addr_copy(&final, &fl.fl6_dst);
1861                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1862                         final_p = &final;
1863                 }
1864
1865                 err = ip6_dst_lookup(sk, &dst, &fl);
1866                 if (err) {
1867                         sk->sk_route_caps = 0;
1868                         return err;
1869                 }
1870                 if (final_p)
1871                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1872
1873                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1874                         sk->sk_err_soft = -err;
1875                         dst_release(dst);
1876                         return err;
1877                 }
1878
1879                 ip6_dst_store(sk, dst, NULL);
1880                 sk->sk_route_caps = dst->dev->features &
1881                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1882         }
1883
1884         return 0;
1885 }
1886
1887 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1888 {
1889         struct sock *sk = skb->sk;
1890         struct inet_sock *inet = inet_sk(sk);
1891         struct ipv6_pinfo *np = inet6_sk(sk);
1892         struct flowi fl;
1893         struct dst_entry *dst;
1894         struct in6_addr *final_p = NULL, final;
1895
1896         memset(&fl, 0, sizeof(fl));
1897         fl.proto = IPPROTO_TCP;
1898         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1899         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1900         fl.fl6_flowlabel = np->flow_label;
1901         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1902         fl.oif = sk->sk_bound_dev_if;
1903         fl.fl_ip_sport = inet->sport;
1904         fl.fl_ip_dport = inet->dport;
1905
1906         if (np->opt && np->opt->srcrt) {
1907                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1908                 ipv6_addr_copy(&final, &fl.fl6_dst);
1909                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1910                 final_p = &final;
1911         }
1912
1913         dst = __sk_dst_check(sk, np->dst_cookie);
1914
1915         if (dst == NULL) {
1916                 int err = ip6_dst_lookup(sk, &dst, &fl);
1917
1918                 if (err) {
1919                         sk->sk_err_soft = -err;
1920                         return err;
1921                 }
1922
1923                 if (final_p)
1924                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1925
1926                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1927                         sk->sk_route_caps = 0;
1928                         dst_release(dst);
1929                         return err;
1930                 }
1931
1932                 ip6_dst_store(sk, dst, NULL);
1933                 sk->sk_route_caps = dst->dev->features &
1934                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1935         }
1936
1937         skb->dst = dst_clone(dst);
1938
1939         /* Restore final destination back after routing done */
1940         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1941
1942         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1943 }
1944
1945 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1946 {
1947         struct ipv6_pinfo *np = inet6_sk(sk);
1948         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1949
1950         sin6->sin6_family = AF_INET6;
1951         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1952         sin6->sin6_port = inet_sk(sk)->dport;
1953         /* We do not store received flowlabel for TCP */
1954         sin6->sin6_flowinfo = 0;
1955         sin6->sin6_scope_id = 0;
1956         if (sk->sk_bound_dev_if &&
1957             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1958                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1959 }
1960
1961 static int tcp_v6_remember_stamp(struct sock *sk)
1962 {
1963         /* Alas, not yet... */
1964         return 0;
1965 }
1966
1967 static struct tcp_func ipv6_specific = {
1968         .queue_xmit     =       tcp_v6_xmit,
1969         .send_check     =       tcp_v6_send_check,
1970         .rebuild_header =       tcp_v6_rebuild_header,
1971         .conn_request   =       tcp_v6_conn_request,
1972         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1973         .remember_stamp =       tcp_v6_remember_stamp,
1974         .net_header_len =       sizeof(struct ipv6hdr),
1975
1976         .setsockopt     =       ipv6_setsockopt,
1977         .getsockopt     =       ipv6_getsockopt,
1978         .addr2sockaddr  =       v6_addr2sockaddr,
1979         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1980 };
1981
1982 /*
1983  *      TCP over IPv4 via INET6 API
1984  */
1985
1986 static struct tcp_func ipv6_mapped = {
1987         .queue_xmit     =       ip_queue_xmit,
1988         .send_check     =       tcp_v4_send_check,
1989         .rebuild_header =       tcp_v4_rebuild_header,
1990         .conn_request   =       tcp_v6_conn_request,
1991         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1992         .remember_stamp =       tcp_v4_remember_stamp,
1993         .net_header_len =       sizeof(struct iphdr),
1994
1995         .setsockopt     =       ipv6_setsockopt,
1996         .getsockopt     =       ipv6_getsockopt,
1997         .addr2sockaddr  =       v6_addr2sockaddr,
1998         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1999 };
2000
2001
2002
2003 /* NOTE: A lot of things set to zero explicitly by call to
2004  *       sk_alloc() so need not be done here.
2005  */
2006 static int tcp_v6_init_sock(struct sock *sk)
2007 {
2008         struct tcp_sock *tp = tcp_sk(sk);
2009
2010         skb_queue_head_init(&tp->out_of_order_queue);
2011         tcp_init_xmit_timers(sk);
2012         tcp_prequeue_init(tp);
2013
2014         tp->rto  = TCP_TIMEOUT_INIT;
2015         tp->mdev = TCP_TIMEOUT_INIT;
2016
2017         /* So many TCP implementations out there (incorrectly) count the
2018          * initial SYN frame in their delayed-ACK and congestion control
2019          * algorithms that we must have the following bandaid to talk
2020          * efficiently to them.  -DaveM
2021          */
2022         tp->snd_cwnd = 2;
2023
2024         /* See draft-stevens-tcpca-spec-01 for discussion of the
2025          * initialization of these values.
2026          */
2027         tp->snd_ssthresh = 0x7fffffff;
2028         tp->snd_cwnd_clamp = ~0;
2029         tp->mss_cache_std = tp->mss_cache = 536;
2030
2031         tp->reordering = sysctl_tcp_reordering;
2032
2033         sk->sk_state = TCP_CLOSE;
2034
2035         tp->af_specific = &ipv6_specific;
2036
2037         sk->sk_write_space = sk_stream_write_space;
2038         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2039
2040         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2041         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2042
2043         atomic_inc(&tcp_sockets_allocated);
2044
2045         return 0;
2046 }
2047
2048 static int tcp_v6_destroy_sock(struct sock *sk)
2049 {
2050         extern int tcp_v4_destroy_sock(struct sock *sk);
2051
2052         tcp_v4_destroy_sock(sk);
2053         return inet6_destroy_sock(sk);
2054 }
2055
2056 /* Proc filesystem TCPv6 sock list dumping. */
2057 static void get_openreq6(struct seq_file *seq, 
2058                          struct sock *sk, struct request_sock *req, int i, int uid)
2059 {
2060         struct in6_addr *dest, *src;
2061         int ttd = req->expires - jiffies;
2062
2063         if (ttd < 0)
2064                 ttd = 0;
2065
2066         src = &tcp6_rsk(req)->loc_addr;
2067         dest = &tcp6_rsk(req)->rmt_addr;
2068         seq_printf(seq,
2069                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2071                    i,
2072                    src->s6_addr32[0], src->s6_addr32[1],
2073                    src->s6_addr32[2], src->s6_addr32[3],
2074                    ntohs(inet_sk(sk)->sport),
2075                    dest->s6_addr32[0], dest->s6_addr32[1],
2076                    dest->s6_addr32[2], dest->s6_addr32[3],
2077                    ntohs(inet_rsk(req)->rmt_port),
2078                    TCP_SYN_RECV,
2079                    0,0, /* could print option size, but that is af dependent. */
2080                    1,   /* timers active (only the expire timer) */  
2081                    jiffies_to_clock_t(ttd), 
2082                    req->retrans,
2083                    uid,
2084                    0,  /* non standard timer */  
2085                    0, /* open_requests have no inode */
2086                    0, req);
2087 }
2088
2089 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2090 {
2091         struct in6_addr *dest, *src;
2092         __u16 destp, srcp;
2093         int timer_active;
2094         unsigned long timer_expires;
2095         struct inet_sock *inet = inet_sk(sp);
2096         struct tcp_sock *tp = tcp_sk(sp);
2097         struct ipv6_pinfo *np = inet6_sk(sp);
2098
2099         dest  = &np->daddr;
2100         src   = &np->rcv_saddr;
2101         destp = ntohs(inet->dport);
2102         srcp  = ntohs(inet->sport);
2103         if (tp->pending == TCP_TIME_RETRANS) {
2104                 timer_active    = 1;
2105                 timer_expires   = tp->timeout;
2106         } else if (tp->pending == TCP_TIME_PROBE0) {
2107                 timer_active    = 4;
2108                 timer_expires   = tp->timeout;
2109         } else if (timer_pending(&sp->sk_timer)) {
2110                 timer_active    = 2;
2111                 timer_expires   = sp->sk_timer.expires;
2112         } else {
2113                 timer_active    = 0;
2114                 timer_expires = jiffies;
2115         }
2116
2117         seq_printf(seq,
2118                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2120                    i,
2121                    src->s6_addr32[0], src->s6_addr32[1],
2122                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2123                    dest->s6_addr32[0], dest->s6_addr32[1],
2124                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2125                    sp->sk_state, 
2126                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2127                    timer_active,
2128                    jiffies_to_clock_t(timer_expires - jiffies),
2129                    tp->retransmits,
2130                    sock_i_uid(sp),
2131                    tp->probes_out,
2132                    sock_i_ino(sp),
2133                    atomic_read(&sp->sk_refcnt), sp,
2134                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2135                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2136                    );
2137 }
2138
2139 static void get_timewait6_sock(struct seq_file *seq, 
2140                                struct tcp_tw_bucket *tw, int i)
2141 {
2142         struct in6_addr *dest, *src;
2143         __u16 destp, srcp;
2144         int ttd = tw->tw_ttd - jiffies;
2145
2146         if (ttd < 0)
2147                 ttd = 0;
2148
2149         dest  = &tw->tw_v6_daddr;
2150         src   = &tw->tw_v6_rcv_saddr;
2151         destp = ntohs(tw->tw_dport);
2152         srcp  = ntohs(tw->tw_sport);
2153
2154         seq_printf(seq,
2155                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2156                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2157                    i,
2158                    src->s6_addr32[0], src->s6_addr32[1],
2159                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2160                    dest->s6_addr32[0], dest->s6_addr32[1],
2161                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2162                    tw->tw_substate, 0, 0,
2163                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2164                    atomic_read(&tw->tw_refcnt), tw);
2165 }
2166
2167 #ifdef CONFIG_PROC_FS
2168 static int tcp6_seq_show(struct seq_file *seq, void *v)
2169 {
2170         struct tcp_iter_state *st;
2171
2172         if (v == SEQ_START_TOKEN) {
2173                 seq_puts(seq,
2174                          "  sl  "
2175                          "local_address                         "
2176                          "remote_address                        "
2177                          "st tx_queue rx_queue tr tm->when retrnsmt"
2178                          "   uid  timeout inode\n");
2179                 goto out;
2180         }
2181         st = seq->private;
2182
2183         switch (st->state) {
2184         case TCP_SEQ_STATE_LISTENING:
2185         case TCP_SEQ_STATE_ESTABLISHED:
2186                 get_tcp6_sock(seq, v, st->num);
2187                 break;
2188         case TCP_SEQ_STATE_OPENREQ:
2189                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2190                 break;
2191         case TCP_SEQ_STATE_TIME_WAIT:
2192                 get_timewait6_sock(seq, v, st->num);
2193                 break;
2194         }
2195 out:
2196         return 0;
2197 }
2198
2199 static struct file_operations tcp6_seq_fops;
2200 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2201         .owner          = THIS_MODULE,
2202         .name           = "tcp6",
2203         .family         = AF_INET6,
2204         .seq_show       = tcp6_seq_show,
2205         .seq_fops       = &tcp6_seq_fops,
2206 };
2207
2208 int __init tcp6_proc_init(void)
2209 {
2210         return tcp_proc_register(&tcp6_seq_afinfo);
2211 }
2212
2213 void tcp6_proc_exit(void)
2214 {
2215         tcp_proc_unregister(&tcp6_seq_afinfo);
2216 }
2217 #endif
2218
2219 struct proto tcpv6_prot = {
2220         .name                   = "TCPv6",
2221         .owner                  = THIS_MODULE,
2222         .close                  = tcp_close,
2223         .connect                = tcp_v6_connect,
2224         .disconnect             = tcp_disconnect,
2225         .accept                 = tcp_accept,
2226         .ioctl                  = tcp_ioctl,
2227         .init                   = tcp_v6_init_sock,
2228         .destroy                = tcp_v6_destroy_sock,
2229         .shutdown               = tcp_shutdown,
2230         .setsockopt             = tcp_setsockopt,
2231         .getsockopt             = tcp_getsockopt,
2232         .sendmsg                = tcp_sendmsg,
2233         .recvmsg                = tcp_recvmsg,
2234         .backlog_rcv            = tcp_v6_do_rcv,
2235         .hash                   = tcp_v6_hash,
2236         .unhash                 = tcp_unhash,
2237         .get_port               = tcp_v6_get_port,
2238         .enter_memory_pressure  = tcp_enter_memory_pressure,
2239         .sockets_allocated      = &tcp_sockets_allocated,
2240         .memory_allocated       = &tcp_memory_allocated,
2241         .memory_pressure        = &tcp_memory_pressure,
2242         .sysctl_mem             = sysctl_tcp_mem,
2243         .sysctl_wmem            = sysctl_tcp_wmem,
2244         .sysctl_rmem            = sysctl_tcp_rmem,
2245         .max_header             = MAX_TCP_HEADER,
2246         .obj_size               = sizeof(struct tcp6_sock),
2247         .rsk_prot               = &tcp6_request_sock_ops,
2248 };
2249
2250 static struct inet6_protocol tcpv6_protocol = {
2251         .handler        =       tcp_v6_rcv,
2252         .err_handler    =       tcp_v6_err,
2253         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2254 };
2255
2256 extern struct proto_ops inet6_stream_ops;
2257
2258 static struct inet_protosw tcpv6_protosw = {
2259         .type           =       SOCK_STREAM,
2260         .protocol       =       IPPROTO_TCP,
2261         .prot           =       &tcpv6_prot,
2262         .ops            =       &inet6_stream_ops,
2263         .capability     =       -1,
2264         .no_check       =       0,
2265         .flags          =       INET_PROTOSW_PERMANENT,
2266 };
2267
2268 void __init tcpv6_init(void)
2269 {
2270         /* register inet6 protocol */
2271         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2272                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2273         inet6_register_protosw(&tcpv6_protosw);
2274 }